updated scrapy spider again
This commit is contained in:
parent
de66ce5b3c
commit
9c260f31a2
@ -1,5 +1,7 @@
|
|||||||
import scrapy
|
import scrapy
|
||||||
import traceback
|
import traceback
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings("ignore", category=scrapy.exceptions.ScrapyDeprecationWarning)
|
||||||
from scrapy_settings import EXT_SETTINGS
|
from scrapy_settings import EXT_SETTINGS
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
@ -59,17 +61,20 @@ class unloze_spider(scrapy.Spider):
|
|||||||
next_event_server = False
|
next_event_server = False
|
||||||
next_event_time = False
|
next_event_time = False
|
||||||
next_event_reward = False
|
next_event_reward = False
|
||||||
|
event_date = None
|
||||||
for r in response.xpath("//*[contains(text(),'TL;DR')]/../../..//text()").extract():
|
for r in response.xpath("//*[contains(text(),'TL;DR')]/../../..//text()").extract():
|
||||||
if "\n" in r or len(r) < 4 or "\t" in r or skipping > 0:
|
if "\n" in r or len(r) < 4 or "\t" in r or skipping > 0:
|
||||||
if r.lower() == "tl;dr":
|
if r.lower() == "tl;dr":
|
||||||
skipping -= 1
|
#skipping -= 1
|
||||||
|
skipping = -1
|
||||||
continue
|
continue
|
||||||
if "server" in r.lower() and "time" not in r.lower():
|
if "server" in r.lower() and "time" not in r.lower() and next_event_server is not None:
|
||||||
next_event_server = True
|
next_event_server = True
|
||||||
continue
|
continue
|
||||||
if next_event_server:
|
if next_event_server:
|
||||||
event_server += r
|
event_server += r
|
||||||
next_event_server = False
|
if ":270" in r: #server port generally
|
||||||
|
next_event_server = None
|
||||||
|
|
||||||
if "maps" in r.lower() and "rewards" not in r.lower():
|
if "maps" in r.lower() and "rewards" not in r.lower():
|
||||||
next_event_maps = True
|
next_event_maps = True
|
||||||
@ -81,7 +86,8 @@ class unloze_spider(scrapy.Spider):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if next_event_maps:
|
if next_event_maps:
|
||||||
event_maps += f"{r} "
|
if r.startswith('ze_') or r.startswith('mg_') or r.startswith('de_') or r.startswith('zr_'):
|
||||||
|
event_maps += f"{r} "
|
||||||
|
|
||||||
if next_event_date:
|
if next_event_date:
|
||||||
next_event_date= False
|
next_event_date= False
|
||||||
@ -100,6 +106,7 @@ class unloze_spider(scrapy.Spider):
|
|||||||
event_reward = r
|
event_reward = r
|
||||||
next_event_reward = False
|
next_event_reward = False
|
||||||
|
|
||||||
|
|
||||||
self.item["event_title"] = event_title
|
self.item["event_title"] = event_title
|
||||||
self.item["event_date"] = event_date
|
self.item["event_date"] = event_date
|
||||||
self.item["event_time"] = event_time
|
self.item["event_time"] = event_time
|
||||||
|
Loading…
Reference in New Issue
Block a user