updated scrapy spider again
This commit is contained in:
parent
de66ce5b3c
commit
9c260f31a2
@ -1,5 +1,7 @@
|
||||
import scrapy
|
||||
import traceback
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", category=scrapy.exceptions.ScrapyDeprecationWarning)
|
||||
from scrapy_settings import EXT_SETTINGS
|
||||
from pprint import pprint
|
||||
|
||||
@ -59,17 +61,20 @@ class unloze_spider(scrapy.Spider):
|
||||
next_event_server = False
|
||||
next_event_time = False
|
||||
next_event_reward = False
|
||||
event_date = None
|
||||
for r in response.xpath("//*[contains(text(),'TL;DR')]/../../..//text()").extract():
|
||||
if "\n" in r or len(r) < 4 or "\t" in r or skipping > 0:
|
||||
if r.lower() == "tl;dr":
|
||||
skipping -= 1
|
||||
#skipping -= 1
|
||||
skipping = -1
|
||||
continue
|
||||
if "server" in r.lower() and "time" not in r.lower():
|
||||
if "server" in r.lower() and "time" not in r.lower() and next_event_server is not None:
|
||||
next_event_server = True
|
||||
continue
|
||||
if next_event_server:
|
||||
event_server += r
|
||||
next_event_server = False
|
||||
if ":270" in r: #server port generally
|
||||
next_event_server = None
|
||||
|
||||
if "maps" in r.lower() and "rewards" not in r.lower():
|
||||
next_event_maps = True
|
||||
@ -81,7 +86,8 @@ class unloze_spider(scrapy.Spider):
|
||||
continue
|
||||
|
||||
if next_event_maps:
|
||||
event_maps += f"{r} "
|
||||
if r.startswith('ze_') or r.startswith('mg_') or r.startswith('de_') or r.startswith('zr_'):
|
||||
event_maps += f"{r} "
|
||||
|
||||
if next_event_date:
|
||||
next_event_date= False
|
||||
@ -100,6 +106,7 @@ class unloze_spider(scrapy.Spider):
|
||||
event_reward = r
|
||||
next_event_reward = False
|
||||
|
||||
|
||||
self.item["event_title"] = event_title
|
||||
self.item["event_date"] = event_date
|
||||
self.item["event_time"] = event_time
|
||||
@ -116,4 +123,4 @@ class unloze_spider(scrapy.Spider):
|
||||
sys.exit(1)
|
||||
|
||||
#pprint(self.item)
|
||||
return self.item
|
||||
return self.item
|
||||
|
Loading…
Reference in New Issue
Block a user