updated scrapy spider again

This commit is contained in:
jenz 2024-04-24 13:35:52 +02:00
parent de66ce5b3c
commit 9c260f31a2

View File

@ -1,5 +1,7 @@
import scrapy
import traceback
import warnings
warnings.filterwarnings("ignore", category=scrapy.exceptions.ScrapyDeprecationWarning)
from scrapy_settings import EXT_SETTINGS
from pprint import pprint
@ -59,17 +61,20 @@ class unloze_spider(scrapy.Spider):
next_event_server = False
next_event_time = False
next_event_reward = False
event_date = None
for r in response.xpath("//*[contains(text(),'TL;DR')]/../../..//text()").extract():
if "\n" in r or len(r) < 4 or "\t" in r or skipping > 0:
if r.lower() == "tl;dr":
skipping -= 1
#skipping -= 1
skipping = -1
continue
if "server" in r.lower() and "time" not in r.lower():
if "server" in r.lower() and "time" not in r.lower() and next_event_server is not None:
next_event_server = True
continue
if next_event_server:
event_server += r
next_event_server = False
if ":270" in r: #server port generally
next_event_server = None
if "maps" in r.lower() and "rewards" not in r.lower():
next_event_maps = True
@ -81,7 +86,8 @@ class unloze_spider(scrapy.Spider):
continue
if next_event_maps:
event_maps += f"{r} "
if r.startswith('ze_') or r.startswith('mg_') or r.startswith('de_') or r.startswith('zr_'):
event_maps += f"{r} "
if next_event_date:
next_event_date= False
@ -100,6 +106,7 @@ class unloze_spider(scrapy.Spider):
event_reward = r
next_event_reward = False
self.item["event_title"] = event_title
self.item["event_date"] = event_date
self.item["event_time"] = event_time
@ -116,4 +123,4 @@ class unloze_spider(scrapy.Spider):
sys.exit(1)
#pprint(self.item)
return self.item
return self.item