From 9c260f31a24571e67dfe5c2c1a6ec2f948e5630a Mon Sep 17 00:00:00 2001 From: jenz Date: Wed, 24 Apr 2024 13:35:52 +0200 Subject: [PATCH] updated scrapy spider again --- event_notification/python/scrape_event.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/event_notification/python/scrape_event.py b/event_notification/python/scrape_event.py index 83863037..aa5d65d1 100644 --- a/event_notification/python/scrape_event.py +++ b/event_notification/python/scrape_event.py @@ -1,5 +1,7 @@ import scrapy import traceback +import warnings +warnings.filterwarnings("ignore", category=scrapy.exceptions.ScrapyDeprecationWarning) from scrapy_settings import EXT_SETTINGS from pprint import pprint @@ -59,17 +61,20 @@ class unloze_spider(scrapy.Spider): next_event_server = False next_event_time = False next_event_reward = False + event_date = None for r in response.xpath("//*[contains(text(),'TL;DR')]/../../..//text()").extract(): if "\n" in r or len(r) < 4 or "\t" in r or skipping > 0: if r.lower() == "tl;dr": - skipping -= 1 + #skipping -= 1 + skipping = -1 continue - if "server" in r.lower() and "time" not in r.lower(): + if "server" in r.lower() and "time" not in r.lower() and next_event_server is not None: next_event_server = True continue if next_event_server: event_server += r - next_event_server = False + if ":270" in r: #server port generally + next_event_server = None if "maps" in r.lower() and "rewards" not in r.lower(): next_event_maps = True @@ -81,7 +86,8 @@ class unloze_spider(scrapy.Spider): continue if next_event_maps: - event_maps += f"{r} " + if r.startswith('ze_') or r.startswith('mg_') or r.startswith('de_') or r.startswith('zr_'): + event_maps += f"{r} " if next_event_date: next_event_date= False @@ -100,6 +106,7 @@ class unloze_spider(scrapy.Spider): event_reward = r next_event_reward = False + self.item["event_title"] = event_title self.item["event_date"] = event_date self.item["event_time"] = event_time @@ -116,4 +123,4 @@ class unloze_spider(scrapy.Spider): sys.exit(1) #pprint(self.item) - return self.item + return self.item