import scrapy
import traceback
from scrapy_settings import EXT_SETTINGS
from pprint import pprint

class unloze_spider(scrapy.Spider):
    """
    Main unloze event scraper 
    """

    custom_settings = EXT_SETTINGS
    
    def __init__(self, item):
        self.url = item["url"]
        self.item = item

    def start_requests(self):
        request = scrapy.Request(
            url = self.url,
            callback = self.parse
        )
        yield request

    def parse(self, response):
        """
        Parsing content in the events sections
        """
        newest_thread = None
        threads = response.xpath("//div[@class='structItem-title']/@uix-href").extract()
        for thread in threads:
            if "poll" in thread.lower() or "nomination-thread" in thread.lower():
                continue
            newest_thread = thread
            break
        
        if newest_thread is None:
            print("no thread found. url: ", response.url)
            import sys
            sys.exit(1)
        request = scrapy.Request(
            url = "https://unloze.com" + newest_thread,
            callback = self.parse2
        )
        yield request
        
    def parse2(self, response):
        """
        Parsing content on the actual newest event thread
        """
        try:
            event_title = response.url.rsplit(".", 1)[0].rsplit("/", 1)[1]
            event_server = ""
            event_maps = ""
            #several event managers do the threads differently in terms of highlighting and marks, they dont use or understand standardization welp
            #as long as no random nigger types TL;DR in their post i guess this will work
            skipping = len(response.xpath("//*[contains(text(),'TL;DR')]").extract())
            next_event_maps = False
            next_event_date = False
            next_event_server = False
            next_event_time = False
            next_event_reward = False
            for r in response.xpath("//*[contains(text(),'TL;DR')]/../../..//text()").extract():
                if "\n" in r or len(r) < 4 or "\t" in r or skipping > 0:
                    if r.lower() == "tl;dr":
                        skipping -= 1
                    continue
                if "server" in r.lower() and "time" not in r.lower():
                    next_event_server = True
                    continue
                if next_event_server:
                    event_server += r
                    next_event_server = False

                if "maps" in r.lower() and "rewards" not in r.lower():
                    next_event_maps = True
                    continue

                if "date" in r.lower():
                    next_event_maps = False
                    next_event_date = True
                    continue

                if next_event_maps:
                    event_maps += f"{r} "

                if next_event_date:
                    next_event_date= False
                    event_date = r
                if "time" in r.lower() and "server" not in r.lower():
                    next_event_time = True
                    continue
                if next_event_time:
                    event_time = r
                    next_event_time = False

                if "rewards" in r.lower():
                    next_event_reward = True
                    continue
                if next_event_reward:
                    event_reward = r
                    next_event_reward = False

            self.item["event_title"] = event_title
            self.item["event_date"] = event_date
            self.item["event_time"] = event_time
            self.item["event_server"] = event_server
            self.item["event_maps"] = event_maps
            self.item["event_reward"] = event_reward
            self.item["event_url"] = response.url

        except Exception:
            error_msg = traceback.format_exc()
            print("traceback msg: ", error_msg)
            print("url: ", response.url)
            import sys
            sys.exit(1)
        
        #pprint(self.item)
        return self.item