38 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			38 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/home/nonroot/event_scrapy/venv/bin/python3
 | 
						|
 | 
						|
from scrapy.crawler import CrawlerRunner
 | 
						|
from scrapy.utils.project import get_project_settings
 | 
						|
from twisted.internet import reactor, defer
 | 
						|
from scrape_event import unloze_spider
 | 
						|
 | 
						|
@defer.inlineCallbacks
 | 
						|
def handle_urls(result, runner, reactor):
 | 
						|
    for item in result:
 | 
						|
        yield runner.crawl(unloze_spider, item = item)
 | 
						|
    #this finishes the reactor.run()
 | 
						|
    reactor.stop()
 | 
						|
 | 
						|
def main():
 | 
						|
    result = []
 | 
						|
    urls = []
 | 
						|
    #mg     
 | 
						|
    urls.append("https://unloze.com/forums/events.79/")
 | 
						|
    #ze
 | 
						|
    urls.append("https://unloze.com/forums/events.76/")
 | 
						|
    #zr
 | 
						|
    urls.append("https://unloze.com/forums/events.80/")
 | 
						|
    #jb but there are no events yet
 | 
						|
    #urls.append("https://unloze.com/forums/events.90/")
 | 
						|
 | 
						|
    for url in urls:
 | 
						|
        d = {"event_title" : None, "event_server": None, "event_maps": None, "event_date": None, "event_time": None, "event_reward": None, "url": url}
 | 
						|
        result.append(d)
 | 
						|
 | 
						|
    runner = CrawlerRunner(get_project_settings())
 | 
						|
    handle_urls(result, runner, reactor)
 | 
						|
    reactor.run()
 | 
						|
    print("reactor finish")
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |