import scrapy import sys from scrapy.crawler import CrawlerProcess from scrapy import spiderloader from scrapy.utils.project import get_project_settings from database.models import Page, db def crawl(): try: db.create_tables(Page.__subclasses__()) settings = get_project_settings() process = CrawlerProcess(settings) spiders = spiderloader.SpiderLoader.from_settings(settings) for spider in spiders.list(): if "crawl" in spider: process.crawl(spider) process.start() except Exception as e: print(e) def update(): try: settings = get_project_settings() process = CrawlerProcess(settings) spiders = spiderloader.SpiderLoader.from_settings(settings) for spider in spiders.list(): if "update" in spider: process.crawl(spider) process.start() except Exception as e: print(e) def show_help(): print("Launch all crawler => "+str(sys.argv[0])+" crawl") print("Update all page already crawlerd => "+str(sys.argv[0])+" update") if __name__ == '__main__': #main() if len(sys.argv) == 2: if sys.argv[1] == "crawl": crawl() elif sys.argv[1] == "update": update() else: show_help() else: show_help()