diff options
author | neodarz <neodarz@neodarz.net> | 2019-02-22 00:00:19 +0100 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2019-02-22 00:00:19 +0100 |
commit | e12eb8bd99c1b6695873632e0540fa07379671d4 (patch) | |
tree | 967ea553732aab7d7709da4e5ff5c6d8a72aa182 /app.py | |
parent | 582cfba635143c3fd00a914acc4c8e65587e5a21 (diff) | |
download | khanindexer-e12eb8bd99c1b6695873632e0540fa07379671d4.tar.xz khanindexer-e12eb8bd99c1b6695873632e0540fa07379671d4.zip |
Add a way to specify a crawl to launch
Diffstat (limited to 'app.py')
-rw-r--r-- | app.py | 25 |
1 files changed, 21 insertions, 4 deletions
@@ -6,15 +6,27 @@ from scrapy.utils.project import get_project_settings from database.models import Page, db -def crawl(): +def crawl(the_spider=""): try: db.create_tables(Page.__subclasses__()) settings = get_project_settings() process = CrawlerProcess(settings) spiders = spiderloader.SpiderLoader.from_settings(settings) - for spider in spiders.list(): - if "crawl" in spider: - process.crawl(spider) + + if the_spider == "": + for spider in spiders.list(): + if "crawl" in spider: + process.crawl(spider) + elif the_spider in spiders.list(): + process.crawl(the_spider) + else: + print("`"+the_spider+"` is not a valid spider.") + print('Valid spider are: ') + for spider in spiders.list(): + if "crawl" in spider: + print(" "+spider) + sys.exit(1) + process.start() except Exception as e: print(e) @@ -44,5 +56,10 @@ if __name__ == '__main__': update() else: show_help() + elif len(sys.argv) == 3: + if sys.argv[1] == "crawl": + crawl(sys.argv[2]) + else: + show_help() else: show_help() |