diff options
-rw-r--r-- | README.md | 7 | ||||
-rw-r--r-- | app.py | 25 |
2 files changed, 28 insertions, 4 deletions
@@ -49,6 +49,13 @@ For launch all the crawler use the following command: python app.py crawl ``` +You can also specific a spider to crawl, for example `nevrax_crawler` with the +command: + +``` +python app.py crawl nevrax_crawler +``` + # Indexing Before lauch indexing or searching command you must verifiy that the folder of @@ -6,15 +6,27 @@ from scrapy.utils.project import get_project_settings from database.models import Page, db -def crawl(): +def crawl(the_spider=""): try: db.create_tables(Page.__subclasses__()) settings = get_project_settings() process = CrawlerProcess(settings) spiders = spiderloader.SpiderLoader.from_settings(settings) - for spider in spiders.list(): - if "crawl" in spider: - process.crawl(spider) + + if the_spider == "": + for spider in spiders.list(): + if "crawl" in spider: + process.crawl(spider) + elif the_spider in spiders.list(): + process.crawl(the_spider) + else: + print("`"+the_spider+"` is not a valid spider.") + print('Valid spider are: ') + for spider in spiders.list(): + if "crawl" in spider: + print(" "+spider) + sys.exit(1) + process.start() except Exception as e: print(e) @@ -44,5 +56,10 @@ if __name__ == '__main__': update() else: show_help() + elif len(sys.argv) == 3: + if sys.argv[1] == "crawl": + crawl(sys.argv[2]) + else: + show_help() else: show_help() |