From e12eb8bd99c1b6695873632e0540fa07379671d4 Mon Sep 17 00:00:00 2001
From: neodarz <neodarz@neodarz.net>
Date: Fri, 22 Feb 2019 00:00:19 +0100
Subject: Add a way to specify a crawl to launch

---
 README.md |  7 +++++++
 app.py    | 25 +++++++++++++++++++++----
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index c5fcd6a..34ce041 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,13 @@ For launch all the crawler use the following command:
 python app.py crawl
 ```
 
+You can also specific a spider to crawl, for example `nevrax_crawler` with the
+command:
+
+```
+python app.py crawl nevrax_crawler
+```
+
 # Indexing
 
 Before lauch indexing or searching command you must verifiy that the folder of
diff --git a/app.py b/app.py
index ad44a14..9c1c302 100644
--- a/app.py
+++ b/app.py
@@ -6,15 +6,27 @@ from scrapy.utils.project import get_project_settings
 
 from database.models import Page, db
 
-def crawl():
+def crawl(the_spider=""):
     try:
         db.create_tables(Page.__subclasses__())
         settings = get_project_settings()
         process = CrawlerProcess(settings)
         spiders = spiderloader.SpiderLoader.from_settings(settings)
-        for spider in spiders.list():
-            if "crawl" in spider:
-                process.crawl(spider)
+
+        if the_spider == "":
+            for spider in spiders.list():
+                if "crawl" in spider:
+                    process.crawl(spider)
+        elif the_spider in spiders.list():
+            process.crawl(the_spider)
+        else:
+            print("`"+the_spider+"` is not a valid spider.")
+            print('Valid spider are: ')
+            for spider in spiders.list():
+                if "crawl" in spider:
+                    print("    "+spider)
+            sys.exit(1)
+
         process.start()
     except Exception as e:
         print(e)
@@ -44,5 +56,10 @@ if __name__ == '__main__':
             update()
         else:
             show_help()
+    elif len(sys.argv) == 3:
+        if sys.argv[1] == "crawl":
+            crawl(sys.argv[2])
+        else:
+            show_help()
     else:
         show_help()
-- 
cgit v1.2.1