aboutsummaryrefslogtreecommitdiff
path: root/app.py
diff options
context:
space:
mode:
Diffstat (limited to 'app.py')
-rw-r--r--app.py25
1 files changed, 21 insertions, 4 deletions
diff --git a/app.py b/app.py
index ad44a14..9c1c302 100644
--- a/app.py
+++ b/app.py
@@ -6,15 +6,27 @@ from scrapy.utils.project import get_project_settings
from database.models import Page, db
-def crawl():
+def crawl(the_spider=""):
try:
db.create_tables(Page.__subclasses__())
settings = get_project_settings()
process = CrawlerProcess(settings)
spiders = spiderloader.SpiderLoader.from_settings(settings)
- for spider in spiders.list():
- if "crawl" in spider:
- process.crawl(spider)
+
+ if the_spider == "":
+ for spider in spiders.list():
+ if "crawl" in spider:
+ process.crawl(spider)
+ elif the_spider in spiders.list():
+ process.crawl(the_spider)
+ else:
+ print("`"+the_spider+"` is not a valid spider.")
+ print('Valid spider are: ')
+ for spider in spiders.list():
+ if "crawl" in spider:
+ print(" "+spider)
+ sys.exit(1)
+
process.start()
except Exception as e:
print(e)
@@ -44,5 +56,10 @@ if __name__ == '__main__':
update()
else:
show_help()
+ elif len(sys.argv) == 3:
+ if sys.argv[1] == "crawl":
+ crawl(sys.argv[2])
+ else:
+ show_help()
else:
show_help()