aboutsummaryrefslogtreecommitdiff
path: root/crawler/nevrax/pipelines.py
diff options
context:
space:
mode:
authorneodarz <neodarz@neodarz.net>2019-01-19 00:02:57 +0100
committerneodarz <neodarz@neodarz.net>2019-01-19 00:02:57 +0100
commita3f01580faf6caee4abcc8e682567b87380857b9 (patch)
treedbf5e0a9866b3aac7e7bb64f2eab8c005e1c28cd /crawler/nevrax/pipelines.py
parent073e919ef198a04da1e5ed28a7dfbc5d9681fc14 (diff)
downloadkhanindexer-a3f01580faf6caee4abcc8e682567b87380857b9.tar.xz
khanindexer-a3f01580faf6caee4abcc8e682567b87380857b9.zip
Add khanindex nevrax indexation
Diffstat (limited to 'crawler/nevrax/pipelines.py')
-rw-r--r--crawler/nevrax/pipelines.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/crawler/nevrax/pipelines.py b/crawler/nevrax/pipelines.py
new file mode 100644
index 0000000..775d5df
--- /dev/null
+++ b/crawler/nevrax/pipelines.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+import logging
+
+from database.models import Nevrax
+
+
+class NevraxPipeline(object):
+
+ def process_item(self, item, spider):
+ try:
+ page = Nevrax.get(Nevrax.url == item['url'])
+ q = Nevrax.update(**item).where(Nevrax.url == item['url'])
+ q.execute()
+ logging.info("Update item {}".format(page))
+ except Nevrax.DoesNotExist:
+ page = Nevrax.create(**item)
+ logging.info("Create item {}".format(page))
+ logging.info('Item {} stored in db'.format(page))
+ return item