diff options
author | neodarz <neodarz@neodarz.net> | 2019-01-19 00:02:57 +0100 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2019-01-19 00:02:57 +0100 |
commit | a3f01580faf6caee4abcc8e682567b87380857b9 (patch) | |
tree | dbf5e0a9866b3aac7e7bb64f2eab8c005e1c28cd /crawler/nevrax/pipelines.py | |
parent | 073e919ef198a04da1e5ed28a7dfbc5d9681fc14 (diff) | |
download | khanindexer-a3f01580faf6caee4abcc8e682567b87380857b9.tar.xz khanindexer-a3f01580faf6caee4abcc8e682567b87380857b9.zip |
Add khanindex nevrax indexation
Diffstat (limited to '')
-rw-r--r-- | crawler/nevrax/pipelines.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/crawler/nevrax/pipelines.py b/crawler/nevrax/pipelines.py new file mode 100644 index 0000000..775d5df --- /dev/null +++ b/crawler/nevrax/pipelines.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +import logging + +from database.models import Nevrax + + +class NevraxPipeline(object): + + def process_item(self, item, spider): + try: + page = Nevrax.get(Nevrax.url == item['url']) + q = Nevrax.update(**item).where(Nevrax.url == item['url']) + q.execute() + logging.info("Update item {}".format(page)) + except Nevrax.DoesNotExist: + page = Nevrax.create(**item) + logging.info("Create item {}".format(page)) + logging.info('Item {} stored in db'.format(page)) + return item |