From ad6212da067fdc05a8564e79943692fd9d466110 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 3 Feb 2019 20:02:18 +0100 Subject: Add ability to update a page in db if updated --- crawler/neodarznet/pipelines.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'crawler/neodarznet/pipelines.py') diff --git a/crawler/neodarznet/pipelines.py b/crawler/neodarznet/pipelines.py index fbfebbb..1f3a9fc 100644 --- a/crawler/neodarznet/pipelines.py +++ b/crawler/neodarznet/pipelines.py @@ -9,8 +9,9 @@ class NeodarznetPipeline(object): def process_item(self, item, spider): try: page = Neodarznet.get(Neodarznet.url == item['url']) - q = Neodarznet.update(**item).where(Neodarznet.url == item['url']) - q.execute() + if page.content_length != item['content_length']: + q = Neodarznet.update(**item).where(Neodarznet.url == item['url']) + q.execute() logging.info("Update item {}".format(page)) except Neodarznet.DoesNotExist: page = Neodarznet.create(**item) -- cgit v1.2.1