diff options
Diffstat (limited to 'crawler/nevrax/pipelines.py')
-rw-r--r-- | crawler/nevrax/pipelines.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/crawler/nevrax/pipelines.py b/crawler/nevrax/pipelines.py index 775d5df..dbbb782 100644 --- a/crawler/nevrax/pipelines.py +++ b/crawler/nevrax/pipelines.py @@ -9,8 +9,9 @@ class NevraxPipeline(object): def process_item(self, item, spider): try: page = Nevrax.get(Nevrax.url == item['url']) - q = Nevrax.update(**item).where(Nevrax.url == item['url']) - q.execute() + if page.content_length != item['content_length']: + q = Nevrax.update(**item).where(Nevrax.url == item['url']) + q.execute() logging.info("Update item {}".format(page)) except Nevrax.DoesNotExist: page = Nevrax.create(**item) |