2 files changed, 5 insertions, 3 deletions
diff --git a/crawler/neodarznet/pipelines.py b/crawler/neodarznet/pipelines.py
index fbfebbb..1f3a9fc 100644
--- a/crawler/neodarznet/pipelines.py
+++ b/crawler/neodarznet/pipelines.py
@@ -9,8 +9,9 @@ class NeodarznetPipeline(object):
     def process_item(self, item, spider):
         try:
             page = Neodarznet.get(Neodarznet.url == item['url'])
-            q = Neodarznet.update(**item).where(Neodarznet.url == item['url'])
-            q.execute()
+            if page.content_length != item['content_length']:
+                q = Neodarznet.update(**item).where(Neodarznet.url == item['url'])
+                q.execute()
             logging.info("Update item {}".format(page))
         except Neodarznet.DoesNotExist:
             page = Neodarznet.create(**item)
diff --git a/crawler/neodarznet/spiders/scrape.py b/crawler/neodarznet/spiders/scrape.py
index e16ede2..0f54c4a 100644
--- a/crawler/neodarznet/spiders/scrape.py
+++ b/crawler/neodarznet/spiders/scrape.py
@@ -39,5 +39,6 @@ class NeodarznetSpider(CrawlSpider):
         yield {
                 'url': response.url,
                 'title': response.css('title::text').extract_first(),
-                'content': ''.join(sel.select("//div[@class='bodya']//text()").extract()).strip()
+                'content': ''.join(sel.select("//div[@class='bodya']//text()").extract()).strip(),
+                'content_length': len(response.body)
         }