From ad6212da067fdc05a8564e79943692fd9d466110 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 3 Feb 2019 20:02:18 +0100 Subject: Add ability to update a page in db if updated --- crawler/neodarznet/spiders/scrape.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'crawler/neodarznet/spiders') diff --git a/crawler/neodarznet/spiders/scrape.py b/crawler/neodarznet/spiders/scrape.py index e16ede2..0f54c4a 100644 --- a/crawler/neodarznet/spiders/scrape.py +++ b/crawler/neodarznet/spiders/scrape.py @@ -39,5 +39,6 @@ class NeodarznetSpider(CrawlSpider): yield { 'url': response.url, 'title': response.css('title::text').extract_first(), - 'content': ''.join(sel.select("//div[@class='bodya']//text()").extract()).strip() + 'content': ''.join(sel.select("//div[@class='bodya']//text()").extract()).strip(), + 'content_length': len(response.body) } -- cgit v1.2.1