From 74dbf4defed8ae348a327e4674d917b3dd869713 Mon Sep 17 00:00:00 2001 From: neodarz Date: Wed, 6 Feb 2019 00:02:31 +0100 Subject: Add date when page are crawled --- crawler/neodarznet/spiders/scrape.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'crawler/neodarznet/spiders/scrape.py') diff --git a/crawler/neodarznet/spiders/scrape.py b/crawler/neodarznet/spiders/scrape.py index 0f54c4a..bd97067 100644 --- a/crawler/neodarznet/spiders/scrape.py +++ b/crawler/neodarznet/spiders/scrape.py @@ -4,6 +4,8 @@ from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor from scrapy import Selector +import datetime + class NeodarznetSpider(CrawlSpider): name = "neodarznet" custom_settings = { @@ -40,5 +42,6 @@ class NeodarznetSpider(CrawlSpider): 'url': response.url, 'title': response.css('title::text').extract_first(), 'content': ''.join(sel.select("//div[@class='bodya']//text()").extract()).strip(), - 'content_length': len(response.body) + 'content_length': len(response.body), + 'date_updated': datetime.datetime.now() } -- cgit v1.2.1