diff options
author | neodarz <neodarz@neodarz.net> | 2019-02-06 00:02:31 +0100 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2019-02-06 00:02:31 +0100 |
commit | 74dbf4defed8ae348a327e4674d917b3dd869713 (patch) | |
tree | 36cf3c0f45b443616eccf4473e6c02e15e6a2347 | |
parent | ad6212da067fdc05a8564e79943692fd9d466110 (diff) | |
download | khanindexer-74dbf4defed8ae348a327e4674d917b3dd869713.tar.xz khanindexer-74dbf4defed8ae348a327e4674d917b3dd869713.zip |
Add date when page are crawled
Diffstat (limited to '')
-rw-r--r-- | crawler/neodarznet/spiders/scrape.py | 5 | ||||
-rw-r--r-- | crawler/nevrax/spiders/scrape.py | 4 | ||||
-rw-r--r-- | database/models.py | 5 |
3 files changed, 11 insertions, 3 deletions
diff --git a/crawler/neodarznet/spiders/scrape.py b/crawler/neodarznet/spiders/scrape.py index 0f54c4a..bd97067 100644 --- a/crawler/neodarznet/spiders/scrape.py +++ b/crawler/neodarznet/spiders/scrape.py @@ -4,6 +4,8 @@ from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor from scrapy import Selector +import datetime + class NeodarznetSpider(CrawlSpider): name = "neodarznet" custom_settings = { @@ -40,5 +42,6 @@ class NeodarznetSpider(CrawlSpider): 'url': response.url, 'title': response.css('title::text').extract_first(), 'content': ''.join(sel.select("//div[@class='bodya']//text()").extract()).strip(), - 'content_length': len(response.body) + 'content_length': len(response.body), + 'date_updated': datetime.datetime.now() } diff --git a/crawler/nevrax/spiders/scrape.py b/crawler/nevrax/spiders/scrape.py index 785ec3f..d27aecf 100644 --- a/crawler/nevrax/spiders/scrape.py +++ b/crawler/nevrax/spiders/scrape.py @@ -5,6 +5,7 @@ from scrapy.linkextractors import LinkExtractor from scrapy import Selector import config +import datetime class NevraxSpider(CrawlSpider): name = "nevrax" @@ -42,5 +43,6 @@ class NevraxSpider(CrawlSpider): 'url': response.url, 'title': response.css('title::text').extract_first(), 'content': ''.join(sel.select("//body//text()").extract()).strip(), - 'content_length': len(response.body) + 'content_length': len(response.body), + 'date_updated': datetime.datetime.now() } diff --git a/database/models.py b/database/models.py index a4c3f65..af7b6f5 100644 --- a/database/models.py +++ b/database/models.py @@ -1,7 +1,9 @@ -from peewee import Model, CharField, TextField, IntegerField, PostgresqlDatabase +from peewee import Model, CharField, TextField, IntegerField, DateTimeField, PostgresqlDatabase import config +import datetime + db = PostgresqlDatabase(config.DB, host=config.DB_HOST, port=config.DB_PORT, user=config.DB_USER, password=config.DB_PASS, autocommit=True, autorollback=True) class Page(Model): @@ -12,6 +14,7 @@ class Page(Model): title = CharField(null=True) content = TextField(null=True) content_length = IntegerField() + date_updated = DateTimeField(default=datetime.datetime.now) class Meta: database = db |