diff options
Diffstat (limited to '')
-rw-r--r-- | crawler/nevrax/spiders/scrape.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/crawler/nevrax/spiders/scrape.py b/crawler/nevrax/spiders/scrape.py index 8a7b8ec..785ec3f 100644 --- a/crawler/nevrax/spiders/scrape.py +++ b/crawler/nevrax/spiders/scrape.py @@ -41,5 +41,6 @@ class NevraxSpider(CrawlSpider): yield { 'url': response.url, 'title': response.css('title::text').extract_first(), - 'content': ''.join(sel.select("//body//text()").extract()).strip() + 'content': ''.join(sel.select("//body//text()").extract()).strip(), + 'content_length': len(response.body) } |