From f0b4712c897ee35f2d79cf0408f480c2c0bb41da Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 13 Jan 2019 11:22:16 +0100 Subject: Save all crawled datas in database --- crawler/neodarznet/pipelines.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'crawler/neodarznet/pipelines.py') diff --git a/crawler/neodarznet/pipelines.py b/crawler/neodarznet/pipelines.py index 71e7865..6703796 100644 --- a/crawler/neodarznet/pipelines.py +++ b/crawler/neodarznet/pipelines.py @@ -1,6 +1,19 @@ # -*- coding: utf-8 -*- +import logging + +from database.models import Page class NeodarznetPipeline(object): - def process_time(self, item, spider): + + def process_item(self, item, spider): + try: + page = Page.get(Page.url == item['url']) + q = Page.update(**item).where(Page.url == item['url']) + q.execute() + logging.info("Update item {}".format(page)) + except Page.DoesNotExist: + page = Page.create(**item) + logging.info("Create item {}".format(page)) + logging.info('Item {} stored in db'.format(page)) return item -- cgit v1.2.1