aboutsummaryrefslogtreecommitdiff
path: root/crawler/nevrax/pipelines.py
blob: dbbb782a28379353b65a50679ca0d222944769f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# -*- coding: utf-8 -*-
import logging

from database.models import Nevrax


class NevraxPipeline(object):

    def process_item(self, item, spider):
        try:
            page = Nevrax.get(Nevrax.url == item['url'])
            if page.content_length != item['content_length']:
                q = Nevrax.update(**item).where(Nevrax.url == item['url'])
                q.execute()
            logging.info("Update item {}".format(page))
        except Nevrax.DoesNotExist:
            page = Nevrax.create(**item)
            logging.info("Create item {}".format(page))
        logging.info('Item {} stored in db'.format(page))
        return item