From a3f01580faf6caee4abcc8e682567b87380857b9 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 19 Jan 2019 00:02:57 +0100 Subject: Add khanindex nevrax indexation --- crawler/neodarznet/settings.py | 12 ------------ crawler/neodarznet/spiders/scrape.py | 9 +++++++-- 2 files changed, 7 insertions(+), 14 deletions(-) delete mode 100644 crawler/neodarznet/settings.py (limited to 'crawler/neodarznet') diff --git a/crawler/neodarznet/settings.py b/crawler/neodarznet/settings.py deleted file mode 100644 index 2e5f184..0000000 --- a/crawler/neodarznet/settings.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- - -BOT_NAME = 'neodarznet' - -SPIDER_MODULES = ['crawler.neodarznet.spiders'] -NEWSPIDER_MODULE = 'crawler.neodarznet.spiders' - -ROBOTSTXT_OBEY = True - -DEPTH_LIMIT = 0 - -ITEM_PIPELINES = {'crawler.neodarznet.pipelines.NeodarznetPipeline': 0} diff --git a/crawler/neodarznet/spiders/scrape.py b/crawler/neodarznet/spiders/scrape.py index a32a3e4..e16ede2 100644 --- a/crawler/neodarznet/spiders/scrape.py +++ b/crawler/neodarznet/spiders/scrape.py @@ -4,8 +4,13 @@ from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor from scrapy import Selector -class ScrapSpider(CrawlSpider): - name = "scrape" +class NeodarznetSpider(CrawlSpider): + name = "neodarznet" + custom_settings = { + 'ITEM_PIPELINES': { + 'crawler.neodarznet.pipelines.NeodarznetPipeline': 0 + } + } allow_domains = ['neodarz.net'] start_urls = [ 'https://neodarz.net/', -- cgit v1.2.1