aboutsummaryrefslogtreecommitdiff
path: root/crawler/neodarznet
diff options
context:
space:
mode:
authorneodarz <neodarz@neodarz.net>2019-01-19 00:02:57 +0100
committerneodarz <neodarz@neodarz.net>2019-01-19 00:02:57 +0100
commita3f01580faf6caee4abcc8e682567b87380857b9 (patch)
treedbf5e0a9866b3aac7e7bb64f2eab8c005e1c28cd /crawler/neodarznet
parent073e919ef198a04da1e5ed28a7dfbc5d9681fc14 (diff)
downloadkhanindexer-a3f01580faf6caee4abcc8e682567b87380857b9.tar.xz
khanindexer-a3f01580faf6caee4abcc8e682567b87380857b9.zip
Add khanindex nevrax indexation
Diffstat (limited to 'crawler/neodarznet')
-rw-r--r--crawler/neodarznet/settings.py12
-rw-r--r--crawler/neodarznet/spiders/scrape.py9
2 files changed, 7 insertions, 14 deletions
diff --git a/crawler/neodarznet/settings.py b/crawler/neodarznet/settings.py
deleted file mode 100644
index 2e5f184..0000000
--- a/crawler/neodarznet/settings.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-
-BOT_NAME = 'neodarznet'
-
-SPIDER_MODULES = ['crawler.neodarznet.spiders']
-NEWSPIDER_MODULE = 'crawler.neodarznet.spiders'
-
-ROBOTSTXT_OBEY = True
-
-DEPTH_LIMIT = 0
-
-ITEM_PIPELINES = {'crawler.neodarznet.pipelines.NeodarznetPipeline': 0}
diff --git a/crawler/neodarznet/spiders/scrape.py b/crawler/neodarznet/spiders/scrape.py
index a32a3e4..e16ede2 100644
--- a/crawler/neodarznet/spiders/scrape.py
+++ b/crawler/neodarznet/spiders/scrape.py
@@ -4,8 +4,13 @@ from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy import Selector
-class ScrapSpider(CrawlSpider):
- name = "scrape"
+class NeodarznetSpider(CrawlSpider):
+ name = "neodarznet"
+ custom_settings = {
+ 'ITEM_PIPELINES': {
+ 'crawler.neodarznet.pipelines.NeodarznetPipeline': 0
+ }
+ }
allow_domains = ['neodarz.net']
start_urls = [
'https://neodarz.net/',