import scrapy import sys from scrapy.crawler import CrawlerProcess from scrapy import spiderloader from scrapy.utils.project import get_project_settings from flask import Flask, request, jsonify import json from sphinx import sphinx from database.models import Page, db import config app = Flask(__name__) @app.route("/", methods=['GET', 'POST']) def search(): query = request.args.get('search') index = request.args.get('index') if query != '' and query != None and index != '' and index != None: response = sphinx.search(query, index) elif query is None: response = {'error': 1, 'msg': 'Use `search` attribute for make a search'} elif index == None: response = {'error': 1, 'msg': 'Use `index` attribute to precise an index'} elif index == '': response = {'error': 1, 'msg': '`index` cannot be null'} else: response = {'error': 1, 'msg': '`search` cannot be null'} return jsonify(response) def crawl(): try: db.create_tables(Page.__subclasses__()) settings = get_project_settings() process = CrawlerProcess(settings) spiders = spiderloader.SpiderLoader.from_settings(settings) for spider in spiders.list(): process.crawl(spider) process.start() except Exception as e: print(e) def main(): app.run(debug=True, use_reloader=True) def show_help(): print("Launch server => "+str(sys.argv[0])) print("Launch all crawler => "+str(sys.argv[0])+" crawl") if __name__ == '__main__': if len(sys.argv) == 1: main() elif len(sys.argv) == 2: if sys.argv[1] == "crawl": crawl() else: show_help() else: show_help()