From 70eea5c1874ef3fffe59c5ab0a03dae25beb2d25 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 21 Jan 2018 05:50:43 +0100 Subject: Add code for directly build and start searx preconfigured --- README.md | 9 +++ RyzomForum.py | 84 -------------------------- devryzom.py | 106 -------------------------------- forgeryzom.py | 126 -------------------------------------- install.sh | 17 ++++++ khaganatForum.py | 146 --------------------------------------------- khaganatWikhan.py | 84 -------------------------- ryzomcoreConfluence.py | 130 ---------------------------------------- settings.yml | 89 --------------------------- src/RyzomForum.py | 84 ++++++++++++++++++++++++++ src/devryzom.py | 106 ++++++++++++++++++++++++++++++++ src/forgeryzom.py | 126 ++++++++++++++++++++++++++++++++++++++ src/khaganatForum.py | 146 +++++++++++++++++++++++++++++++++++++++++++++ src/khaganatWikhan.py | 84 ++++++++++++++++++++++++++ src/ryzomcoreConfluence.py | 130 ++++++++++++++++++++++++++++++++++++++++ src/settings.yml | 108 +++++++++++++++++++++++++++++++++ start.sh | 3 + 17 files changed, 813 insertions(+), 765 deletions(-) delete mode 100644 RyzomForum.py delete mode 100644 devryzom.py delete mode 100644 forgeryzom.py create mode 100755 install.sh delete mode 100644 khaganatForum.py delete mode 100644 khaganatWikhan.py delete mode 100644 ryzomcoreConfluence.py delete mode 100644 settings.yml create mode 100644 src/RyzomForum.py create mode 100644 src/devryzom.py create mode 100644 src/forgeryzom.py create mode 100644 src/khaganatForum.py create mode 100644 src/khaganatWikhan.py create mode 100644 src/ryzomcoreConfluence.py create mode 100644 src/settings.yml create mode 100755 start.sh diff --git a/README.md b/README.md index 7bdb819..9ab6708 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,15 @@ Si "moteurs de recherche" est entre guillmet, c'est que ce ne sont pas des moteu * khaganat.net/forum: !kf * ryzom.com/forum: !rc +# Installation + +Pour installer il suffit de lancer `install.sh` afin d'installer searx et de +le configurer directement. + +# Usage + +Pour lancer le serveur, il suffit de lancer `start.sh`. + # Développement (searx utilise python 2.7, ça peut toujours être utile comme info) diff --git a/RyzomForum.py b/RyzomForum.py deleted file mode 100644 index 083696c..0000000 --- a/RyzomForum.py +++ /dev/null @@ -1,84 +0,0 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'https://khaganat.net' -search_url = '/wikhan/?do=search'\ - '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'id': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): - try: - res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - except: - continue - - if not res_url: - continue - - title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - - # append result - results.append({'title': title, - 'content': "", - 'url': base_url + res_url}) - - # Search results - for r in doc.xpath('//dl[@class="search_results"]/*'): - try: - if r.tag == "dt": - res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - elif r.tag == "dd": - content = extract_text(r.xpath('.')) - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url}) - except: - continue - - if not res_url: - continue - - # return results - return results diff --git a/devryzom.py b/devryzom.py deleted file mode 100644 index 7201096..0000000 --- a/devryzom.py +++ /dev/null @@ -1,106 +0,0 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text -from datetime import datetime - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'http://dev.ryzom.com' -search_url = '/search?wiki_pages=1&{query}' -#search_url = '/wikhan/?do=search'\ -# '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'q': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - i = 0 - for r in doc.xpath('//dl[@id="search-results"]/dt'): - try: - res_url = r.xpath('.//a/@href')[-1] - except: - continue - - if not res_url: - continue - - title = extract_text(r.xpath('.//a')) - - i = i + 1 - - y = 0 - - - for s in doc.xpath('//dl[@id="search-results"]/dd'): - y = y + 1 - if y == i: - content = extract_text(s.xpath('.//span[@class="description"]')) - - dataBrut = extract_text(s.xpath('.//span[@class="author"]')) - data = dataBrut.split(' ') - date = data[0].split('/') - - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url, - 'publishedDate': datetime(int(date[2]), int(date[0]), int(date[1]), 0, 0, 0)}) - - # append result - #results.append({'content': content}) - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue - - # return results - return results diff --git a/forgeryzom.py b/forgeryzom.py deleted file mode 100644 index 850ae43..0000000 --- a/forgeryzom.py +++ /dev/null @@ -1,126 +0,0 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text -from datetime import datetime - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'http://forge.ryzom.com' -search_url = '/wiki/W/api.php?action=query'\ - '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'search': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - for r in doc.xpath('//ul[@class="mw-search-results"]/li'): - try: - res_url = r.xpath('.//div[@class="mw-search-result-heading"]/a/@href')[-1] - except: - continue - - if not res_url: - continue - - title = extract_text(r.xpath('.//div[@class="mw-search-result-heading"]/a/@title')) - - content = extract_text(r.xpath('.//div[@class="searchresult"]')) - - dataBrut = extract_text(r.xpath('.//div[@class="mw-search-result-data"]')) - - data = dataBrut.split('-') - - - - #date = '-'.join(dataS) - adatetime = data[1] - data = adatetime.split(',') - date = data[1] - Thedate = date.split(' ') - - - if Thedate[2] == "January": - ThedateMonth = 1 - elif Thedate[2] == "February": - ThedateMonth = 2 - elif Thedate[2] == "March": - ThedateMonth = 3 - elif Thedate[2] == "April": - ThedateMonth = 4 - elif Thedate[2] == "May": - ThedateMonth = 5 - elif Thedate[2] == "June": - ThedateMonth = 6 - elif Thedate[2] == "July": - ThedateMonth = 7 - elif Thedate[2] == "August": - ThedateMonth = 8 - elif Thedate[2] == "September": - ThedateMonth = 9 - elif Thedate[2] == "October": - ThedateMonth = 10 - elif Thedate[2] == "November": - ThedateMonth = 11 - else: - ThedateMonth = 12 - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url, - 'publishedDate': datetime(int(Thedate[3]), ThedateMonth, int(Thedate[1]), 3, 1, 42)}) - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue - - # return results - return results diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..4793895 --- /dev/null +++ b/install.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +git clone https://github.com/asciimoo/searx.git +cd searx +git checkout v0.13.1 +cd .. +ln -s ../../../src/devryzom.py searx/searx/engines/devryzom.py +ln -s ../../../src/khaganatWikhan.py searx/searx/engines/khaganatWikhan.py +ln -s ../../../src/khaganatForum.py searx/searx/engines/khaganatForum.py +ln -s ../../../src/forgeryzom.py searx/searx/engines/forgeryzom.py +ln -s ../../../src/ryzomcoreConfluence.py searx/searx/engines/ryzomcoreConfluence.py +ln -s ../../../src/RyzomForum.py searx/searx/engines/RyzomForum.py +cp src/settings.yml searx/searx/settings.yml +virtualenv venv -p python2.7 +source venv/bin/activate +./searx/manage.sh update_packages + diff --git a/khaganatForum.py b/khaganatForum.py deleted file mode 100644 index 781a1cf..0000000 --- a/khaganatForum.py +++ /dev/null @@ -1,146 +0,0 @@ -# -*- coding: utf-8 -*- -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text -from datetime import datetime - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'https://khaganat.net' -search_url = '/forum/index.php?action=search2&{query}' -#search_url = '/wikhan/?do=search'\ -# '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'search': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'): - try: - res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1] - except: - continue - - if not res_url: - continue - - try: - title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) - except: - continue - try: - content = extract_text(r.xpath('.//div[@class="list_posts double_height"]')) - except: - content = "" - - try: - dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8') - except: - dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8') - date = dateBrut.split(' ') - year = date[2] - day = date[0] - french_text_month = date[1] - time = date[4] - #dataBrut = extract_text(r.xpath('.//span[@class="date"]')) - - #data = dataBrut.split('-') - - - - #date = '-'.join(dataS) - #adatetime = data[1] - #data = adatetime.split(',') - #date = data[1] - #Thedate = dataBrut.split(' ') - #TheDay = Thedate[1].split(',') - - - if french_text_month == "janvier": - Month = 1 - elif french_text_month.decode('utf-8') == "février".decode('utf-8'): - Month = 2 - elif french_text_month == "mars": - Month = 3 - elif french_text_month == "avril": - Month = 4 - elif french_text_month == "mai": - Month = 5 - elif french_text_month == "juin": - Month = 6 - elif french_text_month == "juillet": - Month = 7 - elif french_text_month.decode('utf-8') == "août".decode('utf-8'): - Month = 8 - elif french_text_month == "septembre": - Month = 9 - elif french_text_month == "octobre": - Month = 10 - elif french_text_month == "novembre": - Month = 11 - else: - Month = 12 - - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url, - 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) - - - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue - - # return results - return results diff --git a/khaganatWikhan.py b/khaganatWikhan.py deleted file mode 100644 index 083696c..0000000 --- a/khaganatWikhan.py +++ /dev/null @@ -1,84 +0,0 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'https://khaganat.net' -search_url = '/wikhan/?do=search'\ - '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'id': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): - try: - res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - except: - continue - - if not res_url: - continue - - title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - - # append result - results.append({'title': title, - 'content': "", - 'url': base_url + res_url}) - - # Search results - for r in doc.xpath('//dl[@class="search_results"]/*'): - try: - if r.tag == "dt": - res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - elif r.tag == "dd": - content = extract_text(r.xpath('.')) - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url}) - except: - continue - - if not res_url: - continue - - # return results - return results diff --git a/ryzomcoreConfluence.py b/ryzomcoreConfluence.py deleted file mode 100644 index 5d35c9f..0000000 --- a/ryzomcoreConfluence.py +++ /dev/null @@ -1,130 +0,0 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text -from datetime import datetime - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'https://ryzomcore.atlassian.net' -search_url = '/wiki/dosearchsite.action?{query}' -#search_url = '/wikhan/?do=search'\ -# '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'queryString': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - for r in doc.xpath('//ol[@class="search-results cql"]/li'): - try: - res_url = r.xpath('.//a[@class="search-result-link visitable"]/@href')[-1] - except: - continue - - if not res_url: - continue - - title = extract_text(r.xpath('.//a[@class="search-result-link visitable"]')) - - content = extract_text(r.xpath('.//div[@class="highlights"]')) - - dataBrut = extract_text(r.xpath('.//span[@class="date"]')) - - #data = dataBrut.split('-') - - - - #date = '-'.join(dataS) - #adatetime = data[1] - #data = adatetime.split(',') - #date = data[1] - Thedate = dataBrut.split(' ') - TheDay = Thedate[1].split(',') - - - if Thedate[0] == "Jan": - ThedateMonth = 1 - elif Thedate[0] == "Feb": - ThedateMonth = 2 - elif Thedate[0] == "Mar": - ThedateMonth = 3 - elif Thedate[0] == "Apr": - ThedateMonth = 4 - elif Thedate[0] == "May": - ThedateMonth = 5 - elif Thedate[0] == "Jun": - ThedateMonth = 6 - elif Thedate[0] == "Jul": - ThedateMonth = 7 - elif Thedate[0] == "Aug": - ThedateMonth = 8 - elif Thedate[0] == "Sep": - ThedateMonth = 9 - elif Thedate[0] == "Oct": - ThedateMonth = 10 - elif Thedate[0] == "Nov": - ThedateMonth = 11 - else: - ThedateMonth = 12 - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url, - 'publishedDate': datetime(int(Thedate[2]), ThedateMonth, int(TheDay[0]), 3, 1, 42)}) - - - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue - - # return results - return results diff --git a/settings.yml b/settings.yml deleted file mode 100644 index 76f1c3d..0000000 --- a/settings.yml +++ /dev/null @@ -1,89 +0,0 @@ -general: - debug : False # Debug mode, only for development - instance_name : "searx" # displayed name - -search: - safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict - autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default - language : "all" - -server: - port : 8888 - bind_address : "0.0.0.0" # address to listen on - secret_key : "e6a21d96debe828f1cad62074bec30a2" # change this! - base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" - image_proxy : False # Proxying image results through searx - -ui: - themes_path : "" # Custom ui themes path - leave it blank if you didn't change - default_theme : oscar # ui theme - default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section - -# searx supports result proxification using an external service: https://github.com/asciimoo/morty -# uncomment below section if you have running morty proxy -#result_proxy: -# url : http://127.0.0.1:3000/ -# key : your_morty_proxy_key - -outgoing: # communication with search engines - request_timeout : 2.0 # seconds - useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator - pool_connections : 100 # Number of different hosts - pool_maxsize : 10 # Number of simultaneous requests by host -# uncomment below section if you want to use a proxy -# see http://docs.python-requests.org/en/latest/user/advanced/#proxies -# SOCKS proxies are also supported: see http://docs.python-requests.org/en/master/user/advanced/#socks -# proxies : -# http : http://127.0.0.1:8080 -# https: http://127.0.0.1:8080 -# uncomment below section only if you have more than one network interface -# which can be the source of outgoing search requests -# source_ips: -# - 1.1.1.1 -# - 1.1.1.2 - -engines: - - name : khaganat.net/wikhan - engine : khaganatWikhan - shortcut : wik - - - name : ryzomcore.atlassian.net/wiki - engine : ryzomcoreConfluence - shortcut : rcc - - - name : forge.ryzom.com - engine : forgeryzom - shortcut: fr - - - name: dev.ryzom.com - engine: devryzom - shortcut: dr - - - name: khaganat.net/forum - engine: khaganatForum - shortcut: kf - - - name: app.ryzom.com app forum - engine: RyzomForum - shortcut: rc - - -locales: - en : English - bg : Български (Bulgarian) - de : Deutsch (German) - el_GR : Ελληνικά (Greek_Greece) - eo : Esperanto (Esperanto) - es : Español (Spanish) - fr : Français (French) - he : עברית (Hebrew) - hu : Magyar (Hungarian) - it : Italiano (Italian) - ja : 日本語 (Japanese) - nl : Nederlands (Dutch) - pt : Português (Portuguese) - pt_BR : Português (Portuguese_Brazil) - ro : Română (Romanian) - ru : Русский (Russian) - tr : Türkçe (Turkish) - zh : 中文 (Chinese) diff --git a/src/RyzomForum.py b/src/RyzomForum.py new file mode 100644 index 0000000..083696c --- /dev/null +++ b/src/RyzomForum.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/wikhan/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/src/devryzom.py b/src/devryzom.py new file mode 100644 index 0000000..7201096 --- /dev/null +++ b/src/devryzom.py @@ -0,0 +1,106 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://dev.ryzom.com' +search_url = '/search?wiki_pages=1&{query}' +#search_url = '/wikhan/?do=search'\ +# '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'q': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + i = 0 + for r in doc.xpath('//dl[@id="search-results"]/dt'): + try: + res_url = r.xpath('.//a/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a')) + + i = i + 1 + + y = 0 + + + for s in doc.xpath('//dl[@id="search-results"]/dd'): + y = y + 1 + if y == i: + content = extract_text(s.xpath('.//span[@class="description"]')) + + dataBrut = extract_text(s.xpath('.//span[@class="author"]')) + data = dataBrut.split(' ') + date = data[0].split('/') + + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(date[2]), int(date[0]), int(date[1]), 0, 0, 0)}) + + # append result + #results.append({'content': content}) + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/forgeryzom.py b/src/forgeryzom.py new file mode 100644 index 0000000..850ae43 --- /dev/null +++ b/src/forgeryzom.py @@ -0,0 +1,126 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://forge.ryzom.com' +search_url = '/wiki/W/api.php?action=query'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'search': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//ul[@class="mw-search-results"]/li'): + try: + res_url = r.xpath('.//div[@class="mw-search-result-heading"]/a/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//div[@class="mw-search-result-heading"]/a/@title')) + + content = extract_text(r.xpath('.//div[@class="searchresult"]')) + + dataBrut = extract_text(r.xpath('.//div[@class="mw-search-result-data"]')) + + data = dataBrut.split('-') + + + + #date = '-'.join(dataS) + adatetime = data[1] + data = adatetime.split(',') + date = data[1] + Thedate = date.split(' ') + + + if Thedate[2] == "January": + ThedateMonth = 1 + elif Thedate[2] == "February": + ThedateMonth = 2 + elif Thedate[2] == "March": + ThedateMonth = 3 + elif Thedate[2] == "April": + ThedateMonth = 4 + elif Thedate[2] == "May": + ThedateMonth = 5 + elif Thedate[2] == "June": + ThedateMonth = 6 + elif Thedate[2] == "July": + ThedateMonth = 7 + elif Thedate[2] == "August": + ThedateMonth = 8 + elif Thedate[2] == "September": + ThedateMonth = 9 + elif Thedate[2] == "October": + ThedateMonth = 10 + elif Thedate[2] == "November": + ThedateMonth = 11 + else: + ThedateMonth = 12 + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(Thedate[3]), ThedateMonth, int(Thedate[1]), 3, 1, 42)}) + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/khaganatForum.py b/src/khaganatForum.py new file mode 100644 index 0000000..781a1cf --- /dev/null +++ b/src/khaganatForum.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/forum/index.php?action=search2&{query}' +#search_url = '/wikhan/?do=search'\ +# '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'search': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'): + try: + res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1] + except: + continue + + if not res_url: + continue + + try: + title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) + except: + continue + try: + content = extract_text(r.xpath('.//div[@class="list_posts double_height"]')) + except: + content = "" + + try: + dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8') + except: + dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8') + date = dateBrut.split(' ') + year = date[2] + day = date[0] + french_text_month = date[1] + time = date[4] + #dataBrut = extract_text(r.xpath('.//span[@class="date"]')) + + #data = dataBrut.split('-') + + + + #date = '-'.join(dataS) + #adatetime = data[1] + #data = adatetime.split(',') + #date = data[1] + #Thedate = dataBrut.split(' ') + #TheDay = Thedate[1].split(',') + + + if french_text_month == "janvier": + Month = 1 + elif french_text_month.decode('utf-8') == "février".decode('utf-8'): + Month = 2 + elif french_text_month == "mars": + Month = 3 + elif french_text_month == "avril": + Month = 4 + elif french_text_month == "mai": + Month = 5 + elif french_text_month == "juin": + Month = 6 + elif french_text_month == "juillet": + Month = 7 + elif french_text_month.decode('utf-8') == "août".decode('utf-8'): + Month = 8 + elif french_text_month == "septembre": + Month = 9 + elif french_text_month == "octobre": + Month = 10 + elif french_text_month == "novembre": + Month = 11 + else: + Month = 12 + + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) + + + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/khaganatWikhan.py b/src/khaganatWikhan.py new file mode 100644 index 0000000..083696c --- /dev/null +++ b/src/khaganatWikhan.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/wikhan/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/src/ryzomcoreConfluence.py b/src/ryzomcoreConfluence.py new file mode 100644 index 0000000..5d35c9f --- /dev/null +++ b/src/ryzomcoreConfluence.py @@ -0,0 +1,130 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://ryzomcore.atlassian.net' +search_url = '/wiki/dosearchsite.action?{query}' +#search_url = '/wikhan/?do=search'\ +# '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'queryString': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//ol[@class="search-results cql"]/li'): + try: + res_url = r.xpath('.//a[@class="search-result-link visitable"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="search-result-link visitable"]')) + + content = extract_text(r.xpath('.//div[@class="highlights"]')) + + dataBrut = extract_text(r.xpath('.//span[@class="date"]')) + + #data = dataBrut.split('-') + + + + #date = '-'.join(dataS) + #adatetime = data[1] + #data = adatetime.split(',') + #date = data[1] + Thedate = dataBrut.split(' ') + TheDay = Thedate[1].split(',') + + + if Thedate[0] == "Jan": + ThedateMonth = 1 + elif Thedate[0] == "Feb": + ThedateMonth = 2 + elif Thedate[0] == "Mar": + ThedateMonth = 3 + elif Thedate[0] == "Apr": + ThedateMonth = 4 + elif Thedate[0] == "May": + ThedateMonth = 5 + elif Thedate[0] == "Jun": + ThedateMonth = 6 + elif Thedate[0] == "Jul": + ThedateMonth = 7 + elif Thedate[0] == "Aug": + ThedateMonth = 8 + elif Thedate[0] == "Sep": + ThedateMonth = 9 + elif Thedate[0] == "Oct": + ThedateMonth = 10 + elif Thedate[0] == "Nov": + ThedateMonth = 11 + else: + ThedateMonth = 12 + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(Thedate[2]), ThedateMonth, int(TheDay[0]), 3, 1, 42)}) + + + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/settings.yml b/src/settings.yml new file mode 100644 index 0000000..db1cf22 --- /dev/null +++ b/src/settings.yml @@ -0,0 +1,108 @@ +general: + debug : False # Debug mode, only for development + instance_name : "searx" # displayed name + +search: + safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict + autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default + language : "all" + +server: + port : 8888 + bind_address : "127.0.0.1" # address to listen on + secret_key : "ultrasecretkey" # change this! + base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" + image_proxy : False # Proxying image results through searx + http_protocol_version : "1.0" # 1.0 and 1.1 are supported + +ui: + static_path : "" # Custom static path - leave it blank if you didn't change + templates_path : "" # Custom templates path - leave it blank if you didn't change + default_theme : oscar # ui theme + default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section + +# searx supports result proxification using an external service: https://github.com/asciimoo/morty +# uncomment below section if you have running morty proxy +#result_proxy: +# url : http://127.0.0.1:3000/ +# key : your_morty_proxy_key + +outgoing: # communication with search engines + request_timeout : 2.0 # seconds + useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator + pool_connections : 100 # Number of different hosts + pool_maxsize : 10 # Number of simultaneous requests by host +# uncomment below section if you want to use a proxy +# see http://docs.python-requests.org/en/latest/user/advanced/#proxies +# SOCKS proxies are also supported: see http://docs.python-requests.org/en/master/user/advanced/#socks +# proxies : +# http : http://127.0.0.1:8080 +# https: http://127.0.0.1:8080 +# uncomment below section only if you have more than one network interface +# which can be the source of outgoing search requests +# source_ips: +# - 1.1.1.1 +# - 1.1.1.2 + +engines: + - name : khaganat.net/wikhan + engine : khaganatWikhan + shortcut : wik + + - name : ryzomcore.atlassian.net/wiki + engine : ryzomcoreConfluence + shortcut : rcc + + - name : forge.ryzom.com + engine : forgeryzom + shortcut: fr + + - name: dev.ryzom.com + engine: devryzom + shortcut: dr + + - name: khaganat.net/forum + engine: khaganatForum + shortcut: kf + + - name: app.ryzom.com app forum + engine: RyzomForum + shortcut: rc + + +locales: + en : English + ar : العَرَبِيَّة (Arabic) + bg : Български (Bulgarian) + cs : Čeština (Czech) + da : Dansk (Danish) + de : Deutsch (German) + el_GR : Ελληνικά (Greek_Greece) + eo : Esperanto (Esperanto) + es : Español (Spanish) + fi : Suomi (Finnish) + fr : Français (French) + he : עברית (Hebrew) + hr : Hrvatski (Croatian) + hu : Magyar (Hungarian) + it : Italiano (Italian) + ja : 日本語 (Japanese) + nl : Nederlands (Dutch) + pt : Português (Portuguese) + pt_BR : Português (Portuguese_Brazil) + ro : Română (Romanian) + ru : Русский (Russian) + sk : Slovenčina (Slovak) + sl : Slovenski (Slovene) + sr : српски (Serbian) + sv : Svenska (Swedish) + tr : Türkçe (Turkish) + uk : українська мова (Ukrainian) + zh : 中文 (Chinese) + +doi_resolvers : + oadoi.org : 'https://oadoi.org/' + doi.org : 'https://doi.org/' + doai.io : 'http://doai.io/' + +default_doi_resolver : 'oadoi.org' diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..7745c4b --- /dev/null +++ b/start.sh @@ -0,0 +1,3 @@ +#!/bin/bash +source ./venv/bin/activate +python2.7 searx/searx/webapp.py -- cgit v1.2.1