From 70eea5c1874ef3fffe59c5ab0a03dae25beb2d25 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 21 Jan 2018 05:50:43 +0100 Subject: Add code for directly build and start searx preconfigured --- src/RyzomForum.py | 84 ++++++++++++++++++++++++++ src/devryzom.py | 106 ++++++++++++++++++++++++++++++++ src/forgeryzom.py | 126 ++++++++++++++++++++++++++++++++++++++ src/khaganatForum.py | 146 +++++++++++++++++++++++++++++++++++++++++++++ src/khaganatWikhan.py | 84 ++++++++++++++++++++++++++ src/ryzomcoreConfluence.py | 130 ++++++++++++++++++++++++++++++++++++++++ src/settings.yml | 108 +++++++++++++++++++++++++++++++++ 7 files changed, 784 insertions(+) create mode 100644 src/RyzomForum.py create mode 100644 src/devryzom.py create mode 100644 src/forgeryzom.py create mode 100644 src/khaganatForum.py create mode 100644 src/khaganatWikhan.py create mode 100644 src/ryzomcoreConfluence.py create mode 100644 src/settings.yml (limited to 'src') diff --git a/src/RyzomForum.py b/src/RyzomForum.py new file mode 100644 index 0000000..083696c --- /dev/null +++ b/src/RyzomForum.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/wikhan/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/src/devryzom.py b/src/devryzom.py new file mode 100644 index 0000000..7201096 --- /dev/null +++ b/src/devryzom.py @@ -0,0 +1,106 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://dev.ryzom.com' +search_url = '/search?wiki_pages=1&{query}' +#search_url = '/wikhan/?do=search'\ +# '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'q': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + i = 0 + for r in doc.xpath('//dl[@id="search-results"]/dt'): + try: + res_url = r.xpath('.//a/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a')) + + i = i + 1 + + y = 0 + + + for s in doc.xpath('//dl[@id="search-results"]/dd'): + y = y + 1 + if y == i: + content = extract_text(s.xpath('.//span[@class="description"]')) + + dataBrut = extract_text(s.xpath('.//span[@class="author"]')) + data = dataBrut.split(' ') + date = data[0].split('/') + + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(date[2]), int(date[0]), int(date[1]), 0, 0, 0)}) + + # append result + #results.append({'content': content}) + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/forgeryzom.py b/src/forgeryzom.py new file mode 100644 index 0000000..850ae43 --- /dev/null +++ b/src/forgeryzom.py @@ -0,0 +1,126 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://forge.ryzom.com' +search_url = '/wiki/W/api.php?action=query'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'search': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//ul[@class="mw-search-results"]/li'): + try: + res_url = r.xpath('.//div[@class="mw-search-result-heading"]/a/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//div[@class="mw-search-result-heading"]/a/@title')) + + content = extract_text(r.xpath('.//div[@class="searchresult"]')) + + dataBrut = extract_text(r.xpath('.//div[@class="mw-search-result-data"]')) + + data = dataBrut.split('-') + + + + #date = '-'.join(dataS) + adatetime = data[1] + data = adatetime.split(',') + date = data[1] + Thedate = date.split(' ') + + + if Thedate[2] == "January": + ThedateMonth = 1 + elif Thedate[2] == "February": + ThedateMonth = 2 + elif Thedate[2] == "March": + ThedateMonth = 3 + elif Thedate[2] == "April": + ThedateMonth = 4 + elif Thedate[2] == "May": + ThedateMonth = 5 + elif Thedate[2] == "June": + ThedateMonth = 6 + elif Thedate[2] == "July": + ThedateMonth = 7 + elif Thedate[2] == "August": + ThedateMonth = 8 + elif Thedate[2] == "September": + ThedateMonth = 9 + elif Thedate[2] == "October": + ThedateMonth = 10 + elif Thedate[2] == "November": + ThedateMonth = 11 + else: + ThedateMonth = 12 + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(Thedate[3]), ThedateMonth, int(Thedate[1]), 3, 1, 42)}) + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/khaganatForum.py b/src/khaganatForum.py new file mode 100644 index 0000000..781a1cf --- /dev/null +++ b/src/khaganatForum.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/forum/index.php?action=search2&{query}' +#search_url = '/wikhan/?do=search'\ +# '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'search': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'): + try: + res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1] + except: + continue + + if not res_url: + continue + + try: + title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) + except: + continue + try: + content = extract_text(r.xpath('.//div[@class="list_posts double_height"]')) + except: + content = "" + + try: + dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8') + except: + dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8') + date = dateBrut.split(' ') + year = date[2] + day = date[0] + french_text_month = date[1] + time = date[4] + #dataBrut = extract_text(r.xpath('.//span[@class="date"]')) + + #data = dataBrut.split('-') + + + + #date = '-'.join(dataS) + #adatetime = data[1] + #data = adatetime.split(',') + #date = data[1] + #Thedate = dataBrut.split(' ') + #TheDay = Thedate[1].split(',') + + + if french_text_month == "janvier": + Month = 1 + elif french_text_month.decode('utf-8') == "février".decode('utf-8'): + Month = 2 + elif french_text_month == "mars": + Month = 3 + elif french_text_month == "avril": + Month = 4 + elif french_text_month == "mai": + Month = 5 + elif french_text_month == "juin": + Month = 6 + elif french_text_month == "juillet": + Month = 7 + elif french_text_month.decode('utf-8') == "août".decode('utf-8'): + Month = 8 + elif french_text_month == "septembre": + Month = 9 + elif french_text_month == "octobre": + Month = 10 + elif french_text_month == "novembre": + Month = 11 + else: + Month = 12 + + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) + + + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/khaganatWikhan.py b/src/khaganatWikhan.py new file mode 100644 index 0000000..083696c --- /dev/null +++ b/src/khaganatWikhan.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/wikhan/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/src/ryzomcoreConfluence.py b/src/ryzomcoreConfluence.py new file mode 100644 index 0000000..5d35c9f --- /dev/null +++ b/src/ryzomcoreConfluence.py @@ -0,0 +1,130 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://ryzomcore.atlassian.net' +search_url = '/wiki/dosearchsite.action?{query}' +#search_url = '/wikhan/?do=search'\ +# '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'queryString': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//ol[@class="search-results cql"]/li'): + try: + res_url = r.xpath('.//a[@class="search-result-link visitable"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="search-result-link visitable"]')) + + content = extract_text(r.xpath('.//div[@class="highlights"]')) + + dataBrut = extract_text(r.xpath('.//span[@class="date"]')) + + #data = dataBrut.split('-') + + + + #date = '-'.join(dataS) + #adatetime = data[1] + #data = adatetime.split(',') + #date = data[1] + Thedate = dataBrut.split(' ') + TheDay = Thedate[1].split(',') + + + if Thedate[0] == "Jan": + ThedateMonth = 1 + elif Thedate[0] == "Feb": + ThedateMonth = 2 + elif Thedate[0] == "Mar": + ThedateMonth = 3 + elif Thedate[0] == "Apr": + ThedateMonth = 4 + elif Thedate[0] == "May": + ThedateMonth = 5 + elif Thedate[0] == "Jun": + ThedateMonth = 6 + elif Thedate[0] == "Jul": + ThedateMonth = 7 + elif Thedate[0] == "Aug": + ThedateMonth = 8 + elif Thedate[0] == "Sep": + ThedateMonth = 9 + elif Thedate[0] == "Oct": + ThedateMonth = 10 + elif Thedate[0] == "Nov": + ThedateMonth = 11 + else: + ThedateMonth = 12 + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(Thedate[2]), ThedateMonth, int(TheDay[0]), 3, 1, 42)}) + + + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results diff --git a/src/settings.yml b/src/settings.yml new file mode 100644 index 0000000..db1cf22 --- /dev/null +++ b/src/settings.yml @@ -0,0 +1,108 @@ +general: + debug : False # Debug mode, only for development + instance_name : "searx" # displayed name + +search: + safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict + autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default + language : "all" + +server: + port : 8888 + bind_address : "127.0.0.1" # address to listen on + secret_key : "ultrasecretkey" # change this! + base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" + image_proxy : False # Proxying image results through searx + http_protocol_version : "1.0" # 1.0 and 1.1 are supported + +ui: + static_path : "" # Custom static path - leave it blank if you didn't change + templates_path : "" # Custom templates path - leave it blank if you didn't change + default_theme : oscar # ui theme + default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section + +# searx supports result proxification using an external service: https://github.com/asciimoo/morty +# uncomment below section if you have running morty proxy +#result_proxy: +# url : http://127.0.0.1:3000/ +# key : your_morty_proxy_key + +outgoing: # communication with search engines + request_timeout : 2.0 # seconds + useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator + pool_connections : 100 # Number of different hosts + pool_maxsize : 10 # Number of simultaneous requests by host +# uncomment below section if you want to use a proxy +# see http://docs.python-requests.org/en/latest/user/advanced/#proxies +# SOCKS proxies are also supported: see http://docs.python-requests.org/en/master/user/advanced/#socks +# proxies : +# http : http://127.0.0.1:8080 +# https: http://127.0.0.1:8080 +# uncomment below section only if you have more than one network interface +# which can be the source of outgoing search requests +# source_ips: +# - 1.1.1.1 +# - 1.1.1.2 + +engines: + - name : khaganat.net/wikhan + engine : khaganatWikhan + shortcut : wik + + - name : ryzomcore.atlassian.net/wiki + engine : ryzomcoreConfluence + shortcut : rcc + + - name : forge.ryzom.com + engine : forgeryzom + shortcut: fr + + - name: dev.ryzom.com + engine: devryzom + shortcut: dr + + - name: khaganat.net/forum + engine: khaganatForum + shortcut: kf + + - name: app.ryzom.com app forum + engine: RyzomForum + shortcut: rc + + +locales: + en : English + ar : العَرَبِيَّة (Arabic) + bg : Български (Bulgarian) + cs : Čeština (Czech) + da : Dansk (Danish) + de : Deutsch (German) + el_GR : Ελληνικά (Greek_Greece) + eo : Esperanto (Esperanto) + es : Español (Spanish) + fi : Suomi (Finnish) + fr : Français (French) + he : עברית (Hebrew) + hr : Hrvatski (Croatian) + hu : Magyar (Hungarian) + it : Italiano (Italian) + ja : 日本語 (Japanese) + nl : Nederlands (Dutch) + pt : Português (Portuguese) + pt_BR : Português (Portuguese_Brazil) + ro : Română (Romanian) + ru : Русский (Russian) + sk : Slovenčina (Slovak) + sl : Slovenski (Slovene) + sr : српски (Serbian) + sv : Svenska (Swedish) + tr : Türkçe (Turkish) + uk : українська мова (Ukrainian) + zh : 中文 (Chinese) + +doi_resolvers : + oadoi.org : 'https://oadoi.org/' + doi.org : 'https://doi.org/' + doai.io : 'http://doai.io/' + +default_doi_resolver : 'oadoi.org' -- cgit v1.2.1