From 70eea5c1874ef3fffe59c5ab0a03dae25beb2d25 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 21 Jan 2018 05:50:43 +0100 Subject: Add code for directly build and start searx preconfigured --- forgeryzom.py | 126 ---------------------------------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 forgeryzom.py (limited to 'forgeryzom.py') diff --git a/forgeryzom.py b/forgeryzom.py deleted file mode 100644 index 850ae43..0000000 --- a/forgeryzom.py +++ /dev/null @@ -1,126 +0,0 @@ -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text -from datetime import datetime - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'http://forge.ryzom.com' -search_url = '/wiki/W/api.php?action=query'\ - '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'search': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - for r in doc.xpath('//ul[@class="mw-search-results"]/li'): - try: - res_url = r.xpath('.//div[@class="mw-search-result-heading"]/a/@href')[-1] - except: - continue - - if not res_url: - continue - - title = extract_text(r.xpath('.//div[@class="mw-search-result-heading"]/a/@title')) - - content = extract_text(r.xpath('.//div[@class="searchresult"]')) - - dataBrut = extract_text(r.xpath('.//div[@class="mw-search-result-data"]')) - - data = dataBrut.split('-') - - - - #date = '-'.join(dataS) - adatetime = data[1] - data = adatetime.split(',') - date = data[1] - Thedate = date.split(' ') - - - if Thedate[2] == "January": - ThedateMonth = 1 - elif Thedate[2] == "February": - ThedateMonth = 2 - elif Thedate[2] == "March": - ThedateMonth = 3 - elif Thedate[2] == "April": - ThedateMonth = 4 - elif Thedate[2] == "May": - ThedateMonth = 5 - elif Thedate[2] == "June": - ThedateMonth = 6 - elif Thedate[2] == "July": - ThedateMonth = 7 - elif Thedate[2] == "August": - ThedateMonth = 8 - elif Thedate[2] == "September": - ThedateMonth = 9 - elif Thedate[2] == "October": - ThedateMonth = 10 - elif Thedate[2] == "November": - ThedateMonth = 11 - else: - ThedateMonth = 12 - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url, - 'publishedDate': datetime(int(Thedate[3]), ThedateMonth, int(Thedate[1]), 3, 1, 42)}) - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue - - # return results - return results -- cgit v1.2.1