# Doku Wiki # # @website https://www.dokuwiki.org/ # @provide-api yes # (https://www.dokuwiki.org/devel:xmlrpc) # # @using-api no # @results HTML # @stable yes # @parse (general) url, title, content from urllib import urlencode from lxml.html import fromstring from searx.engines.xpath import extract_text from datetime import datetime # engine dependent config categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' paging = False language_support = False number_of_results = 5 # search-url # Doku is OpenSearch compatible base_url = 'https://en.wiki.ryzom.com' search_url = '/w/index.php?{query}' # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ # do search-request def request(query, params): params['url'] = base_url +\ search_url.format(query=urlencode({'search': query})) return params # get response from search-request def response(resp): results = [] doc = fromstring(resp.text) # parse results # Quickhits for r in doc.xpath('//ul[@class="mw-search-results"]/li'): try: res_url = r.xpath('.//div[@class="mw-search-result-heading"]/a/@href')[-1] title = extract_text(r.xpath('.//div[@class="mw-search-result-heading"]/a/@title')) content = extract_text(r.xpath('.//div[@class="searchresult"]')) dataBrut = extract_text(r.xpath('.//div[@class="mw-search-result-data"]')) data = dataBrut.split('-') adatetime = data[1] data = adatetime.split(',') date = data[1] Thedate = date.split(' ') if Thedate[2] == "January": ThedateMonth = 1 elif Thedate[2] == "February": ThedateMonth = 2 elif Thedate[2] == "March": ThedateMonth = 3 elif Thedate[2] == "April": ThedateMonth = 4 elif Thedate[2] == "May": ThedateMonth = 5 elif Thedate[2] == "June": ThedateMonth = 6 elif Thedate[2] == "July": ThedateMonth = 7 elif Thedate[2] == "August": ThedateMonth = 8 elif Thedate[2] == "September": ThedateMonth = 9 elif Thedate[2] == "October": ThedateMonth = 10 elif Thedate[2] == "November": ThedateMonth = 11 else: ThedateMonth = 12 # append result results.append({'title': title, 'content': content, 'url': base_url + res_url, 'publishedDate': datetime(int(Thedate[3]), ThedateMonth, int(Thedate[1]), 3, 1, 42)}) except: continue # return results return results