# -*- coding: utf-8 -*- # Doku Wiki # # @website https://www.dokuwiki.org/ # @provide-api yes # (https://www.dokuwiki.org/devel:xmlrpc) # # @using-api no # @results HTML # @stable yes # @parse (general) url, title, content from urllib import urlencode from lxml.html import fromstring from searx.engines.xpath import extract_text from datetime import datetime # engine dependent config categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' paging = False language_support = False number_of_results = 5 # search-url # Doku is OpenSearch compatible base_url = 'https://khaganat.net' search_url = '/forum/index.php?action=search2&{query}' # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ # do search-request def request(query, params): params['url'] = base_url +\ search_url.format(query=urlencode({'search': query})) return params # get response from search-request def response(resp): results = [] doc = fromstring(resp.text) # Search for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'): try: res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1] title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) content = extract_text(r.xpath('.//div[@class="list_posts double_height"]')) dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8') date = dateBrut.split(' ') year = date[2] day = date[0] french_text_month = date[1] time = date[4] if french_text_month == "janvier": Month = 1 elif french_text_month.decode('utf-8') == "février".decode('utf-8'): Month = 2 elif french_text_month == "mars": Month = 3 elif french_text_month == "avril": Month = 4 elif french_text_month == "mai": Month = 5 elif french_text_month == "juin": Month = 6 elif french_text_month == "juillet": Month = 7 elif french_text_month.decode('utf-8') == "août".decode('utf-8'): Month = 8 elif french_text_month == "septembre": Month = 9 elif french_text_month == "octobre": Month = 10 elif french_text_month == "novembre": Month = 11 else: Month = 12 # append result results.append({'title': title, 'content': content, 'url': res_url, 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) except: continue # return results return results