From 70eea5c1874ef3fffe59c5ab0a03dae25beb2d25 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 21 Jan 2018 05:50:43 +0100 Subject: Add code for directly build and start searx preconfigured --- src/khaganatForum.py | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 src/khaganatForum.py (limited to 'src/khaganatForum.py') diff --git a/src/khaganatForum.py b/src/khaganatForum.py new file mode 100644 index 0000000..781a1cf --- /dev/null +++ b/src/khaganatForum.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text +from datetime import datetime + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/forum/index.php?action=search2&{query}' +#search_url = '/wikhan/?do=search'\ +# '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'search': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'): + try: + res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1] + except: + continue + + if not res_url: + continue + + try: + title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) + except: + continue + try: + content = extract_text(r.xpath('.//div[@class="list_posts double_height"]')) + except: + content = "" + + try: + dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8') + except: + dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8') + date = dateBrut.split(' ') + year = date[2] + day = date[0] + french_text_month = date[1] + time = date[4] + #dataBrut = extract_text(r.xpath('.//span[@class="date"]')) + + #data = dataBrut.split('-') + + + + #date = '-'.join(dataS) + #adatetime = data[1] + #data = adatetime.split(',') + #date = data[1] + #Thedate = dataBrut.split(' ') + #TheDay = Thedate[1].split(',') + + + if french_text_month == "janvier": + Month = 1 + elif french_text_month.decode('utf-8') == "février".decode('utf-8'): + Month = 2 + elif french_text_month == "mars": + Month = 3 + elif french_text_month == "avril": + Month = 4 + elif french_text_month == "mai": + Month = 5 + elif french_text_month == "juin": + Month = 6 + elif french_text_month == "juillet": + Month = 7 + elif french_text_month.decode('utf-8') == "août".decode('utf-8'): + Month = 8 + elif french_text_month == "septembre": + Month = 9 + elif french_text_month == "octobre": + Month = 10 + elif french_text_month == "novembre": + Month = 11 + else: + Month = 12 + + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) + + + + # Search results + #for r in doc.xpath('//dl[@class="search_results"]/*'): + # try: + # if r.tag == "dt": + # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + # elif r.tag == "dd": + # content = extract_text(r.xpath('.')) + + # append result + # results.append({'title': title, + # 'content': content, + # 'url': base_url + res_url}) + # except: + # continue + + # if not res_url: + # continue + + # return results + return results -- cgit v1.2.1