diff options
author | neodarz <neodarz@neodarz.net> | 2018-01-21 05:50:43 +0100 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2018-01-21 05:50:43 +0100 |
commit | 70eea5c1874ef3fffe59c5ab0a03dae25beb2d25 (patch) | |
tree | dae09eba5bcee087d1d8e98c968c022d89c73401 /khaganatForum.py | |
parent | 80a16a605576f85e15ffd0a3b4baf2f1dbaa2ab4 (diff) | |
download | ryzomcore_searx-70eea5c1874ef3fffe59c5ab0a03dae25beb2d25.tar.xz ryzomcore_searx-70eea5c1874ef3fffe59c5ab0a03dae25beb2d25.zip |
Add code for directly build and start searx preconfigured
Diffstat (limited to 'khaganatForum.py')
-rw-r--r-- | khaganatForum.py | 146 |
1 files changed, 0 insertions, 146 deletions
diff --git a/khaganatForum.py b/khaganatForum.py deleted file mode 100644 index 781a1cf..0000000 --- a/khaganatForum.py +++ /dev/null @@ -1,146 +0,0 @@ -# -*- coding: utf-8 -*- -# Doku Wiki -# -# @website https://www.dokuwiki.org/ -# @provide-api yes -# (https://www.dokuwiki.org/devel:xmlrpc) -# -# @using-api no -# @results HTML -# @stable yes -# @parse (general) url, title, content - -from urllib import urlencode -from lxml.html import fromstring -from searx.engines.xpath import extract_text -from datetime import datetime - -# engine dependent config -categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' -paging = False -language_support = False -number_of_results = 5 - -# search-url -# Doku is OpenSearch compatible -base_url = 'https://khaganat.net' -search_url = '/forum/index.php?action=search2&{query}' -#search_url = '/wikhan/?do=search'\ -# '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ - - -# do search-request -def request(query, params): - - params['url'] = base_url +\ - search_url.format(query=urlencode({'search': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - doc = fromstring(resp.text) - - # parse results - # Quickhits - for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'): - try: - res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1] - except: - continue - - if not res_url: - continue - - try: - title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) - except: - continue - try: - content = extract_text(r.xpath('.//div[@class="list_posts double_height"]')) - except: - content = "" - - try: - dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8') - except: - dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8') - date = dateBrut.split(' ') - year = date[2] - day = date[0] - french_text_month = date[1] - time = date[4] - #dataBrut = extract_text(r.xpath('.//span[@class="date"]')) - - #data = dataBrut.split('-') - - - - #date = '-'.join(dataS) - #adatetime = data[1] - #data = adatetime.split(',') - #date = data[1] - #Thedate = dataBrut.split(' ') - #TheDay = Thedate[1].split(',') - - - if french_text_month == "janvier": - Month = 1 - elif french_text_month.decode('utf-8') == "février".decode('utf-8'): - Month = 2 - elif french_text_month == "mars": - Month = 3 - elif french_text_month == "avril": - Month = 4 - elif french_text_month == "mai": - Month = 5 - elif french_text_month == "juin": - Month = 6 - elif french_text_month == "juillet": - Month = 7 - elif french_text_month.decode('utf-8') == "août".decode('utf-8'): - Month = 8 - elif french_text_month == "septembre": - Month = 9 - elif french_text_month == "octobre": - Month = 10 - elif french_text_month == "novembre": - Month = 11 - else: - Month = 12 - - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url, - 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) - - - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue - - # return results - return results |