diff options
author | neodarz <neodarz@neodarz.net> | 2016-12-20 19:10:39 +0100 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2016-12-20 19:10:39 +0100 |
commit | 7332ce6d5f72305f1589383c6694e030821d4d86 (patch) | |
tree | 2137a09d90617f3cd24c1d491426bf5287918a90 /RyzomForum.py | |
download | ryzomcore_searx-7332ce6d5f72305f1589383c6694e030821d4d86.tar.xz ryzomcore_searx-7332ce6d5f72305f1589383c6694e030821d4d86.zip |
Initial release :)
Diffstat (limited to 'RyzomForum.py')
-rw-r--r-- | RyzomForum.py | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/RyzomForum.py b/RyzomForum.py new file mode 100644 index 0000000..083696c --- /dev/null +++ b/RyzomForum.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'https://khaganat.net' +search_url = '/wikhan/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results |