From 70eea5c1874ef3fffe59c5ab0a03dae25beb2d25 Mon Sep 17 00:00:00 2001
From: neodarz <neodarz@neodarz.net>
Date: Sun, 21 Jan 2018 05:50:43 +0100
Subject: Add code for directly build and start searx preconfigured

---
 src/khaganatForum.py | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 src/khaganatForum.py

(limited to 'src/khaganatForum.py')

diff --git a/src/khaganatForum.py b/src/khaganatForum.py
new file mode 100644
index 0000000..781a1cf
--- /dev/null
+++ b/src/khaganatForum.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+# Doku Wiki
+#
+# @website     https://www.dokuwiki.org/
+# @provide-api yes
+#              (https://www.dokuwiki.org/devel:xmlrpc)
+#
+# @using-api   no
+# @results     HTML
+# @stable      yes
+# @parse       (general)    url, title, content
+
+from urllib import urlencode
+from lxml.html import fromstring
+from searx.engines.xpath import extract_text
+from datetime import datetime
+
+# engine dependent config
+categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
+paging = False
+language_support = False
+number_of_results = 5
+
+# search-url
+# Doku is OpenSearch compatible
+base_url = 'https://khaganat.net'
+search_url = '/forum/index.php?action=search2&{query}'
+#search_url = '/wikhan/?do=search'\
+#        '&{query}'
+# TODO             '&startRecord={offset}'\
+# TODO             '&maximumRecords={limit}'\
+
+
+# do search-request
+def request(query, params):
+
+    params['url'] = base_url +\
+            search_url.format(query=urlencode({'search': query}))
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    doc = fromstring(resp.text)
+
+    # parse results
+    # Quickhits
+    for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'):
+        try:
+            res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1]
+        except:
+            continue
+
+        if not res_url:
+            continue
+
+        try:
+            title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) 
+        except:
+            continue
+        try:
+            content = extract_text(r.xpath('.//div[@class="list_posts double_height"]'))
+        except:
+            content = ""
+
+        try:
+            dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8')
+        except:
+            dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8')
+        date = dateBrut.split(' ')
+        year = date[2]
+        day = date[0]
+        french_text_month = date[1]
+        time = date[4]
+        #dataBrut = extract_text(r.xpath('.//span[@class="date"]'))
+
+        #data = dataBrut.split('-')
+
+
+
+        #date = '-'.join(dataS)
+        #adatetime = data[1]
+        #data = adatetime.split(',')
+        #date = data[1]
+        #Thedate = dataBrut.split(' ')
+        #TheDay = Thedate[1].split(',')
+
+
+        if french_text_month == "janvier":
+            Month = 1
+        elif french_text_month.decode('utf-8') == "février".decode('utf-8'):
+            Month = 2
+        elif french_text_month  == "mars":
+            Month = 3
+        elif french_text_month == "avril":
+            Month = 4
+        elif french_text_month == "mai":
+            Month = 5
+        elif french_text_month == "juin":
+            Month = 6
+        elif french_text_month == "juillet":
+            Month = 7
+        elif french_text_month.decode('utf-8') == "août".decode('utf-8'):
+            Month = 8
+        elif french_text_month == "septembre":
+            Month = 9
+        elif french_text_month == "octobre":
+             Month = 10
+        elif french_text_month == "novembre":
+            Month = 11
+        else:
+            Month = 12
+
+
+        # append result
+        results.append({'title': title,
+                        'content': content,
+                        'url': base_url + res_url,
+                        'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)})
+
+
+
+    # Search results
+    #for r in doc.xpath('//dl[@class="search_results"]/*'):
+    #    try:
+    #        if r.tag == "dt":
+    #            res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
+    #            title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
+    #        elif r.tag == "dd":
+    #            content = extract_text(r.xpath('.'))
+
+                # append result
+    #            results.append({'title': title,
+    #                            'content': content,
+    #                            'url': base_url + res_url})
+    #    except:
+    #        continue
+
+    #    if not res_url:
+    #        continue
+
+    # return results
+    return results
-- 
cgit v1.2.1