Clean Khaganat forum engine code

author: neodarz <neodarz@neodarz.net> 2019-01-20 18:37:25 +0100
committer: neodarz <neodarz@neodarz.net> 2019-01-20 18:37:25 +0100
commit: 20662085498866c855b60aa3f1b0e32600fdb3cd (patch)
tree: 21c952215ace17e2a926a57e83b75230a01e8ce4 /src
parent: 2ea03ea09a1b7ea64d78bc6589b6aaf5de884e26 (diff)
download: ryzomcore_searx-20662085498866c855b60aa3f1b0e32600fdb3cd.tar.xz
ryzomcore_searx-20662085498866c855b60aa3f1b0e32600fdb3cd.zip
1 files changed, 40 insertions, 89 deletions
diff --git a/src/khaganatForum.py b/src/khaganatForum.py
index a129dc6..862eada 100644
--- a/src/khaganatForum.py
+++ b/src/khaganatForum.py
@@ -25,8 +25,6 @@ number_of_results = 5
 # Doku is OpenSearch compatible
 base_url = 'https://khaganat.net'
 search_url = '/forum/index.php?action=search2&{query}'
-#search_url = '/wikhan/?do=search'\
-#        '&{query}'
 # TODO             '&startRecord={offset}'\
 # TODO             '&maximumRecords={limit}'\
 
@@ -46,101 +44,54 @@ def response(resp):
 
     doc = fromstring(resp.text)
 
-    # parse results
-    # Quickhits
+    # Search
     for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'):
         try:
             res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1]
-        except:
-            continue
-
-        if not res_url:
-            continue
-
-        try:
             title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) 
-        except:
-            continue
-        try:
             content = extract_text(r.xpath('.//div[@class="list_posts double_height"]'))
-        except:
-            content = ""
 
-        try:
             dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8')
+            date = dateBrut.split(' ')
+            year = date[2]
+            day = date[0]
+            french_text_month = date[1]
+            time = date[4]
+
+            if french_text_month == "janvier":
+                Month = 1
+            elif french_text_month.decode('utf-8') == "février".decode('utf-8'):
+                Month = 2
+            elif french_text_month  == "mars":
+                Month = 3
+            elif french_text_month == "avril":
+                Month = 4
+            elif french_text_month == "mai":
+                Month = 5
+            elif french_text_month == "juin":
+                Month = 6
+            elif french_text_month == "juillet":
+                Month = 7
+            elif french_text_month.decode('utf-8') == "août".decode('utf-8'):
+                Month = 8
+            elif french_text_month == "septembre":
+                Month = 9
+            elif french_text_month == "octobre":
+                 Month = 10
+            elif french_text_month == "novembre":
+                Month = 11
+            else:
+                Month = 12
+
+
+            # append result
+            results.append({'title': title,
+                            'content': content,
+                            'url': res_url,
+                            'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)})
+
         except:
-            dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8')
-        date = dateBrut.split(' ')
-        year = date[2]
-        day = date[0]
-        french_text_month = date[1]
-        time = date[4]
-        #dataBrut = extract_text(r.xpath('.//span[@class="date"]'))
-
-        #data = dataBrut.split('-')
-
-
-
-        #date = '-'.join(dataS)
-        #adatetime = data[1]
-        #data = adatetime.split(',')
-        #date = data[1]
-        #Thedate = dataBrut.split(' ')
-        #TheDay = Thedate[1].split(',')
-
-
-        if french_text_month == "janvier":
-            Month = 1
-        elif french_text_month.decode('utf-8') == "février".decode('utf-8'):
-            Month = 2
-        elif french_text_month  == "mars":
-            Month = 3
-        elif french_text_month == "avril":
-            Month = 4
-        elif french_text_month == "mai":
-            Month = 5
-        elif french_text_month == "juin":
-            Month = 6
-        elif french_text_month == "juillet":
-            Month = 7
-        elif french_text_month.decode('utf-8') == "août".decode('utf-8'):
-            Month = 8
-        elif french_text_month == "septembre":
-            Month = 9
-        elif french_text_month == "octobre":
-             Month = 10
-        elif french_text_month == "novembre":
-            Month = 11
-        else:
-            Month = 12
-
-
-        # append result
-        results.append({'title': title,
-                        'content': content,
-                        'url': res_url,
-                        'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)})
-
-
-
-    # Search results
-    #for r in doc.xpath('//dl[@class="search_results"]/*'):
-    #    try:
-    #        if r.tag == "dt":
-    #            res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
-    #            title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
-    #        elif r.tag == "dd":
-    #            content = extract_text(r.xpath('.'))
-
-                # append result
-    #            results.append({'title': title,
-    #                            'content': content,
-    #                            'url': base_url + res_url})
-    #    except:
-    #        continue
-
-    #    if not res_url:
-    #        continue
+            continue
 
     # return results
     return results
author	neodarz <neodarz@neodarz.net>	2019-01-20 18:37:25 +0100
committer	neodarz <neodarz@neodarz.net>	2019-01-20 18:37:25 +0100
commit	20662085498866c855b60aa3f1b0e32600fdb3cd (patch)
tree	21c952215ace17e2a926a57e83b75230a01e8ce4 /src
parent	2ea03ea09a1b7ea64d78bc6589b6aaf5de884e26 (diff)
download	ryzomcore_searx-20662085498866c855b60aa3f1b0e32600fdb3cd.tar.xz ryzomcore_searx-20662085498866c855b60aa3f1b0e32600fdb3cd.zip