aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorneodarz <neodarz@neodarz.net>2019-01-20 18:37:25 +0100
committerneodarz <neodarz@neodarz.net>2019-01-20 18:37:25 +0100
commit20662085498866c855b60aa3f1b0e32600fdb3cd (patch)
tree21c952215ace17e2a926a57e83b75230a01e8ce4 /src
parent2ea03ea09a1b7ea64d78bc6589b6aaf5de884e26 (diff)
downloadryzomcore_searx-20662085498866c855b60aa3f1b0e32600fdb3cd.tar.xz
ryzomcore_searx-20662085498866c855b60aa3f1b0e32600fdb3cd.zip
Clean Khaganat forum engine code
Diffstat (limited to 'src')
-rw-r--r--src/khaganatForum.py129
1 files changed, 40 insertions, 89 deletions
diff --git a/src/khaganatForum.py b/src/khaganatForum.py
index a129dc6..862eada 100644
--- a/src/khaganatForum.py
+++ b/src/khaganatForum.py
@@ -25,8 +25,6 @@ number_of_results = 5
# Doku is OpenSearch compatible
base_url = 'https://khaganat.net'
search_url = '/forum/index.php?action=search2&{query}'
-#search_url = '/wikhan/?do=search'\
-# '&{query}'
# TODO '&startRecord={offset}'\
# TODO '&maximumRecords={limit}'\
@@ -46,101 +44,54 @@ def response(resp):
doc = fromstring(resp.text)
- # parse results
- # Quickhits
+ # Search
for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'):
try:
res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1]
- except:
- continue
-
- if not res_url:
- continue
-
- try:
title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5'))
- except:
- continue
- try:
content = extract_text(r.xpath('.//div[@class="list_posts double_height"]'))
- except:
- content = ""
- try:
dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8')
+ date = dateBrut.split(' ')
+ year = date[2]
+ day = date[0]
+ french_text_month = date[1]
+ time = date[4]
+
+ if french_text_month == "janvier":
+ Month = 1
+ elif french_text_month.decode('utf-8') == "février".decode('utf-8'):
+ Month = 2
+ elif french_text_month == "mars":
+ Month = 3
+ elif french_text_month == "avril":
+ Month = 4
+ elif french_text_month == "mai":
+ Month = 5
+ elif french_text_month == "juin":
+ Month = 6
+ elif french_text_month == "juillet":
+ Month = 7
+ elif french_text_month.decode('utf-8') == "août".decode('utf-8'):
+ Month = 8
+ elif french_text_month == "septembre":
+ Month = 9
+ elif french_text_month == "octobre":
+ Month = 10
+ elif french_text_month == "novembre":
+ Month = 11
+ else:
+ Month = 12
+
+
+ # append result
+ results.append({'title': title,
+ 'content': content,
+ 'url': res_url,
+ 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)})
+
except:
- dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8')
- date = dateBrut.split(' ')
- year = date[2]
- day = date[0]
- french_text_month = date[1]
- time = date[4]
- #dataBrut = extract_text(r.xpath('.//span[@class="date"]'))
-
- #data = dataBrut.split('-')
-
-
-
- #date = '-'.join(dataS)
- #adatetime = data[1]
- #data = adatetime.split(',')
- #date = data[1]
- #Thedate = dataBrut.split(' ')
- #TheDay = Thedate[1].split(',')
-
-
- if french_text_month == "janvier":
- Month = 1
- elif french_text_month.decode('utf-8') == "février".decode('utf-8'):
- Month = 2
- elif french_text_month == "mars":
- Month = 3
- elif french_text_month == "avril":
- Month = 4
- elif french_text_month == "mai":
- Month = 5
- elif french_text_month == "juin":
- Month = 6
- elif french_text_month == "juillet":
- Month = 7
- elif french_text_month.decode('utf-8') == "août".decode('utf-8'):
- Month = 8
- elif french_text_month == "septembre":
- Month = 9
- elif french_text_month == "octobre":
- Month = 10
- elif french_text_month == "novembre":
- Month = 11
- else:
- Month = 12
-
-
- # append result
- results.append({'title': title,
- 'content': content,
- 'url': res_url,
- 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)})
-
-
-
- # Search results
- #for r in doc.xpath('//dl[@class="search_results"]/*'):
- # try:
- # if r.tag == "dt":
- # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
- # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
- # elif r.tag == "dd":
- # content = extract_text(r.xpath('.'))
-
- # append result
- # results.append({'title': title,
- # 'content': content,
- # 'url': base_url + res_url})
- # except:
- # continue
-
- # if not res_url:
- # continue
+ continue
# return results
return results