diff options
-rw-r--r-- | src/khaganatForum.py | 129 |
1 files changed, 40 insertions, 89 deletions
diff --git a/src/khaganatForum.py b/src/khaganatForum.py index a129dc6..862eada 100644 --- a/src/khaganatForum.py +++ b/src/khaganatForum.py @@ -25,8 +25,6 @@ number_of_results = 5 # Doku is OpenSearch compatible base_url = 'https://khaganat.net' search_url = '/forum/index.php?action=search2&{query}' -#search_url = '/wikhan/?do=search'\ -# '&{query}' # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ @@ -46,101 +44,54 @@ def response(resp): doc = fromstring(resp.text) - # parse results - # Quickhits + # Search for r in doc.xpath('//div[@id="main_content_section"]/div/div/div'): try: res_url = r.xpath('.//div[@class="topic_details floatleft"]/h5/a/@href')[-1] - except: - continue - - if not res_url: - continue - - try: title = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/h5')) - except: - continue - try: content = extract_text(r.xpath('.//div[@class="list_posts double_height"]')) - except: - content = "" - try: dateBrut = extract_text(r.xpath('.//div[@class="topic_details floatleft"]/span/em')).encode('utf-8') + date = dateBrut.split(' ') + year = date[2] + day = date[0] + french_text_month = date[1] + time = date[4] + + if french_text_month == "janvier": + Month = 1 + elif french_text_month.decode('utf-8') == "février".decode('utf-8'): + Month = 2 + elif french_text_month == "mars": + Month = 3 + elif french_text_month == "avril": + Month = 4 + elif french_text_month == "mai": + Month = 5 + elif french_text_month == "juin": + Month = 6 + elif french_text_month == "juillet": + Month = 7 + elif french_text_month.decode('utf-8') == "août".decode('utf-8'): + Month = 8 + elif french_text_month == "septembre": + Month = 9 + elif french_text_month == "octobre": + Month = 10 + elif french_text_month == "novembre": + Month = 11 + else: + Month = 12 + + + # append result + results.append({'title': title, + 'content': content, + 'url': res_url, + 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) + except: - dateBrut = "01 janvier 1970 à 01:01:01".encode('utf-8') - date = dateBrut.split(' ') - year = date[2] - day = date[0] - french_text_month = date[1] - time = date[4] - #dataBrut = extract_text(r.xpath('.//span[@class="date"]')) - - #data = dataBrut.split('-') - - - - #date = '-'.join(dataS) - #adatetime = data[1] - #data = adatetime.split(',') - #date = data[1] - #Thedate = dataBrut.split(' ') - #TheDay = Thedate[1].split(',') - - - if french_text_month == "janvier": - Month = 1 - elif french_text_month.decode('utf-8') == "février".decode('utf-8'): - Month = 2 - elif french_text_month == "mars": - Month = 3 - elif french_text_month == "avril": - Month = 4 - elif french_text_month == "mai": - Month = 5 - elif french_text_month == "juin": - Month = 6 - elif french_text_month == "juillet": - Month = 7 - elif french_text_month.decode('utf-8') == "août".decode('utf-8'): - Month = 8 - elif french_text_month == "septembre": - Month = 9 - elif french_text_month == "octobre": - Month = 10 - elif french_text_month == "novembre": - Month = 11 - else: - Month = 12 - - - # append result - results.append({'title': title, - 'content': content, - 'url': res_url, - 'publishedDate': datetime(int(year), Month, int(day), 3, 1, 42)}) - - - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue + continue # return results return results |