diff options
-rw-r--r-- | src/forgeryzom.py | 113 |
1 files changed, 43 insertions, 70 deletions
diff --git a/src/forgeryzom.py b/src/forgeryzom.py index 71238ff..76aa50a 100644 --- a/src/forgeryzom.py +++ b/src/forgeryzom.py @@ -48,78 +48,51 @@ def response(resp): for r in doc.xpath('//ul[@class="mw-search-results"]/li'): try: res_url = r.xpath('.//div[@class="mw-search-result-heading"]/a/@href')[-1] - except: - continue + title = extract_text(r.xpath('.//div[@class="mw-search-result-heading"]/a/@title')) + content = extract_text(r.xpath('.//div[@class="searchresult"]')) + + dataBrut = extract_text(r.xpath('.//div[@class="mw-search-result-data"]')) + data = dataBrut.split('-') + + adatetime = data[1] + data = adatetime.split(',') + date = data[1] + Thedate = date.split(' ') + + + if Thedate[2] == "January": + ThedateMonth = 1 + elif Thedate[2] == "February": + ThedateMonth = 2 + elif Thedate[2] == "March": + ThedateMonth = 3 + elif Thedate[2] == "April": + ThedateMonth = 4 + elif Thedate[2] == "May": + ThedateMonth = 5 + elif Thedate[2] == "June": + ThedateMonth = 6 + elif Thedate[2] == "July": + ThedateMonth = 7 + elif Thedate[2] == "August": + ThedateMonth = 8 + elif Thedate[2] == "September": + ThedateMonth = 9 + elif Thedate[2] == "October": + ThedateMonth = 10 + elif Thedate[2] == "November": + ThedateMonth = 11 + else: + ThedateMonth = 12 + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url, + 'publishedDate': datetime(int(Thedate[3]), ThedateMonth, int(Thedate[1]), 3, 1, 42)}) - if not res_url: + except: continue - title = extract_text(r.xpath('.//div[@class="mw-search-result-heading"]/a/@title')) - - content = extract_text(r.xpath('.//div[@class="searchresult"]')) - - dataBrut = extract_text(r.xpath('.//div[@class="mw-search-result-data"]')) - - data = dataBrut.split('-') - - - - #date = '-'.join(dataS) - adatetime = data[1] - data = adatetime.split(',') - date = data[1] - Thedate = date.split(' ') - - - if Thedate[2] == "January": - ThedateMonth = 1 - elif Thedate[2] == "February": - ThedateMonth = 2 - elif Thedate[2] == "March": - ThedateMonth = 3 - elif Thedate[2] == "April": - ThedateMonth = 4 - elif Thedate[2] == "May": - ThedateMonth = 5 - elif Thedate[2] == "June": - ThedateMonth = 6 - elif Thedate[2] == "July": - ThedateMonth = 7 - elif Thedate[2] == "August": - ThedateMonth = 8 - elif Thedate[2] == "September": - ThedateMonth = 9 - elif Thedate[2] == "October": - ThedateMonth = 10 - elif Thedate[2] == "November": - ThedateMonth = 11 - else: - ThedateMonth = 12 - - # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url, - 'publishedDate': datetime(int(Thedate[3]), ThedateMonth, int(Thedate[1]), 3, 1, 42)}) - - # Search results - #for r in doc.xpath('//dl[@class="search_results"]/*'): - # try: - # if r.tag == "dt": - # res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] - # title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) - # elif r.tag == "dd": - # content = extract_text(r.xpath('.')) - - # append result - # results.append({'title': title, - # 'content': content, - # 'url': base_url + res_url}) - # except: - # continue - - # if not res_url: - # continue - # return results return results |