diff options
-rw-r--r-- | requirements.txt | 2 | ||||
-rw-r--r-- | search.py | 51 |
2 files changed, 53 insertions, 0 deletions
diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1f3e778 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4 +requests diff --git a/search.py b/search.py new file mode 100644 index 0000000..305f39d --- /dev/null +++ b/search.py @@ -0,0 +1,51 @@ +import requests, re, sys +from bs4 import BeautifulSoup + +page_num = 0 +url_prefix = 'http://fr.lolix.org/search/offre/' +default_option = '&mode=find&posteid=0®ionid=0&contratid=0' +url = url_prefix+'search.php?page='+str(page_num)+default_option +page = requests.get(url) + +if len(sys.argv) == 2: + arg = sys.argv[1] +else: + arg = "" + +soup = BeautifulSoup(page.text, 'html.parser') +contenu = soup.find_all(class_='Contenu') +offres = [] + +empty = True +while empty: + for el in contenu: + table = el.find_all('table') + for ele in table: + if ele.find_all(class_="ListeTitre"): + print("Parsing page "+str(page_num)+"...") + if re.search('Aucune', ele.text): + empty = False + else: + rows = ele.find_all("tr") + for row in rows: + lines = row.find_all("td") + for line in lines: + for a in line.find_all('a', href=True): + if re.search('\?id', a.attrs['href']): + urlArt = url_prefix+str(a.attrs['href']) + pageArt = requests.get(urlArt) + soupArt = BeautifulSoup(pageArt.text, "html.parser") + if re.search(arg, soupArt.text): + offres.append(url_prefix+str(a.attrs['href'])) + page_num = page_num + 1 + url = url_prefix+"search.php?page="+str(page_num)+default_option + page = requests.get(url) + soup = BeautifulSoup(page.text, 'html.parser') + contenu = soup.find_all(class_='Contenu') + +if len(offres) != 0: + print("\nOffres trouvée: \n") + for offre in offres: + print(offre) +else: + print("Aucune offre à ce jour ne correspond à vos critères de recherche.") |