import requests, re, sys from bs4 import BeautifulSoup page_num = 0 url_prefix = 'http://fr.lolix.org/search/offre/' default_option = '&mode=find&posteid=0®ionid=0&contratid=0' url = url_prefix+'search.php?page='+str(page_num)+default_option page = requests.get(url) if len(sys.argv) == 2: arg = sys.argv[1] else: arg = "" soup = BeautifulSoup(page.text, 'html.parser') contenu = soup.find_all(class_='Contenu') offres = [] empty = True while empty: for el in contenu: table = el.find_all('table') for ele in table: if ele.find_all(class_="ListeTitre"): print("Parsing page "+str(page_num)+"...") if re.search('Aucune', ele.text): empty = False else: rows = ele.find_all("tr") for row in rows: lines = row.find_all("td") for line in lines: for a in line.find_all('a', href=True): if re.search('\?id', a.attrs['href']): urlArt = url_prefix+str(a.attrs['href']) pageArt = requests.get(urlArt) soupArt = BeautifulSoup(pageArt.text, "html.parser") if re.search(arg, soupArt.text): offres.append(url_prefix+str(a.attrs['href'])) page_num = page_num + 1 url = url_prefix+"search.php?page="+str(page_num)+default_option page = requests.get(url) soup = BeautifulSoup(page.text, 'html.parser') contenu = soup.find_all(class_='Contenu') if len(offres) != 0: print("\nOffres trouvée: \n") for offre in offres: print(offre) else: print("Aucune offre à ce jour ne correspond à vos critères de recherche.")