summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--requirements.txt2
-rw-r--r--search.py51
2 files changed, 53 insertions, 0 deletions
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1f3e778
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+beautifulsoup4
+requests
diff --git a/search.py b/search.py
new file mode 100644
index 0000000..305f39d
--- /dev/null
+++ b/search.py
@@ -0,0 +1,51 @@
+import requests, re, sys
+from bs4 import BeautifulSoup
+
+page_num = 0
+url_prefix = 'http://fr.lolix.org/search/offre/'
+default_option = '&mode=find&posteid=0&regionid=0&contratid=0'
+url = url_prefix+'search.php?page='+str(page_num)+default_option
+page = requests.get(url)
+
+if len(sys.argv) == 2:
+ arg = sys.argv[1]
+else:
+ arg = ""
+
+soup = BeautifulSoup(page.text, 'html.parser')
+contenu = soup.find_all(class_='Contenu')
+offres = []
+
+empty = True
+while empty:
+ for el in contenu:
+ table = el.find_all('table')
+ for ele in table:
+ if ele.find_all(class_="ListeTitre"):
+ print("Parsing page "+str(page_num)+"...")
+ if re.search('Aucune', ele.text):
+ empty = False
+ else:
+ rows = ele.find_all("tr")
+ for row in rows:
+ lines = row.find_all("td")
+ for line in lines:
+ for a in line.find_all('a', href=True):
+ if re.search('\?id', a.attrs['href']):
+ urlArt = url_prefix+str(a.attrs['href'])
+ pageArt = requests.get(urlArt)
+ soupArt = BeautifulSoup(pageArt.text, "html.parser")
+ if re.search(arg, soupArt.text):
+ offres.append(url_prefix+str(a.attrs['href']))
+ page_num = page_num + 1
+ url = url_prefix+"search.php?page="+str(page_num)+default_option
+ page = requests.get(url)
+ soup = BeautifulSoup(page.text, 'html.parser')
+ contenu = soup.find_all(class_='Contenu')
+
+if len(offres) != 0:
+ print("\nOffres trouvée: \n")
+ for offre in offres:
+ print(offre)
+else:
+ print("Aucune offre à ce jour ne correspond à vos critères de recherche.")