summaryrefslogtreecommitdiff
path: root/search.py
blob: 305f39d576200d80807e13a78272da1b8e58552e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import requests, re, sys
from bs4 import BeautifulSoup

page_num = 0
url_prefix = 'http://fr.lolix.org/search/offre/'
default_option = '&mode=find&posteid=0&regionid=0&contratid=0'
url = url_prefix+'search.php?page='+str(page_num)+default_option
page = requests.get(url)

if len(sys.argv) == 2:
    arg = sys.argv[1]
else:
    arg = ""

soup = BeautifulSoup(page.text, 'html.parser')
contenu = soup.find_all(class_='Contenu')
offres = []

empty = True
while empty:
    for el in contenu:
        table = el.find_all('table')
        for ele in table:
            if ele.find_all(class_="ListeTitre"):
                print("Parsing page "+str(page_num)+"...")
                if re.search('Aucune', ele.text):
                    empty = False
                else:
                    rows = ele.find_all("tr")
                    for row in rows:
                        lines = row.find_all("td")
                        for line in lines:
                            for a in line.find_all('a', href=True):
                                if re.search('\?id', a.attrs['href']):
                                    urlArt = url_prefix+str(a.attrs['href'])
                                    pageArt = requests.get(urlArt)
                                    soupArt = BeautifulSoup(pageArt.text, "html.parser")
                                    if re.search(arg, soupArt.text):
                                        offres.append(url_prefix+str(a.attrs['href']))
                    page_num = page_num + 1
                    url = url_prefix+"search.php?page="+str(page_num)+default_option
                    page = requests.get(url)
                    soup = BeautifulSoup(page.text, 'html.parser')
                    contenu = soup.find_all(class_='Contenu')

if len(offres) != 0:
    print("\nOffres trouvée: \n")
    for offre in offres:
        print(offre)
else:
    print("Aucune offre à ce jour ne correspond à vos critères de recherche.")