1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#!/bin/python
from bs4 import BeautifulSoup
import requests
import lxml
import re
from tqdm import tqdm
import urllib.request
import sys
if len(sys.argv) < 2:
print("Usage:")
print(" {} <user_id>".format(sys.argv[0]))
exit(1)
user = sys.argv[1]
class DownloadProgressBar(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def download_url(url, out_path):
with DownloadProgressBar(unit='B', unit_scale=True,
miniters=1, desc=url.split('/')[-1]) as t:
urllib.request.urlretrieve(url, filename=out_path, reporthook=t.update_to)
def get_mods(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
search_download = re.compile(".*download.*")
search_page = re.compile(".*pagination.*")
is_number = re.compile(".*[0-9].*")
for link in soup.find_all('a', href=re.compile("download")):
link_href = link.get("href")
link_name_arr = link.get("href").split("#")
if len(link_name_arr) > 1:
print("Downloading {} module...".format(link_name_arr[1]))
link_name = link_name_arr[1]
download_url(link_href, link_name)
else:
print("Can't get get file name... :/")
page_selected = "0"
go_to = "1"
pages = []
for link in soup.find_all('a', class_=re.compile(".*pagination.*")):
if is_number.match(link.text) and "pagination-selected" in link.get("class"):
page_selected = link.text
if is_number.match(link.text) and "pagination" in link.get("class"):
if link.text not in pages:
pages.append(link.text)
go_to = str(int(page_selected)+1)
if go_to in pages:
get_mods("https://modarchive.org/index.php?request=view_artist_modules&query=" + user+"&page="+ go_to +"#mods")
url = "https://modarchive.org/index.php?request=view_artist_modules&query=" + user+"&page=1#mods"
get_mods(url)
|