#!/bin/python from bs4 import BeautifulSoup import requests import lxml import re from tqdm import tqdm import urllib.request import sys if len(sys.argv) < 2: print("Usage:") print(" {} ".format(sys.argv[0])) exit(1) user = sys.argv[1] class DownloadProgressBar(tqdm): def update_to(self, b=1, bsize=1, tsize=None): if tsize is not None: self.total = tsize self.update(b * bsize - self.n) def download_url(url, out_path): with DownloadProgressBar(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, filename=out_path, reporthook=t.update_to) def get_mods(url): r = requests.get(url) soup = BeautifulSoup(r.text, "lxml") search_download = re.compile(".*download.*") search_page = re.compile(".*pagination.*") is_number = re.compile(".*[0-9].*") for link in soup.find_all('a', href=re.compile("download")): link_href = link.get("href") link_name_arr = link.get("href").split("#") if len(link_name_arr) > 1: print("Downloading {} module...".format(link_name_arr[1])) link_name = link_name_arr[1] download_url(link_href, link_name) else: print("Can't get get file name... :/") page_selected = "0" go_to = "1" pages = [] for link in soup.find_all('a', class_=re.compile(".*pagination.*")): if is_number.match(link.text) and "pagination-selected" in link.get("class"): page_selected = link.text if is_number.match(link.text) and "pagination" in link.get("class"): if link.text not in pages: pages.append(link.text) go_to = str(int(page_selected)+1) if go_to in pages: get_mods("https://modarchive.org/index.php?request=view_artist_modules&query=" + user+"&page="+ go_to +"#mods") url = "https://modarchive.org/index.php?request=view_artist_modules&query=" + user+"&page=1#mods" get_mods(url)