aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorneodarz <neodarz@neodarz.net>2019-07-07 12:00:59 +0200
committerneodarz <neodarz@neodarz.net>2019-07-07 12:00:59 +0200
commitb06edf3abc5bd6b957456bf1a988912ec585a0e6 (patch)
tree499685acfaa536123b0ec2a4aec012b7029c5706
parent6eed18304fe27b19ca5ec65ce712142fe0d78ae6 (diff)
downloadthasearch.sh-b06edf3abc5bd6b957456bf1a988912ec585a0e6.tar.xz
thasearch.sh-b06edf3abc5bd6b957456bf1a988912ec585a0e6.zip
Add script for download modules
-rw-r--r--modules-dl.py69
1 files changed, 69 insertions, 0 deletions
diff --git a/modules-dl.py b/modules-dl.py
new file mode 100644
index 0000000..b6ce2b1
--- /dev/null
+++ b/modules-dl.py
@@ -0,0 +1,69 @@
+#!/bin/python
+
+from bs4 import BeautifulSoup
+import requests
+import lxml
+import re
+
+from tqdm import tqdm
+import urllib.request
+
+
+import sys
+
+if len(sys.argv) < 2:
+ print("Usage:")
+ print(" {} <user_id>".format(sys.argv[0]))
+ exit(1)
+
+user = sys.argv[1]
+
+class DownloadProgressBar(tqdm):
+ def update_to(self, b=1, bsize=1, tsize=None):
+ if tsize is not None:
+ self.total = tsize
+ self.update(b * bsize - self.n)
+
+def download_url(url, out_path):
+ with DownloadProgressBar(unit='B', unit_scale=True,
+ miniters=1, desc=url.split('/')[-1]) as t:
+ urllib.request.urlretrieve(url, filename=out_path, reporthook=t.update_to)
+
+def get_mods(url):
+
+ r = requests.get(url)
+
+ soup = BeautifulSoup(r.text, "lxml")
+
+ search_download = re.compile(".*download.*")
+ search_page = re.compile(".*pagination.*")
+
+ is_number = re.compile(".*[0-9].*")
+
+ for link in soup.find_all('a', href=re.compile("download")):
+ link_href = link.get("href")
+ link_name_arr = link.get("href").split("#")
+ if len(link_name_arr) > 1:
+ print("Downloading {} module...".format(link_name_arr[1]))
+ link_name = link_name_arr[1]
+ download_url(link_href, link_name)
+ else:
+ print("Can't get get file name... :/")
+
+ page_selected = "0"
+ go_to = "1"
+ pages = []
+ for link in soup.find_all('a', class_=re.compile(".*pagination.*")):
+ if is_number.match(link.text) and "pagination-selected" in link.get("class"):
+ page_selected = link.text
+ if is_number.match(link.text) and "pagination" in link.get("class"):
+ if link.text not in pages:
+ pages.append(link.text)
+
+ go_to = str(int(page_selected)+1)
+ if go_to in pages:
+ get_mods("https://modarchive.org/index.php?request=view_artist_modules&query=" + user+"&page="+ go_to +"#mods")
+
+
+url = "https://modarchive.org/index.php?request=view_artist_modules&query=" + user+"&page=1#mods"
+get_mods(url)