aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--README.md30
-rw-r--r--extractors/__init__.py0
-rw-r--r--extractors/__pycache__/__init__.cpython-38.pycbin0 -> 155 bytes
-rw-r--r--extractors/__pycache__/bandcamp.cpython-38.pycbin0 -> 1220 bytes
-rw-r--r--extractors/__pycache__/common.cpython-38.pycbin0 -> 1726 bytes
-rw-r--r--extractors/__pycache__/job.cpython-38.pycbin0 -> 1831 bytes
-rw-r--r--extractors/bandcamp.py26
-rw-r--r--extractors/common.py42
-rw-r--r--extractors/job.py47
-rw-r--r--main.py55
-rw-r--r--requirements.txt3
-rw-r--r--utils.py21
13 files changed, 228 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7623b7b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+
+.urls_cache.txt
+out
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..82b44e9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,30 @@
+Simple tool for download all album from an artist
+
+# Install
+
+```
+pip install -r requirements.txt
+```
+
+## Dependencies
+
+- youtube-dl
+
+# Usage
+
+```
+usage: main.py [-h] [--url URL] [--update] [--file FILE] [--output OUTPUT]
+
+Custom album downloader tool
+
+optional arguments:
+ -h, --help show this help message and exit
+ --url URL link to the file to download
+ --update update all albums from cache
+ --file FILE read url from file
+ --output OUTPUT folder where to put downloaded albums. Default to: <app_installed_folder>/out/
+```
+
+# Suuported website
+
+- [x] Bandcamp
diff --git a/extractors/__init__.py b/extractors/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extractors/__init__.py
diff --git a/extractors/__pycache__/__init__.cpython-38.pyc b/extractors/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..ae96411
--- /dev/null
+++ b/extractors/__pycache__/__init__.cpython-38.pyc
Binary files differ
diff --git a/extractors/__pycache__/bandcamp.cpython-38.pyc b/extractors/__pycache__/bandcamp.cpython-38.pyc
new file mode 100644
index 0000000..8ff4e9f
--- /dev/null
+++ b/extractors/__pycache__/bandcamp.cpython-38.pyc
Binary files differ
diff --git a/extractors/__pycache__/common.cpython-38.pyc b/extractors/__pycache__/common.cpython-38.pyc
new file mode 100644
index 0000000..641e251
--- /dev/null
+++ b/extractors/__pycache__/common.cpython-38.pyc
Binary files differ
diff --git a/extractors/__pycache__/job.cpython-38.pyc b/extractors/__pycache__/job.cpython-38.pyc
new file mode 100644
index 0000000..68dee63
--- /dev/null
+++ b/extractors/__pycache__/job.cpython-38.pyc
Binary files differ
diff --git a/extractors/bandcamp.py b/extractors/bandcamp.py
new file mode 100644
index 0000000..cc383ae
--- /dev/null
+++ b/extractors/bandcamp.py
@@ -0,0 +1,26 @@
+import re
+import logging
+import requests
+from bs4 import BeautifulSoup
+
+from .common import Extractor
+
+class bandcamp(Extractor):
+ pattern = re.compile(r'(http(?:s|):\/\/.*bandcamp.com\/)')
+ filename_template = "%(artist)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"
+
+ def __init__(self, reg, url):
+ super().__init__(reg, url)
+
+ def get_albums(self):
+ r = requests.get(self.root)
+ soup = BeautifulSoup(r.text, 'html.parser')
+ items = soup.select('a[href]')
+ for item in items:
+ if 'album' in item['href']:
+ url = self.root.rstrip('/') + item['href']
+ if url not in self._albums:
+ self._albums.append(url)
+
+ if not self._albums:
+ logging.warning(f"No albums found at {self.root} ????")
diff --git a/extractors/common.py b/extractors/common.py
new file mode 100644
index 0000000..db8f5c1
--- /dev/null
+++ b/extractors/common.py
@@ -0,0 +1,42 @@
+import os
+from pathlib import Path
+from sh import youtube_dl
+from utils import read_file, write_file
+
+
+class Extractor():
+
+ def __init__(self, reg, url):
+ self.root = reg.group(1)
+ self._albums = []
+ self.root_path = self._root_path()
+ self._update_cache(self.root)
+
+ def _root_path(self):
+ file_path = os.path.abspath(__file__)
+ folder_path = Path(file_path).parent
+ root_path = Path(folder_path).parent
+ return root_path
+
+ def _update_cache(self, url):
+ urls_cache = []
+ cache_file = Path(self.root_path, '.urls_cache.txt')
+ urls_cache = read_file(cache_file)
+
+ for url in urls_cache:
+ if url.startswith(self.root):
+ return
+ write_file(cache_file, self.root)
+
+ def _yt_wrapper(self, url, output):
+ for line in youtube_dl(
+ url, audio_format="mp3",
+ add_metadata=True,
+ o=output + self.filename_template,
+ _iter=True):
+ print(line.strip())
+
+ def download_albums(self, output):
+ for album in self._albums:
+ print("Parsing " + album + "...")
+ self._yt_wrapper(album, output)
diff --git a/extractors/job.py b/extractors/job.py
new file mode 100644
index 0000000..ce44e9a
--- /dev/null
+++ b/extractors/job.py
@@ -0,0 +1,47 @@
+import logging
+import re
+import importlib
+import sys
+
+extrs = [
+ 'bandcamp'
+]
+
+
+class DlJob():
+
+ def __init__(self, url, output):
+ self.extr = self._find(url)
+ self.output = output
+ self._albums = []
+ if not self.extr:
+ logging.error(url + " is not supported")
+ sys.exit(1)
+
+ def _find(self, url):
+ for cls in self._list_extractors():
+ match = cls.pattern.match(url)
+ if match:
+ return cls(match, url)
+
+ def _list_extractors(self):
+ for extr in iter(extrs):
+ module = importlib.import_module('.'+extr, __package__)
+ yield from self._add_module(module)
+
+ def _add_module(self, module):
+ classes = self._get_classes(module)
+ for cls in classes:
+ cls.pattern = re.compile(cls.pattern)
+ return classes
+
+ def _get_classes(self, module):
+ return [
+ cls for cls in module.__dict__.values() if (
+ hasattr(cls, "pattern") and cls.__module__ == module.__name__
+ )
+ ]
+
+ def run(self):
+ self.extr.get_albums()
+ self.extr.download_albums(self.output)
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..e1e16d1
--- /dev/null
+++ b/main.py
@@ -0,0 +1,55 @@
+#!/bin/python
+
+import os
+import argparse
+from pathlib import Path
+from extractors.job import DlJob
+from utils import read_file
+
+module_path = os.path.abspath(__file__)
+ROOT = Path(module_path).parent
+
+parser = argparse.ArgumentParser(description="Custom album downloader tool")
+parser.add_argument('--url', help="link to the file to download")
+parser.add_argument(
+ '--update', help='update all albums from cache', action="store_true")
+parser.add_argument('--file', help="read url from file")
+parser.add_argument(
+ '--output',
+ help="folder where to put downloaded albums. "
+ "Default to: " + str(ROOT) + "/out/",
+ default=str(ROOT) + "/out/")
+
+args = parser.parse_args()
+
+if not args.output.endswith("/"):
+ args.output = args.output + "/"
+
+if args.update:
+ print('Updating from cache...')
+
+
+ cache_file = Path(ROOT, '.urls_cache.txt')
+
+ urls_cache = read_file(cache_file)
+
+ for url in urls_cache:
+ dl_job = DlJob(url, args.output)
+ dl_job.run()
+
+if args.url:
+ print('Downloading from url...')
+ dl_job = DlJob(args.url, args.output)
+ dl_job.run()
+
+if args.file:
+ print("Downloading from file...")
+
+ urls = read_file(args.file)
+
+ for url in urls:
+ dl_job = DlJob(url, args.output)
+ dl_job.run()
+
+if not args.url and not args.update and not args.file:
+ parser.print_help()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b4396b9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+bs4
+sh
+requests
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..2736206
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,21 @@
+import sys
+import logging
+
+
+def read_file(filename):
+ lines = []
+
+ if filename.is_dir():
+ logging.fatal(f'{filename} is a folder instead of a file!')
+ sys.exit(1)
+ elif not filename.is_file():
+ filename.touch()
+ with open(filename) as filehandler:
+ for line in filehandler.readlines():
+ lines.append(line.strip())
+
+ return lines
+
+def write_file(filename, data):
+ with open(filename, 'a') as filehandler:
+ filehandler.write(data+'\n')