From c4fb7fda5b1b6bb22db1f517f71cf393f68c6a9b Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 15 Aug 2020 14:07:15 +0200 Subject: Initial commit --- .gitignore | 4 ++ README.md | 30 ++++++++++++++ extractors/__init__.py | 0 extractors/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 155 bytes extractors/__pycache__/bandcamp.cpython-38.pyc | Bin 0 -> 1220 bytes extractors/__pycache__/common.cpython-38.pyc | Bin 0 -> 1726 bytes extractors/__pycache__/job.cpython-38.pyc | Bin 0 -> 1831 bytes extractors/bandcamp.py | 26 ++++++++++++ extractors/common.py | 42 +++++++++++++++++++ extractors/job.py | 47 +++++++++++++++++++++ main.py | 55 +++++++++++++++++++++++++ requirements.txt | 3 ++ utils.py | 21 ++++++++++ 13 files changed, 228 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 extractors/__init__.py create mode 100644 extractors/__pycache__/__init__.cpython-38.pyc create mode 100644 extractors/__pycache__/bandcamp.cpython-38.pyc create mode 100644 extractors/__pycache__/common.cpython-38.pyc create mode 100644 extractors/__pycache__/job.cpython-38.pyc create mode 100644 extractors/bandcamp.py create mode 100644 extractors/common.py create mode 100644 extractors/job.py create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 utils.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7623b7b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__ + +.urls_cache.txt +out diff --git a/README.md b/README.md new file mode 100644 index 0000000..82b44e9 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +Simple tool for download all album from an artist + +# Install + +``` +pip install -r requirements.txt +``` + +## Dependencies + +- youtube-dl + +# Usage + +``` +usage: main.py [-h] [--url URL] [--update] [--file FILE] [--output OUTPUT] + +Custom album downloader tool + +optional arguments: + -h, --help show this help message and exit + --url URL link to the file to download + --update update all albums from cache + --file FILE read url from file + --output OUTPUT folder where to put downloaded albums. Default to: /out/ +``` + +# Suuported website + +- [x] Bandcamp diff --git a/extractors/__init__.py b/extractors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/extractors/__pycache__/__init__.cpython-38.pyc b/extractors/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ae96411 Binary files /dev/null and b/extractors/__pycache__/__init__.cpython-38.pyc differ diff --git a/extractors/__pycache__/bandcamp.cpython-38.pyc b/extractors/__pycache__/bandcamp.cpython-38.pyc new file mode 100644 index 0000000..8ff4e9f Binary files /dev/null and b/extractors/__pycache__/bandcamp.cpython-38.pyc differ diff --git a/extractors/__pycache__/common.cpython-38.pyc b/extractors/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..641e251 Binary files /dev/null and b/extractors/__pycache__/common.cpython-38.pyc differ diff --git a/extractors/__pycache__/job.cpython-38.pyc b/extractors/__pycache__/job.cpython-38.pyc new file mode 100644 index 0000000..68dee63 Binary files /dev/null and b/extractors/__pycache__/job.cpython-38.pyc differ diff --git a/extractors/bandcamp.py b/extractors/bandcamp.py new file mode 100644 index 0000000..cc383ae --- /dev/null +++ b/extractors/bandcamp.py @@ -0,0 +1,26 @@ +import re +import logging +import requests +from bs4 import BeautifulSoup + +from .common import Extractor + +class bandcamp(Extractor): + pattern = re.compile(r'(http(?:s|):\/\/.*bandcamp.com\/)') + filename_template = "%(artist)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" + + def __init__(self, reg, url): + super().__init__(reg, url) + + def get_albums(self): + r = requests.get(self.root) + soup = BeautifulSoup(r.text, 'html.parser') + items = soup.select('a[href]') + for item in items: + if 'album' in item['href']: + url = self.root.rstrip('/') + item['href'] + if url not in self._albums: + self._albums.append(url) + + if not self._albums: + logging.warning(f"No albums found at {self.root} ????") diff --git a/extractors/common.py b/extractors/common.py new file mode 100644 index 0000000..db8f5c1 --- /dev/null +++ b/extractors/common.py @@ -0,0 +1,42 @@ +import os +from pathlib import Path +from sh import youtube_dl +from utils import read_file, write_file + + +class Extractor(): + + def __init__(self, reg, url): + self.root = reg.group(1) + self._albums = [] + self.root_path = self._root_path() + self._update_cache(self.root) + + def _root_path(self): + file_path = os.path.abspath(__file__) + folder_path = Path(file_path).parent + root_path = Path(folder_path).parent + return root_path + + def _update_cache(self, url): + urls_cache = [] + cache_file = Path(self.root_path, '.urls_cache.txt') + urls_cache = read_file(cache_file) + + for url in urls_cache: + if url.startswith(self.root): + return + write_file(cache_file, self.root) + + def _yt_wrapper(self, url, output): + for line in youtube_dl( + url, audio_format="mp3", + add_metadata=True, + o=output + self.filename_template, + _iter=True): + print(line.strip()) + + def download_albums(self, output): + for album in self._albums: + print("Parsing " + album + "...") + self._yt_wrapper(album, output) diff --git a/extractors/job.py b/extractors/job.py new file mode 100644 index 0000000..ce44e9a --- /dev/null +++ b/extractors/job.py @@ -0,0 +1,47 @@ +import logging +import re +import importlib +import sys + +extrs = [ + 'bandcamp' +] + + +class DlJob(): + + def __init__(self, url, output): + self.extr = self._find(url) + self.output = output + self._albums = [] + if not self.extr: + logging.error(url + " is not supported") + sys.exit(1) + + def _find(self, url): + for cls in self._list_extractors(): + match = cls.pattern.match(url) + if match: + return cls(match, url) + + def _list_extractors(self): + for extr in iter(extrs): + module = importlib.import_module('.'+extr, __package__) + yield from self._add_module(module) + + def _add_module(self, module): + classes = self._get_classes(module) + for cls in classes: + cls.pattern = re.compile(cls.pattern) + return classes + + def _get_classes(self, module): + return [ + cls for cls in module.__dict__.values() if ( + hasattr(cls, "pattern") and cls.__module__ == module.__name__ + ) + ] + + def run(self): + self.extr.get_albums() + self.extr.download_albums(self.output) diff --git a/main.py b/main.py new file mode 100644 index 0000000..e1e16d1 --- /dev/null +++ b/main.py @@ -0,0 +1,55 @@ +#!/bin/python + +import os +import argparse +from pathlib import Path +from extractors.job import DlJob +from utils import read_file + +module_path = os.path.abspath(__file__) +ROOT = Path(module_path).parent + +parser = argparse.ArgumentParser(description="Custom album downloader tool") +parser.add_argument('--url', help="link to the file to download") +parser.add_argument( + '--update', help='update all albums from cache', action="store_true") +parser.add_argument('--file', help="read url from file") +parser.add_argument( + '--output', + help="folder where to put downloaded albums. " + "Default to: " + str(ROOT) + "/out/", + default=str(ROOT) + "/out/") + +args = parser.parse_args() + +if not args.output.endswith("/"): + args.output = args.output + "/" + +if args.update: + print('Updating from cache...') + + + cache_file = Path(ROOT, '.urls_cache.txt') + + urls_cache = read_file(cache_file) + + for url in urls_cache: + dl_job = DlJob(url, args.output) + dl_job.run() + +if args.url: + print('Downloading from url...') + dl_job = DlJob(args.url, args.output) + dl_job.run() + +if args.file: + print("Downloading from file...") + + urls = read_file(args.file) + + for url in urls: + dl_job = DlJob(url, args.output) + dl_job.run() + +if not args.url and not args.update and not args.file: + parser.print_help() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b4396b9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +bs4 +sh +requests diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..2736206 --- /dev/null +++ b/utils.py @@ -0,0 +1,21 @@ +import sys +import logging + + +def read_file(filename): + lines = [] + + if filename.is_dir(): + logging.fatal(f'{filename} is a folder instead of a file!') + sys.exit(1) + elif not filename.is_file(): + filename.touch() + with open(filename) as filehandler: + for line in filehandler.readlines(): + lines.append(line.strip()) + + return lines + +def write_file(filename, data): + with open(filename, 'a') as filehandler: + filehandler.write(data+'\n') -- cgit v1.2.1