diff options
-rw-r--r-- | extractors/common.py | 11 | ||||
-rw-r--r-- | extractors/job.py | 19 | ||||
-rw-r--r-- | main.py | 29 | ||||
-rw-r--r-- | utils.py | 3 |
4 files changed, 51 insertions, 11 deletions
diff --git a/extractors/common.py b/extractors/common.py index db8f5c1..da3e59c 100644 --- a/extractors/common.py +++ b/extractors/common.py @@ -1,4 +1,5 @@ import os +import re from pathlib import Path from sh import youtube_dl from utils import read_file, write_file @@ -7,7 +8,13 @@ from utils import read_file, write_file class Extractor(): def __init__(self, reg, url): - self.root = reg.group(1) + self.root = None + if not reg: + r = re.search(r'(^http(?:s|):(?:\/\/.*?\/|\/\/.*))', url) + if r: + self.root = r.group(1) + if not self.root: + self.root = reg.group(1) self._albums = [] self.root_path = self._root_path() self._update_cache(self.root) @@ -26,7 +33,7 @@ class Extractor(): for url in urls_cache: if url.startswith(self.root): return - write_file(cache_file, self.root) + write_file(cache_file, self.root + "," + self.__class__.__name__) def _yt_wrapper(self, url, output): for line in youtube_dl( diff --git a/extractors/job.py b/extractors/job.py index 596a3ae..795617a 100644 --- a/extractors/job.py +++ b/extractors/job.py @@ -3,6 +3,8 @@ import re import importlib import sys +from utils import NoExtractorException + extrs = [ 'bandcamp' ] @@ -10,12 +12,16 @@ extrs = [ class DlJob(): - def __init__(self, url, output): - self.extr = self._find(url) + def __init__(self, url, output, extractor=None): + if extractor in extrs: + cls = self._get_class(extractor) + self.extr = cls(reg=None, url=url) + else: + self.extr = self._find(url) self.output = output self._albums = [] if not self.extr: - logging.error("No extractor found for " + url + ".") + raise NoExtractorException("No extractor found for " + url + ".") def _find(self, url): for cls in self._list_extractors(): @@ -41,6 +47,13 @@ class DlJob(): ) ] + def _get_class(self, extractor): + module = importlib.import_module('.' + extractor, __package__) + classes = self._get_classes(module) + for cls in classes: + if cls.__name__ == extractor: + return cls + def run(self): self.extr.get_albums() self.extr.download_albums(self.output) @@ -2,10 +2,13 @@ import os import argparse +import logging from pathlib import Path from extractors.job import DlJob from utils import read_file +from utils import NoExtractorException + module_path = os.path.abspath(__file__) ROOT = Path(module_path).parent @@ -19,6 +22,7 @@ parser.add_argument( help="folder where to put downloaded albums. " "Default to: " + str(ROOT) + "/out/", default=str(ROOT) + "/out/") +parser.add_argument('--extractor', help="name of the extractor") args = parser.parse_args() @@ -34,13 +38,23 @@ if args.update: urls_cache = read_file(cache_file) for url in urls_cache: - dl_job = DlJob(url, args.output) - dl_job.run() + try: + args.extractor = url.split(',')[1] + except IndexError: + pass + try: + dl_job = DlJob(url, args.output, args.extractor) + dl_job.run() + except NoExtractorException as exc: + logging.error(exc) if args.url: print('Downloading from url...') - dl_job = DlJob(args.url, args.output) - dl_job.run() + try: + dl_job = DlJob(args.url, args.output, args.extractor) + dl_job.run() + except NoExtractorException as exc: + logging.error(exc) if args.file: print("Downloading from file...") @@ -49,8 +63,11 @@ if args.file: for url in urls: if url: - dl_job = DlJob(url, args.output) - dl_job.run() + try: + dl_job = DlJob(url, args.output, args.extractor) + dl_job.run() + except NoExtractorException as exc: + logging.error(exc) if not args.url and not args.update and not args.file: parser.print_help() @@ -21,3 +21,6 @@ def read_file(filename): def write_file(filename, data): with open(filename, 'a') as filehandler: filehandler.write(data+'\n') + +class NoExtractorException(Exception): + pass |