From e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 15 Aug 2020 17:08:58 +0200 Subject: Add option to specifie extractor --- extractors/common.py | 11 +++++++++-- extractors/job.py | 19 ++++++++++++++++--- main.py | 29 +++++++++++++++++++++++------ utils.py | 3 +++ 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/extractors/common.py b/extractors/common.py index db8f5c1..da3e59c 100644 --- a/extractors/common.py +++ b/extractors/common.py @@ -1,4 +1,5 @@ import os +import re from pathlib import Path from sh import youtube_dl from utils import read_file, write_file @@ -7,7 +8,13 @@ from utils import read_file, write_file class Extractor(): def __init__(self, reg, url): - self.root = reg.group(1) + self.root = None + if not reg: + r = re.search(r'(^http(?:s|):(?:\/\/.*?\/|\/\/.*))', url) + if r: + self.root = r.group(1) + if not self.root: + self.root = reg.group(1) self._albums = [] self.root_path = self._root_path() self._update_cache(self.root) @@ -26,7 +33,7 @@ class Extractor(): for url in urls_cache: if url.startswith(self.root): return - write_file(cache_file, self.root) + write_file(cache_file, self.root + "," + self.__class__.__name__) def _yt_wrapper(self, url, output): for line in youtube_dl( diff --git a/extractors/job.py b/extractors/job.py index 596a3ae..795617a 100644 --- a/extractors/job.py +++ b/extractors/job.py @@ -3,6 +3,8 @@ import re import importlib import sys +from utils import NoExtractorException + extrs = [ 'bandcamp' ] @@ -10,12 +12,16 @@ extrs = [ class DlJob(): - def __init__(self, url, output): - self.extr = self._find(url) + def __init__(self, url, output, extractor=None): + if extractor in extrs: + cls = self._get_class(extractor) + self.extr = cls(reg=None, url=url) + else: + self.extr = self._find(url) self.output = output self._albums = [] if not self.extr: - logging.error("No extractor found for " + url + ".") + raise NoExtractorException("No extractor found for " + url + ".") def _find(self, url): for cls in self._list_extractors(): @@ -41,6 +47,13 @@ class DlJob(): ) ] + def _get_class(self, extractor): + module = importlib.import_module('.' + extractor, __package__) + classes = self._get_classes(module) + for cls in classes: + if cls.__name__ == extractor: + return cls + def run(self): self.extr.get_albums() self.extr.download_albums(self.output) diff --git a/main.py b/main.py index 3c41953..7cc352a 100644 --- a/main.py +++ b/main.py @@ -2,10 +2,13 @@ import os import argparse +import logging from pathlib import Path from extractors.job import DlJob from utils import read_file +from utils import NoExtractorException + module_path = os.path.abspath(__file__) ROOT = Path(module_path).parent @@ -19,6 +22,7 @@ parser.add_argument( help="folder where to put downloaded albums. " "Default to: " + str(ROOT) + "/out/", default=str(ROOT) + "/out/") +parser.add_argument('--extractor', help="name of the extractor") args = parser.parse_args() @@ -34,13 +38,23 @@ if args.update: urls_cache = read_file(cache_file) for url in urls_cache: - dl_job = DlJob(url, args.output) - dl_job.run() + try: + args.extractor = url.split(',')[1] + except IndexError: + pass + try: + dl_job = DlJob(url, args.output, args.extractor) + dl_job.run() + except NoExtractorException as exc: + logging.error(exc) if args.url: print('Downloading from url...') - dl_job = DlJob(args.url, args.output) - dl_job.run() + try: + dl_job = DlJob(args.url, args.output, args.extractor) + dl_job.run() + except NoExtractorException as exc: + logging.error(exc) if args.file: print("Downloading from file...") @@ -49,8 +63,11 @@ if args.file: for url in urls: if url: - dl_job = DlJob(url, args.output) - dl_job.run() + try: + dl_job = DlJob(url, args.output, args.extractor) + dl_job.run() + except NoExtractorException as exc: + logging.error(exc) if not args.url and not args.update and not args.file: parser.print_help() diff --git a/utils.py b/utils.py index 58188b1..972e92f 100644 --- a/utils.py +++ b/utils.py @@ -21,3 +21,6 @@ def read_file(filename): def write_file(filename, data): with open(filename, 'a') as filehandler: filehandler.write(data+'\n') + +class NoExtractorException(Exception): + pass -- cgit v1.2.1