From e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 15 Aug 2020 17:08:58 +0200 Subject: Add option to specifie extractor --- extractors/common.py | 11 +++++++++-- extractors/job.py | 19 ++++++++++++++++--- 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'extractors') diff --git a/extractors/common.py b/extractors/common.py index db8f5c1..da3e59c 100644 --- a/extractors/common.py +++ b/extractors/common.py @@ -1,4 +1,5 @@ import os +import re from pathlib import Path from sh import youtube_dl from utils import read_file, write_file @@ -7,7 +8,13 @@ from utils import read_file, write_file class Extractor(): def __init__(self, reg, url): - self.root = reg.group(1) + self.root = None + if not reg: + r = re.search(r'(^http(?:s|):(?:\/\/.*?\/|\/\/.*))', url) + if r: + self.root = r.group(1) + if not self.root: + self.root = reg.group(1) self._albums = [] self.root_path = self._root_path() self._update_cache(self.root) @@ -26,7 +33,7 @@ class Extractor(): for url in urls_cache: if url.startswith(self.root): return - write_file(cache_file, self.root) + write_file(cache_file, self.root + "," + self.__class__.__name__) def _yt_wrapper(self, url, output): for line in youtube_dl( diff --git a/extractors/job.py b/extractors/job.py index 596a3ae..795617a 100644 --- a/extractors/job.py +++ b/extractors/job.py @@ -3,6 +3,8 @@ import re import importlib import sys +from utils import NoExtractorException + extrs = [ 'bandcamp' ] @@ -10,12 +12,16 @@ extrs = [ class DlJob(): - def __init__(self, url, output): - self.extr = self._find(url) + def __init__(self, url, output, extractor=None): + if extractor in extrs: + cls = self._get_class(extractor) + self.extr = cls(reg=None, url=url) + else: + self.extr = self._find(url) self.output = output self._albums = [] if not self.extr: - logging.error("No extractor found for " + url + ".") + raise NoExtractorException("No extractor found for " + url + ".") def _find(self, url): for cls in self._list_extractors(): @@ -41,6 +47,13 @@ class DlJob(): ) ] + def _get_class(self, extractor): + module = importlib.import_module('.' + extractor, __package__) + classes = self._get_classes(module) + for cls in classes: + if cls.__name__ == extractor: + return cls + def run(self): self.extr.get_albums() self.extr.download_albums(self.output) -- cgit v1.2.1