aboutsummaryrefslogtreecommitdiff
path: root/extractors
diff options
context:
space:
mode:
Diffstat (limited to 'extractors')
-rw-r--r--extractors/common.py11
-rw-r--r--extractors/job.py19
2 files changed, 25 insertions, 5 deletions
diff --git a/extractors/common.py b/extractors/common.py
index db8f5c1..da3e59c 100644
--- a/extractors/common.py
+++ b/extractors/common.py
@@ -1,4 +1,5 @@
import os
+import re
from pathlib import Path
from sh import youtube_dl
from utils import read_file, write_file
@@ -7,7 +8,13 @@ from utils import read_file, write_file
class Extractor():
def __init__(self, reg, url):
- self.root = reg.group(1)
+ self.root = None
+ if not reg:
+ r = re.search(r'(^http(?:s|):(?:\/\/.*?\/|\/\/.*))', url)
+ if r:
+ self.root = r.group(1)
+ if not self.root:
+ self.root = reg.group(1)
self._albums = []
self.root_path = self._root_path()
self._update_cache(self.root)
@@ -26,7 +33,7 @@ class Extractor():
for url in urls_cache:
if url.startswith(self.root):
return
- write_file(cache_file, self.root)
+ write_file(cache_file, self.root + "," + self.__class__.__name__)
def _yt_wrapper(self, url, output):
for line in youtube_dl(
diff --git a/extractors/job.py b/extractors/job.py
index 596a3ae..795617a 100644
--- a/extractors/job.py
+++ b/extractors/job.py
@@ -3,6 +3,8 @@ import re
import importlib
import sys
+from utils import NoExtractorException
+
extrs = [
'bandcamp'
]
@@ -10,12 +12,16 @@ extrs = [
class DlJob():
- def __init__(self, url, output):
- self.extr = self._find(url)
+ def __init__(self, url, output, extractor=None):
+ if extractor in extrs:
+ cls = self._get_class(extractor)
+ self.extr = cls(reg=None, url=url)
+ else:
+ self.extr = self._find(url)
self.output = output
self._albums = []
if not self.extr:
- logging.error("No extractor found for " + url + ".")
+ raise NoExtractorException("No extractor found for " + url + ".")
def _find(self, url):
for cls in self._list_extractors():
@@ -41,6 +47,13 @@ class DlJob():
)
]
+ def _get_class(self, extractor):
+ module = importlib.import_module('.' + extractor, __package__)
+ classes = self._get_classes(module)
+ for cls in classes:
+ if cls.__name__ == extractor:
+ return cls
+
def run(self):
self.extr.get_albums()
self.extr.download_albums(self.output)