aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--extractors/common.py11
-rw-r--r--extractors/job.py19
-rw-r--r--main.py29
-rw-r--r--utils.py3
4 files changed, 51 insertions, 11 deletions
diff --git a/extractors/common.py b/extractors/common.py
index db8f5c1..da3e59c 100644
--- a/extractors/common.py
+++ b/extractors/common.py
@@ -1,4 +1,5 @@
import os
+import re
from pathlib import Path
from sh import youtube_dl
from utils import read_file, write_file
@@ -7,7 +8,13 @@ from utils import read_file, write_file
class Extractor():
def __init__(self, reg, url):
- self.root = reg.group(1)
+ self.root = None
+ if not reg:
+ r = re.search(r'(^http(?:s|):(?:\/\/.*?\/|\/\/.*))', url)
+ if r:
+ self.root = r.group(1)
+ if not self.root:
+ self.root = reg.group(1)
self._albums = []
self.root_path = self._root_path()
self._update_cache(self.root)
@@ -26,7 +33,7 @@ class Extractor():
for url in urls_cache:
if url.startswith(self.root):
return
- write_file(cache_file, self.root)
+ write_file(cache_file, self.root + "," + self.__class__.__name__)
def _yt_wrapper(self, url, output):
for line in youtube_dl(
diff --git a/extractors/job.py b/extractors/job.py
index 596a3ae..795617a 100644
--- a/extractors/job.py
+++ b/extractors/job.py
@@ -3,6 +3,8 @@ import re
import importlib
import sys
+from utils import NoExtractorException
+
extrs = [
'bandcamp'
]
@@ -10,12 +12,16 @@ extrs = [
class DlJob():
- def __init__(self, url, output):
- self.extr = self._find(url)
+ def __init__(self, url, output, extractor=None):
+ if extractor in extrs:
+ cls = self._get_class(extractor)
+ self.extr = cls(reg=None, url=url)
+ else:
+ self.extr = self._find(url)
self.output = output
self._albums = []
if not self.extr:
- logging.error("No extractor found for " + url + ".")
+ raise NoExtractorException("No extractor found for " + url + ".")
def _find(self, url):
for cls in self._list_extractors():
@@ -41,6 +47,13 @@ class DlJob():
)
]
+ def _get_class(self, extractor):
+ module = importlib.import_module('.' + extractor, __package__)
+ classes = self._get_classes(module)
+ for cls in classes:
+ if cls.__name__ == extractor:
+ return cls
+
def run(self):
self.extr.get_albums()
self.extr.download_albums(self.output)
diff --git a/main.py b/main.py
index 3c41953..7cc352a 100644
--- a/main.py
+++ b/main.py
@@ -2,10 +2,13 @@
import os
import argparse
+import logging
from pathlib import Path
from extractors.job import DlJob
from utils import read_file
+from utils import NoExtractorException
+
module_path = os.path.abspath(__file__)
ROOT = Path(module_path).parent
@@ -19,6 +22,7 @@ parser.add_argument(
help="folder where to put downloaded albums. "
"Default to: " + str(ROOT) + "/out/",
default=str(ROOT) + "/out/")
+parser.add_argument('--extractor', help="name of the extractor")
args = parser.parse_args()
@@ -34,13 +38,23 @@ if args.update:
urls_cache = read_file(cache_file)
for url in urls_cache:
- dl_job = DlJob(url, args.output)
- dl_job.run()
+ try:
+ args.extractor = url.split(',')[1]
+ except IndexError:
+ pass
+ try:
+ dl_job = DlJob(url, args.output, args.extractor)
+ dl_job.run()
+ except NoExtractorException as exc:
+ logging.error(exc)
if args.url:
print('Downloading from url...')
- dl_job = DlJob(args.url, args.output)
- dl_job.run()
+ try:
+ dl_job = DlJob(args.url, args.output, args.extractor)
+ dl_job.run()
+ except NoExtractorException as exc:
+ logging.error(exc)
if args.file:
print("Downloading from file...")
@@ -49,8 +63,11 @@ if args.file:
for url in urls:
if url:
- dl_job = DlJob(url, args.output)
- dl_job.run()
+ try:
+ dl_job = DlJob(url, args.output, args.extractor)
+ dl_job.run()
+ except NoExtractorException as exc:
+ logging.error(exc)
if not args.url and not args.update and not args.file:
parser.print_help()
diff --git a/utils.py b/utils.py
index 58188b1..972e92f 100644
--- a/utils.py
+++ b/utils.py
@@ -21,3 +21,6 @@ def read_file(filename):
def write_file(filename, data):
with open(filename, 'a') as filehandler:
filehandler.write(data+'\n')
+
+class NoExtractorException(Exception):
+ pass