Add option to specifie extractor

author: neodarz <neodarz@neodarz.net> 2020-08-15 17:08:58 +0200
committer: neodarz <neodarz@neodarz.net> 2020-08-15 17:08:58 +0200
commit: e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13 (patch)
tree: 8bec0b584c05b4343ac6f1ea78682747402eb207
parent: e6097b7d5b6122eeecea6a358a8e74cff5a17bbb (diff)
download: music_downloader-e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13.tar.xz
music_downloader-e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13.zip
4 files changed, 51 insertions, 11 deletions
diff --git a/extractors/common.py b/extractors/common.py
index db8f5c1..da3e59c 100644
--- a/extractors/common.py
+++ b/extractors/common.py
@@ -1,4 +1,5 @@
 import os
+import re
 from pathlib import Path
 from sh import youtube_dl
 from utils import read_file, write_file
@@ -7,7 +8,13 @@ from utils import read_file, write_file
 class Extractor():
 
     def __init__(self, reg, url):
-        self.root = reg.group(1)
+        self.root = None
+        if not reg:
+            r = re.search(r'(^http(?:s|):(?:\/\/.*?\/|\/\/.*))', url)
+            if r:
+                self.root = r.group(1)
+        if not self.root:
+            self.root = reg.group(1)
         self._albums = []
         self.root_path = self._root_path()
         self._update_cache(self.root)
@@ -26,7 +33,7 @@ class Extractor():
         for url in urls_cache:
             if url.startswith(self.root):
                 return
-        write_file(cache_file, self.root)
+        write_file(cache_file, self.root + "," + self.__class__.__name__)
 
     def _yt_wrapper(self, url, output):
         for line in youtube_dl(
diff --git a/extractors/job.py b/extractors/job.py
index 596a3ae..795617a 100644
--- a/extractors/job.py
+++ b/extractors/job.py
@@ -3,6 +3,8 @@ import re
 import importlib
 import sys
 
+from utils import NoExtractorException
+
 extrs = [
     'bandcamp'
 ]
@@ -10,12 +12,16 @@ extrs = [
 
 class DlJob():
 
-    def __init__(self, url, output):
-        self.extr = self._find(url)
+    def __init__(self, url, output, extractor=None):
+        if extractor in extrs:
+            cls = self._get_class(extractor)
+            self.extr = cls(reg=None, url=url)
+        else:
+            self.extr = self._find(url)
         self.output = output
         self._albums = []
         if not self.extr:
-            logging.error("No extractor found for " + url + ".")
+            raise NoExtractorException("No extractor found for " + url + ".")
 
     def _find(self, url):
         for cls in self._list_extractors():
@@ -41,6 +47,13 @@ class DlJob():
             )
         ]
 
+    def _get_class(self, extractor):
+        module = importlib.import_module('.' + extractor, __package__)
+        classes = self._get_classes(module)
+        for cls in classes:
+            if cls.__name__ == extractor:
+                return cls
+
     def run(self):
         self.extr.get_albums()
         self.extr.download_albums(self.output)
diff --git a/main.py b/main.py
index 3c41953..7cc352a 100644
--- a/main.py
+++ b/main.py
@@ -2,10 +2,13 @@
 
 import os
 import argparse
+import logging
 from pathlib import Path
 from extractors.job import DlJob
 from utils import read_file
 
+from utils import NoExtractorException
+
 module_path = os.path.abspath(__file__)
 ROOT = Path(module_path).parent
 
@@ -19,6 +22,7 @@ parser.add_argument(
     help="folder where to put downloaded albums. "
     "Default to: " + str(ROOT) + "/out/",
     default=str(ROOT) + "/out/")
+parser.add_argument('--extractor', help="name of the extractor")
 
 args = parser.parse_args()
 
@@ -34,13 +38,23 @@ if args.update:
     urls_cache = read_file(cache_file)
 
     for url in urls_cache:
-        dl_job = DlJob(url, args.output)
-        dl_job.run()
+        try:
+            args.extractor = url.split(',')[1]
+        except IndexError:
+            pass
+        try:
+            dl_job = DlJob(url, args.output, args.extractor)
+            dl_job.run()
+        except NoExtractorException as exc:
+            logging.error(exc)
 
 if args.url:
     print('Downloading from url...')
-    dl_job = DlJob(args.url, args.output)
-    dl_job.run()
+    try:
+        dl_job = DlJob(args.url, args.output, args.extractor)
+        dl_job.run()
+    except NoExtractorException as exc:
+        logging.error(exc)
 
 if args.file:
     print("Downloading from file...")
@@ -49,8 +63,11 @@ if args.file:
 
     for url in urls:
         if url:
-            dl_job = DlJob(url, args.output)
-            dl_job.run()
+            try:
+                dl_job = DlJob(url, args.output, args.extractor)
+                dl_job.run()
+            except NoExtractorException as exc:
+                logging.error(exc)
 
 if not args.url and not args.update and not args.file:
     parser.print_help()
diff --git a/utils.py b/utils.py
index 58188b1..972e92f 100644
--- a/utils.py
+++ b/utils.py
@@ -21,3 +21,6 @@ def read_file(filename):
 def write_file(filename, data):
     with open(filename, 'a') as filehandler:
         filehandler.write(data+'\n')
+
+class NoExtractorException(Exception):
+    pass
author	neodarz <neodarz@neodarz.net>	2020-08-15 17:08:58 +0200
committer	neodarz <neodarz@neodarz.net>	2020-08-15 17:08:58 +0200
commit	e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13 (patch)
tree	8bec0b584c05b4343ac6f1ea78682747402eb207
parent	e6097b7d5b6122eeecea6a358a8e74cff5a17bbb (diff)
download	music_downloader-e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13.tar.xz music_downloader-e2099a3e5c8f38c8bae88d862950b0c2bf5b1d13.zip