8 files changed, 115 insertions, 0 deletions
diff --git a/extractors/__init__.py b/extractors/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extractors/__init__.py
diff --git a/extractors/__pycache__/__init__.cpython-38.pyc b/extractors/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..ae96411
--- /dev/null
+++ b/extractors/__pycache__/__init__.cpython-38.pyc
diff --git a/extractors/__pycache__/bandcamp.cpython-38.pyc b/extractors/__pycache__/bandcamp.cpython-38.pyc
new file mode 100644
index 0000000..8ff4e9f
--- /dev/null
+++ b/extractors/__pycache__/bandcamp.cpython-38.pyc
diff --git a/extractors/__pycache__/common.cpython-38.pyc b/extractors/__pycache__/common.cpython-38.pyc
new file mode 100644
index 0000000..641e251
--- /dev/null
+++ b/extractors/__pycache__/common.cpython-38.pyc
diff --git a/extractors/__pycache__/job.cpython-38.pyc b/extractors/__pycache__/job.cpython-38.pyc
new file mode 100644
index 0000000..68dee63
--- /dev/null
+++ b/extractors/__pycache__/job.cpython-38.pyc
diff --git a/extractors/bandcamp.py b/extractors/bandcamp.py
new file mode 100644
index 0000000..cc383ae
--- /dev/null
+++ b/extractors/bandcamp.py
@@ -0,0 +1,26 @@
+import re
+import logging
+import requests
+from bs4 import BeautifulSoup
+
+from .common import Extractor
+
+class bandcamp(Extractor):
+    pattern = re.compile(r'(http(?:s|):\/\/.*bandcamp.com\/)')
+    filename_template = "%(artist)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"
+
+    def __init__(self, reg, url):
+        super().__init__(reg, url)
+
+    def get_albums(self):
+        r = requests.get(self.root)
+        soup = BeautifulSoup(r.text, 'html.parser')
+        items = soup.select('a[href]')
+        for item in items:
+            if 'album' in item['href']:
+                url = self.root.rstrip('/') + item['href']
+                if url not in self._albums:
+                    self._albums.append(url)
+
+        if not self._albums:
+            logging.warning(f"No albums found at {self.root} ????")
diff --git a/extractors/common.py b/extractors/common.py
new file mode 100644
index 0000000..db8f5c1
--- /dev/null
+++ b/extractors/common.py
@@ -0,0 +1,42 @@
+import os
+from pathlib import Path
+from sh import youtube_dl
+from utils import read_file, write_file
+
+
+class Extractor():
+
+    def __init__(self, reg, url):
+        self.root = reg.group(1)
+        self._albums = []
+        self.root_path = self._root_path()
+        self._update_cache(self.root)
+
+    def _root_path(self):
+        file_path = os.path.abspath(__file__)
+        folder_path = Path(file_path).parent
+        root_path = Path(folder_path).parent
+        return root_path
+
+    def _update_cache(self, url):
+        urls_cache = []
+        cache_file = Path(self.root_path, '.urls_cache.txt')
+        urls_cache = read_file(cache_file)
+
+        for url in urls_cache:
+            if url.startswith(self.root):
+                return
+        write_file(cache_file, self.root)
+
+    def _yt_wrapper(self, url, output):
+        for line in youtube_dl(
+                url, audio_format="mp3",
+                add_metadata=True,
+                o=output + self.filename_template,
+                _iter=True):
+            print(line.strip())
+
+    def download_albums(self, output):
+        for album in self._albums:
+            print("Parsing " + album + "...")
+            self._yt_wrapper(album, output)
diff --git a/extractors/job.py b/extractors/job.py
new file mode 100644
index 0000000..ce44e9a
--- /dev/null
+++ b/extractors/job.py
@@ -0,0 +1,47 @@
+import logging
+import re
+import importlib
+import sys
+
+extrs = [
+    'bandcamp'
+]
+
+
+class DlJob():
+
+    def __init__(self, url, output):
+        self.extr = self._find(url)
+        self.output = output
+        self._albums = []
+        if not self.extr:
+            logging.error(url + " is not supported")
+            sys.exit(1)
+
+    def _find(self, url):
+        for cls in self._list_extractors():
+            match = cls.pattern.match(url)
+            if match:
+                return cls(match, url)
+
+    def _list_extractors(self):
+        for extr in iter(extrs):
+            module = importlib.import_module('.'+extr, __package__)
+            yield from self._add_module(module)
+
+    def _add_module(self, module):
+        classes = self._get_classes(module)
+        for cls in classes:
+            cls.pattern = re.compile(cls.pattern)
+        return classes
+
+    def _get_classes(self, module):
+        return [
+            cls for cls in module.__dict__.values() if (
+                hasattr(cls, "pattern") and cls.__module__ == module.__name__
+            )
+        ]
+
+    def run(self):
+        self.extr.get_albums()
+        self.extr.download_albums(self.output)