aboutsummaryrefslogtreecommitdiff
path: root/extractors
diff options
context:
space:
mode:
authorneodarz <neodarz@neodarz.net>2020-08-15 14:07:15 +0200
committerneodarz <neodarz@neodarz.net>2020-08-15 14:07:15 +0200
commitc4fb7fda5b1b6bb22db1f517f71cf393f68c6a9b (patch)
tree746b1c9e7d5b2f49e338706fec9f9c0d74f9f7c3 /extractors
downloadmusic_downloader-c4fb7fda5b1b6bb22db1f517f71cf393f68c6a9b.tar.xz
music_downloader-c4fb7fda5b1b6bb22db1f517f71cf393f68c6a9b.zip
Initial commit
Diffstat (limited to 'extractors')
-rw-r--r--extractors/__init__.py0
-rw-r--r--extractors/__pycache__/__init__.cpython-38.pycbin0 -> 155 bytes
-rw-r--r--extractors/__pycache__/bandcamp.cpython-38.pycbin0 -> 1220 bytes
-rw-r--r--extractors/__pycache__/common.cpython-38.pycbin0 -> 1726 bytes
-rw-r--r--extractors/__pycache__/job.cpython-38.pycbin0 -> 1831 bytes
-rw-r--r--extractors/bandcamp.py26
-rw-r--r--extractors/common.py42
-rw-r--r--extractors/job.py47
8 files changed, 115 insertions, 0 deletions
diff --git a/extractors/__init__.py b/extractors/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extractors/__init__.py
diff --git a/extractors/__pycache__/__init__.cpython-38.pyc b/extractors/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..ae96411
--- /dev/null
+++ b/extractors/__pycache__/__init__.cpython-38.pyc
Binary files differ
diff --git a/extractors/__pycache__/bandcamp.cpython-38.pyc b/extractors/__pycache__/bandcamp.cpython-38.pyc
new file mode 100644
index 0000000..8ff4e9f
--- /dev/null
+++ b/extractors/__pycache__/bandcamp.cpython-38.pyc
Binary files differ
diff --git a/extractors/__pycache__/common.cpython-38.pyc b/extractors/__pycache__/common.cpython-38.pyc
new file mode 100644
index 0000000..641e251
--- /dev/null
+++ b/extractors/__pycache__/common.cpython-38.pyc
Binary files differ
diff --git a/extractors/__pycache__/job.cpython-38.pyc b/extractors/__pycache__/job.cpython-38.pyc
new file mode 100644
index 0000000..68dee63
--- /dev/null
+++ b/extractors/__pycache__/job.cpython-38.pyc
Binary files differ
diff --git a/extractors/bandcamp.py b/extractors/bandcamp.py
new file mode 100644
index 0000000..cc383ae
--- /dev/null
+++ b/extractors/bandcamp.py
@@ -0,0 +1,26 @@
+import re
+import logging
+import requests
+from bs4 import BeautifulSoup
+
+from .common import Extractor
+
+class bandcamp(Extractor):
+ pattern = re.compile(r'(http(?:s|):\/\/.*bandcamp.com\/)')
+ filename_template = "%(artist)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"
+
+ def __init__(self, reg, url):
+ super().__init__(reg, url)
+
+ def get_albums(self):
+ r = requests.get(self.root)
+ soup = BeautifulSoup(r.text, 'html.parser')
+ items = soup.select('a[href]')
+ for item in items:
+ if 'album' in item['href']:
+ url = self.root.rstrip('/') + item['href']
+ if url not in self._albums:
+ self._albums.append(url)
+
+ if not self._albums:
+ logging.warning(f"No albums found at {self.root} ????")
diff --git a/extractors/common.py b/extractors/common.py
new file mode 100644
index 0000000..db8f5c1
--- /dev/null
+++ b/extractors/common.py
@@ -0,0 +1,42 @@
+import os
+from pathlib import Path
+from sh import youtube_dl
+from utils import read_file, write_file
+
+
+class Extractor():
+
+ def __init__(self, reg, url):
+ self.root = reg.group(1)
+ self._albums = []
+ self.root_path = self._root_path()
+ self._update_cache(self.root)
+
+ def _root_path(self):
+ file_path = os.path.abspath(__file__)
+ folder_path = Path(file_path).parent
+ root_path = Path(folder_path).parent
+ return root_path
+
+ def _update_cache(self, url):
+ urls_cache = []
+ cache_file = Path(self.root_path, '.urls_cache.txt')
+ urls_cache = read_file(cache_file)
+
+ for url in urls_cache:
+ if url.startswith(self.root):
+ return
+ write_file(cache_file, self.root)
+
+ def _yt_wrapper(self, url, output):
+ for line in youtube_dl(
+ url, audio_format="mp3",
+ add_metadata=True,
+ o=output + self.filename_template,
+ _iter=True):
+ print(line.strip())
+
+ def download_albums(self, output):
+ for album in self._albums:
+ print("Parsing " + album + "...")
+ self._yt_wrapper(album, output)
diff --git a/extractors/job.py b/extractors/job.py
new file mode 100644
index 0000000..ce44e9a
--- /dev/null
+++ b/extractors/job.py
@@ -0,0 +1,47 @@
+import logging
+import re
+import importlib
+import sys
+
+extrs = [
+ 'bandcamp'
+]
+
+
+class DlJob():
+
+ def __init__(self, url, output):
+ self.extr = self._find(url)
+ self.output = output
+ self._albums = []
+ if not self.extr:
+ logging.error(url + " is not supported")
+ sys.exit(1)
+
+ def _find(self, url):
+ for cls in self._list_extractors():
+ match = cls.pattern.match(url)
+ if match:
+ return cls(match, url)
+
+ def _list_extractors(self):
+ for extr in iter(extrs):
+ module = importlib.import_module('.'+extr, __package__)
+ yield from self._add_module(module)
+
+ def _add_module(self, module):
+ classes = self._get_classes(module)
+ for cls in classes:
+ cls.pattern = re.compile(cls.pattern)
+ return classes
+
+ def _get_classes(self, module):
+ return [
+ cls for cls in module.__dict__.values() if (
+ hasattr(cls, "pattern") and cls.__module__ == module.__name__
+ )
+ ]
+
+ def run(self):
+ self.extr.get_albums()
+ self.extr.download_albums(self.output)