From c4fb7fda5b1b6bb22db1f517f71cf393f68c6a9b Mon Sep 17 00:00:00 2001
From: neodarz <neodarz@neodarz.net>
Date: Sat, 15 Aug 2020 14:07:15 +0200
Subject: Initial commit

---
 .gitignore                                     |   4 ++
 README.md                                      |  30 ++++++++++++++
 extractors/__init__.py                         |   0
 extractors/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 155 bytes
 extractors/__pycache__/bandcamp.cpython-38.pyc | Bin 0 -> 1220 bytes
 extractors/__pycache__/common.cpython-38.pyc   | Bin 0 -> 1726 bytes
 extractors/__pycache__/job.cpython-38.pyc      | Bin 0 -> 1831 bytes
 extractors/bandcamp.py                         |  26 ++++++++++++
 extractors/common.py                           |  42 +++++++++++++++++++
 extractors/job.py                              |  47 +++++++++++++++++++++
 main.py                                        |  55 +++++++++++++++++++++++++
 requirements.txt                               |   3 ++
 utils.py                                       |  21 ++++++++++
 13 files changed, 228 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 extractors/__init__.py
 create mode 100644 extractors/__pycache__/__init__.cpython-38.pyc
 create mode 100644 extractors/__pycache__/bandcamp.cpython-38.pyc
 create mode 100644 extractors/__pycache__/common.cpython-38.pyc
 create mode 100644 extractors/__pycache__/job.cpython-38.pyc
 create mode 100644 extractors/bandcamp.py
 create mode 100644 extractors/common.py
 create mode 100644 extractors/job.py
 create mode 100644 main.py
 create mode 100644 requirements.txt
 create mode 100644 utils.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7623b7b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+
+.urls_cache.txt
+out
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..82b44e9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,30 @@
+Simple tool for download all album from an artist
+
+# Install
+
+```
+pip install -r requirements.txt
+```
+
+## Dependencies
+
+- youtube-dl
+
+# Usage
+
+```
+usage: main.py [-h] [--url URL] [--update] [--file FILE] [--output OUTPUT]
+
+Custom album downloader tool
+
+optional arguments:
+  -h, --help       show this help message and exit
+  --url URL        link to the file to download
+  --update         update all albums from cache
+  --file FILE      read url from file
+  --output OUTPUT  folder where to put downloaded albums. Default to: <app_installed_folder>/out/
+```
+
+# Suuported website
+
+- [x] Bandcamp
diff --git a/extractors/__init__.py b/extractors/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/extractors/__pycache__/__init__.cpython-38.pyc b/extractors/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..ae96411
Binary files /dev/null and b/extractors/__pycache__/__init__.cpython-38.pyc differ
diff --git a/extractors/__pycache__/bandcamp.cpython-38.pyc b/extractors/__pycache__/bandcamp.cpython-38.pyc
new file mode 100644
index 0000000..8ff4e9f
Binary files /dev/null and b/extractors/__pycache__/bandcamp.cpython-38.pyc differ
diff --git a/extractors/__pycache__/common.cpython-38.pyc b/extractors/__pycache__/common.cpython-38.pyc
new file mode 100644
index 0000000..641e251
Binary files /dev/null and b/extractors/__pycache__/common.cpython-38.pyc differ
diff --git a/extractors/__pycache__/job.cpython-38.pyc b/extractors/__pycache__/job.cpython-38.pyc
new file mode 100644
index 0000000..68dee63
Binary files /dev/null and b/extractors/__pycache__/job.cpython-38.pyc differ
diff --git a/extractors/bandcamp.py b/extractors/bandcamp.py
new file mode 100644
index 0000000..cc383ae
--- /dev/null
+++ b/extractors/bandcamp.py
@@ -0,0 +1,26 @@
+import re
+import logging
+import requests
+from bs4 import BeautifulSoup
+
+from .common import Extractor
+
+class bandcamp(Extractor):
+    pattern = re.compile(r'(http(?:s|):\/\/.*bandcamp.com\/)')
+    filename_template = "%(artist)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"
+
+    def __init__(self, reg, url):
+        super().__init__(reg, url)
+
+    def get_albums(self):
+        r = requests.get(self.root)
+        soup = BeautifulSoup(r.text, 'html.parser')
+        items = soup.select('a[href]')
+        for item in items:
+            if 'album' in item['href']:
+                url = self.root.rstrip('/') + item['href']
+                if url not in self._albums:
+                    self._albums.append(url)
+
+        if not self._albums:
+            logging.warning(f"No albums found at {self.root} ????")
diff --git a/extractors/common.py b/extractors/common.py
new file mode 100644
index 0000000..db8f5c1
--- /dev/null
+++ b/extractors/common.py
@@ -0,0 +1,42 @@
+import os
+from pathlib import Path
+from sh import youtube_dl
+from utils import read_file, write_file
+
+
+class Extractor():
+
+    def __init__(self, reg, url):
+        self.root = reg.group(1)
+        self._albums = []
+        self.root_path = self._root_path()
+        self._update_cache(self.root)
+
+    def _root_path(self):
+        file_path = os.path.abspath(__file__)
+        folder_path = Path(file_path).parent
+        root_path = Path(folder_path).parent
+        return root_path
+
+    def _update_cache(self, url):
+        urls_cache = []
+        cache_file = Path(self.root_path, '.urls_cache.txt')
+        urls_cache = read_file(cache_file)
+
+        for url in urls_cache:
+            if url.startswith(self.root):
+                return
+        write_file(cache_file, self.root)
+
+    def _yt_wrapper(self, url, output):
+        for line in youtube_dl(
+                url, audio_format="mp3",
+                add_metadata=True,
+                o=output + self.filename_template,
+                _iter=True):
+            print(line.strip())
+
+    def download_albums(self, output):
+        for album in self._albums:
+            print("Parsing " + album + "...")
+            self._yt_wrapper(album, output)
diff --git a/extractors/job.py b/extractors/job.py
new file mode 100644
index 0000000..ce44e9a
--- /dev/null
+++ b/extractors/job.py
@@ -0,0 +1,47 @@
+import logging
+import re
+import importlib
+import sys
+
+extrs = [
+    'bandcamp'
+]
+
+
+class DlJob():
+
+    def __init__(self, url, output):
+        self.extr = self._find(url)
+        self.output = output
+        self._albums = []
+        if not self.extr:
+            logging.error(url + " is not supported")
+            sys.exit(1)
+
+    def _find(self, url):
+        for cls in self._list_extractors():
+            match = cls.pattern.match(url)
+            if match:
+                return cls(match, url)
+
+    def _list_extractors(self):
+        for extr in iter(extrs):
+            module = importlib.import_module('.'+extr, __package__)
+            yield from self._add_module(module)
+
+    def _add_module(self, module):
+        classes = self._get_classes(module)
+        for cls in classes:
+            cls.pattern = re.compile(cls.pattern)
+        return classes
+
+    def _get_classes(self, module):
+        return [
+            cls for cls in module.__dict__.values() if (
+                hasattr(cls, "pattern") and cls.__module__ == module.__name__
+            )
+        ]
+
+    def run(self):
+        self.extr.get_albums()
+        self.extr.download_albums(self.output)
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..e1e16d1
--- /dev/null
+++ b/main.py
@@ -0,0 +1,55 @@
+#!/bin/python
+
+import os
+import argparse
+from pathlib import Path
+from extractors.job import DlJob
+from utils import read_file
+
+module_path = os.path.abspath(__file__)
+ROOT = Path(module_path).parent
+
+parser = argparse.ArgumentParser(description="Custom album downloader tool")
+parser.add_argument('--url', help="link to the file to download")
+parser.add_argument(
+        '--update', help='update all albums from cache', action="store_true")
+parser.add_argument('--file', help="read url from file")
+parser.add_argument(
+    '--output',
+    help="folder where to put downloaded albums. "
+    "Default to: " + str(ROOT) + "/out/",
+    default=str(ROOT) + "/out/")
+
+args = parser.parse_args()
+
+if not args.output.endswith("/"):
+    args.output = args.output + "/"
+
+if args.update:
+    print('Updating from cache...')
+
+
+    cache_file = Path(ROOT, '.urls_cache.txt')
+
+    urls_cache = read_file(cache_file)
+
+    for url in urls_cache:
+        dl_job = DlJob(url, args.output)
+        dl_job.run()
+
+if args.url:
+    print('Downloading from url...')
+    dl_job = DlJob(args.url, args.output)
+    dl_job.run()
+
+if args.file:
+    print("Downloading from file...")
+
+    urls = read_file(args.file)
+
+    for url in urls:
+        dl_job = DlJob(url, args.output)
+        dl_job.run()
+
+if not args.url and not args.update and not args.file:
+    parser.print_help()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b4396b9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+bs4
+sh
+requests
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..2736206
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,21 @@
+import sys
+import logging
+
+
+def read_file(filename):
+    lines = []
+
+    if filename.is_dir():
+        logging.fatal(f'{filename} is a folder instead of a file!')
+        sys.exit(1)
+    elif not filename.is_file():
+        filename.touch()
+    with open(filename) as filehandler:
+        for line in filehandler.readlines():
+            lines.append(line.strip())
+
+    return lines
+
+def write_file(filename, data):
+    with open(filename, 'a') as filehandler:
+        filehandler.write(data+'\n')
-- 
cgit v1.2.1