From c4fb7fda5b1b6bb22db1f517f71cf393f68c6a9b Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 15 Aug 2020 14:07:15 +0200 Subject: Initial commit --- extractors/bandcamp.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 extractors/bandcamp.py (limited to 'extractors/bandcamp.py') diff --git a/extractors/bandcamp.py b/extractors/bandcamp.py new file mode 100644 index 0000000..cc383ae --- /dev/null +++ b/extractors/bandcamp.py @@ -0,0 +1,26 @@ +import re +import logging +import requests +from bs4 import BeautifulSoup + +from .common import Extractor + +class bandcamp(Extractor): + pattern = re.compile(r'(http(?:s|):\/\/.*bandcamp.com\/)') + filename_template = "%(artist)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" + + def __init__(self, reg, url): + super().__init__(reg, url) + + def get_albums(self): + r = requests.get(self.root) + soup = BeautifulSoup(r.text, 'html.parser') + items = soup.select('a[href]') + for item in items: + if 'album' in item['href']: + url = self.root.rstrip('/') + item['href'] + if url not in self._albums: + self._albums.append(url) + + if not self._albums: + logging.warning(f"No albums found at {self.root} ????") -- cgit v1.2.1