extractors/bandcamp.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

import re
import logging
import requests
from bs4 import BeautifulSoup

from .common import Extractor

class bandcamp(Extractor):
    pattern = re.compile(r'(http(?:s|):\/\/.*bandcamp.com)')
    filename_template = "%(artist)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"

    def __init__(self, reg, url):
        super().__init__(reg, url)

    def get_albums(self):
        r = requests.get(self.root)
        soup = BeautifulSoup(r.text, 'html.parser')
        items = soup.select('a[href]')
        for item in items:
            if '/album' in item['href'] and \
                    not item['href'].startswith("http"):
                url = self.root.rstrip('/') + item['href']
                if url not in self._albums:
                    self._albums.append(url)

        if not self._albums:
            logging.warning(f"No albums found at {self.root} ????")