From 631679b041112615c24f665d32ae80466f10cef1 Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 15 Aug 2020 16:02:14 +0200 Subject: Be more restrictive on url detection --- extractors/bandcamp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'extractors') diff --git a/extractors/bandcamp.py b/extractors/bandcamp.py index dd18f44..935a2d8 100644 --- a/extractors/bandcamp.py +++ b/extractors/bandcamp.py @@ -17,7 +17,8 @@ class bandcamp(Extractor): soup = BeautifulSoup(r.text, 'html.parser') items = soup.select('a[href]') for item in items: - if '/album' in item['href']: + if '/album' in item['href'] and \ + not item['href'].startswith("http"): url = self.root.rstrip('/') + item['href'] if url not in self._albums: self._albums.append(url) -- cgit v1.2.1