extractors/common.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

import os
import re
from pathlib import Path
from sh import youtube_dl, ErrorReturnCode_1
from utils import read_file, write_file


class Extractor():

    def __init__(self, reg, url):
        self.root = None
        self.path = None
        if not reg:
            r = re.search(r'(^http(?:s|):(?:\/\/.*?\/|\/\/.*))', url)
            if r:
                self.root = r.group(1)
                try:
                    self.path = r.group(2)
                except IndexError:
                    pass
        if not self.root:
            self.root = reg.group(1)
            try:
                self.path = reg.group(2)
            except IndexError:
                pass
        self._albums = []
        self.root_path = self._root_path()
        self._update_cache(self.root)

    def _root_path(self):
        file_path = os.path.abspath(__file__)
        folder_path = Path(file_path).parent
        root_path = Path(folder_path).parent
        return root_path

    def _update_cache(self, url):
        urls_cache = []
        cache_file = Path(self.root_path, '.urls_cache.txt')
        urls_cache = read_file(cache_file)

        for url in urls_cache:
            if url.startswith(self.root):
                return
        write_file(cache_file, self.root + "," + self.__class__.__name__)

    def _yt_wrapper(self, url, output):
        try:
            for line in youtube_dl(
                    url, audio_format="mp3",
                    add_metadata=True,
                    o=output + self.filename_template,
                    ignore_errors=True,
                    _iter=True):
                print(line.strip())
        except ErrorReturnCode_1:
            pass

    def download_albums(self, output):
        for album in self._albums:
            print("Parsing " + album + "...")
            self._yt_wrapper(album, output)