db.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

import os, re
import datetime, logging
from slugify import slugify
from pathlib import Path

def import_bookmarks(bookmarks_file="bookmarks.html", force=False):
    """ Import bookmarks from shaarli 0.11 database export """

    if not os.path.exists("links"):
        os.makedirs("links")

    with open(bookmarks_file, "r", encoding="utf-8") as bookmarks:
        databookmarks = bookmarks.read()

    for bookmark in databookmarks.split("<DT>"):
        href = ""
        add_date = ""
        private = ""
        tags = ""
        title = ""
        data = ""
        archive = ""
        _href = re.search('.*HREF=\"(.*?)\".*', bookmark)
        if _href is not None:
            href = _href.group(1)
        _add_date = re.search('.*ADD_DATE=\"(.*?)\".*', bookmark)
        if _add_date is not None:
            add_date = _add_date.group(1)
            tz = datetime.datetime.fromtimestamp(int(add_date)) - datetime.datetime.utcfromtimestamp(int(add_date))
            add_date = datetime.datetime.fromtimestamp(int(add_date), tz=datetime.timezone(offset=tz)).isoformat()
        _private = re.search('.*PRIVATE=\"(.*?)\".*', bookmark)
        if _private is not None:
            private = _private.group(1)
        _tags = re.search('.*TAGS=\"(.*?)\".*', bookmark)
        if _tags is not None:
            tags = _tags.group(1)
        _title = re.search('<A.*>(.*)</A>', bookmark)
        if _title is not None:
            title = _title.group(1)
        _data = re.search('.*<DD>((?!\[Archive\]).*(\n*(?!\[Archive\]).*){10})', bookmark)
        if _data is not None:
            data = _data.group(1)
        _archive = re.search('.*\[Archive\]\((.*)\)', bookmark)
        if _archive is not None:
            archive = _archive.group(1)


        if title != "":
            final_file = Path("links/"+slugify(title, max_length=120))
            if not final_file.is_file() or force:
                logging.info("Import file {}".format(final_file))
                with open(final_file, "w", encoding="utf-8") as link:
                    file_content = "---\n"
                    file_content += "title: {}\n".format(title)
                    file_content += "published: {}\n".format(add_date)
                    file_content += "updated: {}\n".format(add_date)
                    file_content += "link: {}\n".format(href)
                    file_content += "tags: {}\n".format(tags)
                    file_content += "archive: {}\n".format(archive)
                    file_content += "private: {}\n".format(private)
                    file_content += "---\n"
                    file_content += "{}".format(data)
                    link.write(file_content)
            else:
                logging.warning("File {} already exist!".format(final_file))