import os, re import datetime, logging from slugify import slugify from pathlib import Path def import_bookmarks(bookmarks_file="bookmarks.html", force=False): """ Import bookmarks from shaarli 0.11 database export """ if not os.path.exists("links"): os.makedirs("links") with open(bookmarks_file, "r", encoding="utf-8") as bookmarks: databookmarks = bookmarks.read() for bookmark in databookmarks.split("
"): href = "" add_date = "" private = "" tags = "" title = "" data = "" archive = "" _href = re.search('.*HREF=\"(.*?)\".*', bookmark) if _href is not None: href = _href.group(1) _add_date = re.search('.*ADD_DATE=\"(.*?)\".*', bookmark) if _add_date is not None: add_date = _add_date.group(1) tz = datetime.datetime.fromtimestamp(int(add_date)) - datetime.datetime.utcfromtimestamp(int(add_date)) add_date = datetime.datetime.fromtimestamp(int(add_date), tz=datetime.timezone(offset=tz)).isoformat() _private = re.search('.*PRIVATE=\"(.*?)\".*', bookmark) if _private is not None: private = _private.group(1) _tags = re.search('.*TAGS=\"(.*?)\".*', bookmark) if _tags is not None: tags = _tags.group(1) _title = re.search('(.*)', bookmark) if _title is not None: title = _title.group(1) _data = re.search('.*
((?!\[Archive\]).*(\n*(?!\[Archive\]).*){10})', bookmark) if _data is not None: data = _data.group(1) _archive = re.search('.*\[Archive\]\((.*)\)', bookmark) if _archive is not None: archive = _archive.group(1) if title != "": final_file = Path("links/"+slugify(title, max_length=120)) if not final_file.is_file() or force: logging.info("Import file {}".format(final_file)) with open(final_file, "w", encoding="utf-8") as link: file_content = "---\n" file_content += "title: {}\n".format(title) file_content += "published: {}\n".format(add_date) file_content += "updated: {}\n".format(add_date) file_content += "link: {}\n".format(href) file_content += "tags: {}\n".format(tags) file_content += "archive: {}\n".format(archive) file_content += "private: {}\n".format(private) file_content += "---\n" file_content += "{}".format(data) link.write(file_content) else: logging.warning("File {} already exist!".format(final_file)) title = re.compile(r'^title: (.*)$') published = re.compile(r'^published: (.*)$') updated = re.compile(r'^updated: (.*)$') link = re.compile(r'^link: (.*)$') tags = re.compile(r'^tags: (.*)$') archive = re.compile(r'^archive: (.*)$') private = re.compile(r'^private: (.*)$') comment = re.compile(r'---[\s\S]*?---[\s\S]([\s\S]*)',) def bookmark(path): bookmark = {} with open(path, 'r', encoding="utf-8") as f: lines = f.readlines() bookmark["path"] = path bookmark["title"] = [m.group(1) for l in lines for m in [title.search(l)] if m][0] bookmark["published"] = [m.group(1) for l in lines for m in [published.search(l)] if m][0] bookmark["updated"] = [m.group(1) for l in lines for m in [updated.search(l)] if m][0] bookmark["link"] = [m.group(1) for l in lines for m in [link.search(l)] if m][0] bookmark["tags"] = [m.group(1) for l in lines for m in [tags.search(l)] if m][0] bookmark["archive"] = [m.group(1) for l in lines for m in [archive.search(l)] if m][0] bookmark["private"] = [m.group(1) for l in lines for m in [private.search(l)] if m][0] bookmark["comment"] = [m.group(1) for m in [comment.search("".join(lines))] if m][0] return bookmark def bookmarks(path): bookmarks = [] for root, dirs, files in os.walk(path, topdown=False): for name in files: bookmarks.append(bookmark(path / Path(name))) return bookmarks