import os, re import datetime, logging from slugify import slugify from pathlib import Path def import_bookmarks(bookmarks_file="bookmarks.html", force=False): """ Import bookmarks from shaarli 0.11 database export """ if not os.path.exists("links"): os.makedirs("links") with open(bookmarks_file, "r", encoding="utf-8") as bookmarks: databookmarks = bookmarks.read() for bookmark in databookmarks.split("
"): href = "" add_date = "" private = "" tags = "" title = "" data = "" archive = "" _href = re.search('.*HREF=\"(.*?)\".*', bookmark) if _href is not None: href = _href.group(1) _add_date = re.search('.*ADD_DATE=\"(.*?)\".*', bookmark) if _add_date is not None: add_date = _add_date.group(1) tz = datetime.datetime.fromtimestamp(int(add_date)) - datetime.datetime.utcfromtimestamp(int(add_date)) add_date = datetime.datetime.fromtimestamp(int(add_date), tz=datetime.timezone(offset=tz)).isoformat() _private = re.search('.*PRIVATE=\"(.*?)\".*', bookmark) if _private is not None: private = _private.group(1) _tags = re.search('.*TAGS=\"(.*?)\".*', bookmark) if _tags is not None: tags = _tags.group(1) _title = re.search('(.*)', bookmark) if _title is not None: title = _title.group(1) _data = re.search('.*
((?!\[Archive\]).*(\n*(?!\[Archive\]).*){10})', bookmark) if _data is not None: data = _data.group(1) _archive = re.search('.*\[Archive\]\((.*)\)', bookmark) if _archive is not None: archive = _archive.group(1) if title != "": final_file = Path("links/"+slugify(title, max_length=120)) if not final_file.is_file() or force: logging.info("Import file {}".format(final_file)) with open(final_file, "w", encoding="utf-8") as link: file_content = "---\n" file_content += "title: {}\n".format(title) file_content += "published: {}\n".format(add_date) file_content += "updated: {}\n".format(add_date) file_content += "link: {}\n".format(href) file_content += "tags: {}\n".format(tags) file_content += "archive: {}\n".format(archive) file_content += "private: {}\n".format(private) file_content += "---\n" file_content += "{}".format(data) link.write(file_content) else: logging.warning("File {} already exist!".format(final_file))