import os, re
import datetime, logging
from slugify import slugify
from pathlib import Path
def import_bookmarks(bookmarks_file="bookmarks.html", force=False):
""" Import bookmarks from shaarli 0.11 database export """
if not os.path.exists("links"):
os.makedirs("links")
with open(bookmarks_file, "r", encoding="utf-8") as bookmarks:
databookmarks = bookmarks.read()
for bookmark in databookmarks.split("
"):
href = ""
add_date = ""
private = ""
tags = ""
title = ""
data = ""
archive = ""
_href = re.search('.*HREF=\"(.*?)\".*', bookmark)
if _href is not None:
href = _href.group(1)
_add_date = re.search('.*ADD_DATE=\"(.*?)\".*', bookmark)
if _add_date is not None:
add_date = _add_date.group(1)
tz = datetime.datetime.fromtimestamp(int(add_date)) - datetime.datetime.utcfromtimestamp(int(add_date))
add_date = datetime.datetime.fromtimestamp(int(add_date), tz=datetime.timezone(offset=tz)).isoformat()
_private = re.search('.*PRIVATE=\"(.*?)\".*', bookmark)
if _private is not None:
private = _private.group(1)
_tags = re.search('.*TAGS=\"(.*?)\".*', bookmark)
if _tags is not None:
tags = _tags.group(1)
_title = re.search('(.*)', bookmark)
if _title is not None:
title = _title.group(1)
_data = re.search('.*((?!\[Archive\]).*(\n*(?!\[Archive\]).*){10})', bookmark)
if _data is not None:
data = _data.group(1)
_archive = re.search('.*\[Archive\]\((.*)\)', bookmark)
if _archive is not None:
archive = _archive.group(1)
if title != "":
final_file = Path("links/"+slugify(title, max_length=120))
if not final_file.is_file() or force:
logging.info("Import file {}".format(final_file))
with open(final_file, "w", encoding="utf-8") as link:
file_content = "---\n"
file_content += "title: {}\n".format(title)
file_content += "published: {}\n".format(add_date)
file_content += "updated: {}\n".format(add_date)
file_content += "link: {}\n".format(href)
file_content += "tags: {}\n".format(tags)
file_content += "archive: {}\n".format(archive)
file_content += "private: {}\n".format(private)
file_content += "---\n"
file_content += "{}".format(data)
link.write(file_content)
else:
logging.warning("File {} already exist!".format(final_file))
title = re.compile(r'^title: (.*)$')
published = re.compile(r'^published: (.*)$')
updated = re.compile(r'^updated: (.*)$')
link = re.compile(r'^link: (.*)$')
tags = re.compile(r'^tags: (.*)$')
archive = re.compile(r'^archive: (.*)$')
private = re.compile(r'^private: (.*)$')
comment = re.compile(r'---[\s\S]*?---[\s\S]([\s\S]*)',)
def bookmark(path):
bookmark = {}
with open(path, 'r', encoding="utf-8") as f:
lines = f.readlines()
bookmark["path"] = path
bookmark["title"] = [m.group(1) for l in lines for m in [title.search(l)] if m][0]
bookmark["published"] = [m.group(1) for l in lines for m in [published.search(l)] if m][0]
bookmark["updated"] = [m.group(1) for l in lines for m in [updated.search(l)] if m][0]
bookmark["link"] = [m.group(1) for l in lines for m in [link.search(l)] if m][0]
bookmark["tags"] = [m.group(1) for l in lines for m in [tags.search(l)] if m][0]
bookmark["archive"] = [m.group(1) for l in lines for m in [archive.search(l)] if m][0]
bookmark["private"] = [m.group(1) for l in lines for m in [private.search(l)] if m][0]
bookmark["comment"] = [m.group(1) for m in [comment.search("".join(lines))] if m][0]
return bookmark
def bookmarks(path):
bookmarks = []
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
bookmarks.append(bookmark(path / Path(name)))
return bookmarks