diff options
author | neodarz <neodarz@neodarz.net> | 2019-09-29 12:49:48 +0200 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2019-09-29 12:49:48 +0200 |
commit | 987e1f00222a1fd9a566a7f7e096b586867e5e2a (patch) | |
tree | 2e625758bf036c3336b62d2ca9325e9f99eb223f | |
parent | a6aed7b5009768e042c4fa2326fc69107f473723 (diff) | |
download | pyshaarli-987e1f00222a1fd9a566a7f7e096b586867e5e2a.tar.xz pyshaarli-987e1f00222a1fd9a566a7f7e096b586867e5e2a.zip |
Add import function from shaarli 0.11
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | db.py | 66 |
2 files changed, 67 insertions, 1 deletions
@@ -18,7 +18,7 @@ Fonctionalities: [ ] Add an article [ ] Delete an article [ ] Edit an article - [ ] Import from shaarli 0.11 + [X] Import from shaarli 0.11 [ ] Search [ ] Indexation [ ] Cli @@ -0,0 +1,66 @@ +import os, re +import datetime, logging +from slugify import slugify +from pathlib import Path + +def import_bookmarks(bookmarks_file="bookmarks.html", force=False): + """ Import bookmarks from shaarli 0.11 database export """ + + if not os.path.exists("links"): + os.makedirs("links") + + with open(bookmarks_file, "r", encoding="utf-8") as bookmarks: + databookmarks = bookmarks.read() + + for bookmark in databookmarks.split("<DT>"): + href = "" + add_date = "" + private = "" + tags = "" + title = "" + data = "" + archive = "" + _href = re.search('.*HREF=\"(.*?)\".*', bookmark) + if _href is not None: + href = _href.group(1) + _add_date = re.search('.*ADD_DATE=\"(.*?)\".*', bookmark) + if _add_date is not None: + add_date = _add_date.group(1) + tz = datetime.datetime.fromtimestamp(int(add_date)) - datetime.datetime.utcfromtimestamp(int(add_date)) + add_date = datetime.datetime.fromtimestamp(int(add_date), tz=datetime.timezone(offset=tz)).isoformat() + _private = re.search('.*PRIVATE=\"(.*?)\".*', bookmark) + if _private is not None: + private = _private.group(1) + _tags = re.search('.*TAGS=\"(.*?)\".*', bookmark) + if _tags is not None: + tags = _tags.group(1) + _title = re.search('<A.*>(.*)</A>', bookmark) + if _title is not None: + title = _title.group(1) + _data = re.search('.*<DD>((?!\[Archive\]).*(\n*(?!\[Archive\]).*){10})', bookmark) + if _data is not None: + data = _data.group(1) + _archive = re.search('.*\[Archive\]\((.*)\)', bookmark) + if _archive is not None: + archive = _archive.group(1) + + + if title != "": + final_file = Path("links/"+slugify(title, max_length=120)) + if not final_file.is_file() or force: + logging.info("Import file {}".format(final_file)) + with open(final_file, "w", encoding="utf-8") as link: + file_content = "---\n" + file_content += "title: {}\n".format(title) + file_content += "published: {}\n".format(add_date) + file_content += "updated: {}\n".format(add_date) + file_content += "link: {}\n".format(href) + file_content += "tags: {}\n".format(tags) + file_content += "archive: {}\n".format(archive) + file_content += "private: {}\n".format(private) + file_content += "---\n" + file_content += "{}".format(data) + link.write(file_content) + else: + logging.warning("File {} already exist!".format(final_file)) + |