Add import function from shaarli 0.11

author: neodarz <neodarz@neodarz.net> 2019-09-29 12:49:48 +0200
committer: neodarz <neodarz@neodarz.net> 2019-09-29 12:49:48 +0200
commit: 987e1f00222a1fd9a566a7f7e096b586867e5e2a (patch)
tree: 2e625758bf036c3336b62d2ca9325e9f99eb223f
parent: a6aed7b5009768e042c4fa2326fc69107f473723 (diff)
download: pyshaarli-987e1f00222a1fd9a566a7f7e096b586867e5e2a.tar.xz
pyshaarli-987e1f00222a1fd9a566a7f7e096b586867e5e2a.zip
2 files changed, 67 insertions, 1 deletions
diff --git a/README.md b/README.md
index 443ddbd..4565ed3 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Fonctionalities:
     [ ] Add an article
     [ ] Delete an article
     [ ] Edit an article
-    [ ] Import from shaarli 0.11
+    [X] Import from shaarli 0.11
 [ ] Search
     [ ] Indexation
 [ ] Cli
diff --git a/db.py b/db.py
new file mode 100644
index 0000000..b0895f5
--- /dev/null
+++ b/db.py
@@ -0,0 +1,66 @@
+import os, re
+import datetime, logging
+from slugify import slugify
+from pathlib import Path
+
+def import_bookmarks(bookmarks_file="bookmarks.html", force=False):
+    """ Import bookmarks from shaarli 0.11 database export """
+
+    if not os.path.exists("links"):
+        os.makedirs("links")
+
+    with open(bookmarks_file, "r", encoding="utf-8") as bookmarks:
+        databookmarks = bookmarks.read()
+
+    for bookmark in databookmarks.split("<DT>"):
+        href = ""
+        add_date = ""
+        private = ""
+        tags = ""
+        title = ""
+        data = ""
+        archive = ""
+        _href = re.search('.*HREF=\"(.*?)\".*', bookmark)
+        if _href is not None:
+            href = _href.group(1)
+        _add_date = re.search('.*ADD_DATE=\"(.*?)\".*', bookmark)
+        if _add_date is not None:
+            add_date = _add_date.group(1)
+            tz = datetime.datetime.fromtimestamp(int(add_date)) - datetime.datetime.utcfromtimestamp(int(add_date))
+            add_date = datetime.datetime.fromtimestamp(int(add_date), tz=datetime.timezone(offset=tz)).isoformat()
+        _private = re.search('.*PRIVATE=\"(.*?)\".*', bookmark)
+        if _private is not None:
+            private = _private.group(1)
+        _tags = re.search('.*TAGS=\"(.*?)\".*', bookmark)
+        if _tags is not None:
+            tags = _tags.group(1)
+        _title = re.search('<A.*>(.*)</A>', bookmark)
+        if _title is not None:
+            title = _title.group(1)
+        _data = re.search('.*<DD>((?!\[Archive\]).*(\n*(?!\[Archive\]).*){10})', bookmark)
+        if _data is not None:
+            data = _data.group(1)
+        _archive = re.search('.*\[Archive\]\((.*)\)', bookmark)
+        if _archive is not None:
+            archive = _archive.group(1)
+
+
+        if title != "":
+            final_file = Path("links/"+slugify(title, max_length=120))
+            if not final_file.is_file() or force:
+                logging.info("Import file {}".format(final_file))
+                with open(final_file, "w", encoding="utf-8") as link:
+                    file_content = "---\n"
+                    file_content += "title: {}\n".format(title)
+                    file_content += "published: {}\n".format(add_date)
+                    file_content += "updated: {}\n".format(add_date)
+                    file_content += "link: {}\n".format(href)
+                    file_content += "tags: {}\n".format(tags)
+                    file_content += "archive: {}\n".format(archive)
+                    file_content += "private: {}\n".format(private)
+                    file_content += "---\n"
+                    file_content += "{}".format(data)
+                    link.write(file_content)
+            else:
+                logging.warning("File {} already exist!".format(final_file))
+
author	neodarz <neodarz@neodarz.net>	2019-09-29 12:49:48 +0200
committer	neodarz <neodarz@neodarz.net>	2019-09-29 12:49:48 +0200
commit	987e1f00222a1fd9a566a7f7e096b586867e5e2a (patch)
tree	2e625758bf036c3336b62d2ca9325e9f99eb223f
parent	a6aed7b5009768e042c4fa2326fc69107f473723 (diff)
download	pyshaarli-987e1f00222a1fd9a566a7f7e096b586867e5e2a.tar.xz pyshaarli-987e1f00222a1fd9a566a7f7e096b586867e5e2a.zip