From ae28b77e0569fc847df48f7461660df52fe925cf Mon Sep 17 00:00:00 2001
From: neodarz <neodarz@neodarz.net>
Date: Sun, 26 May 2019 00:14:31 +0200
Subject: Move process_image_sizes function to external file

---
 utils/utils.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'utils')

diff --git a/utils/utils.py b/utils/utils.py
index a82c23e6..0e6ece8e 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -10,6 +10,8 @@ import dateutil.tz
 import bs4
 import urllib.parse
 
+import re
+
 
 @contextmanager
 def init_colorama():
@@ -52,3 +54,49 @@ def absolutify_links(soup, baseurl):
         tag["href"] = urllib.parse.urljoin(baseurl, tag["href"])
     for tag in soup.find_all(lambda tag: tag.has_attr("src")):
         tag["src"] = urllib.parse.urljoin(baseurl, tag["src"])
+
+# MARKDOWN EXTENSION!
+#
+# See docstring of process_image_sizes for documentation.
+
+# If matched, 1st group is width, 3rd group (optional) is height, and
+# 4th group is actual text.
+IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)')
+
+def process_image_sizes(soup):
+    """Process the image size Markdown extension.
+
+    Allows specifying image size in a Markdown image construct
+    ![](). The syntax is:
+
+        ![|width(xheight)?| alt](src)
+
+    where width and height are positive integers (xheight is optional),
+    and alt is the regular alt string (either plain or with some
+    Markdown formatting). alt string, as usual, is optional.
+
+    Examples:
+
+        ![|1920x1080| Hello, world!](http://example.com/hello.png)
+        ![|1920| *Hey!*](http://example.com/hey.png)
+        ![|1280x800|](http://example.com/noalt.png)
+
+    """
+    if not soup.article:
+        return
+    for img_tag in soup.article.find_all("img"):
+        if img_tag.has_attr("alt"):
+            match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"])
+            if match:
+                width, _, height, realalt = match.groups()
+                img_tag["width"] = width
+                if height:
+                    img_tag["height"] = height
+                img_tag["alt"] = realalt
+
+    # strip image specs from captions, if any
+    for caption in soup.article.select(".figure .caption"):
+        if hasattr(caption, "contents") and isinstance(caption.contents[0], str):
+            match = IMAGESIZE_EXTRACTOR.match(caption.contents[0])
+            if match:
+                caption.contents[0].replace_with(match.group(4))
-- 
cgit v1.2.1