aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rw-r--r--utils/utils.py48
1 files changed, 48 insertions, 0 deletions
diff --git a/utils/utils.py b/utils/utils.py
index a82c23e6..0e6ece8e 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -10,6 +10,8 @@ import dateutil.tz
import bs4
import urllib.parse
+import re
+
@contextmanager
def init_colorama():
@@ -52,3 +54,49 @@ def absolutify_links(soup, baseurl):
tag["href"] = urllib.parse.urljoin(baseurl, tag["href"])
for tag in soup.find_all(lambda tag: tag.has_attr("src")):
tag["src"] = urllib.parse.urljoin(baseurl, tag["src"])
+
+# MARKDOWN EXTENSION!
+#
+# See docstring of process_image_sizes for documentation.
+
+# If matched, 1st group is width, 3rd group (optional) is height, and
+# 4th group is actual text.
+IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)')
+
+def process_image_sizes(soup):
+ """Process the image size Markdown extension.
+
+ Allows specifying image size in a Markdown image construct
+ ![](). The syntax is:
+
+ ![|width(xheight)?| alt](src)
+
+ where width and height are positive integers (xheight is optional),
+ and alt is the regular alt string (either plain or with some
+ Markdown formatting). alt string, as usual, is optional.
+
+ Examples:
+
+ ![|1920x1080| Hello, world!](http://example.com/hello.png)
+ ![|1920| *Hey!*](http://example.com/hey.png)
+ ![|1280x800|](http://example.com/noalt.png)
+
+ """
+ if not soup.article:
+ return
+ for img_tag in soup.article.find_all("img"):
+ if img_tag.has_attr("alt"):
+ match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"])
+ if match:
+ width, _, height, realalt = match.groups()
+ img_tag["width"] = width
+ if height:
+ img_tag["height"] = height
+ img_tag["alt"] = realalt
+
+ # strip image specs from captions, if any
+ for caption in soup.article.select(".figure .caption"):
+ if hasattr(caption, "contents") and isinstance(caption.contents[0], str):
+ match = IMAGESIZE_EXTRACTOR.match(caption.contents[0])
+ if match:
+ caption.contents[0].replace_with(match.group(4))