Move process_image_sizes function to external file

author: neodarz <neodarz@neodarz.net> 2019-05-26 00:14:31 +0200
committer: neodarz <neodarz@neodarz.net> 2019-05-26 00:14:31 +0200
commit: ae28b77e0569fc847df48f7461660df52fe925cf (patch)
tree: 172a562a125fe99aec6403c46f31354d0848c16a
parent: dc39bccf756014ba5c4a1bb422bc9baed63b8a8e (diff)
download: my_new_personal_website-ae28b77e0569fc847df48f7461660df52fe925cf.tar.xz
my_new_personal_website-ae28b77e0569fc847df48f7461660df52fe925cf.zip
2 files changed, 49 insertions, 47 deletions
diff --git a/pyblog b/pyblog
index 2f87d0eb..038276f3 100755
--- a/pyblog
+++ b/pyblog
@@ -820,52 +820,6 @@ def _pre_tag_insert_line_numbers(soup, pre_tag):
         pre_tag.code.append(ln_tag)
 
 
-# MARKDOWN EXTENSION!
-#
-# See docstring of process_image_sizes for documentation.
-
-# If matched, 1st group is width, 3rd group (optional) is height, and
-# 4th group is actual text.
-IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)')
-
-def process_image_sizes(soup):
-    """Process the image size Markdown extension.
-
-    Allows specifying image size in a Markdown image construct
-    ![](). The syntax is:
-
-        ![|width(xheight)?| alt](src)
-
-    where width and height are positive integers (xheight is optional),
-    and alt is the regular alt string (either plain or with some
-    Markdown formatting). alt string, as usual, is optional.
-
-    Examples:
-
-        ![|1920x1080| Hello, world!](http://example.com/hello.png)
-        ![|1920| *Hey!*](http://example.com/hey.png)
-        ![|1280x800|](http://example.com/noalt.png)
-
-    """
-    if not soup.article:
-        return
-    for img_tag in soup.article.find_all("img"):
-        if img_tag.has_attr("alt"):
-            match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"])
-            if match:
-                width, _, height, realalt = match.groups()
-                img_tag["width"] = width
-                if height:
-                    img_tag["height"] = height
-                img_tag["alt"] = realalt
-
-    # strip image specs from captions, if any
-    for caption in soup.article.select(".figure .caption"):
-        if hasattr(caption, "contents") and isinstance(caption.contents[0], str):
-            match = IMAGESIZE_EXTRACTOR.match(caption.contents[0])
-            if match:
-                caption.contents[0].replace_with(match.group(4))
-
 def link_img_tags(soup):
     """Convert each <img> tag in <article> to a link to its original."""
     if not soup.article:
@@ -894,7 +848,7 @@ def postprocess_html_file(htmlfilepath):
         soup = bs4.BeautifulSoup(htmlfileobj.read(), "lxml")
 
         # a series of postprocessing (extensible)
-        process_image_sizes(soup)
+        utils.process_image_sizes(soup)
         link_img_tags(soup)
         process_footnote_backlinks(soup)
 
diff --git a/utils/utils.py b/utils/utils.py
index a82c23e6..0e6ece8e 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -10,6 +10,8 @@ import dateutil.tz
 import bs4
 import urllib.parse
 
+import re
+
 
 @contextmanager
 def init_colorama():
@@ -52,3 +54,49 @@ def absolutify_links(soup, baseurl):
         tag["href"] = urllib.parse.urljoin(baseurl, tag["href"])
     for tag in soup.find_all(lambda tag: tag.has_attr("src")):
         tag["src"] = urllib.parse.urljoin(baseurl, tag["src"])
+
+# MARKDOWN EXTENSION!
+#
+# See docstring of process_image_sizes for documentation.
+
+# If matched, 1st group is width, 3rd group (optional) is height, and
+# 4th group is actual text.
+IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)')
+
+def process_image_sizes(soup):
+    """Process the image size Markdown extension.
+
+    Allows specifying image size in a Markdown image construct
+    ![](). The syntax is:
+
+        ![|width(xheight)?| alt](src)
+
+    where width and height are positive integers (xheight is optional),
+    and alt is the regular alt string (either plain or with some
+    Markdown formatting). alt string, as usual, is optional.
+
+    Examples:
+
+        ![|1920x1080| Hello, world!](http://example.com/hello.png)
+        ![|1920| *Hey!*](http://example.com/hey.png)
+        ![|1280x800|](http://example.com/noalt.png)
+
+    """
+    if not soup.article:
+        return
+    for img_tag in soup.article.find_all("img"):
+        if img_tag.has_attr("alt"):
+            match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"])
+            if match:
+                width, _, height, realalt = match.groups()
+                img_tag["width"] = width
+                if height:
+                    img_tag["height"] = height
+                img_tag["alt"] = realalt
+
+    # strip image specs from captions, if any
+    for caption in soup.article.select(".figure .caption"):
+        if hasattr(caption, "contents") and isinstance(caption.contents[0], str):
+            match = IMAGESIZE_EXTRACTOR.match(caption.contents[0])
+            if match:
+                caption.contents[0].replace_with(match.group(4))
author	neodarz <neodarz@neodarz.net>	2019-05-26 00:14:31 +0200
committer	neodarz <neodarz@neodarz.net>	2019-05-26 00:14:31 +0200
commit	ae28b77e0569fc847df48f7461660df52fe925cf (patch)
tree	172a562a125fe99aec6403c46f31354d0848c16a
parent	dc39bccf756014ba5c4a1bb422bc9baed63b8a8e (diff)
download	my_new_personal_website-ae28b77e0569fc847df48f7461660df52fe925cf.tar.xz my_new_personal_website-ae28b77e0569fc847df48f7461660df52fe925cf.zip