aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorneodarz <neodarz@neodarz.net>2019-05-26 00:14:31 +0200
committerneodarz <neodarz@neodarz.net>2019-05-26 00:14:31 +0200
commitae28b77e0569fc847df48f7461660df52fe925cf (patch)
tree172a562a125fe99aec6403c46f31354d0848c16a
parentdc39bccf756014ba5c4a1bb422bc9baed63b8a8e (diff)
downloadmy_new_personal_website-ae28b77e0569fc847df48f7461660df52fe925cf.tar.xz
my_new_personal_website-ae28b77e0569fc847df48f7461660df52fe925cf.zip
Move process_image_sizes function to external file
-rwxr-xr-xpyblog48
-rw-r--r--utils/utils.py48
2 files changed, 49 insertions, 47 deletions
diff --git a/pyblog b/pyblog
index 2f87d0eb..038276f3 100755
--- a/pyblog
+++ b/pyblog
@@ -820,52 +820,6 @@ def _pre_tag_insert_line_numbers(soup, pre_tag):
pre_tag.code.append(ln_tag)
-# MARKDOWN EXTENSION!
-#
-# See docstring of process_image_sizes for documentation.
-
-# If matched, 1st group is width, 3rd group (optional) is height, and
-# 4th group is actual text.
-IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)')
-
-def process_image_sizes(soup):
- """Process the image size Markdown extension.
-
- Allows specifying image size in a Markdown image construct
- ![](). The syntax is:
-
- ![|width(xheight)?| alt](src)
-
- where width and height are positive integers (xheight is optional),
- and alt is the regular alt string (either plain or with some
- Markdown formatting). alt string, as usual, is optional.
-
- Examples:
-
- ![|1920x1080| Hello, world!](http://example.com/hello.png)
- ![|1920| *Hey!*](http://example.com/hey.png)
- ![|1280x800|](http://example.com/noalt.png)
-
- """
- if not soup.article:
- return
- for img_tag in soup.article.find_all("img"):
- if img_tag.has_attr("alt"):
- match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"])
- if match:
- width, _, height, realalt = match.groups()
- img_tag["width"] = width
- if height:
- img_tag["height"] = height
- img_tag["alt"] = realalt
-
- # strip image specs from captions, if any
- for caption in soup.article.select(".figure .caption"):
- if hasattr(caption, "contents") and isinstance(caption.contents[0], str):
- match = IMAGESIZE_EXTRACTOR.match(caption.contents[0])
- if match:
- caption.contents[0].replace_with(match.group(4))
-
def link_img_tags(soup):
"""Convert each <img> tag in <article> to a link to its original."""
if not soup.article:
@@ -894,7 +848,7 @@ def postprocess_html_file(htmlfilepath):
soup = bs4.BeautifulSoup(htmlfileobj.read(), "lxml")
# a series of postprocessing (extensible)
- process_image_sizes(soup)
+ utils.process_image_sizes(soup)
link_img_tags(soup)
process_footnote_backlinks(soup)
diff --git a/utils/utils.py b/utils/utils.py
index a82c23e6..0e6ece8e 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -10,6 +10,8 @@ import dateutil.tz
import bs4
import urllib.parse
+import re
+
@contextmanager
def init_colorama():
@@ -52,3 +54,49 @@ def absolutify_links(soup, baseurl):
tag["href"] = urllib.parse.urljoin(baseurl, tag["href"])
for tag in soup.find_all(lambda tag: tag.has_attr("src")):
tag["src"] = urllib.parse.urljoin(baseurl, tag["src"])
+
+# MARKDOWN EXTENSION!
+#
+# See docstring of process_image_sizes for documentation.
+
+# If matched, 1st group is width, 3rd group (optional) is height, and
+# 4th group is actual text.
+IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)')
+
+def process_image_sizes(soup):
+ """Process the image size Markdown extension.
+
+ Allows specifying image size in a Markdown image construct
+ ![](). The syntax is:
+
+ ![|width(xheight)?| alt](src)
+
+ where width and height are positive integers (xheight is optional),
+ and alt is the regular alt string (either plain or with some
+ Markdown formatting). alt string, as usual, is optional.
+
+ Examples:
+
+ ![|1920x1080| Hello, world!](http://example.com/hello.png)
+ ![|1920| *Hey!*](http://example.com/hey.png)
+ ![|1280x800|](http://example.com/noalt.png)
+
+ """
+ if not soup.article:
+ return
+ for img_tag in soup.article.find_all("img"):
+ if img_tag.has_attr("alt"):
+ match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"])
+ if match:
+ width, _, height, realalt = match.groups()
+ img_tag["width"] = width
+ if height:
+ img_tag["height"] = height
+ img_tag["alt"] = realalt
+
+ # strip image specs from captions, if any
+ for caption in soup.article.select(".figure .caption"):
+ if hasattr(caption, "contents") and isinstance(caption.contents[0], str):
+ match = IMAGESIZE_EXTRACTOR.match(caption.contents[0])
+ if match:
+ caption.contents[0].replace_with(match.group(4))