diff options
Diffstat (limited to 'utils/utils.py')
-rw-r--r-- | utils/utils.py | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/utils/utils.py b/utils/utils.py index a82c23e6..0e6ece8e 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -10,6 +10,8 @@ import dateutil.tz import bs4 import urllib.parse +import re + @contextmanager def init_colorama(): @@ -52,3 +54,49 @@ def absolutify_links(soup, baseurl): tag["href"] = urllib.parse.urljoin(baseurl, tag["href"]) for tag in soup.find_all(lambda tag: tag.has_attr("src")): tag["src"] = urllib.parse.urljoin(baseurl, tag["src"]) + +# MARKDOWN EXTENSION! +# +# See docstring of process_image_sizes for documentation. + +# If matched, 1st group is width, 3rd group (optional) is height, and +# 4th group is actual text. +IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)') + +def process_image_sizes(soup): + """Process the image size Markdown extension. + + Allows specifying image size in a Markdown image construct + ![](). The syntax is: + + ![|width(xheight)?| alt](src) + + where width and height are positive integers (xheight is optional), + and alt is the regular alt string (either plain or with some + Markdown formatting). alt string, as usual, is optional. + + Examples: + + ![|1920x1080| Hello, world!](http://example.com/hello.png) + ![|1920| *Hey!*](http://example.com/hey.png) + ![|1280x800|](http://example.com/noalt.png) + + """ + if not soup.article: + return + for img_tag in soup.article.find_all("img"): + if img_tag.has_attr("alt"): + match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"]) + if match: + width, _, height, realalt = match.groups() + img_tag["width"] = width + if height: + img_tag["height"] = height + img_tag["alt"] = realalt + + # strip image specs from captions, if any + for caption in soup.article.select(".figure .caption"): + if hasattr(caption, "contents") and isinstance(caption.contents[0], str): + match = IMAGESIZE_EXTRACTOR.match(caption.contents[0]) + if match: + caption.contents[0].replace_with(match.group(4)) |