diff options
-rwxr-xr-x | pyblog | 48 | ||||
-rw-r--r-- | utils/utils.py | 48 |
2 files changed, 49 insertions, 47 deletions
@@ -820,52 +820,6 @@ def _pre_tag_insert_line_numbers(soup, pre_tag): pre_tag.code.append(ln_tag) -# MARKDOWN EXTENSION! -# -# See docstring of process_image_sizes for documentation. - -# If matched, 1st group is width, 3rd group (optional) is height, and -# 4th group is actual text. -IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)') - -def process_image_sizes(soup): - """Process the image size Markdown extension. - - Allows specifying image size in a Markdown image construct - ![](). The syntax is: - - ![|width(xheight)?| alt](src) - - where width and height are positive integers (xheight is optional), - and alt is the regular alt string (either plain or with some - Markdown formatting). alt string, as usual, is optional. - - Examples: - - ![|1920x1080| Hello, world!](http://example.com/hello.png) - ![|1920| *Hey!*](http://example.com/hey.png) - ![|1280x800|](http://example.com/noalt.png) - - """ - if not soup.article: - return - for img_tag in soup.article.find_all("img"): - if img_tag.has_attr("alt"): - match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"]) - if match: - width, _, height, realalt = match.groups() - img_tag["width"] = width - if height: - img_tag["height"] = height - img_tag["alt"] = realalt - - # strip image specs from captions, if any - for caption in soup.article.select(".figure .caption"): - if hasattr(caption, "contents") and isinstance(caption.contents[0], str): - match = IMAGESIZE_EXTRACTOR.match(caption.contents[0]) - if match: - caption.contents[0].replace_with(match.group(4)) - def link_img_tags(soup): """Convert each <img> tag in <article> to a link to its original.""" if not soup.article: @@ -894,7 +848,7 @@ def postprocess_html_file(htmlfilepath): soup = bs4.BeautifulSoup(htmlfileobj.read(), "lxml") # a series of postprocessing (extensible) - process_image_sizes(soup) + utils.process_image_sizes(soup) link_img_tags(soup) process_footnote_backlinks(soup) diff --git a/utils/utils.py b/utils/utils.py index a82c23e6..0e6ece8e 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -10,6 +10,8 @@ import dateutil.tz import bs4 import urllib.parse +import re + @contextmanager def init_colorama(): @@ -52,3 +54,49 @@ def absolutify_links(soup, baseurl): tag["href"] = urllib.parse.urljoin(baseurl, tag["href"]) for tag in soup.find_all(lambda tag: tag.has_attr("src")): tag["src"] = urllib.parse.urljoin(baseurl, tag["src"]) + +# MARKDOWN EXTENSION! +# +# See docstring of process_image_sizes for documentation. + +# If matched, 1st group is width, 3rd group (optional) is height, and +# 4th group is actual text. +IMAGESIZE_EXTRACTOR = re.compile(r'\|(\d+)(x(\d+))?\|\s*(.*)') + +def process_image_sizes(soup): + """Process the image size Markdown extension. + + Allows specifying image size in a Markdown image construct + ![](). The syntax is: + + ![|width(xheight)?| alt](src) + + where width and height are positive integers (xheight is optional), + and alt is the regular alt string (either plain or with some + Markdown formatting). alt string, as usual, is optional. + + Examples: + + ![|1920x1080| Hello, world!](http://example.com/hello.png) + ![|1920| *Hey!*](http://example.com/hey.png) + ![|1280x800|](http://example.com/noalt.png) + + """ + if not soup.article: + return + for img_tag in soup.article.find_all("img"): + if img_tag.has_attr("alt"): + match = IMAGESIZE_EXTRACTOR.match(img_tag["alt"]) + if match: + width, _, height, realalt = match.groups() + img_tag["width"] = width + if height: + img_tag["height"] = height + img_tag["alt"] = realalt + + # strip image specs from captions, if any + for caption in soup.article.select(".figure .caption"): + if hasattr(caption, "contents") and isinstance(caption.contents[0], str): + match = IMAGESIZE_EXTRACTOR.match(caption.contents[0]) + if match: + caption.contents[0].replace_with(match.group(4)) |