diff options
author | neodarz <neodarz@neodarz.net> | 2019-05-25 23:40:31 +0200 |
---|---|---|
committer | neodarz <neodarz@neodarz.net> | 2019-05-25 23:40:31 +0200 |
commit | dc39bccf756014ba5c4a1bb422bc9baed63b8a8e (patch) | |
tree | 761cd85184909021b71cbd0f8373ca7a19b8cf6c | |
parent | 9335441fe7dc26bc76fe3aea363c76a218ae261b (diff) | |
download | my_new_personal_website-dc39bccf756014ba5c4a1bb422bc9baed63b8a8e.tar.xz my_new_personal_website-dc39bccf756014ba5c4a1bb422bc9baed63b8a8e.zip |
Move absolutify_links function to external file
-rwxr-xr-x | pyblog | 17 | ||||
-rw-r--r-- | utils/utils.py | 18 |
2 files changed, 19 insertions, 16 deletions
@@ -638,21 +638,6 @@ def rewrite_title(): indexmd.write(line) -def absolutify_links(soup, baseurl): - """Make links in an article absolute. - - Parameters - ---------- - soup : bs4.BeautifulSoup - baseurl : str - - """ - for tag in soup.find_all(lambda tag: tag.has_attr("href")): - tag["href"] = urllib.parse.urljoin(baseurl, tag["href"]) - for tag in soup.find_all(lambda tag: tag.has_attr("src")): - tag["src"] = urllib.parse.urljoin(baseurl, tag["src"]) - - def generate_index_and_feed(): """Generate index.html and feeds (atom and rss).""" # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name @@ -753,7 +738,7 @@ def generate_index_and_feed(): for script_tag in article.find_all("script"): tags_to_remove.append(script_tag) # make internal links absolute - absolutify_links(article, entry_url) + utils.absolutify_links(article, entry_url) # remove marked tags for tag in tags_to_remove: tag.extract() diff --git a/utils/utils.py b/utils/utils.py index bf3d4fbe..a82c23e6 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -7,6 +7,9 @@ import time import datetime import dateutil.tz +import bs4 +import urllib.parse + @contextmanager def init_colorama(): @@ -34,3 +37,18 @@ def current_datetime(): """ return datetime.datetime.fromtimestamp(round(time.time()), dateutil.tz.tzlocal()) + + +def absolutify_links(soup, baseurl): + """Make links in an article absolute. + + Parameters + ---------- + soup : bs4.BeautifulSoup + baseurl : str + + """ + for tag in soup.find_all(lambda tag: tag.has_attr("href")): + tag["href"] = urllib.parse.urljoin(baseurl, tag["href"]) + for tag in soup.find_all(lambda tag: tag.has_attr("src")): + tag["src"] = urllib.parse.urljoin(baseurl, tag["src"]) |