From dc39bccf756014ba5c4a1bb422bc9baed63b8a8e Mon Sep 17 00:00:00 2001 From: neodarz Date: Sat, 25 May 2019 23:40:31 +0200 Subject: Move absolutify_links function to external file --- utils/utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'utils/utils.py') diff --git a/utils/utils.py b/utils/utils.py index bf3d4fbe..a82c23e6 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -7,6 +7,9 @@ import time import datetime import dateutil.tz +import bs4 +import urllib.parse + @contextmanager def init_colorama(): @@ -34,3 +37,18 @@ def current_datetime(): """ return datetime.datetime.fromtimestamp(round(time.time()), dateutil.tz.tzlocal()) + + +def absolutify_links(soup, baseurl): + """Make links in an article absolute. + + Parameters + ---------- + soup : bs4.BeautifulSoup + baseurl : str + + """ + for tag in soup.find_all(lambda tag: tag.has_attr("href")): + tag["href"] = urllib.parse.urljoin(baseurl, tag["href"]) + for tag in soup.find_all(lambda tag: tag.has_attr("src")): + tag["src"] = urllib.parse.urljoin(baseurl, tag["src"]) -- cgit v1.2.1