diff options
Diffstat (limited to 'utils')
-rw-r--r-- | utils/utils.py | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/utils/utils.py b/utils/utils.py index 0e6ece8e..eec10adf 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -12,6 +12,8 @@ import urllib.parse import re +import lxml.etree as ET + @contextmanager def init_colorama(): @@ -100,3 +102,40 @@ def process_image_sizes(soup): match = IMAGESIZE_EXTRACTOR.match(caption.contents[0]) if match: caption.contents[0].replace_with(match.group(4)) + + +def make_sitemap_url_element(link, updated=None, changefreq=None, priority=None): + """Make a sitemap <url> element. + + Parameters + ---------- + link : str or xml.etree.ElementTree.Element + If using an xml.etree.ElementTree.Element element, then it shall + be an atom:link element, e.g., <link href="http://zmwangx.github.io/"/>. + updated : datetime or xml.etree.ElementTree.Element, optional + If using an xml.etree.ElementTree.Element element, then it shall + be an atom:updated element, e.g., + <updated>2015-05-05T22:38:42-07:00</updated>. + changefreq : {"always", "hourly", "daily", "weekly", "monthly", "yearly", "never"}, optional + priority : {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}, optional + + """ + + urlelem = ET.Element("url") + loc = ET.Element("loc") + loc.text = link.attrib["href"] if isinstance(link, ET._Element) else link + urlelem.append(loc) + if updated is not None: + lastmod = ET.Element("lastmod") + lastmod.text = (updated.text if isinstance(updated, ET._Element) + else updated.isoformat()) + urlelem.append(lastmod) + if changefreq is not None: + changefreq_elem = ET.Element("changefreq") + changefreq_elem.text = changefreq + urlelem.append(changefreq_elem) + if priority is not None: + priority_elem = ET.Element("priority") + priority_elem.text = "%.1f" % priority + urlelem.append(priority_elem) + return urlelem |