diff options
author | Zhiming Wang <zmwangx@gmail.com> | 2015-05-13 12:49:40 -0700 |
---|---|---|
committer | Zhiming Wang <zmwangx@gmail.com> | 2015-05-13 12:50:54 -0700 |
commit | e337c56388eb0eaef3d73cd25598afab1c1233a0 (patch) | |
tree | 5384e56f82b79d8bc363818df76b2265b89cd9ef /pyblog | |
parent | 90f24ebea554d5cea2967674cb8e7248a7709283 (diff) | |
download | my_new_personal_website-e337c56388eb0eaef3d73cd25598afab1c1233a0.tar.xz my_new_personal_website-e337c56388eb0eaef3d73cd25598afab1c1233a0.zip |
add support for lfooter (updated) and top level pages...
(other than index.html).
Diffstat (limited to 'pyblog')
-rwxr-xr-x | pyblog | 56 |
1 files changed, 39 insertions, 17 deletions
@@ -259,33 +259,39 @@ def generate_index(feed): os.remove(tmppath) -def make_sitemap_url_element(atomlink, atomupdated, changefreq, priority): +def make_sitemap_url_element(link, updated=None, changefreq=None, priority=None): """Make a sitemap <url> element. Parameters ---------- - atomlink : xml.etree.ElementTree.Element - atom:link element, e.g., <link href="http://zmwangx.github.io/"/> - atomupdated : xml.etree.ElementTree.Element - atom:updated element, e.g., <updated>2015-05-05T22:38:42-07:00</updated> - changefreq : {"always", "hourly", "daily", "weekly", "monthly", "yearly", "never"} - priority : {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1} + link : str or xml.etree.ElementTree.Element + If using an xml.etree.ElementTree.Element element, then it shall + be an atom:link element, e.g., <link href="http://zmwangx.github.io/"/>. + updated : str or xml.etree.ElementTree.Element, optional + If using an xml.etree.ElementTree.Element element, then it shall + be an atom:updated element, e.g., + <updated>2015-05-05T22:38:42-07:00</updated>. + changefreq : {"always", "hourly", "daily", "weekly", "monthly", "yearly", "never"}, optional + priority : {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}, optional """ urlelem = ET.Element("url") loc = ET.Element("loc") - loc.text = atomlink.attrib["href"] - lastmod = ET.Element("lastmod") - lastmod.text = atomupdated.text - changefreq_elem = ET.Element("changefreq") - changefreq_elem.text = changefreq - priority_elem = ET.Element("priority") - priority_elem.text = "%.1f" % priority + loc.text = link.attrib["href"] if isinstance(link, ET.Element) else link urlelem.append(loc) - urlelem.append(lastmod) - urlelem.append(changefreq_elem) - urlelem.append(priority_elem) + if updated is not None: + lastmod = ET.Element("lastmod") + lastmod.text = updated.text if isinstance(updated, ET.Element) else updated + urlelem.append(lastmod) + if changefreq is not None: + changefreq_elem = ET.Element("changefreq") + changefreq_elem.text = changefreq + urlelem.append(changefreq_elem) + if priority is not None: + priority_elem = ET.Element("priority") + priority_elem.text = "%.1f" % priority + urlelem.append(priority_elem) return urlelem @@ -294,6 +300,22 @@ def generate_sitemap(feed): sitemap = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") # index sitemap.append(make_sitemap_url_element(BLOG_HOME, feed.updated, "daily", 1.0)) + # other top level pages + for name in os.listdir(BUILDDIR): + if not name.endswith(".html") or name == "index.html": + continue + link = "{home}/{path}".format(home=BLOG_HOME, path=name) + fullpath = os.path.join(BUILDDIR, name) + # try to extract updated time + updated = None + with open(fullpath, encoding="utf-8") as htmlobj: + soup = bs4.BeautifulSoup(htmlobj.read()) + if soup.article.footer is not None: + updated_tag = soup.article.footer.find(attrs={"class": "updated"}) + if updated_tag is not None: + updated = updated_tag.text + sitemap.append(make_sitemap_url_element(link, updated, "monthly", 0.9)) + # blog entries for entry in feed.entries: sitemap.append(make_sitemap_url_element(entry.link, entry.updated, "monthly", 0.9)) |