aboutsummaryrefslogtreecommitdiff
path: root/pyblog
diff options
context:
space:
mode:
authorZhiming Wang <zmwangx@gmail.com>2015-05-13 12:49:40 -0700
committerZhiming Wang <zmwangx@gmail.com>2015-05-13 12:50:54 -0700
commite337c56388eb0eaef3d73cd25598afab1c1233a0 (patch)
tree5384e56f82b79d8bc363818df76b2265b89cd9ef /pyblog
parent90f24ebea554d5cea2967674cb8e7248a7709283 (diff)
downloadmy_new_personal_website-e337c56388eb0eaef3d73cd25598afab1c1233a0.tar.xz
my_new_personal_website-e337c56388eb0eaef3d73cd25598afab1c1233a0.zip
add support for lfooter (updated) and top level pages...
(other than index.html).
Diffstat (limited to '')
-rwxr-xr-xpyblog56
1 files changed, 39 insertions, 17 deletions
diff --git a/pyblog b/pyblog
index 411b261d..fe3b1f16 100755
--- a/pyblog
+++ b/pyblog
@@ -259,33 +259,39 @@ def generate_index(feed):
os.remove(tmppath)
-def make_sitemap_url_element(atomlink, atomupdated, changefreq, priority):
+def make_sitemap_url_element(link, updated=None, changefreq=None, priority=None):
"""Make a sitemap <url> element.
Parameters
----------
- atomlink : xml.etree.ElementTree.Element
- atom:link element, e.g., <link href="http://zmwangx.github.io/"/>
- atomupdated : xml.etree.ElementTree.Element
- atom:updated element, e.g., <updated>2015-05-05T22:38:42-07:00</updated>
- changefreq : {"always", "hourly", "daily", "weekly", "monthly", "yearly", "never"}
- priority : {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}
+ link : str or xml.etree.ElementTree.Element
+ If using an xml.etree.ElementTree.Element element, then it shall
+ be an atom:link element, e.g., <link href="http://zmwangx.github.io/"/>.
+ updated : str or xml.etree.ElementTree.Element, optional
+ If using an xml.etree.ElementTree.Element element, then it shall
+ be an atom:updated element, e.g.,
+ <updated>2015-05-05T22:38:42-07:00</updated>.
+ changefreq : {"always", "hourly", "daily", "weekly", "monthly", "yearly", "never"}, optional
+ priority : {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}, optional
"""
urlelem = ET.Element("url")
loc = ET.Element("loc")
- loc.text = atomlink.attrib["href"]
- lastmod = ET.Element("lastmod")
- lastmod.text = atomupdated.text
- changefreq_elem = ET.Element("changefreq")
- changefreq_elem.text = changefreq
- priority_elem = ET.Element("priority")
- priority_elem.text = "%.1f" % priority
+ loc.text = link.attrib["href"] if isinstance(link, ET.Element) else link
urlelem.append(loc)
- urlelem.append(lastmod)
- urlelem.append(changefreq_elem)
- urlelem.append(priority_elem)
+ if updated is not None:
+ lastmod = ET.Element("lastmod")
+ lastmod.text = updated.text if isinstance(updated, ET.Element) else updated
+ urlelem.append(lastmod)
+ if changefreq is not None:
+ changefreq_elem = ET.Element("changefreq")
+ changefreq_elem.text = changefreq
+ urlelem.append(changefreq_elem)
+ if priority is not None:
+ priority_elem = ET.Element("priority")
+ priority_elem.text = "%.1f" % priority
+ urlelem.append(priority_elem)
return urlelem
@@ -294,6 +300,22 @@ def generate_sitemap(feed):
sitemap = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
# index
sitemap.append(make_sitemap_url_element(BLOG_HOME, feed.updated, "daily", 1.0))
+ # other top level pages
+ for name in os.listdir(BUILDDIR):
+ if not name.endswith(".html") or name == "index.html":
+ continue
+ link = "{home}/{path}".format(home=BLOG_HOME, path=name)
+ fullpath = os.path.join(BUILDDIR, name)
+ # try to extract updated time
+ updated = None
+ with open(fullpath, encoding="utf-8") as htmlobj:
+ soup = bs4.BeautifulSoup(htmlobj.read())
+ if soup.article.footer is not None:
+ updated_tag = soup.article.footer.find(attrs={"class": "updated"})
+ if updated_tag is not None:
+ updated = updated_tag.text
+ sitemap.append(make_sitemap_url_element(link, updated, "monthly", 0.9))
+
# blog entries
for entry in feed.entries:
sitemap.append(make_sitemap_url_element(entry.link, entry.updated, "monthly", 0.9))