diff options
author | Zhiming Wang <zmwangx@gmail.com> | 2015-05-05 00:08:53 -0700 |
---|---|---|
committer | Zhiming Wang <zmwangx@gmail.com> | 2015-05-05 00:08:53 -0700 |
commit | d14e9ac5b86c911cb255ab30425790488c20fb4d (patch) | |
tree | 7a14faa9dd1e513850f33ced91d14729c3cb36e7 | |
parent | 07bf43a314fe65ccd9c7cb663c3c6134a47cc269 (diff) | |
download | my_new_personal_website-d14e9ac5b86c911cb255ab30425790488c20fb4d.tar.xz my_new_personal_website-d14e9ac5b86c911cb255ab30425790488c20fb4d.zip |
a lot of work
Mainly generating feed and index.
Diffstat (limited to '')
-rwxr-xr-x | pyblog | 249 | ||||
-rw-r--r-- | source/css/theme.css | 14 | ||||
-rw-r--r-- | source/index.md | 4 | ||||
-rw-r--r-- | templates/template.html | 33 |
4 files changed, 274 insertions, 26 deletions
@@ -1,21 +1,138 @@ #!/usr/bin/env python3 +# TODO: timestamp to ISO + """A simple blog generator with Pandoc as backend.""" import argparse +import datetime +import io import os import re import shutil import subprocess import sys +import tempfile +import time +import xml.etree.ElementTree as ET + +import bs4 +import dateutil.parser +import dateutil.tz ROOTDIR = os.path.dirname(os.path.realpath(__file__)) SOURCEDIR = os.path.join(ROOTDIR, "source") -INDEX = os.path.join(SOURCEDIR, "index.md") +INDEXMD = os.path.join(SOURCEDIR, "index.md") TEMPLATEDIR = os.path.join(ROOTDIR, "templates") HTMLTEMPLATE = os.path.join(TEMPLATEDIR, "template.html") BUILDDIR = os.path.join(ROOTDIR, "build") +ATOM = os.path.join(BUILDDIR, "atom.xml") +INDEXHTML = os.path.join(BUILDDIR, "index.html") + +FEED_MAX_ENTRIES = 20 + + +# Hack ET to support CDATA. +# XML suuuuuucks. +# http://stackoverflow.com/a/30019607/1944784 + +def CDATA(text=None): + element = ET.Element('![CDATA[') + element.text = text + return element + +ET._original_serialize_xml = ET._serialize_xml + +def _serialize_xml(write, elem, qnames, namespaces,short_empty_elements, **kwargs): + + if elem.tag == '![CDATA[': + write("\n<{}{}]]>\n".format(elem.tag, elem.text)) + if elem.tail: + write(_escape_cdata(elem.tail)) + else: + return ET._original_serialize_xml(write, elem, qnames, namespaces,short_empty_elements, **kwargs) + +ET._serialize_xml = ET._serialize['xml'] = _serialize_xml + + +class AtomFeed(object): + """Class for storing atom:feed date and metadata.""" + + def __init__(self): + """Define available attributes.""" + self.author = None # atom:author + self.generator = None # atom:generator, optional + self.icon = None # atom:icon, optional + self.logo = None # atom:logo, optional + self.id_text = None # atom:id, just use URI + self.id = None # atom:id + self.links = [] # list of atom:link + self.title = None # atom:title + self.updated_datetime = None # update time as a datetime object + self.updated = None # atom:updated + self.entries = [] # list of atom:entry, in reverse time order + self.feed = None # atom:feed, assembled + + def assemble_feed(self): + """Assemble atom:feed.""" + self.feed = ET.Element("feed", xmlns="http://www.w3.org/2005/Atom") + self.feed.append(self.title) + for link in self.links: + self.feed.append(link) + self.feed.append(self.updated) + self.feed.append(self.id) + self.feed.append(self.author) + if self.icon is not None: + self.feed.append(self.icon) + if self.logo is not None: + self.feed.append(self.icon) + if self.generator is not None: + self.feed.append(self.generator) + # include at most FEED_MAX_ENTRIES entries in the feed + for entry in self.entries[:FEED_MAX_ENTRIES]: + self.feed.append(entry.entry) + + def dump_feed(self): + """Dump atom:feed XML.""" + if self.feed is None: + self.assemble_feed() + return ET.tostring(self.feed).decode('utf-8') + + +class AtomEntry(object): + """Class for storing atom:entry data and metadata.""" + + def __init__(self): + """Define available attributes.""" + self.author = None # atom:author + self.id_text = None # atom:id, just use URI + self.id = None # atom:id + self.relpath = None # HTML page path relative to home + self.link = None # atom:link + self.title_text = None # plain text title + self.title = None # atom:title + self.updated_datetime = None # update time as a datetime object + self.updated = None # atom:updated + self.content_html = None # content as HTML markup + self.content = None # atom:content + self.entry = None # atom:entry, assembled + + def assemble_entry(self): + """Assemble atom:entry.""" + self.entry = ET.Element("entry") + self.entry.append(self.title) + self.entry.append(self.link) + self.entry.append(self.updated) + self.entry.append(self.id) + self.entry.append(self.author) + self.entry.append(self.content) + + def dump_entry(self): + """Dump atom:entry XML.""" + if self.entry is None: + self.assemble_entry() + return ET.tostring(self.entry).decode('utf-8') # TODO: @@ -23,9 +140,118 @@ def new_post(): pass -# TODO: -def generate_index(): - pass +def generate_index(feed): + """Generate index.html from index.md and a TOC.""" + + sys.stderr.write("generating index.html\n") + + # generate TOC + tocbuff = io.StringIO() + tocbuff.write('<div class="indextoc" id="toc">') + year = 10000 # will be larger than the latest year for quite a while + # recall that entries are in reverse chronological order + for entry in feed.entries: + date = entry.updated_datetime + if date.year < year: + # write a new <h2 class="toc"> tag with the smaller year + year = date.year + tocbuff.write(u'\n<h2 class="toc" id="{0}" datetime="{0}">{0}</h2>\n\n'.format(year)) + + # write a new <li> entry (<ul>) in Markdown, in the format: + # * <time class="tocdate" datetime="2015-05-05T00:06:04-0700">May 5</time> + # [Blah blah](/blog/2015-05-04-blah-blah.html) + monthday = date.strftime("%B %d") + tocbuff.write(u'* <time class="tocdate" datetime="%s">%s</time> [%s](%s)\n' % + (date.isoformat(), monthday, entry.title_text, entry.relpath)) + tocbuff.write('</div>') + + # create tempfile with index.md and the TOC concatenated, and generate index.html from that + fd, tmppath = tempfile.mkstemp() + os.close(fd) + with open(tmppath, 'w', encoding='utf-8') as tmpfile: + if os.path.exists(INDEXMD): + with open(INDEXMD, 'r', encoding='utf-8') as indexmd: + tmpfile.write(u"%s\n\n<hr>\n\n" % indexmd.read()) + tmpfile.write("%s\n" % tocbuff.getvalue()) + tocbuff.close() + + pandoc_args = [ + "pandoc", tmppath, + "--template", HTMLTEMPLATE, + "--highlight-style=pygments", + "-o", INDEXHTML, + ] + try: + subprocess.check_call(pandoc_args) + except subprocess.CalledProcessError: + failed_builds += 1 + sys.stderr.write("error: failed to generate index.html\n") + os.remove(tmppath) + + +def generate_index_and_feed(): + """Generate index.html and atom feed.""" + sys.stderr.write("generating atom feed\n") + # initialize feed + feed = AtomFeed() + # TODO: Put hard-coded values in a config file + feed.author = ET.fromstring('<author><name>Zhiming Wang</name><uri>https://github.com/zmwangx</uri><email>zmwangx@gmail.com</email></author>') + feed.generator = ET.Element("generator", uri="https://github.com/zmwangx/zmwangx.github.io") + feed.generator.text = "pyblog" + # TODO: feed.icon + feed.id_text = "http://zmwangx.github.io" + feed.id = ET.Element("id") + feed.id.text = feed.id_text + feed.links = [ + ET.Element("link", href="http://zmwangx.github.io/atom.xml", rel="self"), + ET.Element("link", href="http://zmwangx.github.io/"), + ] + feed.title_text = "dl? cmplnts?" + feed.title = ET.fromstring("<title>%s</title>" % feed.title_text) + # update time will be set after everthing finishes + + postspath = os.path.join(BUILDDIR, "blog") + # traverse all posts in reverse time order + for name in sorted(os.listdir(postspath), reverse=True): + if re.match(r"^(\d{4})-(\d{2})-(\d{2}).*\.html", name): + htmlpath = os.path.join(postspath, name) + entry = AtomEntry() + with open(htmlpath, encoding="utf-8") as htmlfile: + soup = bs4.BeautifulSoup(htmlfile.read()) + entry.author = feed.author # assume it's always the same author + entry.id_text = "%s/blog/%s" % (feed.id_text, name) + entry.id = ET.Element("id") + entry.id.text = entry.id_text + entry.relpath = "/blog/%s" % name + entry.link = ET.Element("link", href=entry.id_text) + entry.title_text = soup.title.text + entry.title = ET.Element("title", type="html") + entry.title.text = entry.title_text + post_date = soup.find("meta", attrs={"name": "date"})["content"] + entry.updated_datetime = dateutil.parser.parse(post_date) + entry.updated = ET.Element("updated") + entry.updated.text = entry.updated_datetime.isoformat() + # extract the article content without header and footer + article = soup.article + article.header.extract() + article.footer.extract() + entry.content_html = ''.join([str(content) + for content in article.contents]) + entry.content = ET.Element("content", type="html") + entry.content.append(CDATA(entry.content_html)) + entry.assemble_entry() + feed.entries.append(entry) + + generate_index(feed) + + feed.updated_datetime = datetime.datetime.fromtimestamp(round(time.time()), + dateutil.tz.tzlocal()) + feed.updated = ET.Element("updated") + feed.updated.text = feed.updated_datetime.isoformat() + + with open(ATOM, 'w', encoding='utf-8') as atom: + atom.write("%s\n" % feed.dump_feed()) + sys.stderr.write("wrote atom.xml\n") def generate(fresh=False): @@ -67,6 +293,8 @@ def generate(fresh=False): os.remove(obj) failed_builds = 0 + template_mtime = os.path.getmtime(HTMLTEMPLATE) + anything_modified = False for root, _, files in os.walk(SOURCEDIR): relroot = os.path.relpath(root, start=SOURCEDIR) @@ -78,7 +306,7 @@ def generate(fresh=False): for name in files: extension = name.split(".")[-1] - if extension not in ["css", "md"]: + if extension not in ["css", "jpg", "md", "png", "svg"]: continue relpath = os.path.join(relroot, name) @@ -88,8 +316,13 @@ def generate(fresh=False): else: dstpath = os.path.join(dstroot, name) if ((not os.path.exists(dstpath) or - os.path.getmtime(dstpath) <= os.path.getmtime(srcpath))): - if extension == "css": + os.path.getmtime(dstpath) <= + max(template_mtime, os.path.getmtime(srcpath)))): + # new post or modified post + anything_modified = True + if srcpath == INDEXMD: + continue # index will be processed separately + if extension in ["css", "jpg", "png", "svg"]: sys.stderr.write("copying %s\n" % relpath) shutil.copy(srcpath, dstpath) elif extension == "md": @@ -106,6 +339,8 @@ def generate(fresh=False): failed_builds += 1 sys.stderr.write("error: failed to generate %s" % relpath) + if anything_modified: + generate_index_and_feed() sys.stderr.write("build finished with %d errors\n" % failed_builds) return failed_builds diff --git a/source/css/theme.css b/source/css/theme.css index b06fe2a2..60308aac 100644 --- a/source/css/theme.css +++ b/source/css/theme.css @@ -34,6 +34,10 @@ h2.meta { font-style: italic; } +h2.toc { + text-align: left; +} + h3 { font-size: 12pt; } @@ -88,3 +92,13 @@ footer .cc-icon { background-size: 16px; vertical-align: middle; } + +div.indextoc ul { + list-style-type: none; + padding-left: 2em; +} + +div.indextoc ul li time.tocdate { + float: left; + width: 8em; +} diff --git a/source/index.md b/source/index.md index 0236fb77..b654ab88 100644 --- a/source/index.md +++ b/source/index.md @@ -5,6 +5,6 @@ title: dl? cmplnts? I am an undergrad at Stanford (junior as of May 2015) studying mathematics and theoretical physics. I enjoy coding in my spare time, for fun and profit (getting things done). I am lazy in general so I try to automate things as much as possible. -My first programming language was Pascal and I consider C my mother tongue, but recently I write my code in Python or Bash (non-interactive) most of the time. My text editor is Emacs, and my interactive shell is Zsh. I use four-space indents. What else is there to tell? +My first programming language was Pascal and I consider C my mother tongue, but recently I write my code in Python or Bash (non-interactive) most of the time. My operating system is OS X, my text editor is Emacs, my interactive shell is Zsh, my package manager is Homebrew, and my default browser (at the moment — it changes a lot) is Google Chrome. I use four-space indents. What else is there to tell? -The blog could be about anything, but most of the stuff should be technical. +This blog could be about anything, but most of the stuff should be technical. diff --git a/templates/template.html b/templates/template.html index 1b527e5e..ff6c5f5a 100644 --- a/templates/template.html +++ b/templates/template.html @@ -1,31 +1,30 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml"$if(lang)$ lang="$lang$" xml:lang="$lang$"$endif$> +<!DOCTYPE html> +<html> <head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> - <meta http-equiv="Content-Style-Type" content="text/css" /> - <meta name="generator" content="pandoc" /> - <meta name="author" content="Zhiming Wang" /> +<meta charset="UTF-8"> +<meta name="generator" content="pandoc" /> +<meta name="author" content="Zhiming Wang" /> $if(date)$ - <meta name="date" content="$date$" /> +<meta name="date" content="$date$" /> $endif$ - <title>$pagetitle$ — dl? cmplnts?</title> - <style type="text/css">code{white-space: pre;}</style> +<title>$pagetitle$</title> +<style type="text/css">code{white-space: pre;}</style> $if(quotes)$ - <style type="text/css">q { quotes: "“" "”" "‘" "’"; }</style> +<style type="text/css">q { quotes: "“" "”" "‘" "’"; }</style> $endif$ $if(highlighting-css)$ - <style type="text/css"> +<style type="text/css"> $highlighting-css$ - </style> +</style> $endif$ $for(css)$ - <link rel="stylesheet" href="$css$" $if(html5)$$else$type="text/css" $endif$/> +<link rel="stylesheet" href="$css$" $if(html5)$$else$type="text/css" $endif$/> $endfor$ $if(math)$ - $math$ +$math$ $endif$ $for(header-includes)$ - $header-includes$ +$header-includes$ $endfor$ <link href='/css/normalize.css' media="all" rel="stylesheet" type="text/css"> <link href='/css/theme.css' media="all" rel="stylesheet" type="text/css"> @@ -41,7 +40,7 @@ $if(subtitle)$ $endif$ <h2 class="meta"> $if(date-display)$ -<time class="timestamp" $if(date)$timestamp="$date$"$endif$>$date-display$,</time> +<time class="timestamp" $if(date)$datetime="$date$"$endif$>$date-display$,</time> $endif$ by <span class="author">Zhiming Wang</span> </h2> @@ -53,8 +52,8 @@ $toc$ </div> $endif$ $body$ -<hr> <footer> +<hr> <a class="cc-icon" href="https://creativecommons.org/licenses/by/4.0/" target="_blank" title="Released under the Creative Commons Attribution 4.0 International license."> <a href="https://github.com/zmwangx">Zhiming Wang</a> </footer> |