aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhiming Wang <zmwangx@gmail.com>2015-05-05 22:08:54 -0700
committerZhiming Wang <zmwangx@gmail.com>2015-05-05 22:10:19 -0700
commit11f732c9f2644302136ee90176ad7816bd596895 (patch)
tree6e1a31127ef13e19889c693a24f9d71f8fafe1c5
parenta7a0b2073f30b1d0214c3152998d95e40a39b438 (diff)
downloadmy_new_personal_website-11f732c9f2644302136ee90176ad7816bd596895.tar.xz
my_new_personal_website-11f732c9f2644302136ee90176ad7816bd596895.zip
20150505 Graceful handling of SIGINT when using Python's multiprocessing.Process
Also implemented the "touch" action in pyblog, as well as wrote a README.md for the source branch. And some other minor patching.
-rw-r--r--README.md15
-rwxr-xr-xpyblog126
-rw-r--r--source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md73
-rw-r--r--source/css/theme.css4
4 files changed, 197 insertions, 21 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..18b55ef7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+This blog can be built with the `pyblog` script. The `pyblog` script currently provides the following actions:
+
+* `new_post` (aliases: `n`, `new`);
+* `touch` (aliases: `t`, `tou`);
+* `generate` (aliases: `g`, `gen`);
+* `regenerate` (aliases: `r`, `regen`);
+* `preview` (aliases: `p`, `pre`).
+* `deploy` (aliases: `d`, `dep`);
+* `gen_deploy` (aliases: `gd`, `gendep`);
+
+Run `pyblog --help` for more information, and `pyblog <action> --help` for what the action does and how to perform the action.
+
+Note that `pyblog` is incompatible with Python 2.x, and in fact only tested on Python 3.4.
+
+The theme of this blog is largely based on that of [mort.ninja](http://mort.ninja) by [Mort Yao](https://github.com/soimort). See [this post](https://zmwangx.github.io/blog/2015-05-05-new-blog-new-start.html) for more details.
diff --git a/pyblog b/pyblog
index cab2972c..ce5c7986 100755
--- a/pyblog
+++ b/pyblog
@@ -5,6 +5,7 @@
import argparse
from contextlib import contextmanager
import datetime
+import fileinput
import io
import http.client
import http.server
@@ -96,6 +97,15 @@ def init_colorama():
colorama.deinit()
+def current_datetime():
+ """Return the current datetime, complete with tzinfo.
+
+ Precision is one second. Timezone is the local timezone.
+ """
+ return datetime.datetime.fromtimestamp(round(time.time()),
+ dateutil.tz.tzlocal())
+
+
class AtomFeed(object):
"""Class for storing atom:feed date and metadata."""
@@ -256,9 +266,8 @@ def generate_index_and_feed():
# update time will be set after everthing finishes
postspath = os.path.join(BUILDDIR, "blog")
- # traverse all posts in reverse time order
- for name in sorted(os.listdir(postspath), reverse=True):
- if re.match(r"^(\d{4})-(\d{2})-(\d{2}).*\.html", name):
+ for name in os.listdir(postspath):
+ if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name):
htmlpath = os.path.join(postspath, name)
entry = AtomEntry()
with open(htmlpath, encoding="utf-8") as htmlfile:
@@ -287,11 +296,12 @@ def generate_index_and_feed():
entry.content.append(cdata(entry.content_html))
entry.assemble_entry()
feed.entries.append(entry)
+ # sort entries by reverse chronological order
+ feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True)
generate_index(feed)
- feed.updated_datetime = datetime.datetime.fromtimestamp(round(time.time()),
- dateutil.tz.tzlocal())
+ feed.updated_datetime = current_datetime()
feed.updated = ET.Element("updated")
feed.updated.text = feed.updated_datetime.isoformat()
@@ -440,8 +450,7 @@ def new_post(args):
"""
title = args.title
- date = datetime.datetime.fromtimestamp(round(time.time()),
- dateutil.tz.tzlocal())
+ date = current_datetime()
filename_date = date.strftime("%Y-%m-%d")
iso_date = date.isoformat()
display_date = "%s %d, %d" % (date.strftime("%B"), date.day, date.year)
@@ -464,6 +473,67 @@ def new_post(args):
return 0
+def touch(args):
+ """Update the timestamp of a post to the current time."""
+ filename = os.path.basename(args.filename)
+ fullpath = os.path.join(SOURCEDIR, "blog", filename)
+ if not os.path.exists(fullpath):
+ sys.stderr.write("%serror: post %s not found %s\n" %
+ (RED, fullpath, RESET))
+ return 1
+ filename_prefix_re = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}")
+ if not filename_prefix_re.match(filename):
+ sys.stderr.write(RED)
+ sys.stderr.write("error: post %s is not a valid post\n" % filename)
+ sys.stderr.write("error: the filename of a valid post begins with "
+ "a date in the form xxxx-xx-xx\n")
+ sys.stderr.write(RESET)
+ return 1
+
+ # update timestamp in the metadata section of the post
+ whatchanged = io.StringIO()
+ date = current_datetime()
+ iso_date = date.isoformat()
+ display_date = "%s %d, %d" % (date.strftime("%B"), date.day, date.year)
+ filename_date = date.strftime("%Y-%m-%d")
+ with fileinput.input(files=(fullpath), inplace=True) as lines:
+ meta_fences = 0
+ for line in lines:
+ if line.startswith("---"):
+ meta_fences += 1
+ sys.stdout.write(line)
+ continue
+ if meta_fences >= 2:
+ # already went past the metadata section
+ sys.stdout.write(line)
+ continue
+
+ if line.startswith("date: "):
+ updated_line = "date: %s\n" % iso_date
+ sys.stdout.write(updated_line)
+ whatchanged.write("-%s+%s\n" % (line, updated_line))
+ continue
+
+ if line.startswith("date-display: "):
+ updated_line = "date-display: %s\n" % display_date
+ sys.stdout.write(updated_line)
+ whatchanged.write("-%s+%s\n" % (line, updated_line))
+ continue
+
+ sys.stdout.write(line)
+
+ sys.stderr.write("\nchangeset:\n\n%s" % whatchanged.getvalue())
+ whatchanged.close()
+
+ # check if the file needs to be renamed
+ new_filename = filename_prefix_re.sub(filename_date, filename)
+ if new_filename != filename:
+ new_fullpath = os.path.join(SOURCEDIR, "blog", new_filename)
+ os.rename(fullpath, new_fullpath)
+ sys.stderr.write("renamed to %s\n" % new_fullpath)
+ return 0
+
+
def deploy(args):
"""Deploys build directory to origin/master without regenerating.
@@ -505,7 +575,7 @@ def deploy(args):
sys.stderr.write("Please answer yes or no.\n")
if abort:
sys.stderr.write("%saborting deployment%s\n" % (RED, RESET))
- exit(1)
+ return 1
# extract latest commit on the source branch
source_commit = subprocess.check_output(
@@ -518,7 +588,7 @@ def deploy(args):
# extract updated time from atom.xml
if not os.path.exists("atom.xml"):
sys.stderr.write("atom.xml not found, cannot deploy\naborting\n")
- exit(1)
+ return 1
atomxml = ET.parse("atom.xml").getroot()
updated = atomxml.find('{http://www.w3.org/2005/Atom}updated').text
@@ -535,7 +605,7 @@ def deploy(args):
"--message=%s" % commit_message])
except subprocess.CalledProcessError:
sys.stderr.write("\n%serror: git commit failed%s\n" % (RED, RESET))
- exit(1)
+ return 1
# check dirty status
dirty = subprocess.check_output(["git", "status", "--porcelain"])
@@ -545,7 +615,7 @@ def deploy(args):
"build directory still dirty\n")
sys.stderr.write("error: please manually inspect what was left out\n")
sys.stderr.write(RESET)
- exit(1)
+ return 1
# push to origin/master
sys.stderr.write("%scommand: git push origin master%s\n" % (BLUE, RESET))
@@ -553,7 +623,7 @@ def deploy(args):
subprocess.check_call(["git", "push", "origin", "master"])
except subprocess.CalledProcessError:
sys.stderr.write("\n%serror: git push failed%s\n" % (RED, RESET))
- exit(1)
+ return 1
return 0
@@ -641,6 +711,26 @@ def main():
parser = argparse.ArgumentParser(description=description)
subparsers = parser.add_subparsers()
+ parser_new_post = subparsers.add_parser(
+ "new_post", aliases=["n", "new"],
+ description="Create a new post with metadata pre-filled.")
+ parser_new_post.add_argument("title", help="title of the new post")
+ parser_new_post.set_defaults(func=new_post)
+
+ parser_new_post = subparsers.add_parser(
+ "touch", aliases=["t", "tou"],
+ description="""Touch an existing post, i.e., update its
+ timestamp to current time. Why is this ever useful? Well, the
+ timestamp filled in by new_post is the time of creation, but one
+ might spend several hours after the creation of the file to
+ finish the post. Sometimes the post is even created on one day
+ and finished on another (say created at 11pm and finished at
+ 1am). Therefore, one may want to retouch the timestamp before
+ publishing.""")
+ parser_new_post.add_argument("filename",
+ help="path or basename of the source file, e.g., 2015-05-05-new-blog-new-start.md")
+ parser_new_post.set_defaults(func=touch)
+
parser_generate = subparsers.add_parser(
"generate", aliases=["g", "gen"],
description="Generate new or changed objects.")
@@ -652,10 +742,9 @@ def main():
parser_regenerate.set_defaults(func=regenerate)
parser_new_post = subparsers.add_parser(
- "new_post", aliases=["n", "new"],
- description="Create a new post with metadata pre-filled.")
- parser_new_post.add_argument("title", help="title of the new post")
- parser_new_post.set_defaults(func=new_post)
+ "preview", aliases=["p", "pre"],
+ description="Serve the blog locally and auto regenerate upon changes.")
+ parser_new_post.set_defaults(func=preview)
parser_new_post = subparsers.add_parser(
"deploy", aliases=["d", "dep"],
@@ -667,11 +756,6 @@ def main():
description="Rebuild entire blog and deploy build/ to origin/master.")
parser_new_post.set_defaults(func=gen_deploy)
- parser_new_post = subparsers.add_parser(
- "preview", aliases=["p", "pre"],
- description="Serve the blog locally and auto regenerate upon changes.")
- parser_new_post.set_defaults(func=preview)
-
with init_colorama():
args = parser.parse_args()
returncode = args.func(args)
diff --git a/source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md b/source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md
new file mode 100644
index 00000000..c289dece
--- /dev/null
+++ b/source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md
@@ -0,0 +1,73 @@
+---
+title: "Graceful handling of <code>SIGINT</code> when using Python's <code>multiprocessing.Process</code>"
+date: 2015-05-05T22:03:39-07:00
+date-display: May 5, 2015
+---
+Today I learned something about Python's (at least CPython's) multiprocessing and signal handling, and I would like to share it here. Basically my situation was such (when developing [`pydoc`](https://github.com/zmwangx/zmwangx.github.io/blob/source/pyblog) that powers this blog):
+
+* I would like to serve the blog with an HTTP server while auto-regenerating for changes;
+* The auto-regeneration is handled in the main process with a while loop, whereas the HTTP server (requiring little human intervention) is put in a `multiprocessing.Process` and launched with `http.server.HTTPServer.serve_forever()`;
+* Upon sending `SIGINT`, both processes need to clean up and quit; in particular, the server needs to exit its `serve_forever()` loop (which can be done via `shutdown()`, but how to invoke the method is a problem, since `serve_forever()` blocks);
+* Handling of `SIGINT` must be graceful in the main process — there might be an ongoing build that must not be interrupted until finishing.
+
+Given this context, I learned the following two critical concepts (at least true in the current version of CPython) through trial and error:
+
+1. **A user-triggered `SIGINT` is sent to both processes** — the main process and the `multiprocessing.Process` instance;
+2. **Except for the defined interfaces, a `multiprocessing.Process` instance is almost completely separated from the main process, sharing as little resources as possible**; by "defined interfaces" I mean the defined attributes and methods of a `Process` instance, as well as defined communication channels like `multiprocessing.Pipe` or `multiprocessing.Queue`. And to expand on resource sharing: yes, the two processes have their own copies of global variables, so using global variables as state registers is a no-go.
+
+Both concepts can be used to one's benefit or detriment. Below is how I solved my problem, using the two concepts. Observe that without a custom handler, Python translates a `SIGINT` to a `KeyboardInterrupt` exception; therefore, I use the default `KeyboardInterrupt` to interrupt the HTTP server in its own process (through handling the exception and calling `shutdown()`), but instead install a custom `SIGINT` handler in the main process that translates `SIGINT` to setting a `sigint_raised` flag that can be picked up by the while loop once the current build (if any) is finished. The proof of concept script is as follows (the production code is [here](https://github.com/zmwangx/zmwangx.github.io/blob/a7a0b2073f30b1d0214c3152998d95e40a39b438/pyblog#L567-L635)):
+
+```python
+#!/usr/bin/env python3
+
+import http.server
+import multiprocessing
+import signal
+import sys
+import time
+
+class HTTPServerProcess(multiprocessing.Process):
+ def run(self):
+ httpd = http.server.HTTPServer(
+ ("", 8000), http.server.SimpleHTTPRequestHandler)
+ try:
+ httpd.serve_forever()
+ except KeyboardInterrupt:
+ httpd.shutdown()
+
+def do_things():
+ for i in range(10):
+ sys.stderr.write(".")
+ sys.stderr.flush()
+ time.sleep(1)
+ sys.stderr.write("\n")
+
+def main():
+ server_process = HTTPServerProcess()
+ server_process.start()
+
+ # define and install custom SIGINT handler
+ sigint_raised = False
+
+ def sigint_mitigator(signum, frame):
+ nonlocal sigint_raised
+ sigint_raised = True
+
+ signal.signal(signal.SIGINT, sigint_mitigator)
+
+ while not sigint_raised:
+ do_things()
+
+ server_process.join()
+
+if __name__ == "__main__":
+ main()
+```
+
+Beware that with this solution, if there are external programs or OS level operations happening in the main process, then the operation at the time of `SIGINT` will still be interrupted[^OS-specific] (for example, in the script above, the `time.sleep(1)` at the exact point of `SIGINT` is still interrupted, but otherwise `do_things` is carried on to its completion). I'm not sure how to explain this — maybe the handler isn't capturing the signal fast enough?[^naive] Anyway, one single early interruption is at least more acceptable than a completely corrupted build[^interruption], and certainly more graceful.
+
+[^naive]: That's awfully naive and layman-sounding, I know, but I *am* almost a layman when it comes to system-level programming.
+
+[^OS-specific]: [CPython's `multiprocessing` is written in C](https://hg.python.org/cpython/file/1320ec1b24af/Modules/_multiprocessing), so the behavior might depend on the OS. I'm talking about OS X here. I haven't inspected and won't inspect the C source code.
+
+[^interruption]: That's assuming your build isn't interdependent in which any single failure corrupts everything. In that case, what can we do? I honestly see no way of injecting signal handling in `subprocess.Popen`.
diff --git a/source/css/theme.css b/source/css/theme.css
index 69017387..220d8e50 100644
--- a/source/css/theme.css
+++ b/source/css/theme.css
@@ -27,6 +27,10 @@ h1.title {
font-size: 18pt;
}
+h1.title code {
+ font-size: 18pt;
+}
+
h2 {
text-align: center;
font-size: 14pt;