diff options
author | Zhiming Wang <zmwangx@gmail.com> | 2015-05-05 22:08:54 -0700 |
---|---|---|
committer | Zhiming Wang <zmwangx@gmail.com> | 2015-05-05 22:10:19 -0700 |
commit | 11f732c9f2644302136ee90176ad7816bd596895 (patch) | |
tree | 6e1a31127ef13e19889c693a24f9d71f8fafe1c5 | |
parent | a7a0b2073f30b1d0214c3152998d95e40a39b438 (diff) | |
download | my_new_personal_website-11f732c9f2644302136ee90176ad7816bd596895.tar.xz my_new_personal_website-11f732c9f2644302136ee90176ad7816bd596895.zip |
20150505 Graceful handling of SIGINT when using Python's multiprocessing.Process
Also implemented the "touch" action in pyblog, as well as wrote a
README.md for the source branch. And some other minor patching.
Diffstat (limited to '')
-rw-r--r-- | README.md | 15 | ||||
-rwxr-xr-x | pyblog | 126 | ||||
-rw-r--r-- | source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md | 73 | ||||
-rw-r--r-- | source/css/theme.css | 4 |
4 files changed, 197 insertions, 21 deletions
diff --git a/README.md b/README.md new file mode 100644 index 00000000..18b55ef7 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +This blog can be built with the `pyblog` script. The `pyblog` script currently provides the following actions: + +* `new_post` (aliases: `n`, `new`); +* `touch` (aliases: `t`, `tou`); +* `generate` (aliases: `g`, `gen`); +* `regenerate` (aliases: `r`, `regen`); +* `preview` (aliases: `p`, `pre`). +* `deploy` (aliases: `d`, `dep`); +* `gen_deploy` (aliases: `gd`, `gendep`); + +Run `pyblog --help` for more information, and `pyblog <action> --help` for what the action does and how to perform the action. + +Note that `pyblog` is incompatible with Python 2.x, and in fact only tested on Python 3.4. + +The theme of this blog is largely based on that of [mort.ninja](http://mort.ninja) by [Mort Yao](https://github.com/soimort). See [this post](https://zmwangx.github.io/blog/2015-05-05-new-blog-new-start.html) for more details. @@ -5,6 +5,7 @@ import argparse from contextlib import contextmanager import datetime +import fileinput import io import http.client import http.server @@ -96,6 +97,15 @@ def init_colorama(): colorama.deinit() +def current_datetime(): + """Return the current datetime, complete with tzinfo. + + Precision is one second. Timezone is the local timezone. + """ + return datetime.datetime.fromtimestamp(round(time.time()), + dateutil.tz.tzlocal()) + + class AtomFeed(object): """Class for storing atom:feed date and metadata.""" @@ -256,9 +266,8 @@ def generate_index_and_feed(): # update time will be set after everthing finishes postspath = os.path.join(BUILDDIR, "blog") - # traverse all posts in reverse time order - for name in sorted(os.listdir(postspath), reverse=True): - if re.match(r"^(\d{4})-(\d{2})-(\d{2}).*\.html", name): + for name in os.listdir(postspath): + if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): htmlpath = os.path.join(postspath, name) entry = AtomEntry() with open(htmlpath, encoding="utf-8") as htmlfile: @@ -287,11 +296,12 @@ def generate_index_and_feed(): entry.content.append(cdata(entry.content_html)) entry.assemble_entry() feed.entries.append(entry) + # sort entries by reverse chronological order + feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True) generate_index(feed) - feed.updated_datetime = datetime.datetime.fromtimestamp(round(time.time()), - dateutil.tz.tzlocal()) + feed.updated_datetime = current_datetime() feed.updated = ET.Element("updated") feed.updated.text = feed.updated_datetime.isoformat() @@ -440,8 +450,7 @@ def new_post(args): """ title = args.title - date = datetime.datetime.fromtimestamp(round(time.time()), - dateutil.tz.tzlocal()) + date = current_datetime() filename_date = date.strftime("%Y-%m-%d") iso_date = date.isoformat() display_date = "%s %d, %d" % (date.strftime("%B"), date.day, date.year) @@ -464,6 +473,67 @@ def new_post(args): return 0 +def touch(args): + """Update the timestamp of a post to the current time.""" + filename = os.path.basename(args.filename) + fullpath = os.path.join(SOURCEDIR, "blog", filename) + if not os.path.exists(fullpath): + sys.stderr.write("%serror: post %s not found %s\n" % + (RED, fullpath, RESET)) + return 1 + filename_prefix_re = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}") + if not filename_prefix_re.match(filename): + sys.stderr.write(RED) + sys.stderr.write("error: post %s is not a valid post\n" % filename) + sys.stderr.write("error: the filename of a valid post begins with " + "a date in the form xxxx-xx-xx\n") + sys.stderr.write(RESET) + return 1 + + # update timestamp in the metadata section of the post + whatchanged = io.StringIO() + date = current_datetime() + iso_date = date.isoformat() + display_date = "%s %d, %d" % (date.strftime("%B"), date.day, date.year) + filename_date = date.strftime("%Y-%m-%d") + with fileinput.input(files=(fullpath), inplace=True) as lines: + meta_fences = 0 + for line in lines: + if line.startswith("---"): + meta_fences += 1 + sys.stdout.write(line) + continue + if meta_fences >= 2: + # already went past the metadata section + sys.stdout.write(line) + continue + + if line.startswith("date: "): + updated_line = "date: %s\n" % iso_date + sys.stdout.write(updated_line) + whatchanged.write("-%s+%s\n" % (line, updated_line)) + continue + + if line.startswith("date-display: "): + updated_line = "date-display: %s\n" % display_date + sys.stdout.write(updated_line) + whatchanged.write("-%s+%s\n" % (line, updated_line)) + continue + + sys.stdout.write(line) + + sys.stderr.write("\nchangeset:\n\n%s" % whatchanged.getvalue()) + whatchanged.close() + + # check if the file needs to be renamed + new_filename = filename_prefix_re.sub(filename_date, filename) + if new_filename != filename: + new_fullpath = os.path.join(SOURCEDIR, "blog", new_filename) + os.rename(fullpath, new_fullpath) + sys.stderr.write("renamed to %s\n" % new_fullpath) + return 0 + + def deploy(args): """Deploys build directory to origin/master without regenerating. @@ -505,7 +575,7 @@ def deploy(args): sys.stderr.write("Please answer yes or no.\n") if abort: sys.stderr.write("%saborting deployment%s\n" % (RED, RESET)) - exit(1) + return 1 # extract latest commit on the source branch source_commit = subprocess.check_output( @@ -518,7 +588,7 @@ def deploy(args): # extract updated time from atom.xml if not os.path.exists("atom.xml"): sys.stderr.write("atom.xml not found, cannot deploy\naborting\n") - exit(1) + return 1 atomxml = ET.parse("atom.xml").getroot() updated = atomxml.find('{http://www.w3.org/2005/Atom}updated').text @@ -535,7 +605,7 @@ def deploy(args): "--message=%s" % commit_message]) except subprocess.CalledProcessError: sys.stderr.write("\n%serror: git commit failed%s\n" % (RED, RESET)) - exit(1) + return 1 # check dirty status dirty = subprocess.check_output(["git", "status", "--porcelain"]) @@ -545,7 +615,7 @@ def deploy(args): "build directory still dirty\n") sys.stderr.write("error: please manually inspect what was left out\n") sys.stderr.write(RESET) - exit(1) + return 1 # push to origin/master sys.stderr.write("%scommand: git push origin master%s\n" % (BLUE, RESET)) @@ -553,7 +623,7 @@ def deploy(args): subprocess.check_call(["git", "push", "origin", "master"]) except subprocess.CalledProcessError: sys.stderr.write("\n%serror: git push failed%s\n" % (RED, RESET)) - exit(1) + return 1 return 0 @@ -641,6 +711,26 @@ def main(): parser = argparse.ArgumentParser(description=description) subparsers = parser.add_subparsers() + parser_new_post = subparsers.add_parser( + "new_post", aliases=["n", "new"], + description="Create a new post with metadata pre-filled.") + parser_new_post.add_argument("title", help="title of the new post") + parser_new_post.set_defaults(func=new_post) + + parser_new_post = subparsers.add_parser( + "touch", aliases=["t", "tou"], + description="""Touch an existing post, i.e., update its + timestamp to current time. Why is this ever useful? Well, the + timestamp filled in by new_post is the time of creation, but one + might spend several hours after the creation of the file to + finish the post. Sometimes the post is even created on one day + and finished on another (say created at 11pm and finished at + 1am). Therefore, one may want to retouch the timestamp before + publishing.""") + parser_new_post.add_argument("filename", + help="path or basename of the source file, e.g., 2015-05-05-new-blog-new-start.md") + parser_new_post.set_defaults(func=touch) + parser_generate = subparsers.add_parser( "generate", aliases=["g", "gen"], description="Generate new or changed objects.") @@ -652,10 +742,9 @@ def main(): parser_regenerate.set_defaults(func=regenerate) parser_new_post = subparsers.add_parser( - "new_post", aliases=["n", "new"], - description="Create a new post with metadata pre-filled.") - parser_new_post.add_argument("title", help="title of the new post") - parser_new_post.set_defaults(func=new_post) + "preview", aliases=["p", "pre"], + description="Serve the blog locally and auto regenerate upon changes.") + parser_new_post.set_defaults(func=preview) parser_new_post = subparsers.add_parser( "deploy", aliases=["d", "dep"], @@ -667,11 +756,6 @@ def main(): description="Rebuild entire blog and deploy build/ to origin/master.") parser_new_post.set_defaults(func=gen_deploy) - parser_new_post = subparsers.add_parser( - "preview", aliases=["p", "pre"], - description="Serve the blog locally and auto regenerate upon changes.") - parser_new_post.set_defaults(func=preview) - with init_colorama(): args = parser.parse_args() returncode = args.func(args) diff --git a/source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md b/source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md new file mode 100644 index 00000000..c289dece --- /dev/null +++ b/source/blog/2015-05-05-graceful-handling-of-sigint-when-using-pythons-multiprocessingprocess.md @@ -0,0 +1,73 @@ +--- +title: "Graceful handling of <code>SIGINT</code> when using Python's <code>multiprocessing.Process</code>" +date: 2015-05-05T22:03:39-07:00 +date-display: May 5, 2015 +--- +Today I learned something about Python's (at least CPython's) multiprocessing and signal handling, and I would like to share it here. Basically my situation was such (when developing [`pydoc`](https://github.com/zmwangx/zmwangx.github.io/blob/source/pyblog) that powers this blog): + +* I would like to serve the blog with an HTTP server while auto-regenerating for changes; +* The auto-regeneration is handled in the main process with a while loop, whereas the HTTP server (requiring little human intervention) is put in a `multiprocessing.Process` and launched with `http.server.HTTPServer.serve_forever()`; +* Upon sending `SIGINT`, both processes need to clean up and quit; in particular, the server needs to exit its `serve_forever()` loop (which can be done via `shutdown()`, but how to invoke the method is a problem, since `serve_forever()` blocks); +* Handling of `SIGINT` must be graceful in the main process — there might be an ongoing build that must not be interrupted until finishing. + +Given this context, I learned the following two critical concepts (at least true in the current version of CPython) through trial and error: + +1. **A user-triggered `SIGINT` is sent to both processes** — the main process and the `multiprocessing.Process` instance; +2. **Except for the defined interfaces, a `multiprocessing.Process` instance is almost completely separated from the main process, sharing as little resources as possible**; by "defined interfaces" I mean the defined attributes and methods of a `Process` instance, as well as defined communication channels like `multiprocessing.Pipe` or `multiprocessing.Queue`. And to expand on resource sharing: yes, the two processes have their own copies of global variables, so using global variables as state registers is a no-go. + +Both concepts can be used to one's benefit or detriment. Below is how I solved my problem, using the two concepts. Observe that without a custom handler, Python translates a `SIGINT` to a `KeyboardInterrupt` exception; therefore, I use the default `KeyboardInterrupt` to interrupt the HTTP server in its own process (through handling the exception and calling `shutdown()`), but instead install a custom `SIGINT` handler in the main process that translates `SIGINT` to setting a `sigint_raised` flag that can be picked up by the while loop once the current build (if any) is finished. The proof of concept script is as follows (the production code is [here](https://github.com/zmwangx/zmwangx.github.io/blob/a7a0b2073f30b1d0214c3152998d95e40a39b438/pyblog#L567-L635)): + +```python +#!/usr/bin/env python3 + +import http.server +import multiprocessing +import signal +import sys +import time + +class HTTPServerProcess(multiprocessing.Process): + def run(self): + httpd = http.server.HTTPServer( + ("", 8000), http.server.SimpleHTTPRequestHandler) + try: + httpd.serve_forever() + except KeyboardInterrupt: + httpd.shutdown() + +def do_things(): + for i in range(10): + sys.stderr.write(".") + sys.stderr.flush() + time.sleep(1) + sys.stderr.write("\n") + +def main(): + server_process = HTTPServerProcess() + server_process.start() + + # define and install custom SIGINT handler + sigint_raised = False + + def sigint_mitigator(signum, frame): + nonlocal sigint_raised + sigint_raised = True + + signal.signal(signal.SIGINT, sigint_mitigator) + + while not sigint_raised: + do_things() + + server_process.join() + +if __name__ == "__main__": + main() +``` + +Beware that with this solution, if there are external programs or OS level operations happening in the main process, then the operation at the time of `SIGINT` will still be interrupted[^OS-specific] (for example, in the script above, the `time.sleep(1)` at the exact point of `SIGINT` is still interrupted, but otherwise `do_things` is carried on to its completion). I'm not sure how to explain this — maybe the handler isn't capturing the signal fast enough?[^naive] Anyway, one single early interruption is at least more acceptable than a completely corrupted build[^interruption], and certainly more graceful. + +[^naive]: That's awfully naive and layman-sounding, I know, but I *am* almost a layman when it comes to system-level programming. + +[^OS-specific]: [CPython's `multiprocessing` is written in C](https://hg.python.org/cpython/file/1320ec1b24af/Modules/_multiprocessing), so the behavior might depend on the OS. I'm talking about OS X here. I haven't inspected and won't inspect the C source code. + +[^interruption]: That's assuming your build isn't interdependent in which any single failure corrupts everything. In that case, what can we do? I honestly see no way of injecting signal handling in `subprocess.Popen`. diff --git a/source/css/theme.css b/source/css/theme.css index 69017387..220d8e50 100644 --- a/source/css/theme.css +++ b/source/css/theme.css @@ -27,6 +27,10 @@ h1.title { font-size: 18pt; } +h1.title code { + font-size: 18pt; +} + h2 { text-align: center; font-size: 14pt; |