to a link to its original.""" if not soup.article: return for img_tag in soup.article.find_all("img"): a_tag = soup.new_tag("a", href=img_tag["src"], target="_blank") a_tag.insert(0, copy.copy(img_tag)) img_tag.replace_with(a_tag) def _pre_tag_insert_line_numbers(soup, pre_tag): """Insert line numbers to a pre tag.""" num_lines = len(pre_tag.text.split("\n")) for line_number in range(1, num_lines + 1): # line number divs will look like: # # ln_tag = soup.new_tag("span") ln_tag["class"] = "line-number" ln_tag["data-line"] = line_number ln_tag["style"] = "top: %.2fem" % ((line_number - 1) * 1.35) # add a comment to the content of the span to suppress tidy5 # empty tag warning ln_tag.append(soup.new_string("", bs4.Comment)) pre_tag.code.append(ln_tag) def process_footnote_backlinks(soup): """Add class attribute "footnotes-backlink" to each footnote backlink.""" for footnotes in soup.find_all("div", attrs={"class": "footnotes"}): for fn_a_tag in footnotes.find_all(lambda tag: tag.name == "a" and tag.has_attr("href") and tag["href"].startswith("#fnref") and tag.string == "\u21A9"): # U+21A9: LEFTWARDS ARROW WITH HOOK fn_a_tag["class"] = "footnotes-backlink" fn_a_tag.string = "\u21A9\uFE0E" # U+FE0E: VARIATION SELECTOR-15 def postprocess_html_file(htmlfilepath): """Perform a series of postprocessing to an HTML file.""" with open(htmlfilepath, "r+", encoding="utf-8") as htmlfileobj: soup = bs4.BeautifulSoup(htmlfileobj.read(), "lxml") # a series of postprocessing (extensible) process_image_sizes(soup) link_img_tags(soup) process_footnote_backlinks(soup) # write back htmlfileobj.seek(0) htmlfileobj.write(str(soup)) htmlfileobj.truncate() def static_vars(**kwargs): def decorate(func): for k in kwargs: setattr(func, k, kwargs[k]) return func return decorate def sanitize(string): """Sanitize string (title) for URI consumption.""" if isinstance(string, bytes): string = string.decode('utf-8') # to lowercase string = string.lower() # strip all non-word, non-hyphen and non-whitespace characters string = re.sub(r"[^\w\s-]", "", string) # replace consecutive whitespaces with a single hyphen string = re.sub(r"\s+", "-", string) # percent encode the result return urllib.parse.quote(string) class HTTPServerProcess(multiprocessing.Process): """This class can be used to run an HTTP server.""" def __init__(self, rootdir): """Initialize the HTTPServerProcess class. Parameters ---------- rootdir : str The root directory to serve from. """ super().__init__() self.rootdir = rootdir def run(self): """Create an HTTP server and serve forever. Runs on localhost. The default port is 8000; if it is not available, a random port is used instead. """ os.chdir(self.rootdir) # pylint: disable=invalid-name HandlerClass = http.server.SimpleHTTPRequestHandler try: httpd = http.server.HTTPServer(("", 8001), HandlerClass) except OSError: httpd = http.server.HTTPServer(("", 0), HandlerClass) _, portnumber = httpd.socket.getsockname() sys.stderr.write("server serving on http://localhost:%d\n" % portnumber) try: httpd.serve_forever() except KeyboardInterrupt: httpd.shutdown() def list_posts(): """List all posts, with date, title, and path to source file. This function only lists posts that has been built (since it reads metadata from HTML rather than Markdown). Returns ------- posts : list A list of posts, in reverse chronological order, where each element is a tuple of (date, title, path to source file). """ posts = [] for name in os.listdir(os.path.join(BUILDDIR, "blog")): if not re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): continue htmlpath = os.path.join(BUILDDIR, "blog", name) entry = AtomEntry() item = RssItem() try: with open(htmlpath, encoding="utf-8") as htmlfile: soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") title = soup.title.text date = dateutil.parser.parse(soup.find("meta", attrs={"name": "date"})["content"]) source_path = os.path.join(POSTSDIR, re.sub(r'.html$', '.md', name)) posts.append((date, title, source_path)) except Exception: sys.stderr.write("error: failed to read metadata from HTML file %s\n" % name) with open(htmlpath, encoding="utf-8") as htmlfile: sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) raise posts.sort(key=lambda post: post[0], reverse=True) return posts class PostSelector: def __init__(self, term, posts): self._term = term self.posts_per_page = term.height - 2 self.pages = [posts[i:i+self.posts_per_page] for i in range(0, len(posts), self.posts_per_page)] self.num_pages = len(self.pages) self.pagepos = 0 self.postpos = 0 self.inserting = False # True if in the middle of inserting a post #, False otherwise term.enter_fullscreen() print(term.clear(), end="") sys.stdout.flush() self.selection = "" self.quit = False self.display_page() def _clear_to_eol(self): term = self._term print(term.clear_eol, end="") sys.stdout.flush() def _print_line(self, line, linenum, highlight=False): term = self._term width = term.width with term.location(0, linenum): if highlight: print(term.reverse(line[:width]), end="") else: print(line[:width], end="") self._clear_to_eol() def _print_post(self, page, pos, highlight=False): if pos >= len(page): # if position out of range, just clear the line self._print_line("", pos + 1, highlight) else: date, title, path = page[pos] line = "%3d: %s %s" % (pos, date.strftime("%m/%d/%y"), title) self._print_line(line, pos + 1, highlight) def display_page(self): term = self._term page = self.pages[self.pagepos] with term.hidden_cursor(): topline = " PAGE %d/%d POST %d" % (self.pagepos + 1, self.num_pages, self.postpos) if self.inserting: topline += term.blink("_") self._print_line(topline, 0, highlight=True) for i in range(self.posts_per_page): self._print_post(page, i) # highlight selected post self._print_post(page, self.postpos, highlight=True) bottomline = " Press h for help." self._print_line(bottomline, term.height - 1, highlight=True) def dispatch(self, key): term = self._term if key in string.digits: # insert if self.inserting: newpostpos = 10 * self.postpos + int(key) if newpostpos < len(self.pages[self.pagepos]): self.postpos = newpostpos else: self.postpos = int(key) self.inserting = True elif key.name == "KEY_DELETE": self.postpos //= 10 self.inserting = True else: self.inserting = False if key.name == "KEY_ENTER": self.selection = self.pages[self.pagepos][self.postpos][2] if key in {"q", "Q"}: self.quit = True elif key.name == "KEY_DOWN" or key in {"n", "N"}: if self.postpos + 1 < len(self.pages[self.pagepos]): self.postpos += 1 elif key.name == "KEY_UP" or key in {"p", "P"}: if self.postpos > 0: self.postpos -= 1 elif key.name == "KEY_RIGHT" or key in {".", ">"}: if self.pagepos + 1 < self.num_pages: self.pagepos += 1 self.postpos = 0 elif key.name == "KEY_LEFT" or key in {",", "<"}: if self.pagepos > 0: self.pagepos -= 1 self.postpos = 0 elif key in {"h", "H"}: print(term.clear_eol, end="") sys.stdout.flush() help_text_lines = [ "Next post: n or ", "Previous post: p or ", "Next page: . or > or ", "Previous page: , or < or ", "Select post: or ", "Select by number: type number as shown (delete or backspace to edit)", "Get help: h", "Quit program: q", ] for i in range(term.height - 1): self._print_line(help_text_lines[i] if i < len(help_text_lines) else "", i) bottomline = " Press any key to continue." self._print_line(bottomline, term.height - 1, highlight=True) with term.raw(): term.inkey() def restore(self): term = self._term term.exit_fullscreen() print(term.clear(), end="") sys.stdout.flush() def select(self): term = self._term try: while True: with term.raw(): self.dispatch(term.inkey()) if self.selection or self.quit: break self.display_page() except Exception: raise finally: self.restore() return self.selection