{title}

-- table --', '', line)) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: if (re.match('^\$.*', line) and first_pr == 1): line_edited='' indexmd.write(re.sub(r'^\$.*', line_edited, line)) first_pr = 0 first_comp = 1 elif (re.match('^\$.*', line)): if (tr_class == "odd"): tr_class = "even" else: tr_class = "odd" line_edited='' indexmd.write(re.sub(r'^\$.*', line_edited, line)) else: indexmd.write(line) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: if (re.match('^ \$.*\$$', line)): indexmd.write(re.sub(r'^ \$.*\$$', "

"+line.split("$")[1]+'

', line)) first_comp = 1 elif (re.match('^ \$.*[^\$]$', line)): if first_comp == 1: indexmd.write(re.sub(r'^ \$.*[^\$]$', "", line)) else: indexmd.write(re.sub(r'^ \$.*', "", line)) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r"-- end table --", "

Compétence	Activité	Justification
"+line.split("$")[1]+' ', line)) first_comp = 0 else: indexmd.write(re.sub(r'^ \$.[^\$]$', " "+line.split("$")[1]+' ', line)) else: indexmd.write(line) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: if (re.match('^ \$.', line)): indexmd.write(re.sub(r'^ \$.*', "	"+line.split("$")[1]+"	"+line+"

", line)) os.remove(tmppath) def generate_blog_list(feed): """"Generate blog list """ sys.stderr.write("generating blog list\n") html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'blog',root): if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('blog/')[1], name)) except IndexError: html_fileList.append(name) # generate TOC for html_file in html_fileList: div_blog_list = u'

\n\n' year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for entry in feed.entries: date = entry.updated_datetime if date.year < year: # close the previous table if there is one if table_opened: div_blog_list += u'\n' # write a new

tag with the smaller year year = date.year div_blog_list += u'\n

.:{0}:.

\n\n'.format(year) div_blog_list += u'\n' table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") div_blog_list += (u'' '\n' % (date.isoformat(), monthday, entry.relpath, entry.title_text)) if table_opened: div_blog_list += u'

May 5	[Blah blah](/blog/2015-05-04-blah-blah.html)
%s	%s

\n' div_blog_list += u'

' fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/blog/index.html"): with open("build/blog/index.html", 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/blog/index.html", 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'{% generate blog_list here %}', div_blog_list, line)) def generate_notes_list(): """"Generate notes list """ sys.stderr.write("generating notes list\n") html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'notes',root): if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('notes/')[1], name)) except IndexError: html_fileList.append(name) div_notes_list = u'

\n\n' year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for name in list(reversed(sorted(os.listdir(os.path.join(BUILDDIR, "notes"))))): if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): htmlpath = os.path.join(BUILDDIR, "notes", name) #tentry = AtomEntry() #item = RssItem() try: with open(htmlpath, encoding="utf-8") as htmlfile: soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") # generate atom entry #entry.author = copy.deepcopy(feed.author) # assume it's always the same author #entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name) #entry.id_text = entry_url #entry.id = ET.Element("id") #entry.id.text = entry_url relpath = "/notes/%s" % name #entry.link = ET.Element("link", href=entry_url) title_text = soup.title.text #entry.title = ET.Element("title", type="html") #entry.title.text = entry.title_text post_date = soup.find("meta", attrs={"name": "date"})["content"] updated_datetime = dateutil.parser.parse(post_date) date = updated_datetime if date.year < year: # close the previous table if there is one if table_opened: div_notes_list += u'\n' # write a new

tag with the smaller year year = date.year div_notes_list += u'\n

.:{0}:.

\n\n'.format(year) div_notes_list += u'\n' table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") div_notes_list += (u'' '\n' % (date.isoformat(), monthday, relpath, title_text)) except Exception: sys.stderr.write("error: failed to generate feed entry from %s\n" % name) with open(htmlpath, encoding="utf-8") as htmlfile: sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) raise if table_opened: div_notes_list += u'

May 5	[Blah blah](/blog/2015-05-04-blah-blah.html)
%s	%s

\n' div_notes_list += u'

' fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/notes/index.html"): with open("build/notes/index.html", 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/notes/index.html", 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'{% generate notes_list here %}', div_notes_list, line)) def generate_index(feed): """Generate index.html from index.md and a TOC.""" for language in LANGUAGES: if language != DEFAULTLANG: indexmdl = re.sub(r'index.md$', "index-{}.md".format(language), INDEXMD) indexhtmll = re.sub(r'index.html$', "index-{}.html".format(language), INDEXHTML) else: indexmdl = INDEXMD indexhtmll = INDEXHTML sys.stderr.write("generating index-{}.html\n".format(language)) # generate TOC tocbuff = io.StringIO() tocbuff.write('

') year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for entry in feed.entries: date = entry.updated_datetime if date.year < year: # close the previous table if there is one if table_opened: tocbuff.write(u'\n') # write a new

tag with the smaller year year = date.year tocbuff.write(u'\n

.:{0}:.

\n\n'.format(year)) tocbuff.write(u'\n') table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") tocbuff.write(u'' '\n' % (date.isoformat(), monthday, entry.title_text, entry.relpath)) if table_opened: tocbuff.write(u'

May 5	[Blah blah](/blog/2015-05-04-blah-blah.html)
%s	[%s](%s)

\n') tocbuff.write('

') # create tempfile with index.md and the TOC concatenated, and generate index.html from that # pylint: disable=invalid-name fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists(indexmdl): with open(indexmdl, 'r', encoding='utf-8') as indexmd: tmpfile.write(u"%s\n\n

\n\n" % indexmd.read()) tmpfile.write("%s\n" % tocbuff.getvalue()) tocbuff.close() pandoc_args = [ "pandoc", tmppath, "--template", HTMLTEMPLATE, "--highlight-style=pygments", "-o", indexhtmll, ] try: subprocess.check_call(pandoc_args) except subprocess.CalledProcessError: sys.stderr.write("error: failed to generate index.html\n") os.remove(tmppath) def generate_sitemap(feed): """Generate sitemap.xml.""" sitemap = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") # index sitemap.append(utils.make_sitemap_url_element(BLOG_HOME, feed.updated, "daily", 1.0)) # other top level pages for name in os.listdir(BUILDDIR): if (not name.endswith(".html") or re.match("index.*\.html", name) or re.match("google[a-z0-9]+\.html", name)): # exclude Google's site ownership verification file continue link = urllib.parse.urljoin(BLOG_HOME, name) fullpath = os.path.join(BUILDDIR, name) # try to extract updated time updated = None with open(fullpath, encoding="utf-8") as htmlobj: soup = bs4.BeautifulSoup(htmlobj.read(), "lxml") if soup.footer is not None: updated_tag = soup.footer.find(attrs={"class": "updated"}) if updated_tag is not None: updated = dateutil.parser.parse(updated_tag.text) sitemap.append(utils.make_sitemap_url_element(link, updated, "monthly", 0.9)) # blog entries for entry in feed.entries: sitemap.append(utils.make_sitemap_url_element(entry.link, entry.updated, "monthly", 0.9)) sitemappath = os.path.join(BUILDDIR, "sitemap.xml") with open(sitemappath, "w", encoding="utf-8") as sitemapfile: sitemapfile.write('\n%s\n' % ET.tostring(sitemap).decode('utf-8')) sys.stderr.write("wrote sitemap.xml\n") def rewrite_title(): """Override the title of some page for a better render""" sys.stderr.write("Overriding some titles\n") filenames =['build/index.html', 'build/blog/index.html', 'build/notes/index.html'] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'website($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'Documents($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'notes($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'blog($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) h1_titles_list = [] h1_title = [] h2_titles_list = [] h2_title = [] fd, tmppath = tempfile.mkstemp() os.close(fd) for filename in filenames: soup = bs4.BeautifulSoup(open(filename), "lxml") for myh1 in soup.find_all("h1"): if re.match("^(?!.*article-title).*$", str(myh1)): h1_id = myh1['id'] h1_name = myh1.string h1_title.append(str(myh1)) myh1['class'] = "h1" myh1.string = "" h1_span_left = soup.new_tag("span") h1_span_left['class'] = "left-h1" h1_span_left.string = "█▓▒░" h1_span_title = soup.new_tag("span") h1_span_title['class'] = "title-h1" h1_span_title.string = "「"+h1_name+"」" h1_span_right = soup.new_tag("span") h1_span_right['class'] = "right-h1" h1_span_right.string = "░▒▓█" myh1.string.insert_before(h1_span_left) myh1.span.insert_after(h1_span_right) myh1.span.insert_after(h1_span_title) h1_title.append(myh1) h1_title.append(h1_name) h1_titles_list.append(h1_title) h1_title = [] for myh2 in soup.find_all("h2"): if re.match("^(?!.*blog-index-year-title).*$", str(myh2)): h2_id = myh2['id'] h2_name = myh2.string h2_title.append(str(myh2)) myh2['class'] = "h2" myh2.string = "" h2_span_left = soup.new_tag("span") h2_span_left['class'] = "left-h2" h2_span_left.string = ".: " h2_span_title = soup.new_tag("span") h2_span_title['class'] = "title-h2" h2_span_title.string = h2_name h2_span_right = soup.new_tag("span") h2_span_right['class'] = "right-h2" h2_span_right.string = " :." myh2.string.insert_before(h2_span_left) myh2.span.insert_after(h2_span_right) myh2.span.insert_after(h2_span_title) h2_title.append(myh2) h2_title.append(h2_name) h2_titles_list.append(h2_title) h2_title = [] tested_title_list = [] tested_title = [] for filename in filenames: soup = bs4.BeautifulSoup(open(filename), "lxml") if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open(filename, 'w', encoding='utf-8') as indexmd: for line in lines: string = "" for title in h1_titles_list: if re.match(".*"+title[0]+".*", line): string = str(title[1]) for title in h2_titles_list: if re.match(".*"+title[0]+".*", line): string = str(title[1]) if string != "": indexmd.write(re.sub(line, string, line)) else: indexmd.write(line) def generate_index_and_feed(): """Generate index.html and feeds (atom and rss).""" # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name sys.stderr.write("generating atom and rss feeds\n") # initialize atom feed feed = AtomFeed() feed.author = ET.fromstring( "" "{author}" "{home}" "{email}" "".format(author=AUTHOR, home=BLOG_HOME, email=AUTHOR_EMAIL)) feed.generator = ET.Element("generator", uri=GENERATOR_HOME_PAGE) feed.generator.text = GENERATOR_NAME if ATOM_ICON_PATH is not None: feed.icon = ET.Element("icon") feed.icon.text = urllib.parse.urljoin(BLOG_HOME, ATOM_ICON_PATH) feed.id_text = BLOG_HOME feed.id = ET.Element("id") feed.id.text = feed.id_text feed.links = [ ET.Element("link", href=urllib.parse.urljoin(BLOG_HOME, "atom.xml"), rel="self", type="application/atom+xml"), ET.Element("link", href=BLOG_HOME, rel="alternate", type="text/html"), ] feed.title_text = BLOG_TITLE feed.title = ET.fromstring("{title}".format(title=BLOG_TITLE)) feed.subtitle_text = BLOG_DESCRIPTION feed.subtitle = ET.fromstring("{subtitle}" .format(subtitle=BLOG_DESCRIPTION)) # initialize rss feed rss = RssFeed() rss.rssurl = urllib.parse.urljoin(BLOG_HOME, "rss.xml") rss.title = ET.Element("title") rss.title.text = BLOG_TITLE rss.link = ET.Element("link") rss.link.text = BLOG_HOME rss.description = ET.Element("description") rss.description.text = BLOG_DESCRIPTION rss.language = ET.Element("language") rss.language.text = LANGUAGE rss.author_text = "{email} ({name})".format(email=AUTHOR_EMAIL, name=AUTHOR) rss.managingEditor = ET.Element("managingEditor") rss.managingEditor.text = rss.author_text rss.webMaster = ET.Element("webMaster") rss.webMaster.text = rss.author_text rss.generator = ET.Element("generator") rss.generator.text = "{generator} ({url})".format(generator=GENERATOR_NAME, url=GENERATOR_HOME_PAGE) rss.image = ET.Element("image") if RSS_ICON_PATH is not None: ET.SubElement(rss.image, "url").text = urllib.parse.urljoin(BLOG_HOME, RSS_ICON_PATH) rss.image.append(copy.deepcopy(rss.title)) rss.image.append(copy.deepcopy(rss.link)) ET.SubElement(rss.image, "width").text = str(RSS_ICON_WIDTH) ET.SubElement(rss.image, "height").text = str(RSS_ICON_HEIGHT) # update times will be set after everthing finishes for name in os.listdir(os.path.join(BUILDDIR, "blog")): if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): htmlpath = os.path.join(BUILDDIR, "blog", name) entry = AtomEntry() item = RssItem() try: with open(htmlpath, encoding="utf-8") as htmlfile: soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") # generate atom entry entry.author = copy.deepcopy(feed.author) # assume it's always the same author entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name) entry.id_text = entry_url entry.id = ET.Element("id") entry.id.text = entry_url entry.relpath = "/blog/%s" % name entry.link = ET.Element("link", href=entry_url) entry.title_text = soup.title.text entry.title = ET.Element("title", type="html") entry.title.text = entry.title_text post_date = soup.find("meta", attrs={"name": "date"})["content"] entry.updated_datetime = dateutil.parser.parse(post_date) entry.updated = ET.Element("updated") # pylint: disable=no-member entry.updated.text = entry.updated_datetime.isoformat() # process content tags_to_remove = [] # mark header and footer for removal article = soup.article if article.header is not None: tags_to_remove.append(article.header) # mark line numbers for removal for line_number_span in article.find_all("span", attrs={"class": "line-number"}): tags_to_remove.append(line_number_span) # mark script tags for removal for script_tag in article.find_all("script"): tags_to_remove.append(script_tag) # make internal links absolute utils.absolutify_links(article, entry_url) # remove marked tags for tag in tags_to_remove: tag.extract() entry.content_html = ''.join([str(content) for content in article.contents]) entry.content = ET.Element("content", type="html") entry.content.text = ET.CDATA(entry.content_html) entry.assemble_entry() feed.entries.append(entry) # generate rss item item.title = ET.Element("title") item.title.text = entry.title_text item.link = ET.Element("link") item.link.text = entry_url item.description = ET.Element("description") item.description.text = ET.CDATA(entry.content_html) item.author = ET.Element("author") item.author.text = rss.author_text item.guid = ET.Element("guid", isPermaLink="true") item.guid.text = item.link.text item.timestamp = entry.updated_datetime.timestamp() item.pubDate = ET.Element("pubDate") item.pubDate.text = email.utils.formatdate(item.timestamp, usegmt=True) item.assemble_item() rss.items.append(item) except Exception: sys.stderr.write("error: failed to generate feed entry from %s\n" % name) with open(htmlpath, encoding="utf-8") as htmlfile: sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) raise # sort entries by reverse chronological order feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True) rss.items.sort(key=lambda item: item.timestamp, reverse=True) generate_index(feed) generate_lang_switch() generate_menu() generate_table() generate_blog_list(feed) generate_notes_list() rewrite_title() feed.updated_datetime = utils.current_datetime() feed.updated = ET.Element("updated") feed.updated.text = feed.updated_datetime.isoformat() rss.update_timestamp = time.time() rss.pubDate = ET.Element("pubDate") rss.pubDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) rss.lastBuildDate = ET.Element("lastBuildDate") rss.lastBuildDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) with open(ATOM, "w", encoding="utf-8") as atom: atom.write("%s\n" % feed.dump_feed(FEED_MAX_ENTRIES)) sys.stderr.write("wrote atom.xml\n") with open(RSS, "w", encoding="utf-8") as rssxml: rssxml.write("%s\n" % rss.dump_rss(FEED_MAX_ENTRIES)) sys.stderr.write("wrote rss.xml\n") generate_sitemap(feed) # exclude_list is only inialized once to avoid constant disk IO @utils.static_vars(exclude_list=None) def generate_blog(fresh=False, report_total_errors=True): """Generate the blog in BUILDDIR. Parameters ---------- fresh : bool If True, remove all existing build artifects and start afresh; otherwise, only copy or build new or modified files. Default is False. report_total_errors : bool If True, a line will be printed to stderr at the end of build (assuming the function doesn't raise early) reporting the total number of errors, e.g., "build finished with 0 errors". This is turned on by default, but pass False to turn it off, which will result in a completely silent session if nothing changed. This is useful for auto-regen, for instance. Returns ------- failed_builds : int Number of build failures. """ # pylint: disable=too-many-branches,too-many-locals,too-many-statements if not os.path.isdir(SOURCEDIR): raise OSError("source directory %s does not exist" % SOURCEDIR) if not os.path.exists(HTMLTEMPLATE): raise OSError("HTML template %s not found" % HTMLTEMPLATE) if not os.path.isdir(BUILDDIR): if os.path.exists(BUILDDIR): os.remove(BUILDDIR) os.mkdir(BUILDDIR, mode=0o755) if fresh: for name in os.listdir(BUILDDIR): if name == ".git": continue obj = os.path.join(BUILDDIR, name) if os.path.isdir(obj): shutil.rmtree(obj) else: os.remove(obj) # nojekyll: https://help.github.com/articles/files-that-start-with-an-underscore-are-missing/ if not os.path.exists(os.path.join(BUILDDIR, ".nojekyll")): with open(os.path.join(BUILDDIR, ".nojekyll"), "w") as fileobj: pass if CUSTOM_DOMAIN and not os.path.exists(os.path.join(BUILDDIR, "CNAME")): with open(os.path.join(BUILDDIR, "CNAME"), "w") as fileobj: fileobj.write(CUSTOM_DOMAIN) failed_builds = 0 generator_mtime = os.path.getmtime(GENERATORSOURCE) template_mtime = os.path.getmtime(HTMLTEMPLATE) fundamental_mtime = max(generator_mtime, template_mtime) anything_modified = False exclude_list = generate_blog.exclude_list # get value of static variable if exclude_list is None: try: with open(EXCLUDELIST) as fp: exclude_list = [os.path.abspath(os.path.join(SOURCEDIR, line.rstrip())) for line in list(fp) if line.strip() != "" and not line.startswith('#')] except OSError: exclude_list = [] generate_blog.exclude_list = exclude_list # assign to static variable for the future for root, dirs, files in os.walk(SOURCEDIR): # If root is in exclude list, skip all files and remove all subdirs from traversal list. if root in exclude_list: dirs[:] = [] continue relroot = os.path.relpath(root, start=SOURCEDIR) dstroot = os.path.join(BUILDDIR, relroot) if not os.path.isdir(dstroot): if os.path.exists(dstroot): os.remove(dstroot) os.mkdir(dstroot, mode=0o755) for name in files: if name.startswith('.') or os.path.join(root, name) in exclude_list: continue extension = name.split(".")[-1] if extension not in ["css", "js", "asc", "html", "jpg", "md", "png", "svg", "ico", "txt", "eot", "ttf", "woff", "woff2"]: continue relpath = os.path.join(relroot, name) srcpath = os.path.join(root, name) if extension == "md": dstpath = os.path.join(dstroot, re.sub(r'\.md$', '.html', name)) else: dstpath = os.path.join(dstroot, name) if ((not os.path.exists(dstpath) or os.path.getmtime(dstpath) <= max(fundamental_mtime, os.path.getmtime(srcpath)))): # new post or modified post anything_modified = True indexmdlist = [] for language in LANGUAGES: if language != DEFAULTLANG: indexmdlist.append("index-{}.md".format(language)) else: indexmdlist.append("index.md") if srcpath in indexmdlist: continue # index will be processed separately if extension in ["css", "js", "asc", "html", "jpg", "png", "svg", "ico", "txt", "eot", "ttf", "woff", "woff2"]: sys.stderr.write("copying %s\n" % relpath) shutil.copy(srcpath, dstpath) elif extension == "md": sys.stderr.write("compiling %s\n" % relpath) pandoc_args = [ "pandoc", srcpath, "--template", HTMLTEMPLATE, "--highlight-style=pygments", "-o", dstpath, ] try: subprocess.check_call(pandoc_args) except subprocess.CalledProcessError: failed_builds += 1 sys.stderr.write("error: failed to generate %s" % relpath) # postprocess generated HTML file utils.postprocess_html_file(dstpath) if anything_modified: generate_index_and_feed() sys.stderr.write("done\n") if report_total_errors: sys.stderr.write("build finished with %d errors\n" % failed_builds) return failed_builds