#!/bin/python3 import os import sys import tempfile import re import bs4 import dateutil import io import subprocess import lxml.etree as ET import urllib.parse import copy import email.utils import time import shutil from rss import * from config.config import * from utils import utils def generate_lang_switch(): """Generate language switcher""" sys.stderr.write("generating langugage switcher\n") fd, tmppath = tempfile.mkstemp() os.close(fd) # Put in a list the pages where the menu will be written html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('build/')[1], name)) except IndexError: html_fileList.append(name) # Writing the menu in all pages contained in the variable in place of the -- generate menu here -- for html_file in html_fileList: with open(tmppath, 'w', encoding='utf-8') as tmpfile: trad_files = [] html_file_name = re.sub(r"(.*)-[a-z]{2}(\.html)", r"\1\2",html_file) for language in LANGUAGES: if language != DEFAULTLANG: trad_file = "{}-{}.html".format( html_file_name.rsplit('.', 1)[0], language) if os.path.exists("build/"+trad_file): trad_files.append({"file":trad_file, "lang":language}) else: trad_file = "{}.html".format(html_file_name.rsplit('.', 1)[0]) if os.path.exists("build/"+trad_file): trad_files.append({"file":trad_file, "lang":language}) if len(trad_files) > 1 and os.path.exists("build/"+html_file): with open("build/"+html_file, 'r', encoding='utf-8') as f: htmly_website_page = "" lines = f.readlines() with open("build/"+html_file, 'w', encoding='utf-8') as f: for line in lines: f.write(re.sub(r'-- generate LANGSWITCH --', htmly_website_page, line)) else: with open("build/"+html_file, 'r', encoding='utf-8') as f: lines = f.readlines() with open("build/"+html_file, 'w', encoding='utf-8') as f: for line in lines: f.write(re.sub(r'-- generate LANGSWITCH --', "", line)) os.remove(tmppath) def generate_menu(): """Generate menu.""" sys.stderr.write("generating menu\n") fd, tmppath = tempfile.mkstemp() os.close(fd) # Put in a list the pages where the menu will be written html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('build/')[1], name)) except IndexError: html_fileList.append(name) # Generate the string who contain the links of the menu htmly_website_page = "" # Writing the menu in all pages contained in the variable in place of the -- generate menu here -- for html_file in html_fileList: with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/"+html_file): with open("build/"+html_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/"+html_file, 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'-- generate menu here --', htmly_website_page, line)) os.remove(tmppath) def generate_table(): """Generate table.""" first_comp = 1 first_pr = 1 tr_class = "odd" documents_fileList = [] documents_fileList.append("/website/bts-sio.html") fd, tmppath = tempfile.mkstemp() os.close(fd) htmly_website_page = "" if os.path.exists(BUILDDIR+"/website/bts-sio.html"): sys.stderr.write("generating table\n") # Put in a list the pages where the menu will be written #for root, dirs, files in os.walk(BUILDDIR+"/website/Documents/Situation2"): # for name in files: # if name.endswith(".html"): # try: # documents_fileList.append(os.path.join(root.split('build')[1], name)) # except IndexError: # documents_fileList.append(name) # Generate the string who contain the links of the menu #htmly_website_page = "" # Writing the menu in all pages contained in the variable in place of the -- generate submenu here -- for document_file in documents_fileList: with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build"+document_file): with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'
-- table --', '', line))
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            if (re.match('^\$.*', line) and first_pr == 1):
                                line_edited=''
                                indexmd.write(re.sub(r'^\$.*', line_edited, line))
                                first_pr = 0
                                first_comp = 1
                            elif (re.match('^\$.*', line)):
                                if (tr_class == "odd"):
                                    tr_class = "even"
                                else:
                                    tr_class = "odd"
                                line_edited=''
                                indexmd.write(re.sub(r'^\$.*', line_edited, line))
                            else:
                                indexmd.write(line)
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:

                            if (re.match('^    \$.*\$$', line)):
                                indexmd.write(re.sub(r'^    \$.*\$$', "
  • "+line.split("$")[1]+'
  • ', line)) first_comp = 1 elif (re.match('^ \$.*[^\$]$', line)): if first_comp == 1: indexmd.write(re.sub(r'^ \$.*[^\$]$', "", line)) else: indexmd.write(re.sub(r'^ \$.*', "", line)) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r"-- end table --", "
    CompétenceActivitéJustification
    • "+line.split("$")[1]+'
    • ', line)) first_comp = 0 else: indexmd.write(re.sub(r'^ \$.*[^\$]$', "
    • "+line.split("$")[1]+'
    • ', line)) else: indexmd.write(line) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: if (re.match('^ \$.*', line)): indexmd.write(re.sub(r'^ \$.*', "
    • "+line.split("$")[1]+"
    • "+line+"
    ", line)) os.remove(tmppath) def generate_blog_list(feed): """"Generate blog list """ sys.stderr.write("generating blog list\n") html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'blog',root): if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('blog/')[1], name)) except IndexError: html_fileList.append(name) # generate TOC for html_file in html_fileList: div_blog_list = u'
    \n\n' year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for entry in feed.entries: date = entry.updated_datetime if date.year < year: # close the previous table if there is one if table_opened: div_blog_list += u'\n' # write a new

    tag with the smaller year year = date.year div_blog_list += u'\n

    .:{0}:.

    \n\n'.format(year) div_blog_list += u'\n' table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") div_blog_list += (u'' '\n' % (date.isoformat(), monthday, entry.relpath, entry.title_text)) if table_opened: div_blog_list += u'
    [Blah blah](/blog/2015-05-04-blah-blah.html)
    %s
    \n' div_blog_list += u'
    ' fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/blog/index.html"): with open("build/blog/index.html", 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/blog/index.html", 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'{% generate blog_list here %}', div_blog_list, line)) def generate_notes_list(): """"Generate notes list """ sys.stderr.write("generating notes list\n") html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'notes',root): if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('notes/')[1], name)) except IndexError: html_fileList.append(name) div_notes_list = u'
    \n\n' year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for name in list(reversed(sorted(os.listdir(os.path.join(BUILDDIR, "notes"))))): if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): htmlpath = os.path.join(BUILDDIR, "notes", name) #tentry = AtomEntry() #item = RssItem() try: with open(htmlpath, encoding="utf-8") as htmlfile: soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") # generate atom entry #entry.author = copy.deepcopy(feed.author) # assume it's always the same author #entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name) #entry.id_text = entry_url #entry.id = ET.Element("id") #entry.id.text = entry_url relpath = "/notes/%s" % name #entry.link = ET.Element("link", href=entry_url) title_text = soup.title.text #entry.title = ET.Element("title", type="html") #entry.title.text = entry.title_text post_date = soup.find("meta", attrs={"name": "date"})["content"] updated_datetime = dateutil.parser.parse(post_date) date = updated_datetime if date.year < year: # close the previous table if there is one if table_opened: div_notes_list += u'\n' # write a new

    tag with the smaller year year = date.year div_notes_list += u'\n

    .:{0}:.

    \n\n'.format(year) div_notes_list += u'\n' table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") div_notes_list += (u'' '\n' % (date.isoformat(), monthday, relpath, title_text)) except Exception: sys.stderr.write("error: failed to generate feed entry from %s\n" % name) with open(htmlpath, encoding="utf-8") as htmlfile: sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) raise if table_opened: div_notes_list += u'
    [Blah blah](/blog/2015-05-04-blah-blah.html)
    %s
    \n' div_notes_list += u'
    ' fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/notes/index.html"): with open("build/notes/index.html", 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/notes/index.html", 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'{% generate notes_list here %}', div_notes_list, line)) def generate_index(feed): """Generate index.html from index.md and a TOC.""" for language in LANGUAGES: if language != DEFAULTLANG: indexmdl = re.sub(r'index.md$', "index-{}.md".format(language), INDEXMD) indexhtmll = re.sub(r'index.html$', "index-{}.html".format(language), INDEXHTML) else: indexmdl = INDEXMD indexhtmll = INDEXHTML sys.stderr.write("generating index-{}.html\n".format(language)) # generate TOC tocbuff = io.StringIO() tocbuff.write('
    ') year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for entry in feed.entries: date = entry.updated_datetime if date.year < year: # close the previous table if there is one if table_opened: tocbuff.write(u'\n') # write a new

    tag with the smaller year year = date.year tocbuff.write(u'\n

    .:{0}:.

    \n\n'.format(year)) tocbuff.write(u'\n') table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") tocbuff.write(u'' '\n' % (date.isoformat(), monthday, entry.title_text, entry.relpath)) if table_opened: tocbuff.write(u'
    [Blah blah](/blog/2015-05-04-blah-blah.html)
    [%s](%s)
    \n') tocbuff.write('
    ') # create tempfile with index.md and the TOC concatenated, and generate index.html from that # pylint: disable=invalid-name fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists(indexmdl): with open(indexmdl, 'r', encoding='utf-8') as indexmd: tmpfile.write(u"%s\n\n
    \n\n" % indexmd.read()) tmpfile.write("%s\n" % tocbuff.getvalue()) tocbuff.close() pandoc_args = [ "pandoc", tmppath, "--template", HTMLTEMPLATE, "--highlight-style=pygments", "-o", indexhtmll, ] try: subprocess.check_call(pandoc_args) except subprocess.CalledProcessError: sys.stderr.write("error: failed to generate index.html\n") os.remove(tmppath) def generate_sitemap(feed): """Generate sitemap.xml.""" sitemap = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") # index sitemap.append(utils.make_sitemap_url_element(BLOG_HOME, feed.updated, "daily", 1.0)) # other top level pages for name in os.listdir(BUILDDIR): if (not name.endswith(".html") or re.match("index.*\.html", name) or re.match("google[a-z0-9]+\.html", name)): # exclude Google's site ownership verification file continue link = urllib.parse.urljoin(BLOG_HOME, name) fullpath = os.path.join(BUILDDIR, name) # try to extract updated time updated = None with open(fullpath, encoding="utf-8") as htmlobj: soup = bs4.BeautifulSoup(htmlobj.read(), "lxml") if soup.footer is not None: updated_tag = soup.footer.find(attrs={"class": "updated"}) if updated_tag is not None: updated = dateutil.parser.parse(updated_tag.text) sitemap.append(utils.make_sitemap_url_element(link, updated, "monthly", 0.9)) # blog entries for entry in feed.entries: sitemap.append(utils.make_sitemap_url_element(entry.link, entry.updated, "monthly", 0.9)) sitemappath = os.path.join(BUILDDIR, "sitemap.xml") with open(sitemappath, "w", encoding="utf-8") as sitemapfile: sitemapfile.write('\n%s\n' % ET.tostring(sitemap).decode('utf-8')) sys.stderr.write("wrote sitemap.xml\n") def rewrite_title(): """Override the title of some page for a better render""" sys.stderr.write("Overriding some titles\n") filenames =['build/index.html', 'build/blog/index.html', 'build/notes/index.html'] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'website($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'Documents($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'notes($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'blog($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) h1_titles_list = [] h1_title = [] h2_titles_list = [] h2_title = [] fd, tmppath = tempfile.mkstemp() os.close(fd) for filename in filenames: soup = bs4.BeautifulSoup(open(filename), "lxml") for myh1 in soup.find_all("h1"): if re.match("^(?!.*article-title).*$", str(myh1)): h1_id = myh1['id'] h1_name = myh1.string h1_title.append(str(myh1)) myh1['class'] = "h1" myh1.string = "" h1_span_left = soup.new_tag("span") h1_span_left['class'] = "left-h1" h1_span_left.string = "█▓▒░" h1_span_title = soup.new_tag("span") h1_span_title['class'] = "title-h1" h1_span_title.string = "「"+h1_name+"」" h1_span_right = soup.new_tag("span") h1_span_right['class'] = "right-h1" h1_span_right.string = "░▒▓█" myh1.string.insert_before(h1_span_left) myh1.span.insert_after(h1_span_right) myh1.span.insert_after(h1_span_title) h1_title.append(myh1) h1_title.append(h1_name) h1_titles_list.append(h1_title) h1_title = [] for myh2 in soup.find_all("h2"): if re.match("^(?!.*blog-index-year-title).*$", str(myh2)): h2_id = myh2['id'] h2_name = myh2.string h2_title.append(str(myh2)) myh2['class'] = "h2" myh2.string = "" h2_span_left = soup.new_tag("span") h2_span_left['class'] = "left-h2" h2_span_left.string = ".: " h2_span_title = soup.new_tag("span") h2_span_title['class'] = "title-h2" h2_span_title.string = h2_name h2_span_right = soup.new_tag("span") h2_span_right['class'] = "right-h2" h2_span_right.string = " :." myh2.string.insert_before(h2_span_left) myh2.span.insert_after(h2_span_right) myh2.span.insert_after(h2_span_title) h2_title.append(myh2) h2_title.append(h2_name) h2_titles_list.append(h2_title) h2_title = [] tested_title_list = [] tested_title = [] for filename in filenames: soup = bs4.BeautifulSoup(open(filename), "lxml") if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open(filename, 'w', encoding='utf-8') as indexmd: for line in lines: string = "" for title in h1_titles_list: if re.match(".*"+title[0]+".*", line): string = str(title[1]) for title in h2_titles_list: if re.match(".*"+title[0]+".*", line): string = str(title[1]) if string != "": indexmd.write(re.sub(line, string, line)) else: indexmd.write(line) def generate_index_and_feed(): """Generate index.html and feeds (atom and rss).""" # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name sys.stderr.write("generating atom and rss feeds\n") # initialize atom feed feed = AtomFeed() feed.author = ET.fromstring( "" "{author}" "{home}" "{email}" "".format(author=AUTHOR, home=BLOG_HOME, email=AUTHOR_EMAIL)) feed.generator = ET.Element("generator", uri=GENERATOR_HOME_PAGE) feed.generator.text = GENERATOR_NAME if ATOM_ICON_PATH is not None: feed.icon = ET.Element("icon") feed.icon.text = urllib.parse.urljoin(BLOG_HOME, ATOM_ICON_PATH) feed.id_text = BLOG_HOME feed.id = ET.Element("id") feed.id.text = feed.id_text feed.links = [ ET.Element("link", href=urllib.parse.urljoin(BLOG_HOME, "atom.xml"), rel="self", type="application/atom+xml"), ET.Element("link", href=BLOG_HOME, rel="alternate", type="text/html"), ] feed.title_text = BLOG_TITLE feed.title = ET.fromstring("{title}".format(title=BLOG_TITLE)) feed.subtitle_text = BLOG_DESCRIPTION feed.subtitle = ET.fromstring("{subtitle}" .format(subtitle=BLOG_DESCRIPTION)) # initialize rss feed rss = RssFeed() rss.rssurl = urllib.parse.urljoin(BLOG_HOME, "rss.xml") rss.title = ET.Element("title") rss.title.text = BLOG_TITLE rss.link = ET.Element("link") rss.link.text = BLOG_HOME rss.description = ET.Element("description") rss.description.text = BLOG_DESCRIPTION rss.language = ET.Element("language") rss.language.text = LANGUAGE rss.author_text = "{email} ({name})".format(email=AUTHOR_EMAIL, name=AUTHOR) rss.managingEditor = ET.Element("managingEditor") rss.managingEditor.text = rss.author_text rss.webMaster = ET.Element("webMaster") rss.webMaster.text = rss.author_text rss.generator = ET.Element("generator") rss.generator.text = "{generator} ({url})".format(generator=GENERATOR_NAME, url=GENERATOR_HOME_PAGE) rss.image = ET.Element("image") if RSS_ICON_PATH is not None: ET.SubElement(rss.image, "url").text = urllib.parse.urljoin(BLOG_HOME, RSS_ICON_PATH) rss.image.append(copy.deepcopy(rss.title)) rss.image.append(copy.deepcopy(rss.link)) ET.SubElement(rss.image, "width").text = str(RSS_ICON_WIDTH) ET.SubElement(rss.image, "height").text = str(RSS_ICON_HEIGHT) # update times will be set after everthing finishes for name in os.listdir(os.path.join(BUILDDIR, "blog")): if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): htmlpath = os.path.join(BUILDDIR, "blog", name) entry = AtomEntry() item = RssItem() try: with open(htmlpath, encoding="utf-8") as htmlfile: soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") # generate atom entry entry.author = copy.deepcopy(feed.author) # assume it's always the same author entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name) entry.id_text = entry_url entry.id = ET.Element("id") entry.id.text = entry_url entry.relpath = "/blog/%s" % name entry.link = ET.Element("link", href=entry_url) entry.title_text = soup.title.text entry.title = ET.Element("title", type="html") entry.title.text = entry.title_text post_date = soup.find("meta", attrs={"name": "date"})["content"] entry.updated_datetime = dateutil.parser.parse(post_date) entry.updated = ET.Element("updated") # pylint: disable=no-member entry.updated.text = entry.updated_datetime.isoformat() # process content tags_to_remove = [] # mark header and footer for removal article = soup.article if article.header is not None: tags_to_remove.append(article.header) # mark line numbers for removal for line_number_span in article.find_all("span", attrs={"class": "line-number"}): tags_to_remove.append(line_number_span) # mark script tags for removal for script_tag in article.find_all("script"): tags_to_remove.append(script_tag) # make internal links absolute utils.absolutify_links(article, entry_url) # remove marked tags for tag in tags_to_remove: tag.extract() entry.content_html = ''.join([str(content) for content in article.contents]) entry.content = ET.Element("content", type="html") entry.content.text = ET.CDATA(entry.content_html) entry.assemble_entry() feed.entries.append(entry) # generate rss item item.title = ET.Element("title") item.title.text = entry.title_text item.link = ET.Element("link") item.link.text = entry_url item.description = ET.Element("description") item.description.text = ET.CDATA(entry.content_html) item.author = ET.Element("author") item.author.text = rss.author_text item.guid = ET.Element("guid", isPermaLink="true") item.guid.text = item.link.text item.timestamp = entry.updated_datetime.timestamp() item.pubDate = ET.Element("pubDate") item.pubDate.text = email.utils.formatdate(item.timestamp, usegmt=True) item.assemble_item() rss.items.append(item) except Exception: sys.stderr.write("error: failed to generate feed entry from %s\n" % name) with open(htmlpath, encoding="utf-8") as htmlfile: sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) raise # sort entries by reverse chronological order feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True) rss.items.sort(key=lambda item: item.timestamp, reverse=True) generate_index(feed) generate_lang_switch() generate_menu() generate_table() generate_blog_list(feed) generate_notes_list() rewrite_title() feed.updated_datetime = utils.current_datetime() feed.updated = ET.Element("updated") feed.updated.text = feed.updated_datetime.isoformat() rss.update_timestamp = time.time() rss.pubDate = ET.Element("pubDate") rss.pubDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) rss.lastBuildDate = ET.Element("lastBuildDate") rss.lastBuildDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) with open(ATOM, "w", encoding="utf-8") as atom: atom.write("%s\n" % feed.dump_feed(FEED_MAX_ENTRIES)) sys.stderr.write("wrote atom.xml\n") with open(RSS, "w", encoding="utf-8") as rssxml: rssxml.write("%s\n" % rss.dump_rss(FEED_MAX_ENTRIES)) sys.stderr.write("wrote rss.xml\n") generate_sitemap(feed) # exclude_list is only inialized once to avoid constant disk IO @utils.static_vars(exclude_list=None) def generate_blog(fresh=False, report_total_errors=True): """Generate the blog in BUILDDIR. Parameters ---------- fresh : bool If True, remove all existing build artifects and start afresh; otherwise, only copy or build new or modified files. Default is False. report_total_errors : bool If True, a line will be printed to stderr at the end of build (assuming the function doesn't raise early) reporting the total number of errors, e.g., "build finished with 0 errors". This is turned on by default, but pass False to turn it off, which will result in a completely silent session if nothing changed. This is useful for auto-regen, for instance. Returns ------- failed_builds : int Number of build failures. """ # pylint: disable=too-many-branches,too-many-locals,too-many-statements if not os.path.isdir(SOURCEDIR): raise OSError("source directory %s does not exist" % SOURCEDIR) if not os.path.exists(HTMLTEMPLATE): raise OSError("HTML template %s not found" % HTMLTEMPLATE) if not os.path.isdir(BUILDDIR): if os.path.exists(BUILDDIR): os.remove(BUILDDIR) os.mkdir(BUILDDIR, mode=0o755) if fresh: for name in os.listdir(BUILDDIR): if name == ".git": continue obj = os.path.join(BUILDDIR, name) if os.path.isdir(obj): shutil.rmtree(obj) else: os.remove(obj) # nojekyll: https://help.github.com/articles/files-that-start-with-an-underscore-are-missing/ if not os.path.exists(os.path.join(BUILDDIR, ".nojekyll")): with open(os.path.join(BUILDDIR, ".nojekyll"), "w") as fileobj: pass if CUSTOM_DOMAIN and not os.path.exists(os.path.join(BUILDDIR, "CNAME")): with open(os.path.join(BUILDDIR, "CNAME"), "w") as fileobj: fileobj.write(CUSTOM_DOMAIN) failed_builds = 0 generator_mtime = os.path.getmtime(GENERATORSOURCE) template_mtime = os.path.getmtime(HTMLTEMPLATE) fundamental_mtime = max(generator_mtime, template_mtime) anything_modified = False exclude_list = generate_blog.exclude_list # get value of static variable if exclude_list is None: try: with open(EXCLUDELIST) as fp: exclude_list = [os.path.abspath(os.path.join(SOURCEDIR, line.rstrip())) for line in list(fp) if line.strip() != "" and not line.startswith('#')] except OSError: exclude_list = [] generate_blog.exclude_list = exclude_list # assign to static variable for the future for root, dirs, files in os.walk(SOURCEDIR): # If root is in exclude list, skip all files and remove all subdirs from traversal list. if root in exclude_list: dirs[:] = [] continue relroot = os.path.relpath(root, start=SOURCEDIR) dstroot = os.path.join(BUILDDIR, relroot) if not os.path.isdir(dstroot): if os.path.exists(dstroot): os.remove(dstroot) os.mkdir(dstroot, mode=0o755) for name in files: if name.startswith('.') or os.path.join(root, name) in exclude_list: continue extension = name.split(".")[-1] if extension not in ["css", "js", "asc", "html", "jpg", "md", "png", "svg", "ico", "txt", "eot", "ttf", "woff", "woff2"]: continue relpath = os.path.join(relroot, name) srcpath = os.path.join(root, name) if extension == "md": dstpath = os.path.join(dstroot, re.sub(r'\.md$', '.html', name)) else: dstpath = os.path.join(dstroot, name) if ((not os.path.exists(dstpath) or os.path.getmtime(dstpath) <= max(fundamental_mtime, os.path.getmtime(srcpath)))): # new post or modified post anything_modified = True indexmdlist = [] for language in LANGUAGES: if language != DEFAULTLANG: indexmdlist.append("index-{}.md".format(language)) else: indexmdlist.append("index.md") if srcpath in indexmdlist: continue # index will be processed separately if extension in ["css", "js", "asc", "html", "jpg", "png", "svg", "ico", "txt", "eot", "ttf", "woff", "woff2"]: sys.stderr.write("copying %s\n" % relpath) shutil.copy(srcpath, dstpath) elif extension == "md": sys.stderr.write("compiling %s\n" % relpath) pandoc_args = [ "pandoc", srcpath, "--template", HTMLTEMPLATE, "--highlight-style=pygments", "-o", dstpath, ] try: subprocess.check_call(pandoc_args) except subprocess.CalledProcessError: failed_builds += 1 sys.stderr.write("error: failed to generate %s" % relpath) # postprocess generated HTML file utils.postprocess_html_file(dstpath) if anything_modified: generate_index_and_feed() sys.stderr.write("done\n") if report_total_errors: sys.stderr.write("build finished with %d errors\n" % failed_builds) return failed_builds