#!/bin/python3

import os
import sys
import tempfile
import re

import bs4
import dateutil

import io
import subprocess

import lxml.etree as ET

import urllib.parse
import copy
import email.utils
import time

import shutil

from rss import *

from config.config import *

from utils import utils


def generate_menu():
    """Generate menu."""

    sys.stderr.write("generating menu\n")

    fd, tmppath = tempfile.mkstemp()
    os.close(fd)

    # Put in a list the pages where the menu will be written
    html_fileList = []
    for root, dirs, files in os.walk(BUILDDIR):
        for name in files:
            if name.endswith(".html"):
                try:
                    html_fileList.append(os.path.join(root.split('build/')[1], name))
                except IndexError:
                    html_fileList.append(name)

    # Generate the string who contain the links of the menu
    htmly_website_page = "<ul>"
    for name in sorted(os.listdir(os.path.join(BUILDDIR, "website"))):
        if name != "Documents":
            htmly_website_page += "<a href='/website/"+name+"' class='lia'><li><span class='left-lia'></span><span class='center-lia'>"+name.split('.html')[0]+"</span><span class='right-lia'></span></li></a>"
    htmly_website_page += "</ul>"

    # Writing the menu in all pages contained in the variable in place of the -- generate menu here --
    for html_file in html_fileList:
        with open(tmppath, 'w', encoding='utf-8') as tmpfile:
            if os.path.exists("build/"+html_file):
                with open("build/"+html_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build/"+html_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            indexmd.write(re.sub(r'-- generate menu here --', htmly_website_page, line))

        os.remove(tmppath)


def generate_table():
    """Generate table."""

    first_comp = 1
    first_pr = 1
    tr_class = "odd"

    documents_fileList = []
    documents_fileList.append("/website/bts-sio.html")

    fd, tmppath = tempfile.mkstemp()
    os.close(fd)

    htmly_website_page = ""

    if os.path.exists(BUILDDIR+"/website/bts-sio.html"):
        sys.stderr.write("generating table\n")

        # Put in a list the pages where the menu will be written
        #for root, dirs, files in os.walk(BUILDDIR+"/website/Documents/Situation2"):
        #    for name in files:
        #        if name.endswith(".html"):
        #            try:
        #                documents_fileList.append(os.path.join(root.split('build')[1], name))
        #            except IndexError:
        #                documents_fileList.append(name)

        # Generate the string who contain the links of the menu
        #htmly_website_page = "<ul>"
        #for name in os.listdir(os.path.join(BUILDDIR, "website/Documents/Situation2")):
        #    htmly_website_page += "<a href='/website/Documents/Situation2/"+name+"' class='situation2lia'><li><span class='situation2left-lia'></span><span class='situation2center-lia'>"+name.split('.html')[0]+"</span><span class='situation2right-lia'></span></li></a>"
        #htmly_website_page += "</ul>"

    # Writing the menu in all pages contained in the variable in place of the -- generate submenu here --
    for document_file in documents_fileList:
        with open(tmppath, 'w', encoding='utf-8') as tmpfile:
            if os.path.exists("build"+document_file):
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            indexmd.write(re.sub(r'<pre>-- table --', '<table><colgroup><col width="18%"></col><col width="16%"></col><col width="23%"></col></colgroup><thead><tr class="header"><th>Compétence</th><th>Activité</th><th>Justification</th></tr></thead><tbody class="skill-table">', line))
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            if (re.match('^\$.*', line) and first_pr == 1):
                                line_edited='<tr class="'+tr_class+'">'
                                indexmd.write(re.sub(r'^\$.*', line_edited, line))
                                first_pr = 0
                                first_comp = 1
                            elif (re.match('^\$.*', line)):
                                if (tr_class == "odd"):
                                    tr_class = "even"
                                else:
                                    tr_class = "odd"
                                line_edited='</tr><tr class="'+tr_class+'">'
                                indexmd.write(re.sub(r'^\$.*', line_edited, line))
                            else:
                                indexmd.write(line)
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:

                            if (re.match('^    \$.*\$$', line)):
                                indexmd.write(re.sub(r'^    \$.*\$$', "<li>"+line.split("$")[1]+'</li>', line))
                                first_comp = 1
                            elif (re.match('^    \$.*[^\$]$', line)):
                                if first_comp == 1:
                                    indexmd.write(re.sub(r'^    \$.*[^\$]$', "<td><ul><li>"+line.split("$")[1]+'</li>', line))
                                    first_comp = 0
                                else:
                                    indexmd.write(re.sub(r'^    \$.*[^\$]$', "<li>"+line.split("$")[1]+'</li>', line))
                            else:
                                indexmd.write(line)
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            if (re.match('^  \$.*', line)):
                                indexmd.write(re.sub(r'^  \$.*', "</td></ul><td><ul><li>"+line.split("$")[1]+"</li></ul></td>", line))
                            else:
                                indexmd.write(re.sub(r'^  \$.*', "</td></ul><td><ul><li>"+line+"</li></ul></td>", line))
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            indexmd.write(re.sub(r"-- end table --", "</tbody></table>", line))

    os.remove(tmppath)


def generate_blog_list(feed):
    """"Generate blog list """

    sys.stderr.write("generating blog list\n")

    html_fileList = []
    for root, dirs, files in os.walk(BUILDDIR):
        for name in files:
            if re.search(r'blog',root):
                if name.endswith(".html"):
                    try:
                        html_fileList.append(os.path.join(root.split('blog/')[1], name))
                    except IndexError:
                        html_fileList.append(name)

    # generate TOC
    for html_file in html_fileList:
        div_blog_list = u'<div class="blog-index" id="toc">\n</table>\n'
        year = 10000  # will be larger than the latest year for quite a while
        # recall that entries are in reverse chronological order
        table_opened = False
        for entry in feed.entries:
            date = entry.updated_datetime
            if date.year < year:
                # close the previous table if there is one
                if table_opened:
                    div_blog_list += u'</table>\n'
                # write a new <h2 class="blog-index-year-title"> tag with the smaller year
                year = date.year
                div_blog_list += u'\n<h2 class="blog-index-year-title" id="{0}"><span class="left-h2">.:</span><span class="title-h2">{0}</span><span class="right-h2">:.</span></h2>\n\n'.format(year)
                div_blog_list += u'<table class="blog-index-yearly-index">\n'
                table_opened = True

            # write a new table row entry in Markdown, in the format:
            #
            #   <tr>
            #     <td class="blog-index-post-date"><time class="date" datetime="2015-05-05T00:06:04-0700">May 5</time></td>
            #     <td class="blog-index-post-title">[Blah blah](/blog/2015-05-04-blah-blah.html)</td>
            #   </tr>
            monthday = date.strftime("%b %d")
            div_blog_list += (u'<tr><td class="blog-index-post-date"><time class="date" datetime="%s">%s</time></td>'
                          '<td class="blog-index-post-title"><a href="%s">%s</a></td></tr>\n' %
                          (date.isoformat(), monthday, entry.relpath, entry.title_text))
        if table_opened:
            div_blog_list += u'</table>\n'
        div_blog_list += u'</div>'

        fd, tmppath = tempfile.mkstemp()
        os.close(fd)
        with open(tmppath, 'w', encoding='utf-8') as tmpfile:
            if os.path.exists("build/blog/index.html"):
                with open("build/blog/index.html", 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build/blog/index.html", 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            indexmd.write(re.sub(r'{% generate blog_list here %}', div_blog_list, line))


def generate_notes_list():
    """"Generate notes list """

    sys.stderr.write("generating notes list\n")

    html_fileList = []
    for root, dirs, files in os.walk(BUILDDIR):
        for name in files:
            if re.search(r'notes',root):
                if name.endswith(".html"):
                    try:
                        html_fileList.append(os.path.join(root.split('notes/')[1], name))
                    except IndexError:
                        html_fileList.append(name)

    div_notes_list = u'<div class="blog-index" id="toc">\n</table>\n'
    year = 10000  # will be larger than the latest year for quite a while
    # recall that entries are in reverse chronological order
    table_opened = False
    for name in list(reversed(sorted(os.listdir(os.path.join(BUILDDIR, "notes"))))):
        if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name):
            htmlpath = os.path.join(BUILDDIR, "notes", name)
            #tentry = AtomEntry()
            #item = RssItem()
            try:
                with open(htmlpath, encoding="utf-8") as htmlfile:
                    soup = bs4.BeautifulSoup(htmlfile.read(), "lxml")
                    # generate atom entry
                    #entry.author = copy.deepcopy(feed.author)  # assume it's always the same author
                    #entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name)
                    #entry.id_text = entry_url
                    #entry.id = ET.Element("id")
                    #entry.id.text = entry_url
                    relpath = "/notes/%s" % name

                    #entry.link = ET.Element("link", href=entry_url)
                    title_text = soup.title.text

                    #entry.title = ET.Element("title", type="html")
                    #entry.title.text = entry.title_text
                    post_date = soup.find("meta", attrs={"name": "date"})["content"]
                    updated_datetime = dateutil.parser.parse(post_date)

                    date = updated_datetime
                    if date.year < year:
                        # close the previous table if there is one
                        if table_opened:
                            div_notes_list += u'</table>\n'
                        # write a new <h2 class="blog-index-year-title"> tag with the smaller year
                        year = date.year
                        div_notes_list += u'\n<h2 class="blog-index-year-title" id="{0}"><span class="left-h2">.:</span><span class="title-h2">{0}</span><span class="right-h2">:.</span></h2>\n\n'.format(year)
                        div_notes_list += u'<table class="blog-index-yearly-index">\n'
                        table_opened = True

                    # write a new table row entry in Markdown, in the format:
                    #
                    #   <tr>
                    #     <td class="blog-index-post-date"><time class="date" datetime="2015-05-05T00:06:04-0700">May 5</time></td>
                    #     <td class="blog-index-post-title">[Blah blah](/blog/2015-05-04-blah-blah.html)</td>
                    #   </tr>
                    monthday = date.strftime("%b %d")
                    div_notes_list += (u'<tr><td class="blog-index-post-date"><time class="date" datetime="%s">%s</time></td>'
                                  '<td class="blog-index-post-title"><a href="%s">%s</a></td></tr>\n' %
                                  (date.isoformat(), monthday, relpath, title_text))

            except Exception:
                sys.stderr.write("error: failed to generate feed entry from %s\n" % name)
                with open(htmlpath, encoding="utf-8") as htmlfile:
                    sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read())
                raise

    if table_opened:
        div_notes_list += u'</table>\n'
    div_notes_list += u'</div>'

    fd, tmppath = tempfile.mkstemp()
    os.close(fd)
    with open(tmppath, 'w', encoding='utf-8') as tmpfile:
        if os.path.exists("build/notes/index.html"):
            with open("build/notes/index.html", 'r', encoding='utf-8') as indexmd:
                lines = indexmd.readlines()
                with open("build/notes/index.html", 'w', encoding='utf-8') as indexmd:
                    for line in lines:
                        indexmd.write(re.sub(r'{% generate notes_list here %}', div_notes_list, line))


def generate_index(feed):
    """Generate index.html from index.md and a TOC."""

    sys.stderr.write("generating index.html\n")

    # generate TOC
    tocbuff = io.StringIO()
    tocbuff.write('<div class="blog-index" id="toc">')
    year = 10000  # will be larger than the latest year for quite a while
    # recall that entries are in reverse chronological order
    table_opened = False
    for entry in feed.entries:
        date = entry.updated_datetime
        if date.year < year:
            # close the previous table if there is one
            if table_opened:
                tocbuff.write(u'</table>\n')
            # write a new <h2 class="blog-index-year-title"> tag with the smaller year
            year = date.year
            tocbuff.write(u'\n<h2 class="blog-index-year-title" id="{0}"><span class="left-h2">.:</span><span class="title-h2">{0}</span><span class="right-h2">:.</span></h2>\n\n'.format(year))
            tocbuff.write(u'<table class="blog-index-yearly-index">\n')
            table_opened = True

        # write a new table row entry in Markdown, in the format:
        #
        #   <tr>
        #     <td class="blog-index-post-date"><time class="date" datetime="2015-05-05T00:06:04-0700">May 5</time></td>
        #     <td class="blog-index-post-title">[Blah blah](/blog/2015-05-04-blah-blah.html)</td>
        #   </tr>
        monthday = date.strftime("%b %d")
        tocbuff.write(u'<tr><td class="blog-index-post-date"><time class="date" datetime="%s">%s</time></td>'
                      '<td class="blog-index-post-title">[%s](%s)</td></tr>\n' %
                      (date.isoformat(), monthday, entry.title_text, entry.relpath))
    if table_opened:
        tocbuff.write(u'</table>\n')
    tocbuff.write('</div>')

    # create tempfile with index.md and the TOC concatenated, and generate index.html from that
    # pylint: disable=invalid-name
    fd, tmppath = tempfile.mkstemp()
    os.close(fd)
    with open(tmppath, 'w', encoding='utf-8') as tmpfile:
        if os.path.exists(INDEXMD):
            with open(INDEXMD, 'r', encoding='utf-8') as indexmd:
                tmpfile.write(u"%s\n\n<hr>\n\n" % indexmd.read())
        tmpfile.write("%s\n" % tocbuff.getvalue())
        tocbuff.close()

    pandoc_args = [
        "pandoc", tmppath,
        "--template", HTMLTEMPLATE,
        "--highlight-style=pygments",
        "-o", INDEXHTML,
    ]
    try:
        subprocess.check_call(pandoc_args)
    except subprocess.CalledProcessError:
        sys.stderr.write("error: failed to generate index.html\n")
    os.remove(tmppath)

def generate_sitemap(feed):
    """Generate sitemap.xml."""
    sitemap = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
    # index
    sitemap.append(utils.make_sitemap_url_element(BLOG_HOME, feed.updated, "daily", 1.0))
    # other top level pages
    for name in os.listdir(BUILDDIR):
        if (not name.endswith(".html") or name == "index.html" or
            re.match("google[a-z0-9]+\.html", name)):  # exclude Google's site ownership verification file
            continue
        link = urllib.parse.urljoin(BLOG_HOME, name)
        fullpath = os.path.join(BUILDDIR, name)
        # try to extract updated time
        updated = None
        with open(fullpath, encoding="utf-8") as htmlobj:
            soup = bs4.BeautifulSoup(htmlobj.read(), "lxml")
            if soup.footer is not None:
                updated_tag = soup.footer.find(attrs={"class": "updated"})
                if updated_tag is not None:
                    updated = dateutil.parser.parse(updated_tag.text)
        sitemap.append(utils.make_sitemap_url_element(link, updated, "monthly", 0.9))

    # blog entries
    for entry in feed.entries:
        sitemap.append(utils.make_sitemap_url_element(entry.link, entry.updated, "monthly", 0.9))
    sitemappath = os.path.join(BUILDDIR, "sitemap.xml")
    with open(sitemappath, "w", encoding="utf-8") as sitemapfile:
        sitemapfile.write('<?xml version="1.0" encoding="UTF-8"?>\n%s\n' %
                          ET.tostring(sitemap).decode('utf-8'))
        sys.stderr.write("wrote sitemap.xml\n")


def rewrite_title():
    """Override the title of some page for a better render"""
    sys.stderr.write("Overriding some titles\n")

    filenames =['build/index.html',
                'build/blog/index.html',
                'build/notes/index.html']

    for root, dirs, files in os.walk(BUILDDIR):
        for name in files:
            if re.search(r'website($)',root):
                if name.endswith(".html"):
                    try:
                        filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name))
                    except IndexError:
                        filenames.append(name)
            if re.search(r'Documents($)',root):
                if name.endswith(".html"):
                    try:
                        filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name))
                    except IndexError:
                        filenames.append(name)
            if re.search(r'notes($)',root):
                if name.endswith(".html"):
                    try:
                        filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name))
                    except IndexError:
                        filenames.append(name)
            if re.search(r'blog($)',root):
                if name.endswith(".html"):
                    try:
                        filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name))
                    except IndexError:
                        filenames.append(name)

    h1_titles_list = []
    h1_title = []

    h2_titles_list = []
    h2_title = []

    fd, tmppath = tempfile.mkstemp()
    os.close(fd)
    for filename in filenames:
        soup = bs4.BeautifulSoup(open(filename), "lxml")
        for myh1 in soup.find_all("h1"):
            if re.match("^(?!.*article-title).*$", str(myh1)):
                h1_id = myh1['id']
                h1_name = myh1.string

                h1_title.append(str(myh1))


                myh1['class'] = "h1"
                myh1.string = ""

                h1_span_left = soup.new_tag("span")
                h1_span_left['class'] = "left-h1"
                h1_span_left.string = "█▓▒░"

                h1_span_title = soup.new_tag("span")
                h1_span_title['class'] = "title-h1"
                h1_span_title.string = "「"+h1_name+"」"

                h1_span_right = soup.new_tag("span")
                h1_span_right['class'] = "right-h1"
                h1_span_right.string = "░▒▓█"

                myh1.string.insert_before(h1_span_left)
                myh1.span.insert_after(h1_span_right)
                myh1.span.insert_after(h1_span_title)

                h1_title.append(myh1)
                h1_title.append(h1_name)

                h1_titles_list.append(h1_title)
            h1_title = []

        for myh2 in soup.find_all("h2"):
            if re.match("^(?!.*blog-index-year-title).*$", str(myh2)):
                h2_id = myh2['id']
                h2_name = myh2.string

                h2_title.append(str(myh2))


                myh2['class'] = "h2"
                myh2.string = ""

                h2_span_left = soup.new_tag("span")
                h2_span_left['class'] = "left-h2"
                h2_span_left.string = ".: "

                h2_span_title = soup.new_tag("span")
                h2_span_title['class'] = "title-h2"
                h2_span_title.string = h2_name

                h2_span_right = soup.new_tag("span")
                h2_span_right['class'] = "right-h2"
                h2_span_right.string = " :."

                myh2.string.insert_before(h2_span_left)
                myh2.span.insert_after(h2_span_right)
                myh2.span.insert_after(h2_span_title)

                h2_title.append(myh2)
                h2_title.append(h2_name)

                h2_titles_list.append(h2_title)
            h2_title = []

    tested_title_list = []
    tested_title = []
    for filename in filenames:
        soup = bs4.BeautifulSoup(open(filename), "lxml")
        if os.path.exists(filename):
            with open(filename, 'r', encoding='utf-8') as indexmd:
                lines = indexmd.readlines()

                with open(filename, 'w', encoding='utf-8') as indexmd:
                    for line in lines:
                        string = ""
                        for title in h1_titles_list:
                            if re.match(".*"+title[0]+".*", line):
                                string = str(title[1])
                        for title in h2_titles_list:
                            if re.match(".*"+title[0]+".*", line):
                                string = str(title[1])
                        if string != "":
                            indexmd.write(re.sub(line, string, line))
                        else:
                            indexmd.write(line)


def generate_index_and_feed():
    """Generate index.html and feeds (atom and rss)."""
    # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name
    sys.stderr.write("generating atom and rss feeds\n")
    # initialize atom feed
    feed = AtomFeed()
    feed.author = ET.fromstring(
        "<author>"
        "<name>{author}</name>"
        "<uri>{home}</uri>"
        "<email>{email}</email>"
        "</author>".format(author=AUTHOR, home=BLOG_HOME, email=AUTHOR_EMAIL))
    feed.generator = ET.Element("generator", uri=GENERATOR_HOME_PAGE)
    feed.generator.text = GENERATOR_NAME
    if ATOM_ICON_PATH is not None:
        feed.icon = ET.Element("icon")
        feed.icon.text = urllib.parse.urljoin(BLOG_HOME, ATOM_ICON_PATH)
    feed.id_text = BLOG_HOME
    feed.id = ET.Element("id")
    feed.id.text = feed.id_text
    feed.links = [
        ET.Element("link", href=urllib.parse.urljoin(BLOG_HOME, "atom.xml"), rel="self",
                   type="application/atom+xml"),
        ET.Element("link", href=BLOG_HOME, rel="alternate",
                   type="text/html"),
    ]
    feed.title_text = BLOG_TITLE
    feed.title = ET.fromstring("<title>{title}</title>".format(title=BLOG_TITLE))
    feed.subtitle_text = BLOG_DESCRIPTION
    feed.subtitle = ET.fromstring("<subtitle>{subtitle}</subtitle>"
                                  .format(subtitle=BLOG_DESCRIPTION))
    # initialize rss feed
    rss = RssFeed()
    rss.rssurl = urllib.parse.urljoin(BLOG_HOME, "rss.xml")
    rss.title = ET.Element("title")
    rss.title.text = BLOG_TITLE
    rss.link = ET.Element("link")
    rss.link.text = BLOG_HOME
    rss.description = ET.Element("description")
    rss.description.text = BLOG_DESCRIPTION
    rss.language = ET.Element("language")
    rss.language.text = LANGUAGE
    rss.author_text = "{email} ({name})".format(email=AUTHOR_EMAIL, name=AUTHOR)
    rss.managingEditor = ET.Element("managingEditor")
    rss.managingEditor.text = rss.author_text
    rss.webMaster = ET.Element("webMaster")
    rss.webMaster.text = rss.author_text
    rss.generator = ET.Element("generator")
    rss.generator.text = "{generator} ({url})".format(generator=GENERATOR_NAME,
                                                      url=GENERATOR_HOME_PAGE)
    rss.image = ET.Element("image")
    if RSS_ICON_PATH is not None:
        ET.SubElement(rss.image, "url").text = urllib.parse.urljoin(BLOG_HOME, RSS_ICON_PATH)
        rss.image.append(copy.deepcopy(rss.title))
        rss.image.append(copy.deepcopy(rss.link))
        ET.SubElement(rss.image, "width").text = str(RSS_ICON_WIDTH)
        ET.SubElement(rss.image, "height").text = str(RSS_ICON_HEIGHT)

    # update times will be set after everthing finishes

    for name in os.listdir(os.path.join(BUILDDIR, "blog")):
        if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name):
            htmlpath = os.path.join(BUILDDIR, "blog", name)
            entry = AtomEntry()
            item = RssItem()
            try:
                with open(htmlpath, encoding="utf-8") as htmlfile:
                    soup = bs4.BeautifulSoup(htmlfile.read(), "lxml")

                    # generate atom entry
                    entry.author = copy.deepcopy(feed.author)  # assume it's always the same author
                    entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name)
                    entry.id_text = entry_url
                    entry.id = ET.Element("id")
                    entry.id.text = entry_url
                    entry.relpath = "/blog/%s" % name
                    entry.link = ET.Element("link", href=entry_url)
                    entry.title_text = soup.title.text
                    entry.title = ET.Element("title", type="html")
                    entry.title.text = entry.title_text
                    post_date = soup.find("meta", attrs={"name": "date"})["content"]
                    entry.updated_datetime = dateutil.parser.parse(post_date)
                    entry.updated = ET.Element("updated")
                    # pylint: disable=no-member
                    entry.updated.text = entry.updated_datetime.isoformat()

                    # process content
                    tags_to_remove = []
                    # mark header and footer for removal
                    article = soup.article
                    if article.header is not None:
                        tags_to_remove.append(article.header)
                    # mark line numbers for removal
                    for line_number_span in article.find_all("span",
                                                             attrs={"class": "line-number"}):
                        tags_to_remove.append(line_number_span)
                    # mark script tags for removal
                    for script_tag in article.find_all("script"):
                        tags_to_remove.append(script_tag)
                    # make internal links absolute
                    utils.absolutify_links(article, entry_url)
                    # remove marked tags
                    for tag in tags_to_remove:
                        tag.extract()

                    entry.content_html = ''.join([str(content)
                                                  for content in article.contents])
                    entry.content = ET.Element("content", type="html")
                    entry.content.text = ET.CDATA(entry.content_html)
                    entry.assemble_entry()
                    feed.entries.append(entry)

                    # generate rss item
                    item.title = ET.Element("title")
                    item.title.text = entry.title_text
                    item.link = ET.Element("link")
                    item.link.text = entry_url
                    item.description = ET.Element("description")
                    item.description.text = ET.CDATA(entry.content_html)
                    item.author = ET.Element("author")
                    item.author.text = rss.author_text
                    item.guid = ET.Element("guid", isPermaLink="true")
                    item.guid.text = item.link.text
                    item.timestamp = entry.updated_datetime.timestamp()
                    item.pubDate = ET.Element("pubDate")
                    item.pubDate.text = email.utils.formatdate(item.timestamp, usegmt=True)
                    item.assemble_item()
                    rss.items.append(item)
            except Exception:
                sys.stderr.write("error: failed to generate feed entry from %s\n" % name)
                with open(htmlpath, encoding="utf-8") as htmlfile:
                    sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read())
                raise
    # sort entries by reverse chronological order
    feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True)
    rss.items.sort(key=lambda item: item.timestamp, reverse=True)

    generate_index(feed)
    generate_menu()
    generate_table()
    generate_blog_list(feed)
    generate_notes_list()
    rewrite_title()

    feed.updated_datetime = utils.current_datetime()
    feed.updated = ET.Element("updated")
    feed.updated.text = feed.updated_datetime.isoformat()

    rss.update_timestamp = time.time()
    rss.pubDate = ET.Element("pubDate")
    rss.pubDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True)
    rss.lastBuildDate = ET.Element("lastBuildDate")
    rss.lastBuildDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True)

    with open(ATOM, "w", encoding="utf-8") as atom:
        atom.write("%s\n" % feed.dump_feed(FEED_MAX_ENTRIES))
        sys.stderr.write("wrote atom.xml\n")

    with open(RSS, "w", encoding="utf-8") as rssxml:
        rssxml.write("%s\n" % rss.dump_rss(FEED_MAX_ENTRIES))
        sys.stderr.write("wrote rss.xml\n")

    generate_sitemap(feed)


# exclude_list is only inialized once to avoid constant disk IO
@utils.static_vars(exclude_list=None)
def generate_blog(fresh=False, report_total_errors=True):
    """Generate the blog in BUILDDIR.

    Parameters
    ----------
    fresh : bool
        If True, remove all existing build artifects and start afresh;
        otherwise, only copy or build new or modified files. Default is
        False.
    report_total_errors : bool
        If True, a line will be printed to stderr at the end of build
        (assuming the function doesn't raise early) reporting the total
        number of errors, e.g., "build finished with 0 errors". This is
        turned on by default, but pass False to turn it off, which will
        result in a completely silent session if nothing changed. This
        is useful for auto-regen, for instance.

    Returns
    -------
    failed_builds : int
        Number of build failures.

    """

    # pylint: disable=too-many-branches,too-many-locals,too-many-statements

    if not os.path.isdir(SOURCEDIR):
        raise OSError("source directory %s does not exist" % SOURCEDIR)
    if not os.path.exists(HTMLTEMPLATE):
        raise OSError("HTML template %s not found" % HTMLTEMPLATE)

    if not os.path.isdir(BUILDDIR):
        if os.path.exists(BUILDDIR):
            os.remove(BUILDDIR)
        os.mkdir(BUILDDIR, mode=0o755)
    if fresh:
        for name in os.listdir(BUILDDIR):
            if name == ".git":
                continue
            obj = os.path.join(BUILDDIR, name)
            if os.path.isdir(obj):
                shutil.rmtree(obj)
            else:
                os.remove(obj)

    # nojekyll: https://help.github.com/articles/files-that-start-with-an-underscore-are-missing/
    if not os.path.exists(os.path.join(BUILDDIR, ".nojekyll")):
        with open(os.path.join(BUILDDIR, ".nojekyll"), "w") as fileobj:
            pass

    if CUSTOM_DOMAIN and not os.path.exists(os.path.join(BUILDDIR, "CNAME")):
        with open(os.path.join(BUILDDIR, "CNAME"), "w") as fileobj:
            fileobj.write(CUSTOM_DOMAIN)

    failed_builds = 0
    generator_mtime = os.path.getmtime(GENERATORSOURCE)
    template_mtime = os.path.getmtime(HTMLTEMPLATE)
    fundamental_mtime = max(generator_mtime, template_mtime)
    anything_modified = False

    exclude_list = generate_blog.exclude_list  # get value of static variable
    if exclude_list is None:
        try:
            with open(EXCLUDELIST) as fp:
                exclude_list = [os.path.abspath(os.path.join(SOURCEDIR, line.rstrip()))
                                for line in list(fp)
                                if line.strip() != "" and not line.startswith('#')]
        except OSError:
            exclude_list = []
        generate_blog.exclude_list = exclude_list  # assign to static variable for the future

    for root, dirs, files in os.walk(SOURCEDIR):
        # If root is in exclude list, skip all files and remove all subdirs from traversal list.
        if root in exclude_list:
            dirs[:] = []
            continue

        relroot = os.path.relpath(root, start=SOURCEDIR)
        dstroot = os.path.join(BUILDDIR, relroot)
        if not os.path.isdir(dstroot):
            if os.path.exists(dstroot):
                os.remove(dstroot)
            os.mkdir(dstroot, mode=0o755)

        for name in files:
            if name.startswith('.') or os.path.join(root, name) in exclude_list:
                continue

            extension = name.split(".")[-1]
            if extension not in ["css", "js", "asc", "html", "jpg", "md", "png", "svg", "ico", "txt",
                                 "eot", "ttf", "woff", "woff2"]:
                continue

            relpath = os.path.join(relroot, name)
            srcpath = os.path.join(root, name)
            if extension == "md":
                dstpath = os.path.join(dstroot, re.sub(r'\.md$', '.html', name))
            else:
                dstpath = os.path.join(dstroot, name)
            if ((not os.path.exists(dstpath) or
                 os.path.getmtime(dstpath) <=
                 max(fundamental_mtime, os.path.getmtime(srcpath)))):
                # new post or modified post
                anything_modified = True
                if srcpath == INDEXMD:
                    continue # index will be processed separately
                if extension in ["css", "js",  "asc", "html", "jpg", "png", "svg", "ico", "txt",
                                 "eot", "ttf", "woff", "woff2"]:
                    sys.stderr.write("copying %s\n" % relpath)
                    shutil.copy(srcpath, dstpath)
                elif extension == "md":
                    sys.stderr.write("compiling %s\n" % relpath)
                    pandoc_args = [
                        "pandoc", srcpath,
                        "--template", HTMLTEMPLATE,
                        "--highlight-style=pygments",
                        "-o", dstpath,
                    ]
                    try:
                        subprocess.check_call(pandoc_args)
                    except subprocess.CalledProcessError:
                        failed_builds += 1
                        sys.stderr.write("error: failed to generate %s" %
                                         relpath)
                    # postprocess generated HTML file
                    utils.postprocess_html_file(dstpath)

    if anything_modified:
        generate_index_and_feed()
        sys.stderr.write("done\n")

    if report_total_errors:
        sys.stderr.write("build finished with %d errors\n" % failed_builds)
    return failed_builds