#!/bin/python3 import os import sys import tempfile import re import bs4 import dateutil import io import subprocess import lxml.etree as ET import urllib.parse import copy import email.utils import time from rss import * from config.config import * from utils import utils def generate_menu(): """Generate menu.""" sys.stderr.write("generating menu\n") fd, tmppath = tempfile.mkstemp() os.close(fd) # Put in a list the pages where the menu will be written html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('build/')[1], name)) except IndexError: html_fileList.append(name) # Generate the string who contain the links of the menu htmly_website_page = "" # Writing the menu in all pages contained in the variable in place of the -- generate menu here -- for html_file in html_fileList: with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/"+html_file): with open("build/"+html_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/"+html_file, 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'-- generate menu here --', htmly_website_page, line)) os.remove(tmppath) def generate_table(): """Generate table.""" first_comp = 1 first_pr = 1 tr_class = "odd" documents_fileList = [] documents_fileList.append("/website/bts-sio.html") fd, tmppath = tempfile.mkstemp() os.close(fd) htmly_website_page = "" if os.path.exists(BUILDDIR+"/website/bts-sio.html"): sys.stderr.write("generating table\n") # Put in a list the pages where the menu will be written #for root, dirs, files in os.walk(BUILDDIR+"/website/Documents/Situation2"): # for name in files: # if name.endswith(".html"): # try: # documents_fileList.append(os.path.join(root.split('build')[1], name)) # except IndexError: # documents_fileList.append(name) # Generate the string who contain the links of the menu #htmly_website_page = "" # Writing the menu in all pages contained in the variable in place of the -- generate submenu here -- for document_file in documents_fileList: with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build"+document_file): with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'
-- table --', '', line))
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:
                            if (re.match('^\$.*', line) and first_pr == 1):
                                line_edited=''
                                indexmd.write(re.sub(r'^\$.*', line_edited, line))
                                first_pr = 0
                                first_comp = 1
                            elif (re.match('^\$.*', line)):
                                if (tr_class == "odd"):
                                    tr_class = "even"
                                else:
                                    tr_class = "odd"
                                line_edited=''
                                indexmd.write(re.sub(r'^\$.*', line_edited, line))
                            else:
                                indexmd.write(line)
                with open("build"+document_file, 'r', encoding='utf-8') as indexmd:
                    lines = indexmd.readlines()
                    with open("build"+document_file, 'w', encoding='utf-8') as indexmd:
                        for line in lines:

                            if (re.match('^    \$.*\$$', line)):
                                indexmd.write(re.sub(r'^    \$.*\$$', "
  • "+line.split("$")[1]+'
  • ', line)) first_comp = 1 elif (re.match('^ \$.*[^\$]$', line)): if first_comp == 1: indexmd.write(re.sub(r'^ \$.*[^\$]$', "", line)) else: indexmd.write(re.sub(r'^ \$.*', "", line)) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r"-- end table --", "
    CompétenceActivitéJustification
    • "+line.split("$")[1]+'
    • ', line)) first_comp = 0 else: indexmd.write(re.sub(r'^ \$.*[^\$]$', "
    • "+line.split("$")[1]+'
    • ', line)) else: indexmd.write(line) with open("build"+document_file, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build"+document_file, 'w', encoding='utf-8') as indexmd: for line in lines: if (re.match('^ \$.*', line)): indexmd.write(re.sub(r'^ \$.*', "
    • "+line.split("$")[1]+"
    • "+line+"
    ", line)) os.remove(tmppath) def generate_blog_list(feed): """"Generate blog list """ sys.stderr.write("generating blog list\n") html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'blog',root): if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('blog/')[1], name)) except IndexError: html_fileList.append(name) # generate TOC for html_file in html_fileList: div_blog_list = u'
    \n\n' year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for entry in feed.entries: date = entry.updated_datetime if date.year < year: # close the previous table if there is one if table_opened: div_blog_list += u'\n' # write a new

    tag with the smaller year year = date.year div_blog_list += u'\n

    .:{0}:.

    \n\n'.format(year) div_blog_list += u'\n' table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") div_blog_list += (u'' '\n' % (date.isoformat(), monthday, entry.relpath, entry.title_text)) if table_opened: div_blog_list += u'
    [Blah blah](/blog/2015-05-04-blah-blah.html)
    %s
    \n' div_blog_list += u'
    ' fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/blog/index.html"): with open("build/blog/index.html", 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/blog/index.html", 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'{% generate blog_list here %}', div_blog_list, line)) def generate_notes_list(): """"Generate notes list """ sys.stderr.write("generating notes list\n") html_fileList = [] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'notes',root): if name.endswith(".html"): try: html_fileList.append(os.path.join(root.split('notes/')[1], name)) except IndexError: html_fileList.append(name) div_notes_list = u'
    \n\n' year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for name in list(reversed(sorted(os.listdir(os.path.join(BUILDDIR, "notes"))))): if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): htmlpath = os.path.join(BUILDDIR, "notes", name) #tentry = AtomEntry() #item = RssItem() try: with open(htmlpath, encoding="utf-8") as htmlfile: soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") # generate atom entry #entry.author = copy.deepcopy(feed.author) # assume it's always the same author #entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name) #entry.id_text = entry_url #entry.id = ET.Element("id") #entry.id.text = entry_url relpath = "/notes/%s" % name #entry.link = ET.Element("link", href=entry_url) title_text = soup.title.text #entry.title = ET.Element("title", type="html") #entry.title.text = entry.title_text post_date = soup.find("meta", attrs={"name": "date"})["content"] updated_datetime = dateutil.parser.parse(post_date) date = updated_datetime if date.year < year: # close the previous table if there is one if table_opened: div_notes_list += u'\n' # write a new

    tag with the smaller year year = date.year div_notes_list += u'\n

    .:{0}:.

    \n\n'.format(year) div_notes_list += u'\n' table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") div_notes_list += (u'' '\n' % (date.isoformat(), monthday, relpath, title_text)) except Exception: sys.stderr.write("error: failed to generate feed entry from %s\n" % name) with open(htmlpath, encoding="utf-8") as htmlfile: sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) raise if table_opened: div_notes_list += u'
    [Blah blah](/blog/2015-05-04-blah-blah.html)
    %s
    \n' div_notes_list += u'
    ' fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists("build/notes/index.html"): with open("build/notes/index.html", 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open("build/notes/index.html", 'w', encoding='utf-8') as indexmd: for line in lines: indexmd.write(re.sub(r'{% generate notes_list here %}', div_notes_list, line)) def generate_index(feed): """Generate index.html from index.md and a TOC.""" sys.stderr.write("generating index.html\n") # generate TOC tocbuff = io.StringIO() tocbuff.write('
    ') year = 10000 # will be larger than the latest year for quite a while # recall that entries are in reverse chronological order table_opened = False for entry in feed.entries: date = entry.updated_datetime if date.year < year: # close the previous table if there is one if table_opened: tocbuff.write(u'\n') # write a new

    tag with the smaller year year = date.year tocbuff.write(u'\n

    .:{0}:.

    \n\n'.format(year)) tocbuff.write(u'\n') table_opened = True # write a new table row entry in Markdown, in the format: # # # # # monthday = date.strftime("%b %d") tocbuff.write(u'' '\n' % (date.isoformat(), monthday, entry.title_text, entry.relpath)) if table_opened: tocbuff.write(u'
    [Blah blah](/blog/2015-05-04-blah-blah.html)
    [%s](%s)
    \n') tocbuff.write('
    ') # create tempfile with index.md and the TOC concatenated, and generate index.html from that # pylint: disable=invalid-name fd, tmppath = tempfile.mkstemp() os.close(fd) with open(tmppath, 'w', encoding='utf-8') as tmpfile: if os.path.exists(INDEXMD): with open(INDEXMD, 'r', encoding='utf-8') as indexmd: tmpfile.write(u"%s\n\n
    \n\n" % indexmd.read()) tmpfile.write("%s\n" % tocbuff.getvalue()) tocbuff.close() pandoc_args = [ "pandoc", tmppath, "--template", HTMLTEMPLATE, "--highlight-style=pygments", "-o", INDEXHTML, ] try: subprocess.check_call(pandoc_args) except subprocess.CalledProcessError: sys.stderr.write("error: failed to generate index.html\n") os.remove(tmppath) def generate_sitemap(feed): """Generate sitemap.xml.""" sitemap = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") # index sitemap.append(utils.make_sitemap_url_element(BLOG_HOME, feed.updated, "daily", 1.0)) # other top level pages for name in os.listdir(BUILDDIR): if (not name.endswith(".html") or name == "index.html" or re.match("google[a-z0-9]+\.html", name)): # exclude Google's site ownership verification file continue link = urllib.parse.urljoin(BLOG_HOME, name) fullpath = os.path.join(BUILDDIR, name) # try to extract updated time updated = None with open(fullpath, encoding="utf-8") as htmlobj: soup = bs4.BeautifulSoup(htmlobj.read(), "lxml") if soup.footer is not None: updated_tag = soup.footer.find(attrs={"class": "updated"}) if updated_tag is not None: updated = dateutil.parser.parse(updated_tag.text) sitemap.append(utils.make_sitemap_url_element(link, updated, "monthly", 0.9)) # blog entries for entry in feed.entries: sitemap.append(utils.make_sitemap_url_element(entry.link, entry.updated, "monthly", 0.9)) sitemappath = os.path.join(BUILDDIR, "sitemap.xml") with open(sitemappath, "w", encoding="utf-8") as sitemapfile: sitemapfile.write('\n%s\n' % ET.tostring(sitemap).decode('utf-8')) sys.stderr.write("wrote sitemap.xml\n") def rewrite_title(): """Override the title of some page for a better render""" sys.stderr.write("Overriding some titles\n") filenames =['build/index.html', 'build/blog/index.html', 'build/notes/index.html'] for root, dirs, files in os.walk(BUILDDIR): for name in files: if re.search(r'website($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'Documents($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'notes($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) if re.search(r'blog($)',root): if name.endswith(".html"): try: filenames.append("build"+os.path.join(root.split(BUILDDIR)[1], name)) except IndexError: filenames.append(name) h1_titles_list = [] h1_title = [] h2_titles_list = [] h2_title = [] fd, tmppath = tempfile.mkstemp() os.close(fd) for filename in filenames: soup = bs4.BeautifulSoup(open(filename), "lxml") for myh1 in soup.find_all("h1"): if re.match("^(?!.*article-title).*$", str(myh1)): h1_id = myh1['id'] h1_name = myh1.string h1_title.append(str(myh1)) myh1['class'] = "h1" myh1.string = "" h1_span_left = soup.new_tag("span") h1_span_left['class'] = "left-h1" h1_span_left.string = "█▓▒░" h1_span_title = soup.new_tag("span") h1_span_title['class'] = "title-h1" h1_span_title.string = "「"+h1_name+"」" h1_span_right = soup.new_tag("span") h1_span_right['class'] = "right-h1" h1_span_right.string = "░▒▓█" myh1.string.insert_before(h1_span_left) myh1.span.insert_after(h1_span_right) myh1.span.insert_after(h1_span_title) h1_title.append(myh1) h1_title.append(h1_name) h1_titles_list.append(h1_title) h1_title = [] for myh2 in soup.find_all("h2"): if re.match("^(?!.*blog-index-year-title).*$", str(myh2)): h2_id = myh2['id'] h2_name = myh2.string h2_title.append(str(myh2)) myh2['class'] = "h2" myh2.string = "" h2_span_left = soup.new_tag("span") h2_span_left['class'] = "left-h2" h2_span_left.string = ".: " h2_span_title = soup.new_tag("span") h2_span_title['class'] = "title-h2" h2_span_title.string = h2_name h2_span_right = soup.new_tag("span") h2_span_right['class'] = "right-h2" h2_span_right.string = " :." myh2.string.insert_before(h2_span_left) myh2.span.insert_after(h2_span_right) myh2.span.insert_after(h2_span_title) h2_title.append(myh2) h2_title.append(h2_name) h2_titles_list.append(h2_title) h2_title = [] tested_title_list = [] tested_title = [] for filename in filenames: soup = bs4.BeautifulSoup(open(filename), "lxml") if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as indexmd: lines = indexmd.readlines() with open(filename, 'w', encoding='utf-8') as indexmd: for line in lines: string = "" for title in h1_titles_list: if re.match(".*"+title[0]+".*", line): string = str(title[1]) for title in h2_titles_list: if re.match(".*"+title[0]+".*", line): string = str(title[1]) if string != "": indexmd.write(re.sub(line, string, line)) else: indexmd.write(line) def generate_index_and_feed(): """Generate index.html and feeds (atom and rss).""" # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name sys.stderr.write("generating atom and rss feeds\n") # initialize atom feed feed = AtomFeed() feed.author = ET.fromstring( "" "{author}" "{home}" "{email}" "".format(author=AUTHOR, home=BLOG_HOME, email=AUTHOR_EMAIL)) feed.generator = ET.Element("generator", uri=GENERATOR_HOME_PAGE) feed.generator.text = GENERATOR_NAME if ATOM_ICON_PATH is not None: feed.icon = ET.Element("icon") feed.icon.text = urllib.parse.urljoin(BLOG_HOME, ATOM_ICON_PATH) feed.id_text = BLOG_HOME feed.id = ET.Element("id") feed.id.text = feed.id_text feed.links = [ ET.Element("link", href=urllib.parse.urljoin(BLOG_HOME, "atom.xml"), rel="self", type="application/atom+xml"), ET.Element("link", href=BLOG_HOME, rel="alternate", type="text/html"), ] feed.title_text = BLOG_TITLE feed.title = ET.fromstring("{title}".format(title=BLOG_TITLE)) feed.subtitle_text = BLOG_DESCRIPTION feed.subtitle = ET.fromstring("{subtitle}" .format(subtitle=BLOG_DESCRIPTION)) # initialize rss feed rss = RssFeed() rss.rssurl = urllib.parse.urljoin(BLOG_HOME, "rss.xml") rss.title = ET.Element("title") rss.title.text = BLOG_TITLE rss.link = ET.Element("link") rss.link.text = BLOG_HOME rss.description = ET.Element("description") rss.description.text = BLOG_DESCRIPTION rss.language = ET.Element("language") rss.language.text = LANGUAGE rss.author_text = "{email} ({name})".format(email=AUTHOR_EMAIL, name=AUTHOR) rss.managingEditor = ET.Element("managingEditor") rss.managingEditor.text = rss.author_text rss.webMaster = ET.Element("webMaster") rss.webMaster.text = rss.author_text rss.generator = ET.Element("generator") rss.generator.text = "{generator} ({url})".format(generator=GENERATOR_NAME, url=GENERATOR_HOME_PAGE) rss.image = ET.Element("image") if RSS_ICON_PATH is not None: ET.SubElement(rss.image, "url").text = urllib.parse.urljoin(BLOG_HOME, RSS_ICON_PATH) rss.image.append(copy.deepcopy(rss.title)) rss.image.append(copy.deepcopy(rss.link)) ET.SubElement(rss.image, "width").text = str(RSS_ICON_WIDTH) ET.SubElement(rss.image, "height").text = str(RSS_ICON_HEIGHT) # update times will be set after everthing finishes for name in os.listdir(os.path.join(BUILDDIR, "blog")): if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): htmlpath = os.path.join(BUILDDIR, "blog", name) entry = AtomEntry() item = RssItem() try: with open(htmlpath, encoding="utf-8") as htmlfile: soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") # generate atom entry entry.author = copy.deepcopy(feed.author) # assume it's always the same author entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name) entry.id_text = entry_url entry.id = ET.Element("id") entry.id.text = entry_url entry.relpath = "/blog/%s" % name entry.link = ET.Element("link", href=entry_url) entry.title_text = soup.title.text entry.title = ET.Element("title", type="html") entry.title.text = entry.title_text post_date = soup.find("meta", attrs={"name": "date"})["content"] entry.updated_datetime = dateutil.parser.parse(post_date) entry.updated = ET.Element("updated") # pylint: disable=no-member entry.updated.text = entry.updated_datetime.isoformat() # process content tags_to_remove = [] # mark header and footer for removal article = soup.article if article.header is not None: tags_to_remove.append(article.header) # mark line numbers for removal for line_number_span in article.find_all("span", attrs={"class": "line-number"}): tags_to_remove.append(line_number_span) # mark script tags for removal for script_tag in article.find_all("script"): tags_to_remove.append(script_tag) # make internal links absolute utils.absolutify_links(article, entry_url) # remove marked tags for tag in tags_to_remove: tag.extract() entry.content_html = ''.join([str(content) for content in article.contents]) entry.content = ET.Element("content", type="html") entry.content.text = ET.CDATA(entry.content_html) entry.assemble_entry() feed.entries.append(entry) # generate rss item item.title = ET.Element("title") item.title.text = entry.title_text item.link = ET.Element("link") item.link.text = entry_url item.description = ET.Element("description") item.description.text = ET.CDATA(entry.content_html) item.author = ET.Element("author") item.author.text = rss.author_text item.guid = ET.Element("guid", isPermaLink="true") item.guid.text = item.link.text item.timestamp = entry.updated_datetime.timestamp() item.pubDate = ET.Element("pubDate") item.pubDate.text = email.utils.formatdate(item.timestamp, usegmt=True) item.assemble_item() rss.items.append(item) except Exception: sys.stderr.write("error: failed to generate feed entry from %s\n" % name) with open(htmlpath, encoding="utf-8") as htmlfile: sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) raise # sort entries by reverse chronological order feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True) rss.items.sort(key=lambda item: item.timestamp, reverse=True) generate_index(feed) generate_menu() generate_table() generate_blog_list(feed) generate_notes_list() rewrite_title() feed.updated_datetime = utils.current_datetime() feed.updated = ET.Element("updated") feed.updated.text = feed.updated_datetime.isoformat() rss.update_timestamp = time.time() rss.pubDate = ET.Element("pubDate") rss.pubDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) rss.lastBuildDate = ET.Element("lastBuildDate") rss.lastBuildDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) with open(ATOM, "w", encoding="utf-8") as atom: atom.write("%s\n" % feed.dump_feed(FEED_MAX_ENTRIES)) sys.stderr.write("wrote atom.xml\n") with open(RSS, "w", encoding="utf-8") as rssxml: rssxml.write("%s\n" % rss.dump_rss(FEED_MAX_ENTRIES)) sys.stderr.write("wrote rss.xml\n") generate_sitemap(feed)