aboutsummaryrefslogtreecommitdiff
path: root/generators/generators.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--generators/generators.py172
1 files changed, 172 insertions, 0 deletions
diff --git a/generators/generators.py b/generators/generators.py
index 9ad773c2..adfe819a 100644
--- a/generators/generators.py
+++ b/generators/generators.py
@@ -13,6 +13,13 @@ import subprocess
import lxml.etree as ET
+import urllib.parse
+import copy
+import email.utils
+import time
+
+from rss import *
+
from config.config import *
from utils import utils
@@ -517,3 +524,168 @@ def rewrite_title():
indexmd.write(re.sub(line, string, line))
else:
indexmd.write(line)
+
+
+def generate_index_and_feed():
+ """Generate index.html and feeds (atom and rss)."""
+ # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name
+ sys.stderr.write("generating atom and rss feeds\n")
+ # initialize atom feed
+ feed = AtomFeed()
+ feed.author = ET.fromstring(
+ "<author>"
+ "<name>{author}</name>"
+ "<uri>{home}</uri>"
+ "<email>{email}</email>"
+ "</author>".format(author=AUTHOR, home=BLOG_HOME, email=AUTHOR_EMAIL))
+ feed.generator = ET.Element("generator", uri=GENERATOR_HOME_PAGE)
+ feed.generator.text = GENERATOR_NAME
+ if ATOM_ICON_PATH is not None:
+ feed.icon = ET.Element("icon")
+ feed.icon.text = urllib.parse.urljoin(BLOG_HOME, ATOM_ICON_PATH)
+ feed.id_text = BLOG_HOME
+ feed.id = ET.Element("id")
+ feed.id.text = feed.id_text
+ feed.links = [
+ ET.Element("link", href=urllib.parse.urljoin(BLOG_HOME, "atom.xml"), rel="self",
+ type="application/atom+xml"),
+ ET.Element("link", href=BLOG_HOME, rel="alternate",
+ type="text/html"),
+ ]
+ feed.title_text = BLOG_TITLE
+ feed.title = ET.fromstring("<title>{title}</title>".format(title=BLOG_TITLE))
+ feed.subtitle_text = BLOG_DESCRIPTION
+ feed.subtitle = ET.fromstring("<subtitle>{subtitle}</subtitle>"
+ .format(subtitle=BLOG_DESCRIPTION))
+ # initialize rss feed
+ rss = RssFeed()
+ rss.rssurl = urllib.parse.urljoin(BLOG_HOME, "rss.xml")
+ rss.title = ET.Element("title")
+ rss.title.text = BLOG_TITLE
+ rss.link = ET.Element("link")
+ rss.link.text = BLOG_HOME
+ rss.description = ET.Element("description")
+ rss.description.text = BLOG_DESCRIPTION
+ rss.language = ET.Element("language")
+ rss.language.text = LANGUAGE
+ rss.author_text = "{email} ({name})".format(email=AUTHOR_EMAIL, name=AUTHOR)
+ rss.managingEditor = ET.Element("managingEditor")
+ rss.managingEditor.text = rss.author_text
+ rss.webMaster = ET.Element("webMaster")
+ rss.webMaster.text = rss.author_text
+ rss.generator = ET.Element("generator")
+ rss.generator.text = "{generator} ({url})".format(generator=GENERATOR_NAME,
+ url=GENERATOR_HOME_PAGE)
+ rss.image = ET.Element("image")
+ if RSS_ICON_PATH is not None:
+ ET.SubElement(rss.image, "url").text = urllib.parse.urljoin(BLOG_HOME, RSS_ICON_PATH)
+ rss.image.append(copy.deepcopy(rss.title))
+ rss.image.append(copy.deepcopy(rss.link))
+ ET.SubElement(rss.image, "width").text = str(RSS_ICON_WIDTH)
+ ET.SubElement(rss.image, "height").text = str(RSS_ICON_HEIGHT)
+
+ # update times will be set after everthing finishes
+
+ for name in os.listdir(os.path.join(BUILDDIR, "blog")):
+ if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name):
+ htmlpath = os.path.join(BUILDDIR, "blog", name)
+ entry = AtomEntry()
+ item = RssItem()
+ try:
+ with open(htmlpath, encoding="utf-8") as htmlfile:
+ soup = bs4.BeautifulSoup(htmlfile.read(), "lxml")
+
+ # generate atom entry
+ entry.author = copy.deepcopy(feed.author) # assume it's always the same author
+ entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name)
+ entry.id_text = entry_url
+ entry.id = ET.Element("id")
+ entry.id.text = entry_url
+ entry.relpath = "/blog/%s" % name
+ entry.link = ET.Element("link", href=entry_url)
+ entry.title_text = soup.title.text
+ entry.title = ET.Element("title", type="html")
+ entry.title.text = entry.title_text
+ post_date = soup.find("meta", attrs={"name": "date"})["content"]
+ entry.updated_datetime = dateutil.parser.parse(post_date)
+ entry.updated = ET.Element("updated")
+ # pylint: disable=no-member
+ entry.updated.text = entry.updated_datetime.isoformat()
+
+ # process content
+ tags_to_remove = []
+ # mark header and footer for removal
+ article = soup.article
+ if article.header is not None:
+ tags_to_remove.append(article.header)
+ # mark line numbers for removal
+ for line_number_span in article.find_all("span",
+ attrs={"class": "line-number"}):
+ tags_to_remove.append(line_number_span)
+ # mark script tags for removal
+ for script_tag in article.find_all("script"):
+ tags_to_remove.append(script_tag)
+ # make internal links absolute
+ utils.absolutify_links(article, entry_url)
+ # remove marked tags
+ for tag in tags_to_remove:
+ tag.extract()
+
+ entry.content_html = ''.join([str(content)
+ for content in article.contents])
+ entry.content = ET.Element("content", type="html")
+ entry.content.text = ET.CDATA(entry.content_html)
+ entry.assemble_entry()
+ feed.entries.append(entry)
+
+ # generate rss item
+ item.title = ET.Element("title")
+ item.title.text = entry.title_text
+ item.link = ET.Element("link")
+ item.link.text = entry_url
+ item.description = ET.Element("description")
+ item.description.text = ET.CDATA(entry.content_html)
+ item.author = ET.Element("author")
+ item.author.text = rss.author_text
+ item.guid = ET.Element("guid", isPermaLink="true")
+ item.guid.text = item.link.text
+ item.timestamp = entry.updated_datetime.timestamp()
+ item.pubDate = ET.Element("pubDate")
+ item.pubDate.text = email.utils.formatdate(item.timestamp, usegmt=True)
+ item.assemble_item()
+ rss.items.append(item)
+ except Exception:
+ sys.stderr.write("error: failed to generate feed entry from %s\n" % name)
+ with open(htmlpath, encoding="utf-8") as htmlfile:
+ sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read())
+ raise
+ # sort entries by reverse chronological order
+ feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True)
+ rss.items.sort(key=lambda item: item.timestamp, reverse=True)
+
+ generate_index(feed)
+ generate_menu()
+ generate_table()
+ generate_blog_list(feed)
+ generate_notes_list()
+ rewrite_title()
+
+ feed.updated_datetime = utils.current_datetime()
+ feed.updated = ET.Element("updated")
+ feed.updated.text = feed.updated_datetime.isoformat()
+
+ rss.update_timestamp = time.time()
+ rss.pubDate = ET.Element("pubDate")
+ rss.pubDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True)
+ rss.lastBuildDate = ET.Element("lastBuildDate")
+ rss.lastBuildDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True)
+
+ with open(ATOM, "w", encoding="utf-8") as atom:
+ atom.write("%s\n" % feed.dump_feed(FEED_MAX_ENTRIES))
+ sys.stderr.write("wrote atom.xml\n")
+
+ with open(RSS, "w", encoding="utf-8") as rssxml:
+ rssxml.write("%s\n" % rss.dump_rss(FEED_MAX_ENTRIES))
+ sys.stderr.write("wrote rss.xml\n")
+
+ generate_sitemap(feed)