aboutsummaryrefslogtreecommitdiff
path: root/pyblog
diff options
context:
space:
mode:
Diffstat (limited to 'pyblog')
-rwxr-xr-xpyblog168
1 files changed, 1 insertions, 167 deletions
diff --git a/pyblog b/pyblog
index 11ea868b..3d443482 100755
--- a/pyblog
+++ b/pyblog
@@ -51,172 +51,6 @@ from config.config import *
from generators import generators
-
-def generate_index_and_feed():
- """Generate index.html and feeds (atom and rss)."""
- # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name
- sys.stderr.write("generating atom and rss feeds\n")
- # initialize atom feed
- feed = AtomFeed()
- feed.author = ET.fromstring(
- "<author>"
- "<name>{author}</name>"
- "<uri>{home}</uri>"
- "<email>{email}</email>"
- "</author>".format(author=AUTHOR, home=BLOG_HOME, email=AUTHOR_EMAIL))
- feed.generator = ET.Element("generator", uri=GENERATOR_HOME_PAGE)
- feed.generator.text = GENERATOR_NAME
- if ATOM_ICON_PATH is not None:
- feed.icon = ET.Element("icon")
- feed.icon.text = urllib.parse.urljoin(BLOG_HOME, ATOM_ICON_PATH)
- feed.id_text = BLOG_HOME
- feed.id = ET.Element("id")
- feed.id.text = feed.id_text
- feed.links = [
- ET.Element("link", href=urllib.parse.urljoin(BLOG_HOME, "atom.xml"), rel="self",
- type="application/atom+xml"),
- ET.Element("link", href=BLOG_HOME, rel="alternate",
- type="text/html"),
- ]
- feed.title_text = BLOG_TITLE
- feed.title = ET.fromstring("<title>{title}</title>".format(title=BLOG_TITLE))
- feed.subtitle_text = BLOG_DESCRIPTION
- feed.subtitle = ET.fromstring("<subtitle>{subtitle}</subtitle>"
- .format(subtitle=BLOG_DESCRIPTION))
- # initialize rss feed
- rss = RssFeed()
- rss.rssurl = urllib.parse.urljoin(BLOG_HOME, "rss.xml")
- rss.title = ET.Element("title")
- rss.title.text = BLOG_TITLE
- rss.link = ET.Element("link")
- rss.link.text = BLOG_HOME
- rss.description = ET.Element("description")
- rss.description.text = BLOG_DESCRIPTION
- rss.language = ET.Element("language")
- rss.language.text = LANGUAGE
- rss.author_text = "{email} ({name})".format(email=AUTHOR_EMAIL, name=AUTHOR)
- rss.managingEditor = ET.Element("managingEditor")
- rss.managingEditor.text = rss.author_text
- rss.webMaster = ET.Element("webMaster")
- rss.webMaster.text = rss.author_text
- rss.generator = ET.Element("generator")
- rss.generator.text = "{generator} ({url})".format(generator=GENERATOR_NAME,
- url=GENERATOR_HOME_PAGE)
- rss.image = ET.Element("image")
- if RSS_ICON_PATH is not None:
- ET.SubElement(rss.image, "url").text = urllib.parse.urljoin(BLOG_HOME, RSS_ICON_PATH)
- rss.image.append(copy.deepcopy(rss.title))
- rss.image.append(copy.deepcopy(rss.link))
- ET.SubElement(rss.image, "width").text = str(RSS_ICON_WIDTH)
- ET.SubElement(rss.image, "height").text = str(RSS_ICON_HEIGHT)
-
- # update times will be set after everthing finishes
-
- for name in os.listdir(os.path.join(BUILDDIR, "blog")):
- if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name):
- htmlpath = os.path.join(BUILDDIR, "blog", name)
- entry = AtomEntry()
- item = RssItem()
- try:
- with open(htmlpath, encoding="utf-8") as htmlfile:
- soup = bs4.BeautifulSoup(htmlfile.read(), "lxml")
-
- # generate atom entry
- entry.author = copy.deepcopy(feed.author) # assume it's always the same author
- entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name)
- entry.id_text = entry_url
- entry.id = ET.Element("id")
- entry.id.text = entry_url
- entry.relpath = "/blog/%s" % name
- entry.link = ET.Element("link", href=entry_url)
- entry.title_text = soup.title.text
- entry.title = ET.Element("title", type="html")
- entry.title.text = entry.title_text
- post_date = soup.find("meta", attrs={"name": "date"})["content"]
- entry.updated_datetime = dateutil.parser.parse(post_date)
- entry.updated = ET.Element("updated")
- # pylint: disable=no-member
- entry.updated.text = entry.updated_datetime.isoformat()
-
- # process content
- tags_to_remove = []
- # mark header and footer for removal
- article = soup.article
- if article.header is not None:
- tags_to_remove.append(article.header)
- # mark line numbers for removal
- for line_number_span in article.find_all("span",
- attrs={"class": "line-number"}):
- tags_to_remove.append(line_number_span)
- # mark script tags for removal
- for script_tag in article.find_all("script"):
- tags_to_remove.append(script_tag)
- # make internal links absolute
- utils.absolutify_links(article, entry_url)
- # remove marked tags
- for tag in tags_to_remove:
- tag.extract()
-
- entry.content_html = ''.join([str(content)
- for content in article.contents])
- entry.content = ET.Element("content", type="html")
- entry.content.text = ET.CDATA(entry.content_html)
- entry.assemble_entry()
- feed.entries.append(entry)
-
- # generate rss item
- item.title = ET.Element("title")
- item.title.text = entry.title_text
- item.link = ET.Element("link")
- item.link.text = entry_url
- item.description = ET.Element("description")
- item.description.text = ET.CDATA(entry.content_html)
- item.author = ET.Element("author")
- item.author.text = rss.author_text
- item.guid = ET.Element("guid", isPermaLink="true")
- item.guid.text = item.link.text
- item.timestamp = entry.updated_datetime.timestamp()
- item.pubDate = ET.Element("pubDate")
- item.pubDate.text = email.utils.formatdate(item.timestamp, usegmt=True)
- item.assemble_item()
- rss.items.append(item)
- except Exception:
- sys.stderr.write("error: failed to generate feed entry from %s\n" % name)
- with open(htmlpath, encoding="utf-8") as htmlfile:
- sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read())
- raise
- # sort entries by reverse chronological order
- feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True)
- rss.items.sort(key=lambda item: item.timestamp, reverse=True)
-
- generators.generate_index(feed)
- generators.generate_menu()
- generators.generate_table()
- generators.generate_blog_list(feed)
- generators.generate_notes_list()
- generators.rewrite_title()
-
- feed.updated_datetime = utils.current_datetime()
- feed.updated = ET.Element("updated")
- feed.updated.text = feed.updated_datetime.isoformat()
-
- rss.update_timestamp = time.time()
- rss.pubDate = ET.Element("pubDate")
- rss.pubDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True)
- rss.lastBuildDate = ET.Element("lastBuildDate")
- rss.lastBuildDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True)
-
- with open(ATOM, "w", encoding="utf-8") as atom:
- atom.write("%s\n" % feed.dump_feed(FEED_MAX_ENTRIES))
- sys.stderr.write("wrote atom.xml\n")
-
- with open(RSS, "w", encoding="utf-8") as rssxml:
- rssxml.write("%s\n" % rss.dump_rss(FEED_MAX_ENTRIES))
- sys.stderr.write("wrote rss.xml\n")
-
- generators.generate_sitemap(feed)
-
-
# exclude_list is only inialized once to avoid constant disk IO
@utils.static_vars(exclude_list=None)
def generate_blog(fresh=False, report_total_errors=True):
@@ -347,7 +181,7 @@ def generate_blog(fresh=False, report_total_errors=True):
utils.postprocess_html_file(dstpath)
if anything_modified:
- generate_index_and_feed()
+ generators.generate_index_and_feed()
sys.stderr.write("done\n")
if report_total_errors: