From dbbe906685b86706b9af03dd3f9799a67613e40d Mon Sep 17 00:00:00 2001 From: neodarz Date: Sun, 26 May 2019 16:15:42 +0200 Subject: Move generate_index_and_feed to external file --- pyblog | 168 +---------------------------------------------------------------- 1 file changed, 1 insertion(+), 167 deletions(-) (limited to 'pyblog') diff --git a/pyblog b/pyblog index 11ea868b..3d443482 100755 --- a/pyblog +++ b/pyblog @@ -51,172 +51,6 @@ from config.config import * from generators import generators - -def generate_index_and_feed(): - """Generate index.html and feeds (atom and rss).""" - # pylint: disable=too-many-statements,attribute-defined-outside-init,invalid-name - sys.stderr.write("generating atom and rss feeds\n") - # initialize atom feed - feed = AtomFeed() - feed.author = ET.fromstring( - "" - "{author}" - "{home}" - "{email}" - "".format(author=AUTHOR, home=BLOG_HOME, email=AUTHOR_EMAIL)) - feed.generator = ET.Element("generator", uri=GENERATOR_HOME_PAGE) - feed.generator.text = GENERATOR_NAME - if ATOM_ICON_PATH is not None: - feed.icon = ET.Element("icon") - feed.icon.text = urllib.parse.urljoin(BLOG_HOME, ATOM_ICON_PATH) - feed.id_text = BLOG_HOME - feed.id = ET.Element("id") - feed.id.text = feed.id_text - feed.links = [ - ET.Element("link", href=urllib.parse.urljoin(BLOG_HOME, "atom.xml"), rel="self", - type="application/atom+xml"), - ET.Element("link", href=BLOG_HOME, rel="alternate", - type="text/html"), - ] - feed.title_text = BLOG_TITLE - feed.title = ET.fromstring("{title}".format(title=BLOG_TITLE)) - feed.subtitle_text = BLOG_DESCRIPTION - feed.subtitle = ET.fromstring("{subtitle}" - .format(subtitle=BLOG_DESCRIPTION)) - # initialize rss feed - rss = RssFeed() - rss.rssurl = urllib.parse.urljoin(BLOG_HOME, "rss.xml") - rss.title = ET.Element("title") - rss.title.text = BLOG_TITLE - rss.link = ET.Element("link") - rss.link.text = BLOG_HOME - rss.description = ET.Element("description") - rss.description.text = BLOG_DESCRIPTION - rss.language = ET.Element("language") - rss.language.text = LANGUAGE - rss.author_text = "{email} ({name})".format(email=AUTHOR_EMAIL, name=AUTHOR) - rss.managingEditor = ET.Element("managingEditor") - rss.managingEditor.text = rss.author_text - rss.webMaster = ET.Element("webMaster") - rss.webMaster.text = rss.author_text - rss.generator = ET.Element("generator") - rss.generator.text = "{generator} ({url})".format(generator=GENERATOR_NAME, - url=GENERATOR_HOME_PAGE) - rss.image = ET.Element("image") - if RSS_ICON_PATH is not None: - ET.SubElement(rss.image, "url").text = urllib.parse.urljoin(BLOG_HOME, RSS_ICON_PATH) - rss.image.append(copy.deepcopy(rss.title)) - rss.image.append(copy.deepcopy(rss.link)) - ET.SubElement(rss.image, "width").text = str(RSS_ICON_WIDTH) - ET.SubElement(rss.image, "height").text = str(RSS_ICON_HEIGHT) - - # update times will be set after everthing finishes - - for name in os.listdir(os.path.join(BUILDDIR, "blog")): - if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}.*\.html", name): - htmlpath = os.path.join(BUILDDIR, "blog", name) - entry = AtomEntry() - item = RssItem() - try: - with open(htmlpath, encoding="utf-8") as htmlfile: - soup = bs4.BeautifulSoup(htmlfile.read(), "lxml") - - # generate atom entry - entry.author = copy.deepcopy(feed.author) # assume it's always the same author - entry_url = urllib.parse.urljoin(BLOG_HOME, "blog/%s" % name) - entry.id_text = entry_url - entry.id = ET.Element("id") - entry.id.text = entry_url - entry.relpath = "/blog/%s" % name - entry.link = ET.Element("link", href=entry_url) - entry.title_text = soup.title.text - entry.title = ET.Element("title", type="html") - entry.title.text = entry.title_text - post_date = soup.find("meta", attrs={"name": "date"})["content"] - entry.updated_datetime = dateutil.parser.parse(post_date) - entry.updated = ET.Element("updated") - # pylint: disable=no-member - entry.updated.text = entry.updated_datetime.isoformat() - - # process content - tags_to_remove = [] - # mark header and footer for removal - article = soup.article - if article.header is not None: - tags_to_remove.append(article.header) - # mark line numbers for removal - for line_number_span in article.find_all("span", - attrs={"class": "line-number"}): - tags_to_remove.append(line_number_span) - # mark script tags for removal - for script_tag in article.find_all("script"): - tags_to_remove.append(script_tag) - # make internal links absolute - utils.absolutify_links(article, entry_url) - # remove marked tags - for tag in tags_to_remove: - tag.extract() - - entry.content_html = ''.join([str(content) - for content in article.contents]) - entry.content = ET.Element("content", type="html") - entry.content.text = ET.CDATA(entry.content_html) - entry.assemble_entry() - feed.entries.append(entry) - - # generate rss item - item.title = ET.Element("title") - item.title.text = entry.title_text - item.link = ET.Element("link") - item.link.text = entry_url - item.description = ET.Element("description") - item.description.text = ET.CDATA(entry.content_html) - item.author = ET.Element("author") - item.author.text = rss.author_text - item.guid = ET.Element("guid", isPermaLink="true") - item.guid.text = item.link.text - item.timestamp = entry.updated_datetime.timestamp() - item.pubDate = ET.Element("pubDate") - item.pubDate.text = email.utils.formatdate(item.timestamp, usegmt=True) - item.assemble_item() - rss.items.append(item) - except Exception: - sys.stderr.write("error: failed to generate feed entry from %s\n" % name) - with open(htmlpath, encoding="utf-8") as htmlfile: - sys.stderr.write("dumping HTML:%s\n\n" % htmlfile.read()) - raise - # sort entries by reverse chronological order - feed.entries.sort(key=lambda entry: entry.updated_datetime, reverse=True) - rss.items.sort(key=lambda item: item.timestamp, reverse=True) - - generators.generate_index(feed) - generators.generate_menu() - generators.generate_table() - generators.generate_blog_list(feed) - generators.generate_notes_list() - generators.rewrite_title() - - feed.updated_datetime = utils.current_datetime() - feed.updated = ET.Element("updated") - feed.updated.text = feed.updated_datetime.isoformat() - - rss.update_timestamp = time.time() - rss.pubDate = ET.Element("pubDate") - rss.pubDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) - rss.lastBuildDate = ET.Element("lastBuildDate") - rss.lastBuildDate.text = email.utils.formatdate(rss.update_timestamp, usegmt=True) - - with open(ATOM, "w", encoding="utf-8") as atom: - atom.write("%s\n" % feed.dump_feed(FEED_MAX_ENTRIES)) - sys.stderr.write("wrote atom.xml\n") - - with open(RSS, "w", encoding="utf-8") as rssxml: - rssxml.write("%s\n" % rss.dump_rss(FEED_MAX_ENTRIES)) - sys.stderr.write("wrote rss.xml\n") - - generators.generate_sitemap(feed) - - # exclude_list is only inialized once to avoid constant disk IO @utils.static_vars(exclude_list=None) def generate_blog(fresh=False, report_total_errors=True): @@ -347,7 +181,7 @@ def generate_blog(fresh=False, report_total_errors=True): utils.postprocess_html_file(dstpath) if anything_modified: - generate_index_and_feed() + generators.generate_index_and_feed() sys.stderr.write("done\n") if report_total_errors: -- cgit v1.2.1