From 8e8d884d1c78641fbb967479bd4ddcb320da150e Mon Sep 17 00:00:00 2001 From: neodarz Date: Mon, 30 Jul 2018 20:37:26 +0200 Subject: Move rss code to a package --- rss/__init__.py | 4 ++++ rss/atom_entry.py | 39 +++++++++++++++++++++++++++++++++++++ rss/atom_feed.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ rss/rss_feed.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ rss/rss_item.py | 30 +++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+) create mode 100644 rss/__init__.py create mode 100644 rss/atom_entry.py create mode 100644 rss/atom_feed.py create mode 100644 rss/rss_feed.py create mode 100644 rss/rss_item.py (limited to 'rss') diff --git a/rss/__init__.py b/rss/__init__.py new file mode 100644 index 00000000..c5d54da5 --- /dev/null +++ b/rss/__init__.py @@ -0,0 +1,4 @@ +from rss.atom_feed import AtomFeed +from rss.atom_entry import AtomEntry +from rss.rss_feed import RssFeed +from rss.rss_item import RssItem diff --git a/rss/atom_entry.py b/rss/atom_entry.py new file mode 100644 index 00000000..4b6e116a --- /dev/null +++ b/rss/atom_entry.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +import lxml.etree as ET + +class AtomEntry(object): + """Class for storing atom:entry data and metadata.""" + + # pylint: disable=invalid-name,too-many-instance-attributes + + def __init__(self): + """Define available attributes.""" + self.author = None # atom:author + self.id_text = None # atom:id, just use URI + self.id = None # atom:id + self.relpath = None # HTML page path relative to home + self.link = None # atom:link + self.title_text = None # plain text title + self.title = None # atom:title + self.updated_datetime = None # update time as a datetime object + self.updated = None # atom:updated + self.content_html = None # content as HTML markup + self.content = None # atom:content + self.entry = None # atom:entry, assembled + + def assemble_entry(self): + """Assemble atom:entry.""" + self.entry = ET.Element("entry") + self.entry.append(self.title) + self.entry.append(self.link) + self.entry.append(self.updated) + self.entry.append(self.id) + self.entry.append(self.author) + self.entry.append(self.content) + + def dump_entry(self): + """Dump atom:entry XML.""" + if self.entry is None: + self.assemble_entry() + return ET.tostring(self.entry).decode("utf-8") diff --git a/rss/atom_feed.py b/rss/atom_feed.py new file mode 100644 index 00000000..622ecc08 --- /dev/null +++ b/rss/atom_feed.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import lxml.etree as ET + +class AtomFeed(object): + """Class for storing atom:feed data and metadata. + + https://tools.ietf.org/html/rfc4287. + + """ + + # pylint: disable=invalid-name,too-many-instance-attributes + + def __init__(self): + """Define available attributes.""" + self.author = None # atom:author + self.generator = None # atom:generator, optional + self.icon = None # atom:icon, optional + self.logo = None # atom:logo, optional + self.id_text = None # atom:id, just use URI + self.id = None # atom:id + self.links = [] # list of atom:link + self.title_text = None # the text of atom:title + self.title = None # atom:title + self.subtitle_text = None # the text of atom:subtitle + self.subtitle = None # atom:subtitle + self.updated_datetime = None # update time as a datetime object + self.updated = None # atom:updated + self.entries = [] # list of atom:entry, in reverse time order + self.feed = None # atom:feed, assembled + + def assemble_feed(self, FEED_MAX_ENTRIES): + """Assemble atom:feed.""" + # pylint: disable=multiple-statements + self.feed = ET.Element("feed", xmlns="http://www.w3.org/2005/Atom") + self.feed.append(self.title) + if self.subtitle is not None: self.feed.append(self.subtitle) + for link in self.links: + self.feed.append(link) + self.feed.append(self.updated) + self.feed.append(self.id) + self.feed.append(self.author) + if self.icon is not None: self.feed.append(self.icon) + if self.logo is not None: self.feed.append(self.icon) + if self.generator is not None: self.feed.append(self.generator) + # include at most FEED_MAX_ENTRIES entries in the feed + for entry in self.entries[:FEED_MAX_ENTRIES]: + self.feed.append(entry.entry) + + def dump_feed(self, FEED_MAX_ENTRIES): + """Dump atom:feed XML.""" + if self.feed is None: + self.assemble_feed(FEED_MAX_ENTRIES) + return ET.tostring(self.feed).decode("utf-8") diff --git a/rss/rss_feed.py b/rss/rss_feed.py new file mode 100644 index 00000000..5ed6226d --- /dev/null +++ b/rss/rss_feed.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +import lxml.etree as ET + +class RssFeed(object): + """Class for storing an RSS 2.0 feed. + + https://validator.w3.org/feed/docs/rss2.html. + + """ + + # pylint: disable=too-many-instance-attributes + + REQUIRED_ELEMENTS = ["title", "link", "description"] + OPTIONAL_ELEMENTS = ["language", "copyright", "managingEditor", "webMaster", + "pubDate", "lastBuildDate", "category", "generator", + "docs", "cloud", "ttl", "image", "textInput", + "skipHours", "skipDays"] + + def __init__(self): + """Define available attributes.""" + self.rssurl = None # the URL of the rss feed + self.atomlink = None + for element in self.REQUIRED_ELEMENTS: + setattr(self, element, None) + for element in self.OPTIONAL_ELEMENTS: + setattr(self, element, None) + self.docs = ET.Element("docs") + self.docs.text = "https://validator.w3.org/feed/docs/rss2.html" + self.author_text = None + self.update_timestamp = None + self.items = [] + self.rss = None + self.channel = None + + def assemble_rss(self, FEED_MAX_ENTRIES): + """Assemble RSS 2.0 feed.""" + self.rss = ET.Element("rss", version="2.0", nsmap={"atom": "http://www.w3.org/2005/Atom"}) + self.channel = ET.SubElement(self.rss, "channel") + # https://validator.w3.org/feed/docs/warning/MissingAtomSelfLink.html + self.atomlink = ET.SubElement(self.channel, "{http://www.w3.org/2005/Atom}link", + href=self.rssurl, rel="self", type="application/rss+xml") + for element in self.REQUIRED_ELEMENTS: + self.channel.append(getattr(self, element)) + for element in self.OPTIONAL_ELEMENTS: + attr = getattr(self, element) + if attr is not None: + self.channel.append(attr) + # include at most FEED_MAX_ENTRIES items in the RSS feed + for item in self.items[:FEED_MAX_ENTRIES]: + self.channel.append(item.item) + + def dump_rss(self, FEED_MAX_ENTRIES): + """Dump RSS feed XML.""" + if self.rss is None: + self.assemble_rss(FEED_MAX_ENTRIES) + return ET.tostring(self.rss).decode("utf-8") diff --git a/rss/rss_item.py b/rss/rss_item.py new file mode 100644 index 00000000..8ead20e6 --- /dev/null +++ b/rss/rss_item.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +import lxml.etree as ET + +class RssItem(object): + """Class for storing an RSS 2.0 item.""" + + ELEMENTS = ["title", "link", "description", "author", "category", "comments", + "enclosure", "guid", "pubDate", "source"] + + def __init__(self): + """Define available attributes.""" + for element in self.ELEMENTS: + setattr(self, element, None) + self.timestamp = None + self.item = None + + def assemble_item(self): + """Assemble an RSS 2.0 item.""" + self.item = ET.Element("item") + for element in self.ELEMENTS: + attr = getattr(self, element) + if attr is not None: + self.item.append(attr) + + def dump_item(self): + """Dump RSS item XML.""" + if self.item is None: + self.assemble_item() + return ET.tostring(self.item).decode("utf-8") -- cgit v1.2.1