mirror of
				https://github.com/samuelclay/NewsBlur.git
				synced 2025-09-18 21:50:56 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			443 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			443 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """PyRSS2Gen - A Python library for generating RSS 2.0 feeds."""
 | |
| 
 | |
| __name__ = "PyRSS2Gen"
 | |
| __version__ = (1, 0, 0)
 | |
| __author__ = "Andrew Dalke <dalke@dalkescientific.com>"
 | |
| 
 | |
| _generator_name = __name__ + "-" + ".".join(map(str, __version__))
 | |
| 
 | |
| import datetime
 | |
| 
 | |
| # Could make this the base class; will need to add 'publish'
 | |
| class WriteXmlMixin:
 | |
|     def write_xml(self, outfile, encoding = "iso-8859-1"):
 | |
|         from xml.sax import saxutils
 | |
|         handler = saxutils.XMLGenerator(outfile, encoding)
 | |
|         handler.startDocument()
 | |
|         self.publish(handler)
 | |
|         handler.endDocument()
 | |
| 
 | |
|     def to_xml(self, encoding = "iso-8859-1"):
 | |
|         try:
 | |
|             import cStringIO as StringIO
 | |
|         except ImportError:
 | |
|             import StringIO
 | |
|         f = StringIO.StringIO()
 | |
|         self.write_xml(f, encoding)
 | |
|         return f.getvalue()
 | |
| 
 | |
| 
 | |
| def _element(handler, name, obj, d = {}):
 | |
|     if isinstance(obj, basestring) or obj is None:
 | |
|         # special-case handling to make the API easier
 | |
|         # to use for the common case.
 | |
|         handler.startElement(name, d)
 | |
|         if obj is not None:
 | |
|             handler.characters(obj)
 | |
|         handler.endElement(name)
 | |
|     else:
 | |
|         # It better know how to emit the correct XML.
 | |
|         obj.publish(handler)
 | |
| 
 | |
| def _opt_element(handler, name, obj):
 | |
|     if obj is None:
 | |
|         return
 | |
|     _element(handler, name, obj)
 | |
| 
 | |
| 
 | |
| def _format_date(dt):
 | |
|     """convert a datetime into an RFC 822 formatted date
 | |
| 
 | |
|     Input date must be in GMT.
 | |
|     """
 | |
|     # Looks like:
 | |
|     #   Sat, 07 Sep 2002 00:00:01 GMT
 | |
|     # Can't use strftime because that's locale dependent
 | |
|     #
 | |
|     # Isn't there a standard way to do this for Python?  The
 | |
|     # rfc822 and email.Utils modules assume a timestamp.  The
 | |
|     # following is based on the rfc822 module.
 | |
|     return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
 | |
|             ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
 | |
|             dt.day,
 | |
|             ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
 | |
|              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
 | |
|             dt.year, dt.hour, dt.minute, dt.second)
 | |
| 
 | |
|         
 | |
| ##
 | |
| # A couple simple wrapper objects for the fields which
 | |
| # take a simple value other than a string.
 | |
| class IntElement:
 | |
|     """implements the 'publish' API for integers
 | |
| 
 | |
|     Takes the tag name and the integer value to publish.
 | |
|     
 | |
|     (Could be used for anything which uses str() to be published
 | |
|     to text for XML.)
 | |
|     """
 | |
|     element_attrs = {}
 | |
|     def __init__(self, name, val):
 | |
|         self.name = name
 | |
|         self.val = val
 | |
|     def publish(self, handler):
 | |
|         handler.startElement(self.name, self.element_attrs)
 | |
|         handler.characters(str(self.val))
 | |
|         handler.endElement(self.name)
 | |
| 
 | |
| class DateElement:
 | |
|     """implements the 'publish' API for a datetime.datetime
 | |
| 
 | |
|     Takes the tag name and the datetime to publish.
 | |
| 
 | |
|     Converts the datetime to RFC 2822 timestamp (4-digit year).
 | |
|     """
 | |
|     def __init__(self, name, dt):
 | |
|         self.name = name
 | |
|         self.dt = dt
 | |
|     def publish(self, handler):
 | |
|         _element(handler, self.name, _format_date(self.dt))
 | |
| ####
 | |
| 
 | |
| class Category:
 | |
|     """Publish a category element"""
 | |
|     def __init__(self, category, domain = None):
 | |
|         self.category = category
 | |
|         self.domain = domain
 | |
|     def publish(self, handler):
 | |
|         d = {}
 | |
|         if self.domain is not None:
 | |
|             d["domain"] = self.domain
 | |
|         _element(handler, "category", self.category, d)
 | |
| 
 | |
| class Cloud:
 | |
|     """Publish a cloud"""
 | |
|     def __init__(self, domain, port, path,
 | |
|                  registerProcedure, protocol):
 | |
|         self.domain = domain
 | |
|         self.port = port
 | |
|         self.path = path
 | |
|         self.registerProcedure = registerProcedure
 | |
|         self.protocol = protocol
 | |
|     def publish(self, handler):
 | |
|         _element(handler, "cloud", None, {
 | |
|             "domain": self.domain,
 | |
|             "port": str(self.port),
 | |
|             "path": self.path,
 | |
|             "registerProcedure": self.registerProcedure,
 | |
|             "protocol": self.protocol})
 | |
| 
 | |
| class Image:
 | |
|     """Publish a channel Image"""
 | |
|     element_attrs = {}
 | |
|     def __init__(self, url, title, link,
 | |
|                  width = None, height = None, description = None):
 | |
|         self.url = url
 | |
|         self.title = title
 | |
|         self.link = link
 | |
|         self.width = width
 | |
|         self.height = height
 | |
|         self.description = description
 | |
|         
 | |
|     def publish(self, handler):
 | |
|         handler.startElement("image", self.element_attrs)
 | |
| 
 | |
|         _element(handler, "url", self.url)
 | |
|         _element(handler, "title", self.title)
 | |
|         _element(handler, "link", self.link)
 | |
| 
 | |
|         width = self.width
 | |
|         if isinstance(width, int):
 | |
|             width = IntElement("width", width)
 | |
|         _opt_element(handler, "width", width)
 | |
|         
 | |
|         height = self.height
 | |
|         if isinstance(height, int):
 | |
|             height = IntElement("height", height)
 | |
|         _opt_element(handler, "height", height)
 | |
| 
 | |
|         _opt_element(handler, "description", self.description)
 | |
| 
 | |
|         handler.endElement("image")
 | |
| 
 | |
| class Guid:
 | |
|     """Publish a guid
 | |
| 
 | |
|     Defaults to being a permalink, which is the assumption if it's
 | |
|     omitted.  Hence strings are always permalinks.
 | |
|     """
 | |
|     def __init__(self, guid, isPermaLink = 1):
 | |
|         self.guid = guid
 | |
|         self.isPermaLink = isPermaLink
 | |
|     def publish(self, handler):
 | |
|         d = {}
 | |
|         if self.isPermaLink:
 | |
|             d["isPermaLink"] = "true"
 | |
|         else:
 | |
|             d["isPermaLink"] = "false"
 | |
|         _element(handler, "guid", self.guid, d)
 | |
| 
 | |
| class TextInput:
 | |
|     """Publish a textInput
 | |
| 
 | |
|     Apparently this is rarely used.
 | |
|     """
 | |
|     element_attrs = {}
 | |
|     def __init__(self, title, description, name, link):
 | |
|         self.title = title
 | |
|         self.description = description
 | |
|         self.name = name
 | |
|         self.link = link
 | |
| 
 | |
|     def publish(self, handler):
 | |
|         handler.startElement("textInput", self.element_attrs)
 | |
|         _element(handler, "title", self.title)
 | |
|         _element(handler, "description", self.description)
 | |
|         _element(handler, "name", self.name)
 | |
|         _element(handler, "link", self.link)
 | |
|         handler.endElement("textInput")
 | |
|         
 | |
| 
 | |
| class Enclosure:
 | |
|     """Publish an enclosure"""
 | |
|     def __init__(self, url, length, type):
 | |
|         self.url = url
 | |
|         self.length = length
 | |
|         self.type = type
 | |
|     def publish(self, handler):
 | |
|         _element(handler, "enclosure", None,
 | |
|                  {"url": self.url,
 | |
|                   "length": str(self.length),
 | |
|                   "type": self.type,
 | |
|                   })
 | |
| 
 | |
| class Source:
 | |
|     """Publish the item's original source, used by aggregators"""
 | |
|     def __init__(self, name, url):
 | |
|         self.name = name
 | |
|         self.url = url
 | |
|     def publish(self, handler):
 | |
|         _element(handler, "source", self.name, {"url": self.url})
 | |
| 
 | |
| class SkipHours:
 | |
|     """Publish the skipHours
 | |
| 
 | |
|     This takes a list of hours, as integers.
 | |
|     """
 | |
|     element_attrs = {}
 | |
|     def __init__(self, hours):
 | |
|         self.hours = hours
 | |
|     def publish(self, handler):
 | |
|         if self.hours:
 | |
|             handler.startElement("skipHours", self.element_attrs)
 | |
|             for hour in self.hours:
 | |
|                 _element(handler, "hour", str(hour))
 | |
|             handler.endElement("skipHours")
 | |
| 
 | |
| class SkipDays:
 | |
|     """Publish the skipDays
 | |
| 
 | |
|     This takes a list of days as strings.
 | |
|     """
 | |
|     element_attrs = {}
 | |
|     def __init__(self, days):
 | |
|         self.days = days
 | |
|     def publish(self, handler):
 | |
|         if self.days:
 | |
|             handler.startElement("skipDays", self.element_attrs)
 | |
|             for day in self.days:
 | |
|                 _element(handler, "day", day)
 | |
|             handler.endElement("skipDays")
 | |
| 
 | |
| class RSS2(WriteXmlMixin):
 | |
|     """The main RSS class.
 | |
| 
 | |
|     Stores the channel attributes, with the "category" elements under
 | |
|     ".categories" and the RSS items under ".items".
 | |
|     """
 | |
|     
 | |
|     rss_attrs = {"version": "2.0"}
 | |
|     element_attrs = {}
 | |
|     def __init__(self,
 | |
|                  title,
 | |
|                  link,
 | |
|                  description,
 | |
| 
 | |
|                  language = None,
 | |
|                  copyright = None,
 | |
|                  managingEditor = None,
 | |
|                  webMaster = None,
 | |
|                  pubDate = None,  # a datetime, *in* *GMT*
 | |
|                  lastBuildDate = None, # a datetime
 | |
|                  
 | |
|                  categories = None, # list of strings or Category
 | |
|                  generator = _generator_name,
 | |
|                  docs = "http://blogs.law.harvard.edu/tech/rss",
 | |
|                  cloud = None,    # a Cloud
 | |
|                  ttl = None,      # integer number of minutes
 | |
| 
 | |
|                  image = None,     # an Image
 | |
|                  rating = None,    # a string; I don't know how it's used
 | |
|                  textInput = None, # a TextInput
 | |
|                  skipHours = None, # a SkipHours with a list of integers
 | |
|                  skipDays = None,  # a SkipDays with a list of strings
 | |
| 
 | |
|                  items = None,     # list of RSSItems
 | |
|                  ):
 | |
|         self.title = title
 | |
|         self.link = link
 | |
|         self.description = description
 | |
|         self.language = language
 | |
|         self.copyright = copyright
 | |
|         self.managingEditor = managingEditor
 | |
| 
 | |
|         self.webMaster = webMaster
 | |
|         self.pubDate = pubDate
 | |
|         self.lastBuildDate = lastBuildDate
 | |
|         
 | |
|         if categories is None:
 | |
|             categories = []
 | |
|         self.categories = categories
 | |
|         self.generator = generator
 | |
|         self.docs = docs
 | |
|         self.cloud = cloud
 | |
|         self.ttl = ttl
 | |
|         self.image = image
 | |
|         self.rating = rating
 | |
|         self.textInput = textInput
 | |
|         self.skipHours = skipHours
 | |
|         self.skipDays = skipDays
 | |
| 
 | |
|         if items is None:
 | |
|             items = []
 | |
|         self.items = items
 | |
| 
 | |
|     def publish(self, handler):
 | |
|         handler.startElement("rss", self.rss_attrs)
 | |
|         handler.startElement("channel", self.element_attrs)
 | |
|         _element(handler, "title", self.title)
 | |
|         _element(handler, "link", self.link)
 | |
|         _element(handler, "description", self.description)
 | |
| 
 | |
|         self.publish_extensions(handler)
 | |
|         
 | |
|         _opt_element(handler, "language", self.language)
 | |
|         _opt_element(handler, "copyright", self.copyright)
 | |
|         _opt_element(handler, "managingEditor", self.managingEditor)
 | |
|         _opt_element(handler, "webMaster", self.webMaster)
 | |
| 
 | |
|         pubDate = self.pubDate
 | |
|         if isinstance(pubDate, datetime.datetime):
 | |
|             pubDate = DateElement("pubDate", pubDate)
 | |
|         _opt_element(handler, "pubDate", pubDate)
 | |
| 
 | |
|         lastBuildDate = self.lastBuildDate
 | |
|         if isinstance(lastBuildDate, datetime.datetime):
 | |
|             lastBuildDate = DateElement("lastBuildDate", lastBuildDate)
 | |
|         _opt_element(handler, "lastBuildDate", lastBuildDate)
 | |
| 
 | |
|         for category in self.categories:
 | |
|             if isinstance(category, basestring):
 | |
|                 category = Category(category)
 | |
|             category.publish(handler)
 | |
| 
 | |
|         _opt_element(handler, "generator", self.generator)
 | |
|         _opt_element(handler, "docs", self.docs)
 | |
| 
 | |
|         if self.cloud is not None:
 | |
|             self.cloud.publish(handler)
 | |
| 
 | |
|         ttl = self.ttl
 | |
|         if isinstance(self.ttl, int):
 | |
|             ttl = IntElement("ttl", ttl)
 | |
|         _opt_element(handler, "tt", ttl)
 | |
| 
 | |
|         if self.image is not None:
 | |
|             self.image.publish(handler)
 | |
| 
 | |
|         _opt_element(handler, "rating", self.rating)
 | |
|         if self.textInput is not None:
 | |
|             self.textInput.publish(handler)
 | |
|         if self.skipHours is not None:
 | |
|             self.skipHours.publish(handler)
 | |
|         if self.skipDays is not None:
 | |
|             self.skipDays.publish(handler)
 | |
| 
 | |
|         for item in self.items:
 | |
|             item.publish(handler)
 | |
| 
 | |
|         handler.endElement("channel")
 | |
|         handler.endElement("rss")
 | |
| 
 | |
|     def publish_extensions(self, handler):
 | |
|         # Derived classes can hook into this to insert
 | |
|         # output after the three required fields.
 | |
|         pass
 | |
| 
 | |
|     
 | |
|     
 | |
| class RSSItem(WriteXmlMixin):
 | |
|     """Publish an RSS Item"""
 | |
|     element_attrs = {}
 | |
|     def __init__(self,
 | |
|                  title = None,  # string
 | |
|                  link = None,   # url as string
 | |
|                  description = None, # string
 | |
|                  author = None,      # email address as string
 | |
|                  categories = None,  # list of string or Category
 | |
|                  comments = None,  # url as string
 | |
|                  enclosure = None, # an Enclosure
 | |
|                  guid = None,    # a unique string
 | |
|                  pubDate = None, # a datetime
 | |
|                  source = None,  # a Source
 | |
|                  ):
 | |
|         
 | |
|         if title is None and description is None:
 | |
|             raise TypeError(
 | |
|                 "must define at least one of 'title' or 'description'")
 | |
|         self.title = title
 | |
|         self.link = link
 | |
|         self.description = description
 | |
|         self.author = author
 | |
|         if categories is None:
 | |
|             categories = []
 | |
|         self.categories = categories
 | |
|         self.comments = comments
 | |
|         self.enclosure = enclosure
 | |
|         self.guid = guid
 | |
|         self.pubDate = pubDate
 | |
|         self.source = source
 | |
|         # It sure does get tedious typing these names three times...
 | |
| 
 | |
|     def publish(self, handler):
 | |
|         handler.startElement("item", self.element_attrs)
 | |
|         _opt_element(handler, "title", self.title)
 | |
|         _opt_element(handler, "link", self.link)
 | |
|         self.publish_extensions(handler)
 | |
|         _opt_element(handler, "description", self.description)
 | |
|         _opt_element(handler, "author", self.author)
 | |
| 
 | |
|         for category in self.categories:
 | |
|             if isinstance(category, basestring):
 | |
|                 category = Category(category)
 | |
|             category.publish(handler)
 | |
|         
 | |
|         _opt_element(handler, "comments", self.comments)
 | |
|         if self.enclosure is not None:
 | |
|             self.enclosure.publish(handler)
 | |
|         _opt_element(handler, "guid", self.guid)
 | |
| 
 | |
|         pubDate = self.pubDate
 | |
|         if isinstance(pubDate, datetime.datetime):
 | |
|             pubDate = DateElement("pubDate", pubDate)
 | |
|         _opt_element(handler, "pubDate", pubDate)
 | |
| 
 | |
|         if self.source is not None:
 | |
|             self.source.publish(handler)
 | |
|         
 | |
|         handler.endElement("item")
 | |
| 
 | |
|     def publish_extensions(self, handler):
 | |
|         # Derived classes can hook into this to insert
 | |
|         # output after the title and link elements
 | |
|         pass
 | 
