app/django/utils/feedgenerator.py
changeset 323 ff1a9aa48cfd
parent 54 03e267d67478
equal deleted inserted replaced
322:6641e941ef1e 323:ff1a9aa48cfd
    17 
    17 
    18 For definitions of the different versions of RSS, see:
    18 For definitions of the different versions of RSS, see:
    19 http://diveintomark.org/archives/2004/02/04/incompatible-rss
    19 http://diveintomark.org/archives/2004/02/04/incompatible-rss
    20 """
    20 """
    21 
    21 
       
    22 import re
       
    23 import datetime
    22 from django.utils.xmlutils import SimplerXMLGenerator
    24 from django.utils.xmlutils import SimplerXMLGenerator
    23 from django.utils.encoding import force_unicode, iri_to_uri
    25 from django.utils.encoding import force_unicode, iri_to_uri
    24 import datetime, re, time
       
    25 import email.Utils
       
    26 
    26 
    27 def rfc2822_date(date):
    27 def rfc2822_date(date):
    28     return email.Utils.formatdate(time.mktime(date.timetuple()))
    28     # We do this ourselves to be timezone aware, email.Utils is not tz aware.
       
    29     if date.tzinfo:
       
    30         time_str = date.strftime('%a, %d %b %Y %H:%M:%S ')
       
    31         offset = date.tzinfo.utcoffset(date)
       
    32         timezone = (offset.days * 24 * 60) + (offset.seconds / 60)
       
    33         hour, minute = divmod(timezone, 60)
       
    34         return time_str + "%+03d%02d" % (hour, minute)
       
    35     else:
       
    36         return date.strftime('%a, %d %b %Y %H:%M:%S -0000')
    29 
    37 
    30 def rfc3339_date(date):
    38 def rfc3339_date(date):
    31     if date.tzinfo:
    39     if date.tzinfo:
    32         return date.strftime('%Y-%m-%dT%H:%M:%S%z')
    40         time_str = date.strftime('%Y-%m-%dT%H:%M:%S')
       
    41         offset = date.tzinfo.utcoffset(date)
       
    42         timezone = (offset.days * 24 * 60) + (offset.seconds / 60)
       
    43         hour, minute = divmod(timezone, 60)
       
    44         return time_str + "%+03d:%02d" % (hour, minute)
    33     else:
    45     else:
    34         return date.strftime('%Y-%m-%dT%H:%M:%SZ')
    46         return date.strftime('%Y-%m-%dT%H:%M:%SZ')
    35 
    47 
    36 def get_tag_uri(url, date):
    48 def get_tag_uri(url, date):
    37     "Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id"
    49     "Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id"
    43 
    55 
    44 class SyndicationFeed(object):
    56 class SyndicationFeed(object):
    45     "Base class for all syndication feeds. Subclasses should provide write()"
    57     "Base class for all syndication feeds. Subclasses should provide write()"
    46     def __init__(self, title, link, description, language=None, author_email=None,
    58     def __init__(self, title, link, description, language=None, author_email=None,
    47             author_name=None, author_link=None, subtitle=None, categories=None,
    59             author_name=None, author_link=None, subtitle=None, categories=None,
    48             feed_url=None, feed_copyright=None, feed_guid=None, ttl=None):
    60             feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs):
    49         to_unicode = lambda s: force_unicode(s, strings_only=True)
    61         to_unicode = lambda s: force_unicode(s, strings_only=True)
    50         if categories:
    62         if categories:
    51             categories = [force_unicode(c) for c in categories]
    63             categories = [force_unicode(c) for c in categories]
    52         self.feed = {
    64         self.feed = {
    53             'title': to_unicode(title),
    65             'title': to_unicode(title),
    62             'feed_url': iri_to_uri(feed_url),
    74             'feed_url': iri_to_uri(feed_url),
    63             'feed_copyright': to_unicode(feed_copyright),
    75             'feed_copyright': to_unicode(feed_copyright),
    64             'id': feed_guid or link,
    76             'id': feed_guid or link,
    65             'ttl': ttl,
    77             'ttl': ttl,
    66         }
    78         }
       
    79         self.feed.update(kwargs)
    67         self.items = []
    80         self.items = []
    68 
    81 
    69     def add_item(self, title, link, description, author_email=None,
    82     def add_item(self, title, link, description, author_email=None,
    70         author_name=None, author_link=None, pubdate=None, comments=None,
    83         author_name=None, author_link=None, pubdate=None, comments=None,
    71         unique_id=None, enclosure=None, categories=(), item_copyright=None, ttl=None):
    84         unique_id=None, enclosure=None, categories=(), item_copyright=None,
       
    85         ttl=None, **kwargs):
    72         """
    86         """
    73         Adds an item to the feed. All args are expected to be Python Unicode
    87         Adds an item to the feed. All args are expected to be Python Unicode
    74         objects except pubdate, which is a datetime.datetime object, and
    88         objects except pubdate, which is a datetime.datetime object, and
    75         enclosure, which is an instance of the Enclosure class.
    89         enclosure, which is an instance of the Enclosure class.
    76         """
    90         """
    77         to_unicode = lambda s: force_unicode(s, strings_only=True)
    91         to_unicode = lambda s: force_unicode(s, strings_only=True)
    78         if categories:
    92         if categories:
    79             categories = [to_unicode(c) for c in categories]
    93             categories = [to_unicode(c) for c in categories]
    80         self.items.append({
    94         item = {
    81             'title': to_unicode(title),
    95             'title': to_unicode(title),
    82             'link': iri_to_uri(link),
    96             'link': iri_to_uri(link),
    83             'description': to_unicode(description),
    97             'description': to_unicode(description),
    84             'author_email': to_unicode(author_email),
    98             'author_email': to_unicode(author_email),
    85             'author_name': to_unicode(author_name),
    99             'author_name': to_unicode(author_name),
    89             'unique_id': to_unicode(unique_id),
   103             'unique_id': to_unicode(unique_id),
    90             'enclosure': enclosure,
   104             'enclosure': enclosure,
    91             'categories': categories or (),
   105             'categories': categories or (),
    92             'item_copyright': to_unicode(item_copyright),
   106             'item_copyright': to_unicode(item_copyright),
    93             'ttl': ttl,
   107             'ttl': ttl,
    94         })
   108         }
       
   109         item.update(kwargs)
       
   110         self.items.append(item)
    95 
   111 
    96     def num_items(self):
   112     def num_items(self):
    97         return len(self.items)
   113         return len(self.items)
       
   114 
       
   115     def root_attributes(self):
       
   116         """
       
   117         Return extra attributes to place on the root (i.e. feed/channel) element.
       
   118         Called from write().
       
   119         """
       
   120         return {}
       
   121 
       
   122     def add_root_elements(self, handler):
       
   123         """
       
   124         Add elements in the the root (i.e. feed/channel) element. Called
       
   125         from write().
       
   126         """
       
   127         pass
       
   128 
       
   129     def item_attributes(self, item):
       
   130         """
       
   131         Return extra attributes to place on each item (i.e. item/entry) element.
       
   132         """
       
   133         return {}
       
   134 
       
   135     def add_item_elements(self, handler, item):
       
   136         """
       
   137         Add elements on each item (i.e. item/entry) element.
       
   138         """
       
   139         pass
    98 
   140 
    99     def write(self, outfile, encoding):
   141     def write(self, outfile, encoding):
   100         """
   142         """
   101         Outputs the feed in the given encoding to outfile, which is a file-like
   143         Outputs the feed in the given encoding to outfile, which is a file-like
   102         object. Subclasses should override this.
   144         object. Subclasses should override this.
   134 class RssFeed(SyndicationFeed):
   176 class RssFeed(SyndicationFeed):
   135     mime_type = 'application/rss+xml'
   177     mime_type = 'application/rss+xml'
   136     def write(self, outfile, encoding):
   178     def write(self, outfile, encoding):
   137         handler = SimplerXMLGenerator(outfile, encoding)
   179         handler = SimplerXMLGenerator(outfile, encoding)
   138         handler.startDocument()
   180         handler.startDocument()
   139         handler.startElement(u"rss", {u"version": self._version})
   181         handler.startElement(u"rss", self.rss_attributes())
   140         handler.startElement(u"channel", {})
   182         handler.startElement(u"channel", self.root_attributes())
       
   183         self.add_root_elements(handler)
       
   184         self.write_items(handler)
       
   185         self.endChannelElement(handler)
       
   186         handler.endElement(u"rss")
       
   187 
       
   188     def rss_attributes(self):
       
   189         return {u"version": self._version}
       
   190 
       
   191     def write_items(self, handler):
       
   192         for item in self.items:
       
   193             handler.startElement(u'item', self.item_attributes(item))
       
   194             self.add_item_elements(handler, item)
       
   195             handler.endElement(u"item")
       
   196 
       
   197     def add_root_elements(self, handler):
   141         handler.addQuickElement(u"title", self.feed['title'])
   198         handler.addQuickElement(u"title", self.feed['title'])
   142         handler.addQuickElement(u"link", self.feed['link'])
   199         handler.addQuickElement(u"link", self.feed['link'])
   143         handler.addQuickElement(u"description", self.feed['description'])
   200         handler.addQuickElement(u"description", self.feed['description'])
   144         if self.feed['language'] is not None:
   201         if self.feed['language'] is not None:
   145             handler.addQuickElement(u"language", self.feed['language'])
   202             handler.addQuickElement(u"language", self.feed['language'])
   146         for cat in self.feed['categories']:
   203         for cat in self.feed['categories']:
   147             handler.addQuickElement(u"category", cat)
   204             handler.addQuickElement(u"category", cat)
   148         if self.feed['feed_copyright'] is not None:
   205         if self.feed['feed_copyright'] is not None:
   149             handler.addQuickElement(u"copyright", self.feed['feed_copyright'])
   206             handler.addQuickElement(u"copyright", self.feed['feed_copyright'])
   150         handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('ascii'))
   207         handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8'))
   151         if self.feed['ttl'] is not None:
   208         if self.feed['ttl'] is not None:
   152             handler.addQuickElement(u"ttl", self.feed['ttl'])
   209             handler.addQuickElement(u"ttl", self.feed['ttl'])
   153         self.write_items(handler)
       
   154         self.endChannelElement(handler)
       
   155         handler.endElement(u"rss")
       
   156 
   210 
   157     def endChannelElement(self, handler):
   211     def endChannelElement(self, handler):
   158         handler.endElement(u"channel")
   212         handler.endElement(u"channel")
   159 
   213 
   160 class RssUserland091Feed(RssFeed):
   214 class RssUserland091Feed(RssFeed):
   161     _version = u"0.91"
   215     _version = u"0.91"
   162     def write_items(self, handler):
   216     def add_item_elements(self, handler, item):
   163         for item in self.items:
   217         handler.addQuickElement(u"title", item['title'])
   164             handler.startElement(u"item", {})
   218         handler.addQuickElement(u"link", item['link'])
   165             handler.addQuickElement(u"title", item['title'])
   219         if item['description'] is not None:
   166             handler.addQuickElement(u"link", item['link'])
   220             handler.addQuickElement(u"description", item['description'])
   167             if item['description'] is not None:
       
   168                 handler.addQuickElement(u"description", item['description'])
       
   169             handler.endElement(u"item")
       
   170 
   221 
   171 class Rss201rev2Feed(RssFeed):
   222 class Rss201rev2Feed(RssFeed):
   172     # Spec: http://blogs.law.harvard.edu/tech/rss
   223     # Spec: http://blogs.law.harvard.edu/tech/rss
   173     _version = u"2.0"
   224     _version = u"2.0"
   174     def write_items(self, handler):
   225     def add_item_elements(self, handler, item):
   175         for item in self.items:
   226         handler.addQuickElement(u"title", item['title'])
   176             handler.startElement(u"item", {})
   227         handler.addQuickElement(u"link", item['link'])
   177             handler.addQuickElement(u"title", item['title'])
   228         if item['description'] is not None:
   178             handler.addQuickElement(u"link", item['link'])
   229             handler.addQuickElement(u"description", item['description'])
   179             if item['description'] is not None:
   230 
   180                 handler.addQuickElement(u"description", item['description'])
   231         # Author information.
   181 
   232         if item["author_name"] and item["author_email"]:
   182             # Author information.
   233             handler.addQuickElement(u"author", "%s (%s)" % \
   183             if item["author_name"] and item["author_email"]:
   234                 (item['author_email'], item['author_name']))
   184                 handler.addQuickElement(u"author", "%s (%s)" % \
   235         elif item["author_email"]:
   185                     (item['author_email'], item['author_name']))
   236             handler.addQuickElement(u"author", item["author_email"])
   186             elif item["author_email"]:
   237         elif item["author_name"]:
   187                 handler.addQuickElement(u"author", item["author_email"])
   238             handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"})
   188             elif item["author_name"]:
   239 
   189                 handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"})
   240         if item['pubdate'] is not None:
   190 
   241             handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8'))
   191             if item['pubdate'] is not None:
   242         if item['comments'] is not None:
   192                 handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii'))
   243             handler.addQuickElement(u"comments", item['comments'])
   193             if item['comments'] is not None:
   244         if item['unique_id'] is not None:
   194                 handler.addQuickElement(u"comments", item['comments'])
   245             handler.addQuickElement(u"guid", item['unique_id'])
   195             if item['unique_id'] is not None:
   246         if item['ttl'] is not None:
   196                 handler.addQuickElement(u"guid", item['unique_id'])
   247             handler.addQuickElement(u"ttl", item['ttl'])
   197             if item['ttl'] is not None:
   248 
   198                 handler.addQuickElement(u"ttl", item['ttl'])
   249         # Enclosure.
   199 
   250         if item['enclosure'] is not None:
   200             # Enclosure.
   251             handler.addQuickElement(u"enclosure", '',
   201             if item['enclosure'] is not None:
   252                 {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
   202                 handler.addQuickElement(u"enclosure", '',
   253                     u"type": item['enclosure'].mime_type})
   203                     {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
   254 
   204                         u"type": item['enclosure'].mime_type})
   255         # Categories.
   205 
   256         for cat in item['categories']:
   206             # Categories.
   257             handler.addQuickElement(u"category", cat)
   207             for cat in item['categories']:
       
   208                 handler.addQuickElement(u"category", cat)
       
   209 
       
   210             handler.endElement(u"item")
       
   211 
   258 
   212 class Atom1Feed(SyndicationFeed):
   259 class Atom1Feed(SyndicationFeed):
   213     # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
   260     # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
   214     mime_type = 'application/atom+xml'
   261     mime_type = 'application/atom+xml'
   215     ns = u"http://www.w3.org/2005/Atom"
   262     ns = u"http://www.w3.org/2005/Atom"
       
   263 
   216     def write(self, outfile, encoding):
   264     def write(self, outfile, encoding):
   217         handler = SimplerXMLGenerator(outfile, encoding)
   265         handler = SimplerXMLGenerator(outfile, encoding)
   218         handler.startDocument()
   266         handler.startDocument()
       
   267         handler.startElement(u'feed', self.root_attributes())
       
   268         self.add_root_elements(handler)
       
   269         self.write_items(handler)
       
   270         handler.endElement(u"feed")
       
   271 
       
   272     def root_attributes(self):
   219         if self.feed['language'] is not None:
   273         if self.feed['language'] is not None:
   220             handler.startElement(u"feed", {u"xmlns": self.ns, u"xml:lang": self.feed['language']})
   274             return {u"xmlns": self.ns, u"xml:lang": self.feed['language']}
   221         else:
   275         else:
   222             handler.startElement(u"feed", {u"xmlns": self.ns})
   276             return {u"xmlns": self.ns}
       
   277 
       
   278     def add_root_elements(self, handler):
   223         handler.addQuickElement(u"title", self.feed['title'])
   279         handler.addQuickElement(u"title", self.feed['title'])
   224         handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
   280         handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
   225         if self.feed['feed_url'] is not None:
   281         if self.feed['feed_url'] is not None:
   226             handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
   282             handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
   227         handler.addQuickElement(u"id", self.feed['id'])
   283         handler.addQuickElement(u"id", self.feed['id'])
   228         handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('ascii'))
   284         handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8'))
   229         if self.feed['author_name'] is not None:
   285         if self.feed['author_name'] is not None:
   230             handler.startElement(u"author", {})
   286             handler.startElement(u"author", {})
   231             handler.addQuickElement(u"name", self.feed['author_name'])
   287             handler.addQuickElement(u"name", self.feed['author_name'])
   232             if self.feed['author_email'] is not None:
   288             if self.feed['author_email'] is not None:
   233                 handler.addQuickElement(u"email", self.feed['author_email'])
   289                 handler.addQuickElement(u"email", self.feed['author_email'])
   238             handler.addQuickElement(u"subtitle", self.feed['subtitle'])
   294             handler.addQuickElement(u"subtitle", self.feed['subtitle'])
   239         for cat in self.feed['categories']:
   295         for cat in self.feed['categories']:
   240             handler.addQuickElement(u"category", "", {u"term": cat})
   296             handler.addQuickElement(u"category", "", {u"term": cat})
   241         if self.feed['feed_copyright'] is not None:
   297         if self.feed['feed_copyright'] is not None:
   242             handler.addQuickElement(u"rights", self.feed['feed_copyright'])
   298             handler.addQuickElement(u"rights", self.feed['feed_copyright'])
   243         self.write_items(handler)
       
   244         handler.endElement(u"feed")
       
   245 
   299 
   246     def write_items(self, handler):
   300     def write_items(self, handler):
   247         for item in self.items:
   301         for item in self.items:
   248             handler.startElement(u"entry", {})
   302             handler.startElement(u"entry", self.item_attributes(item))
   249             handler.addQuickElement(u"title", item['title'])
   303             self.add_item_elements(handler, item)
   250             handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
       
   251             if item['pubdate'] is not None:
       
   252                 handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii'))
       
   253 
       
   254             # Author information.
       
   255             if item['author_name'] is not None:
       
   256                 handler.startElement(u"author", {})
       
   257                 handler.addQuickElement(u"name", item['author_name'])
       
   258                 if item['author_email'] is not None:
       
   259                     handler.addQuickElement(u"email", item['author_email'])
       
   260                 if item['author_link'] is not None:
       
   261                     handler.addQuickElement(u"uri", item['author_link'])
       
   262                 handler.endElement(u"author")
       
   263 
       
   264             # Unique ID.
       
   265             if item['unique_id'] is not None:
       
   266                 unique_id = item['unique_id']
       
   267             else:
       
   268                 unique_id = get_tag_uri(item['link'], item['pubdate'])
       
   269             handler.addQuickElement(u"id", unique_id)
       
   270 
       
   271             # Summary.
       
   272             if item['description'] is not None:
       
   273                 handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
       
   274 
       
   275             # Enclosure.
       
   276             if item['enclosure'] is not None:
       
   277                 handler.addQuickElement(u"link", '',
       
   278                     {u"rel": u"enclosure",
       
   279                      u"href": item['enclosure'].url,
       
   280                      u"length": item['enclosure'].length,
       
   281                      u"type": item['enclosure'].mime_type})
       
   282 
       
   283             # Categories.
       
   284             for cat in item['categories']:
       
   285                 handler.addQuickElement(u"category", u"", {u"term": cat})
       
   286 
       
   287             # Rights.
       
   288             if item['item_copyright'] is not None:
       
   289                 handler.addQuickElement(u"rights", item['item_copyright'])
       
   290 
       
   291             handler.endElement(u"entry")
   304             handler.endElement(u"entry")
       
   305 
       
   306     def add_item_elements(self, handler, item):
       
   307         handler.addQuickElement(u"title", item['title'])
       
   308         handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
       
   309         if item['pubdate'] is not None:
       
   310             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
       
   311 
       
   312         # Author information.
       
   313         if item['author_name'] is not None:
       
   314             handler.startElement(u"author", {})
       
   315             handler.addQuickElement(u"name", item['author_name'])
       
   316             if item['author_email'] is not None:
       
   317                 handler.addQuickElement(u"email", item['author_email'])
       
   318             if item['author_link'] is not None:
       
   319                 handler.addQuickElement(u"uri", item['author_link'])
       
   320             handler.endElement(u"author")
       
   321 
       
   322         # Unique ID.
       
   323         if item['unique_id'] is not None:
       
   324             unique_id = item['unique_id']
       
   325         else:
       
   326             unique_id = get_tag_uri(item['link'], item['pubdate'])
       
   327         handler.addQuickElement(u"id", unique_id)
       
   328 
       
   329         # Summary.
       
   330         if item['description'] is not None:
       
   331             handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
       
   332 
       
   333         # Enclosure.
       
   334         if item['enclosure'] is not None:
       
   335             handler.addQuickElement(u"link", '',
       
   336                 {u"rel": u"enclosure",
       
   337                  u"href": item['enclosure'].url,
       
   338                  u"length": item['enclosure'].length,
       
   339                  u"type": item['enclosure'].mime_type})
       
   340 
       
   341         # Categories.
       
   342         for cat in item['categories']:
       
   343             handler.addQuickElement(u"category", u"", {u"term": cat})
       
   344 
       
   345         # Rights.
       
   346         if item['item_copyright'] is not None:
       
   347             handler.addQuickElement(u"rights", item['item_copyright'])
   292 
   348 
   293 # This isolates the decision of what the system default is, so calling code can
   349 # This isolates the decision of what the system default is, so calling code can
   294 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
   350 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
   295 DefaultFeed = Rss201rev2Feed
   351 DefaultFeed = Rss201rev2Feed