|
1 """ |
|
2 Syndication feed generation library -- used for generating RSS, etc. |
|
3 |
|
4 Sample usage: |
|
5 |
|
6 >>> from django.utils import feedgenerator |
|
7 >>> feed = feedgenerator.Rss201rev2Feed( |
|
8 ... title=u"Poynter E-Media Tidbits", |
|
9 ... link=u"http://www.poynter.org/column.asp?id=31", |
|
10 ... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", |
|
11 ... language=u"en", |
|
12 ... ) |
|
13 >>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.") |
|
14 >>> fp = open('test.rss', 'w') |
|
15 >>> feed.write(fp, 'utf-8') |
|
16 >>> fp.close() |
|
17 |
|
18 For definitions of the different versions of RSS, see: |
|
19 http://diveintomark.org/archives/2004/02/04/incompatible-rss |
|
20 """ |
|
21 |
|
22 from django.utils.xmlutils import SimplerXMLGenerator |
|
23 from django.utils.encoding import force_unicode, iri_to_uri |
|
24 import datetime, re, time |
|
25 import email.Utils |
|
26 |
|
27 def rfc2822_date(date): |
|
28 return email.Utils.formatdate(time.mktime(date.timetuple())) |
|
29 |
|
30 def rfc3339_date(date): |
|
31 if date.tzinfo: |
|
32 return date.strftime('%Y-%m-%dT%H:%M:%S%z') |
|
33 else: |
|
34 return date.strftime('%Y-%m-%dT%H:%M:%SZ') |
|
35 |
|
36 def get_tag_uri(url, date): |
|
37 "Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id" |
|
38 tag = re.sub('^http://', '', url) |
|
39 if date is not None: |
|
40 tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1) |
|
41 tag = re.sub('#', '/', tag) |
|
42 return u'tag:' + tag |
|
43 |
|
44 class SyndicationFeed(object): |
|
45 "Base class for all syndication feeds. Subclasses should provide write()" |
|
46 def __init__(self, title, link, description, language=None, author_email=None, |
|
47 author_name=None, author_link=None, subtitle=None, categories=None, |
|
48 feed_url=None, feed_copyright=None, feed_guid=None, ttl=None): |
|
49 to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
50 if categories: |
|
51 categories = [force_unicode(c) for c in categories] |
|
52 self.feed = { |
|
53 'title': to_unicode(title), |
|
54 'link': iri_to_uri(link), |
|
55 'description': to_unicode(description), |
|
56 'language': to_unicode(language), |
|
57 'author_email': to_unicode(author_email), |
|
58 'author_name': to_unicode(author_name), |
|
59 'author_link': iri_to_uri(author_link), |
|
60 'subtitle': to_unicode(subtitle), |
|
61 'categories': categories or (), |
|
62 'feed_url': iri_to_uri(feed_url), |
|
63 'feed_copyright': to_unicode(feed_copyright), |
|
64 'id': feed_guid or link, |
|
65 'ttl': ttl, |
|
66 } |
|
67 self.items = [] |
|
68 |
|
69 def add_item(self, title, link, description, author_email=None, |
|
70 author_name=None, author_link=None, pubdate=None, comments=None, |
|
71 unique_id=None, enclosure=None, categories=(), item_copyright=None, ttl=None): |
|
72 """ |
|
73 Adds an item to the feed. All args are expected to be Python Unicode |
|
74 objects except pubdate, which is a datetime.datetime object, and |
|
75 enclosure, which is an instance of the Enclosure class. |
|
76 """ |
|
77 to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
78 if categories: |
|
79 categories = [to_unicode(c) for c in categories] |
|
80 self.items.append({ |
|
81 'title': to_unicode(title), |
|
82 'link': iri_to_uri(link), |
|
83 'description': to_unicode(description), |
|
84 'author_email': to_unicode(author_email), |
|
85 'author_name': to_unicode(author_name), |
|
86 'author_link': iri_to_uri(author_link), |
|
87 'pubdate': pubdate, |
|
88 'comments': to_unicode(comments), |
|
89 'unique_id': to_unicode(unique_id), |
|
90 'enclosure': enclosure, |
|
91 'categories': categories or (), |
|
92 'item_copyright': to_unicode(item_copyright), |
|
93 'ttl': ttl, |
|
94 }) |
|
95 |
|
96 def num_items(self): |
|
97 return len(self.items) |
|
98 |
|
99 def write(self, outfile, encoding): |
|
100 """ |
|
101 Outputs the feed in the given encoding to outfile, which is a file-like |
|
102 object. Subclasses should override this. |
|
103 """ |
|
104 raise NotImplementedError |
|
105 |
|
106 def writeString(self, encoding): |
|
107 """ |
|
108 Returns the feed in the given encoding as a string. |
|
109 """ |
|
110 from StringIO import StringIO |
|
111 s = StringIO() |
|
112 self.write(s, encoding) |
|
113 return s.getvalue() |
|
114 |
|
115 def latest_post_date(self): |
|
116 """ |
|
117 Returns the latest item's pubdate. If none of them have a pubdate, |
|
118 this returns the current date/time. |
|
119 """ |
|
120 updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] |
|
121 if len(updates) > 0: |
|
122 updates.sort() |
|
123 return updates[-1] |
|
124 else: |
|
125 return datetime.datetime.now() |
|
126 |
|
127 class Enclosure(object): |
|
128 "Represents an RSS enclosure" |
|
129 def __init__(self, url, length, mime_type): |
|
130 "All args are expected to be Python Unicode objects" |
|
131 self.length, self.mime_type = length, mime_type |
|
132 self.url = iri_to_uri(url) |
|
133 |
|
134 class RssFeed(SyndicationFeed): |
|
135 mime_type = 'application/rss+xml' |
|
136 def write(self, outfile, encoding): |
|
137 handler = SimplerXMLGenerator(outfile, encoding) |
|
138 handler.startDocument() |
|
139 handler.startElement(u"rss", {u"version": self._version}) |
|
140 handler.startElement(u"channel", {}) |
|
141 handler.addQuickElement(u"title", self.feed['title']) |
|
142 handler.addQuickElement(u"link", self.feed['link']) |
|
143 handler.addQuickElement(u"description", self.feed['description']) |
|
144 if self.feed['language'] is not None: |
|
145 handler.addQuickElement(u"language", self.feed['language']) |
|
146 for cat in self.feed['categories']: |
|
147 handler.addQuickElement(u"category", cat) |
|
148 if self.feed['feed_copyright'] is not None: |
|
149 handler.addQuickElement(u"copyright", self.feed['feed_copyright']) |
|
150 handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('ascii')) |
|
151 if self.feed['ttl'] is not None: |
|
152 handler.addQuickElement(u"ttl", self.feed['ttl']) |
|
153 self.write_items(handler) |
|
154 self.endChannelElement(handler) |
|
155 handler.endElement(u"rss") |
|
156 |
|
157 def endChannelElement(self, handler): |
|
158 handler.endElement(u"channel") |
|
159 |
|
160 class RssUserland091Feed(RssFeed): |
|
161 _version = u"0.91" |
|
162 def write_items(self, handler): |
|
163 for item in self.items: |
|
164 handler.startElement(u"item", {}) |
|
165 handler.addQuickElement(u"title", item['title']) |
|
166 handler.addQuickElement(u"link", item['link']) |
|
167 if item['description'] is not None: |
|
168 handler.addQuickElement(u"description", item['description']) |
|
169 handler.endElement(u"item") |
|
170 |
|
171 class Rss201rev2Feed(RssFeed): |
|
172 # Spec: http://blogs.law.harvard.edu/tech/rss |
|
173 _version = u"2.0" |
|
174 def write_items(self, handler): |
|
175 for item in self.items: |
|
176 handler.startElement(u"item", {}) |
|
177 handler.addQuickElement(u"title", item['title']) |
|
178 handler.addQuickElement(u"link", item['link']) |
|
179 if item['description'] is not None: |
|
180 handler.addQuickElement(u"description", item['description']) |
|
181 |
|
182 # Author information. |
|
183 if item["author_name"] and item["author_email"]: |
|
184 handler.addQuickElement(u"author", "%s (%s)" % \ |
|
185 (item['author_email'], item['author_name'])) |
|
186 elif item["author_email"]: |
|
187 handler.addQuickElement(u"author", item["author_email"]) |
|
188 elif item["author_name"]: |
|
189 handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) |
|
190 |
|
191 if item['pubdate'] is not None: |
|
192 handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii')) |
|
193 if item['comments'] is not None: |
|
194 handler.addQuickElement(u"comments", item['comments']) |
|
195 if item['unique_id'] is not None: |
|
196 handler.addQuickElement(u"guid", item['unique_id']) |
|
197 if item['ttl'] is not None: |
|
198 handler.addQuickElement(u"ttl", item['ttl']) |
|
199 |
|
200 # Enclosure. |
|
201 if item['enclosure'] is not None: |
|
202 handler.addQuickElement(u"enclosure", '', |
|
203 {u"url": item['enclosure'].url, u"length": item['enclosure'].length, |
|
204 u"type": item['enclosure'].mime_type}) |
|
205 |
|
206 # Categories. |
|
207 for cat in item['categories']: |
|
208 handler.addQuickElement(u"category", cat) |
|
209 |
|
210 handler.endElement(u"item") |
|
211 |
|
212 class Atom1Feed(SyndicationFeed): |
|
213 # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html |
|
214 mime_type = 'application/atom+xml' |
|
215 ns = u"http://www.w3.org/2005/Atom" |
|
216 def write(self, outfile, encoding): |
|
217 handler = SimplerXMLGenerator(outfile, encoding) |
|
218 handler.startDocument() |
|
219 if self.feed['language'] is not None: |
|
220 handler.startElement(u"feed", {u"xmlns": self.ns, u"xml:lang": self.feed['language']}) |
|
221 else: |
|
222 handler.startElement(u"feed", {u"xmlns": self.ns}) |
|
223 handler.addQuickElement(u"title", self.feed['title']) |
|
224 handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) |
|
225 if self.feed['feed_url'] is not None: |
|
226 handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) |
|
227 handler.addQuickElement(u"id", self.feed['id']) |
|
228 handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('ascii')) |
|
229 if self.feed['author_name'] is not None: |
|
230 handler.startElement(u"author", {}) |
|
231 handler.addQuickElement(u"name", self.feed['author_name']) |
|
232 if self.feed['author_email'] is not None: |
|
233 handler.addQuickElement(u"email", self.feed['author_email']) |
|
234 if self.feed['author_link'] is not None: |
|
235 handler.addQuickElement(u"uri", self.feed['author_link']) |
|
236 handler.endElement(u"author") |
|
237 if self.feed['subtitle'] is not None: |
|
238 handler.addQuickElement(u"subtitle", self.feed['subtitle']) |
|
239 for cat in self.feed['categories']: |
|
240 handler.addQuickElement(u"category", "", {u"term": cat}) |
|
241 if self.feed['feed_copyright'] is not None: |
|
242 handler.addQuickElement(u"rights", self.feed['feed_copyright']) |
|
243 self.write_items(handler) |
|
244 handler.endElement(u"feed") |
|
245 |
|
246 def write_items(self, handler): |
|
247 for item in self.items: |
|
248 handler.startElement(u"entry", {}) |
|
249 handler.addQuickElement(u"title", item['title']) |
|
250 handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) |
|
251 if item['pubdate'] is not None: |
|
252 handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii')) |
|
253 |
|
254 # Author information. |
|
255 if item['author_name'] is not None: |
|
256 handler.startElement(u"author", {}) |
|
257 handler.addQuickElement(u"name", item['author_name']) |
|
258 if item['author_email'] is not None: |
|
259 handler.addQuickElement(u"email", item['author_email']) |
|
260 if item['author_link'] is not None: |
|
261 handler.addQuickElement(u"uri", item['author_link']) |
|
262 handler.endElement(u"author") |
|
263 |
|
264 # Unique ID. |
|
265 if item['unique_id'] is not None: |
|
266 unique_id = item['unique_id'] |
|
267 else: |
|
268 unique_id = get_tag_uri(item['link'], item['pubdate']) |
|
269 handler.addQuickElement(u"id", unique_id) |
|
270 |
|
271 # Summary. |
|
272 if item['description'] is not None: |
|
273 handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) |
|
274 |
|
275 # Enclosure. |
|
276 if item['enclosure'] is not None: |
|
277 handler.addQuickElement(u"link", '', |
|
278 {u"rel": u"enclosure", |
|
279 u"href": item['enclosure'].url, |
|
280 u"length": item['enclosure'].length, |
|
281 u"type": item['enclosure'].mime_type}) |
|
282 |
|
283 # Categories. |
|
284 for cat in item['categories']: |
|
285 handler.addQuickElement(u"category", u"", {u"term": cat}) |
|
286 |
|
287 # Rights. |
|
288 if item['item_copyright'] is not None: |
|
289 handler.addQuickElement(u"rights", item['item_copyright']) |
|
290 |
|
291 handler.endElement(u"entry") |
|
292 |
|
293 # This isolates the decision of what the system default is, so calling code can |
|
294 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". |
|
295 DefaultFeed = Rss201rev2Feed |