Replace usage of feedparser for sanitizing html with new HtmlSanitizer module.
HtmlSanitizer prevents from XSS attacks (Fixes issue 606).
--- a/app/soc/logic/cleaning.py Mon May 18 19:34:19 2009 +0200
+++ b/app/soc/logic/cleaning.py Mon May 18 19:37:38 2009 +0200
@@ -24,7 +24,7 @@
]
-import feedparser
+from htmlsanitizer import HtmlSanitizer
from google.appengine.api import users
@@ -379,16 +379,21 @@
def wrapped(self):
"""Decorator wrapper method.
"""
+ from HTMLParser import HTMLParseError
content = self.cleaned_data.get(field_name)
if user_logic.isDeveloper():
return content
-
- sanitizer = feedparser._HTMLSanitizer('utf-8')
- sanitizer.feed(content)
- content = sanitizer.output()
- content = content.decode('utf-8')
+
+ try:
+ cleaner = HtmlSanitizer.Cleaner()
+ cleaner.string = content
+ cleaner.clean()
+ except HTMLParseError, msg:
+ raise forms.ValidationError(msg)
+
+ content = cleaner.string
content = content.strip().replace('\r\n', '\n')
return content