# HG changeset patch # User Pawel Solyga # Date 1242668258 -7200 # Node ID 6c7d0fba105c7ce83799996b7dd64d61d44dd7a2 # Parent 16c1b1412b0d3b3a31144727455f19e9df449e13 Replace usage of feedparser for sanitizing html with new HtmlSanitizer module. HtmlSanitizer prevents from XSS attacks (Fixes issue 606). diff -r 16c1b1412b0d -r 6c7d0fba105c app/soc/logic/cleaning.py --- a/app/soc/logic/cleaning.py Mon May 18 19:34:19 2009 +0200 +++ b/app/soc/logic/cleaning.py Mon May 18 19:37:38 2009 +0200 @@ -24,7 +24,7 @@ ] -import feedparser +from htmlsanitizer import HtmlSanitizer from google.appengine.api import users @@ -379,16 +379,21 @@ def wrapped(self): """Decorator wrapper method. """ + from HTMLParser import HTMLParseError content = self.cleaned_data.get(field_name) if user_logic.isDeveloper(): return content - - sanitizer = feedparser._HTMLSanitizer('utf-8') - sanitizer.feed(content) - content = sanitizer.output() - content = content.decode('utf-8') + + try: + cleaner = HtmlSanitizer.Cleaner() + cleaner.string = content + cleaner.clean() + except HTMLParseError, msg: + raise forms.ValidationError(msg) + + content = cleaner.string content = content.strip().replace('\r\n', '\n') return content