Replace usage of feedparser for sanitizing html with new HtmlSanitizer module.
authorPawel Solyga <Pawel.Solyga@gmail.com>
Mon, 18 May 2009 19:37:38 +0200
changeset 2327 6c7d0fba105c
parent 2326 16c1b1412b0d
child 2328 e077dc264dff
Replace usage of feedparser for sanitizing html with new HtmlSanitizer module. HtmlSanitizer prevents from XSS attacks (Fixes issue 606).
app/soc/logic/cleaning.py
--- a/app/soc/logic/cleaning.py	Mon May 18 19:34:19 2009 +0200
+++ b/app/soc/logic/cleaning.py	Mon May 18 19:37:38 2009 +0200
@@ -24,7 +24,7 @@
     ]
 
 
-import feedparser
+from htmlsanitizer import HtmlSanitizer
 
 from google.appengine.api import users
 
@@ -379,16 +379,21 @@
   def wrapped(self):
     """Decorator wrapper method.
     """
+    from HTMLParser import HTMLParseError
 
     content = self.cleaned_data.get(field_name)
 
     if user_logic.isDeveloper():
       return content
-
-    sanitizer = feedparser._HTMLSanitizer('utf-8')
-    sanitizer.feed(content)
-    content = sanitizer.output()
-    content = content.decode('utf-8')
+    
+    try:
+      cleaner = HtmlSanitizer.Cleaner()
+      cleaner.string = content
+      cleaner.clean()
+    except HTMLParseError, msg:
+      raise forms.ValidationError(msg)
+    
+    content = cleaner.string
     content = content.strip().replace('\r\n', '\n')
 
     return content