diff -r c2e09f7d62d9 -r bcbbcb72429d app/soc/logic/cleaning.py --- a/app/soc/logic/cleaning.py Sat Feb 21 21:49:46 2009 +0000 +++ b/app/soc/logic/cleaning.py Sat Feb 21 21:56:55 2009 +0000 @@ -24,6 +24,8 @@ ] +import feedparser + from google.appengine.api import users from django import forms @@ -46,7 +48,6 @@ DEF_ORGANZIATION_NOT_ACTIVE_MSG = ugettext( 'This organization is not active/existent') - def check_field_is_empty(field_name): """Returns decorator that bypasses cleaning for empty fields. """ @@ -265,6 +266,16 @@ return feed_url +def clean_document_content(self): + content = self.cleaned_data.get('content') + + sanitizer = feedparser._HTMLSanitizer('utf-8') + sanitizer.feed(content) + content = sanitizer.output() + content = content.strip().replace('\r\n', '\n') + + return content + def clean_url(field_name): """Clean method for cleaning a field belonging to a LinkProperty. """