app/soc/logic/cleaning.py
changeset 2327 6c7d0fba105c
parent 2115 fafd021def7e
child 2329 4e487ffd4102
equal deleted inserted replaced
2326:16c1b1412b0d 2327:6c7d0fba105c
    22     '"Sverre Rabbelier" <sverre@rabbelier.nl>',
    22     '"Sverre Rabbelier" <sverre@rabbelier.nl>',
    23     '"Lennard de Rijk" <ljvderijk@gmail.com>',
    23     '"Lennard de Rijk" <ljvderijk@gmail.com>',
    24     ]
    24     ]
    25 
    25 
    26 
    26 
    27 import feedparser
    27 from htmlsanitizer import HtmlSanitizer
    28 
    28 
    29 from google.appengine.api import users
    29 from google.appengine.api import users
    30 
    30 
    31 from django import forms
    31 from django import forms
    32 from django.forms.util import ErrorList
    32 from django.forms.util import ErrorList
   377 
   377 
   378   @check_field_is_empty(field_name)
   378   @check_field_is_empty(field_name)
   379   def wrapped(self):
   379   def wrapped(self):
   380     """Decorator wrapper method.
   380     """Decorator wrapper method.
   381     """
   381     """
       
   382     from HTMLParser import HTMLParseError
   382 
   383 
   383     content = self.cleaned_data.get(field_name)
   384     content = self.cleaned_data.get(field_name)
   384 
   385 
   385     if user_logic.isDeveloper():
   386     if user_logic.isDeveloper():
   386       return content
   387       return content
   387 
   388     
   388     sanitizer = feedparser._HTMLSanitizer('utf-8')
   389     try:
   389     sanitizer.feed(content)
   390       cleaner = HtmlSanitizer.Cleaner()
   390     content = sanitizer.output()
   391       cleaner.string = content
   391     content = content.decode('utf-8')
   392       cleaner.clean()
       
   393     except HTMLParseError, msg:
       
   394       raise forms.ValidationError(msg)
       
   395     
       
   396     content = cleaner.string
   392     content = content.strip().replace('\r\n', '\n')
   397     content = content.strip().replace('\r\n', '\n')
   393 
   398 
   394     return content
   399     return content
   395 
   400 
   396   return wrapped
   401   return wrapped