app/django/utils/html.py
author Todd Larsen <tlarsen@google.com>
Mon, 29 Sep 2008 15:46:42 +0000
changeset 208 e076aee6e90f
parent 54 03e267d67478
child 323 ff1a9aa48cfd
permissions -rw-r--r--
Take advantage of the Model inheritance provided by polymodel.PolyModel to have Club, School, Sponsor, and Organization actually inherit from the Group Model class, rather than being composed via ReferenceProperties. Patch by: Todd Larsen Review by: Pawel Solyga, Sverre Rabbelier, Augie Fackler Review URL: http://codereviews.googleopensourceprograms.com/606
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     1
"""HTML utilities suitable for global use."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     2
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     3
import re
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     4
import string
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     5
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     6
from django.utils.safestring import SafeData, mark_safe
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     7
from django.utils.encoding import force_unicode
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     8
from django.utils.functional import allow_lazy
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     9
from django.utils.http import urlquote
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    10
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    11
# Configuration for urlize() function.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    12
LEADING_PUNCTUATION  = ['(', '<', '&lt;']
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    13
TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;']
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    14
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    15
# List of possible strings used for bullets in bulleted lists.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    16
DOTS = ['&middot;', '*', '\xe2\x80\xa2', '&#149;', '&bull;', '&#8226;']
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    17
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    18
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    19
word_split_re = re.compile(r'(\s+)')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    20
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    21
    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    22
    '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    23
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    24
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    25
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    26
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    27
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    28
del x # Temporary variable
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    29
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    30
def escape(html):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    31
    """Returns the given HTML with ampersands, quotes and carets encoded."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    32
    return mark_safe(force_unicode(html).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;'))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    33
escape = allow_lazy(escape, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    34
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    35
def conditional_escape(html):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    36
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    37
    Similar to escape(), except that it doesn't operate on pre-escaped strings.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    38
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    39
    if isinstance(html, SafeData):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    40
        return html
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    41
    else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    42
        return escape(html)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    43
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    44
def linebreaks(value, autoescape=False):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    45
    """Converts newlines into <p> and <br />s."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    46
    value = re.sub(r'\r\n|\r|\n', '\n', force_unicode(value)) # normalize newlines
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    47
    paras = re.split('\n{2,}', value)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    48
    if autoescape:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    49
        paras = [u'<p>%s</p>' % escape(p.strip()).replace('\n', '<br />') for p in paras]
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    50
    else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    51
        paras = [u'<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    52
    return u'\n\n'.join(paras)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    53
linebreaks = allow_lazy(linebreaks, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    55
def strip_tags(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    56
    """Returns the given HTML with all tags stripped."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    57
    return re.sub(r'<[^>]*?>', '', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    58
strip_tags = allow_lazy(strip_tags)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    59
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    60
def strip_spaces_between_tags(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    61
    """Returns the given HTML with spaces between tags removed."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    62
    return re.sub(r'>\s+<', '><', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    63
strip_spaces_between_tags = allow_lazy(strip_spaces_between_tags, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    64
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    65
def strip_entities(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    66
    """Returns the given HTML with all entities (&something;) stripped."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    67
    return re.sub(r'&(?:\w+|#\d+);', '', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    68
strip_entities = allow_lazy(strip_entities, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    69
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    70
def fix_ampersands(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    71
    """Returns the given HTML with all unencoded ampersands encoded correctly."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    72
    return unencoded_ampersands_re.sub('&amp;', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    73
fix_ampersands = allow_lazy(fix_ampersands, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    74
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    75
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    76
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    77
    Converts any URLs in text into clickable links.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    78
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    79
    Works on http://, https://, and www. links.  Links can have trailing
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    80
    punctuation (periods, commas, close-parens) and leading punctuation
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    81
    (opening parens) and it'll still do the right thing.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    82
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    83
    If trim_url_limit is not None, the URLs in link text longer than this limit
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    84
    will truncated to trim_url_limit-3 characters and appended with an elipsis.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    85
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    86
    If nofollow is True, the URLs in link text will get a rel="nofollow"
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    87
    attribute.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    88
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    89
    if autoescape:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    90
        trim_url = lambda x, limit=trim_url_limit: conditional_escape(limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    91
    else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    92
        trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    93
    safe_input = isinstance(text, SafeData)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    94
    words = word_split_re.split(force_unicode(text))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    95
    nofollow_attr = nofollow and ' rel="nofollow"' or ''
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    96
    for i, word in enumerate(words):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    97
        match = punctuation_re.match(word)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    98
        if match:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    99
            lead, middle, trail = match.groups()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   100
            if safe_input:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   101
                middle = mark_safe(middle)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   102
            if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   103
                    len(middle) > 0 and middle[0] in string.ascii_letters + string.digits and \
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   104
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   105
                middle = 'http://%s' % middle
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   106
            if middle.startswith('http://') or middle.startswith('https://'):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   107
                url = urlquote(middle, safe='/&=:;#?+*')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   108
                if autoescape and not safe_input:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   109
                    url = escape(url)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   110
                trimmed_url = trim_url(middle)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   111
                middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr,
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   112
                        trimmed_url)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   113
            elif '@' in middle and not middle.startswith('www.') and \
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   114
                      not ':' in middle and simple_email_re.match(middle):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   115
                if autoescape:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   116
                    middle = conditional_escape(middle)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   117
                middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   118
            if lead + middle + trail != word:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   119
                if autoescape and not safe_input:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   120
                    lead, trail = escape(lead), escape(trail)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   121
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   122
            elif autoescape and not safe_input:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   123
                words[i] = escape(word)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   124
        elif safe_input:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   125
            words[i] = mark_safe(word)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   126
        elif autoescape:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   127
            words[i] = escape(word)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   128
    return u''.join(words)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   129
urlize = allow_lazy(urlize, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   130
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   131
def clean_html(text):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   132
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   133
    Clean the given HTML.  Specifically, do the following:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   134
        * Convert <b> and <i> to <strong> and <em>.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   135
        * Encode all ampersands correctly.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   136
        * Remove all "target" attributes from <a> tags.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   137
        * Remove extraneous HTML, such as presentational tags that open and
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   138
          immediately close and <br clear="all">.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   139
        * Convert hard-coded bullets into HTML unordered lists.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   140
        * Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   141
          bottom of the text.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   142
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   143
    from django.utils.text import normalize_newlines
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   144
    text = normalize_newlines(force_unicode(text))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   145
    text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   146
    text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   147
    text = fix_ampersands(text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   148
    # Remove all target="" attributes from <a> tags.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   149
    text = link_target_attribute_re.sub('\\1', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   150
    # Trim stupid HTML such as <br clear="all">.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   151
    text = html_gunk_re.sub('', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   152
    # Convert hard-coded bullets into HTML unordered lists.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   153
    def replace_p_tags(match):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   154
        s = match.group().replace('</p>', '</li>')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   155
        for d in DOTS:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   156
            s = s.replace('<p>%s' % d, '<li>')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   157
        return u'<ul>\n%s\n</ul>' % s
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   158
    text = hard_coded_bullets_re.sub(replace_p_tags, text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   159
    # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   160
    # of the text.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   161
    text = trailing_empty_content_re.sub('', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   162
    return text
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   163
clean_html = allow_lazy(clean_html, unicode)