app/django/utils/html.py
author Madhusudan.C.S <madhusudancs@gmail.com>
Mon, 24 Aug 2009 04:31:23 +0530
changeset 2787 8408741aee63
parent 323 ff1a9aa48cfd
permissions -rw-r--r--
Reverting last 4 patches containing GHOP related views. As Lennard suggested all the model patches should come first followed by the logic and views patches, to make sure nothing committed breaks the existing code after thorough review.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     1
"""HTML utilities suitable for global use."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     2
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     3
import re
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     4
import string
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     5
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     6
from django.utils.safestring import SafeData, mark_safe
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     7
from django.utils.encoding import force_unicode
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     8
from django.utils.functional import allow_lazy
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     9
from django.utils.http import urlquote
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    10
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    11
# Configuration for urlize() function.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    12
LEADING_PUNCTUATION  = ['(', '<', '&lt;']
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    13
TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;']
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    14
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    15
# List of possible strings used for bullets in bulleted lists.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    16
DOTS = ['&middot;', '*', '\xe2\x80\xa2', '&#149;', '&bull;', '&#8226;']
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    17
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    18
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    19
word_split_re = re.compile(r'(\s+)')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    20
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    21
    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    22
    '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    23
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    24
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    25
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    26
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    27
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    28
del x # Temporary variable
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    29
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    30
def escape(html):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    31
    """Returns the given HTML with ampersands, quotes and carets encoded."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    32
    return mark_safe(force_unicode(html).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;'))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    33
escape = allow_lazy(escape, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    34
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    35
def conditional_escape(html):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    36
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    37
    Similar to escape(), except that it doesn't operate on pre-escaped strings.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    38
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    39
    if isinstance(html, SafeData):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    40
        return html
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    41
    else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    42
        return escape(html)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    43
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    44
def linebreaks(value, autoescape=False):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    45
    """Converts newlines into <p> and <br />s."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    46
    value = re.sub(r'\r\n|\r|\n', '\n', force_unicode(value)) # normalize newlines
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    47
    paras = re.split('\n{2,}', value)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    48
    if autoescape:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    49
        paras = [u'<p>%s</p>' % escape(p.strip()).replace('\n', '<br />') for p in paras]
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    50
    else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    51
        paras = [u'<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    52
    return u'\n\n'.join(paras)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    53
linebreaks = allow_lazy(linebreaks, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    55
def strip_tags(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    56
    """Returns the given HTML with all tags stripped."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    57
    return re.sub(r'<[^>]*?>', '', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    58
strip_tags = allow_lazy(strip_tags)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    59
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    60
def strip_spaces_between_tags(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    61
    """Returns the given HTML with spaces between tags removed."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    62
    return re.sub(r'>\s+<', '><', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    63
strip_spaces_between_tags = allow_lazy(strip_spaces_between_tags, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    64
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    65
def strip_entities(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    66
    """Returns the given HTML with all entities (&something;) stripped."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    67
    return re.sub(r'&(?:\w+|#\d+);', '', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    68
strip_entities = allow_lazy(strip_entities, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    69
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    70
def fix_ampersands(value):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    71
    """Returns the given HTML with all unencoded ampersands encoded correctly."""
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    72
    return unencoded_ampersands_re.sub('&amp;', force_unicode(value))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    73
fix_ampersands = allow_lazy(fix_ampersands, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    74
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    75
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    76
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    77
    Converts any URLs in text into clickable links.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    78
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    79
    Works on http://, https://, www. links and links ending in .org, .net or
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    80
    .com. Links can have trailing punctuation (periods, commas, close-parens)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    81
    and leading punctuation (opening parens) and it'll still do the right
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    82
    thing.
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    83
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    84
    If trim_url_limit is not None, the URLs in link text longer than this limit
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    85
    will truncated to trim_url_limit-3 characters and appended with an elipsis.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    86
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    87
    If nofollow is True, the URLs in link text will get a rel="nofollow"
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    88
    attribute.
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    89
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    90
    If autoescape is True, the link text and URLs will get autoescaped.
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    91
    """
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    92
    trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    93
    safe_input = isinstance(text, SafeData)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    94
    words = word_split_re.split(force_unicode(text))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    95
    nofollow_attr = nofollow and ' rel="nofollow"' or ''
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    96
    for i, word in enumerate(words):
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    97
        match = None
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    98
        if '.' in word or '@' in word or ':' in word:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
    99
            match = punctuation_re.match(word)
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   100
        if match:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   101
            lead, middle, trail = match.groups()
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   102
            # Make URL we want to point to.
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   103
            url = None
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   104
            if middle.startswith('http://') or middle.startswith('https://'):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   105
                url = urlquote(middle, safe='/&=:;#?+*')
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   106
            elif middle.startswith('www.') or ('@' not in middle and \
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   107
                    middle and middle[0] in string.ascii_letters + string.digits and \
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   108
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   109
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   110
            elif '@' in middle and not ':' in middle and simple_email_re.match(middle):
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   111
                url = 'mailto:%s' % middle
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   112
                nofollow_attr = ''
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   113
            # Make link.
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   114
            if url:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   115
                trimmed = trim_url(middle)
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   116
                if autoescape and not safe_input:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   117
                    lead, trail = escape(lead), escape(trail)
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   118
                    url, trimmed = escape(url), escape(trimmed)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   119
                middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   120
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   121
            else:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   122
                if safe_input:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   123
                    words[i] = mark_safe(word)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   124
                elif autoescape:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   125
                    words[i] = escape(word)
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   126
        elif safe_input:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   127
            words[i] = mark_safe(word)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   128
        elif autoescape:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   129
            words[i] = escape(word)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   130
    return u''.join(words)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   131
urlize = allow_lazy(urlize, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   132
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   133
def clean_html(text):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   134
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   135
    Clean the given HTML.  Specifically, do the following:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   136
        * Convert <b> and <i> to <strong> and <em>.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   137
        * Encode all ampersands correctly.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   138
        * Remove all "target" attributes from <a> tags.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   139
        * Remove extraneous HTML, such as presentational tags that open and
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   140
          immediately close and <br clear="all">.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   141
        * Convert hard-coded bullets into HTML unordered lists.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   142
        * Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   143
          bottom of the text.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   144
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   145
    from django.utils.text import normalize_newlines
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   146
    text = normalize_newlines(force_unicode(text))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   147
    text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   148
    text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   149
    text = fix_ampersands(text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   150
    # Remove all target="" attributes from <a> tags.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   151
    text = link_target_attribute_re.sub('\\1', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   152
    # Trim stupid HTML such as <br clear="all">.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   153
    text = html_gunk_re.sub('', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   154
    # Convert hard-coded bullets into HTML unordered lists.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   155
    def replace_p_tags(match):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   156
        s = match.group().replace('</p>', '</li>')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   157
        for d in DOTS:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   158
            s = s.replace('<p>%s' % d, '<li>')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   159
        return u'<ul>\n%s\n</ul>' % s
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   160
    text = hard_coded_bullets_re.sub(replace_p_tags, text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   161
    # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   162
    # of the text.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   163
    text = trailing_empty_content_re.sub('', text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   164
    return text
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   165
clean_html = allow_lazy(clean_html, unicode)