app/django/utils/text.py
author Madhusudan.C.S <madhusudancs@gmail.com>
Mon, 24 Aug 2009 04:31:23 +0530
changeset 2787 8408741aee63
parent 323 ff1a9aa48cfd
permissions -rw-r--r--
Reverting last 4 patches containing GHOP related views. As Lennard suggested all the model patches should come first followed by the logic and views patches, to make sure nothing committed breaks the existing code after thorough review.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     1
import re
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     2
from django.conf import settings
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     3
from django.utils.encoding import force_unicode
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     4
from django.utils.functional import allow_lazy
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     5
from django.utils.translation import ugettext_lazy
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
     6
from htmlentitydefs import name2codepoint
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     7
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     8
# Capitalizes the first letter of a string.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     9
capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    10
capfirst = allow_lazy(capfirst, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    11
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    12
def wrap(text, width):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    13
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    14
    A word-wrap function that preserves existing line breaks and most spaces in
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    15
    the text. Expects that existing line breaks are posix newlines.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    16
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    17
    text = force_unicode(text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    18
    def _generator():
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    19
        it = iter(text.split(' '))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    20
        word = it.next()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    21
        yield word
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    22
        pos = len(word) - word.rfind('\n') - 1
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    23
        for word in it:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    24
            if "\n" in word:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    25
                lines = word.split('\n')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    26
            else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    27
                lines = (word,)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    28
            pos += len(lines[0]) + 1
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    29
            if pos > width:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    30
                yield '\n'
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    31
                pos = len(lines[-1])
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    32
            else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    33
                yield ' '
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    34
                if len(lines) > 1:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    35
                    pos = len(lines[-1])
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    36
            yield word
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    37
    return u''.join(_generator())
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    38
wrap = allow_lazy(wrap, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    39
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    40
def truncate_words(s, num):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    41
    "Truncates a string after a certain number of words."
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    42
    s = force_unicode(s)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    43
    length = int(num)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    44
    words = s.split()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    45
    if len(words) > length:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    46
        words = words[:length]
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    47
        if not words[-1].endswith('...'):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    48
            words.append('...')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    49
    return u' '.join(words)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    50
truncate_words = allow_lazy(truncate_words, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    51
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    52
def truncate_html_words(s, num):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    53
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    54
    Truncates html to a certain number of words (not counting tags and
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    55
    comments). Closes opened tags if they were correctly closed in the given
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    56
    html.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    57
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    58
    s = force_unicode(s)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    59
    length = int(num)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    60
    if length <= 0:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    61
        return u''
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    62
    html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    63
    # Set up regular expressions
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    64
    re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    65
    re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    66
    # Count non-HTML words and keep note of open tags
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    67
    pos = 0
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    68
    ellipsis_pos = 0
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    69
    words = 0
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    70
    open_tags = []
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    71
    while words <= length:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    72
        m = re_words.search(s, pos)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    73
        if not m:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    74
            # Checked through whole string
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    75
            break
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    76
        pos = m.end(0)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    77
        if m.group(1):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    78
            # It's an actual non-HTML word
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    79
            words += 1
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    80
            if words == length:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    81
                ellipsis_pos = pos
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    82
            continue
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    83
        # Check for tag
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    84
        tag = re_tag.match(m.group(0))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    85
        if not tag or ellipsis_pos:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    86
            # Don't worry about non tags or tags after our truncate point
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    87
            continue
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    88
        closing_tag, tagname, self_closing = tag.groups()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    89
        tagname = tagname.lower()  # Element names are always case-insensitive
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    90
        if self_closing or tagname in html4_singlets:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    91
            pass
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    92
        elif closing_tag:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    93
            # Check for match in open tags list
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    94
            try:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    95
                i = open_tags.index(tagname)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    96
            except ValueError:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    97
                pass
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    98
            else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    99
                # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   100
                open_tags = open_tags[i+1:]
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   101
        else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   102
            # Add it to the start of the open tags list
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   103
            open_tags.insert(0, tagname)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   104
    if words <= length:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   105
        # Don't try to close tags if we don't need to truncate
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   106
        return s
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   107
    out = s[:ellipsis_pos] + ' ...'
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   108
    # Close any tags still open
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   109
    for tag in open_tags:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   110
        out += '</%s>' % tag
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   111
    # Return string
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   112
    return out
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   113
truncate_html_words = allow_lazy(truncate_html_words, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   114
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   115
def get_valid_filename(s):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   116
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   117
    Returns the given string converted to a string that can be used for a clean
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   118
    filename. Specifically, leading and trailing spaces are removed; other
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   119
    spaces are converted to underscores; and all non-filename-safe characters
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   120
    are removed.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   121
    >>> get_valid_filename("john's portrait in 2004.jpg")
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   122
    u'johns_portrait_in_2004.jpg'
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   123
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   124
    s = force_unicode(s).strip().replace(' ', '_')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   125
    return re.sub(r'[^-A-Za-z0-9_.]', '', s)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   126
get_valid_filename = allow_lazy(get_valid_filename, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   127
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   128
def get_text_list(list_, last_word=ugettext_lazy(u'or')):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   129
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   130
    >>> get_text_list(['a', 'b', 'c', 'd'])
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   131
    u'a, b, c or d'
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   132
    >>> get_text_list(['a', 'b', 'c'], 'and')
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   133
    u'a, b and c'
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   134
    >>> get_text_list(['a', 'b'], 'and')
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   135
    u'a and b'
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   136
    >>> get_text_list(['a'])
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   137
    u'a'
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   138
    >>> get_text_list([])
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   139
    u''
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   140
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   141
    if len(list_) == 0: return u''
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   142
    if len(list_) == 1: return force_unicode(list_[0])
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   143
    return u'%s %s %s' % (', '.join([force_unicode(i) for i in list_][:-1]), force_unicode(last_word), force_unicode(list_[-1]))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   144
get_text_list = allow_lazy(get_text_list, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   145
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   146
def normalize_newlines(text):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   147
    return force_unicode(re.sub(r'\r\n|\r|\n', '\n', text))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   148
normalize_newlines = allow_lazy(normalize_newlines, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   149
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   150
def recapitalize(text):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   151
    "Recapitalizes text, placing caps after end-of-sentence punctuation."
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   152
    text = force_unicode(text).lower()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   153
    capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   154
    text = capsRE.sub(lambda x: x.group(1).upper(), text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   155
    return text
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   156
recapitalize = allow_lazy(recapitalize)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   157
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   158
def phone2numeric(phone):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   159
    "Converts a phone number with letters into its numeric equivalent."
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   160
    letters = re.compile(r'[A-PR-Y]', re.I)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   161
    char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   162
         'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   163
         'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   164
         's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   165
         'y': '9', 'x': '9'}.get(m.group(0).lower())
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   166
    return letters.sub(char2number, phone)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   167
phone2numeric = allow_lazy(phone2numeric)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   168
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   169
# From http://www.xhaus.com/alan/python/httpcomp.html#gzip
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   170
# Used with permission.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   171
def compress_string(s):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   172
    import cStringIO, gzip
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   173
    zbuf = cStringIO.StringIO()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   174
    zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   175
    zfile.write(s)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   176
    zfile.close()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   177
    return zbuf.getvalue()
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   178
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   179
ustring_re = re.compile(u"([\u0080-\uffff])")
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   180
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   181
def javascript_quote(s, quote_double_quotes=False):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   182
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   183
    def fix(match):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   184
        return r"\u%04x" % ord(match.group(1))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   185
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   186
    if type(s) == str:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   187
        s = s.decode('utf-8')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   188
    elif type(s) != unicode:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   189
        raise TypeError, s
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   190
    s = s.replace('\\', '\\\\')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   191
    s = s.replace('\r', '\\r')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   192
    s = s.replace('\n', '\\n')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   193
    s = s.replace('\t', '\\t')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   194
    s = s.replace("'", "\\'")
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   195
    if quote_double_quotes:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   196
        s = s.replace('"', '&quot;')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   197
    return str(ustring_re.sub(fix, s))
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   198
javascript_quote = allow_lazy(javascript_quote, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   199
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   200
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   201
def smart_split(text):
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   202
    r"""
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   203
    Generator that splits a string by spaces, leaving quoted phrases together.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   204
    Supports both single and double quotes, and supports escaping quotes with
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   205
    backslashes. In the output, strings will keep their initial and trailing
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   206
    quote marks.
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   207
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   208
    >>> list(smart_split(r'This is "a person\'s" test.'))
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   209
    [u'This', u'is', u'"a person\\\'s"', u'test.']
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   210
    >>> list(smart_split(r"Another 'person\'s' test.")) 
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   211
    [u'Another', u"'person's'", u'test.']
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   212
    >>> list(smart_split(r'A "\"funky\" style" test.')) 
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   213
    [u'A', u'""funky" style"', u'test.']
54
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   214
    """
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   215
    text = force_unicode(text)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   216
    for bit in smart_split_re.finditer(text):
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   217
        bit = bit.group(0)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   218
        if bit[0] == '"' and bit[-1] == '"':
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   219
            yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   220
        elif bit[0] == "'" and bit[-1] == "'":
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   221
            yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   222
        else:
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   223
            yield bit
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   224
smart_split = allow_lazy(smart_split, unicode)
03e267d67478 Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   225
323
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   226
def _replace_entity(match):
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   227
     text = match.group(1)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   228
     if text[0] == u'#':
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   229
         text = text[1:]
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   230
         try:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   231
             if text[0] in u'xX':
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   232
                 c = int(text[1:], 16)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   233
             else:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   234
                 c = int(text)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   235
             return unichr(c)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   236
         except ValueError:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   237
             return match.group(0)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   238
     else:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   239
         try:
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   240
             return unichr(name2codepoint[text])
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   241
         except (ValueError, KeyError):
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   242
             return match.group(0)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   243
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   244
_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   245
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   246
def unescape_entities(text):
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   247
     return _entity_re.sub(_replace_entity, text)
ff1a9aa48cfd Load ../vendor/django into trunk/app/django.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents: 54
diff changeset
   248
unescape_entities = allow_lazy(unescape_entities, unicode)