thirdparty/google_appengine/lib/django/django/utils/text.py
author Lennard de Rijk <ljvderijk@gmail.com>
Sun, 21 Jun 2009 18:22:02 +0200
changeset 2416 96ff51144dca
parent 109 620f9b141567
permissions -rwxr-xr-x
Set new Melange version number to 0-5-20090621 in app.yaml.template
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
109
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     1
import re
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     2
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     3
from django.conf import settings
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     4
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     5
# Capitalizes the first letter of a string.
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     6
capfirst = lambda x: x and x[0].upper() + x[1:]
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     7
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     8
def wrap(text, width):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
     9
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    10
    A word-wrap function that preserves existing line breaks and most spaces in
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    11
    the text. Expects that existing line breaks are posix newlines.
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    12
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    13
    def _generator():
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    14
        it = iter(text.split(' '))
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    15
        word = it.next()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    16
        yield word
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    17
        pos = len(word) - word.rfind('\n') - 1
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    18
        for word in it:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    19
            if "\n" in word:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    20
                lines = word.split('\n')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    21
            else:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    22
                lines = (word,)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    23
            pos += len(lines[0]) + 1
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    24
            if pos > width:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    25
                yield '\n'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    26
                pos = len(lines[-1])
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    27
            else:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    28
                yield ' '
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    29
                if len(lines) > 1:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    30
                    pos = len(lines[-1])
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    31
            yield word
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    32
    return "".join(_generator())
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    33
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    34
def truncate_words(s, num):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    35
    "Truncates a string after a certain number of words."
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    36
    length = int(num)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    37
    words = s.split()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    38
    if len(words) > length:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    39
        words = words[:length]
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    40
        if not words[-1].endswith('...'):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    41
            words.append('...')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    42
    return ' '.join(words)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    43
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    44
def truncate_html_words(s, num):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    45
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    46
    Truncates html to a certain number of words (not counting tags and comments).
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    47
    Closes opened tags if they were correctly closed in the given html.
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    48
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    49
    length = int(num)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    50
    if length <= 0:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    51
        return ''
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    52
    html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    53
    # Set up regular expressions
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    54
    re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    55
    re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    56
    # Count non-HTML words and keep note of open tags
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    57
    pos = 0
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    58
    ellipsis_pos = 0
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    59
    words = 0
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    60
    open_tags = []
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    61
    while words <= length:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    62
        m = re_words.search(s, pos)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    63
        if not m:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    64
            # Checked through whole string
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    65
            break
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    66
        pos = m.end(0)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    67
        if m.group(1):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    68
            # It's an actual non-HTML word
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    69
            words += 1
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    70
            if words == length:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    71
                ellipsis_pos = pos
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    72
            continue
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    73
        # Check for tag
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    74
        tag = re_tag.match(m.group(0))
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    75
        if not tag or ellipsis_pos:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    76
            # Don't worry about non tags or tags after our truncate point
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    77
            continue
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    78
        closing_tag, tagname, self_closing = tag.groups()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    79
        tagname = tagname.lower()  # Element names are always case-insensitive
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    80
        if self_closing or tagname in html4_singlets:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    81
            pass
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    82
        elif closing_tag:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    83
            # Check for match in open tags list
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    84
            try:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    85
                i = open_tags.index(tagname)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    86
            except ValueError:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    87
                pass
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    88
            else:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    89
                # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    90
                open_tags = open_tags[i+1:]
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    91
        else:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    92
            # Add it to the start of the open tags list
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    93
            open_tags.insert(0, tagname)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    94
    if words <= length:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    95
        # Don't try to close tags if we don't need to truncate
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    96
        return s
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    97
    out = s[:ellipsis_pos] + ' ...'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    98
    # Close any tags still open
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
    99
    for tag in open_tags:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   100
        out += '</%s>' % tag
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   101
    # Return string
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   102
    return out
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   103
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   104
def get_valid_filename(s):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   105
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   106
    Returns the given string converted to a string that can be used for a clean
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   107
    filename. Specifically, leading and trailing spaces are removed; other
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   108
    spaces are converted to underscores; and all non-filename-safe characters
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   109
    are removed.
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   110
    >>> get_valid_filename("john's portrait in 2004.jpg")
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   111
    'johns_portrait_in_2004.jpg'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   112
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   113
    s = s.strip().replace(' ', '_')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   114
    return re.sub(r'[^-A-Za-z0-9_.]', '', s)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   115
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   116
def get_text_list(list_, last_word='or'):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   117
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   118
    >>> get_text_list(['a', 'b', 'c', 'd'])
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   119
    'a, b, c or d'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   120
    >>> get_text_list(['a', 'b', 'c'], 'and')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   121
    'a, b and c'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   122
    >>> get_text_list(['a', 'b'], 'and')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   123
    'a and b'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   124
    >>> get_text_list(['a'])
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   125
    'a'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   126
    >>> get_text_list([])
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   127
    ''
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   128
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   129
    if len(list_) == 0: return ''
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   130
    if len(list_) == 1: return list_[0]
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   131
    return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   132
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   133
def normalize_newlines(text):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   134
    return re.sub(r'\r\n|\r|\n', '\n', text)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   135
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   136
def recapitalize(text):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   137
    "Recapitalizes text, placing caps after end-of-sentence punctuation."
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   138
#     capwords = ()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   139
    text = text.lower()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   140
    capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   141
    text = capsRE.sub(lambda x: x.group(1).upper(), text)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   142
#     for capword in capwords:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   143
#         capwordRE = re.compile(r'\b%s\b' % capword, re.I)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   144
#         text = capwordRE.sub(capword, text)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   145
    return text
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   146
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   147
def phone2numeric(phone):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   148
    "Converts a phone number with letters into its numeric equivalent."
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   149
    letters = re.compile(r'[A-PR-Y]', re.I)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   150
    char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   151
         'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   152
         'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   153
         's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   154
         'y': '9', 'x': '9'}.get(m.group(0).lower())
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   155
    return letters.sub(char2number, phone)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   156
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   157
# From http://www.xhaus.com/alan/python/httpcomp.html#gzip
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   158
# Used with permission.
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   159
def compress_string(s):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   160
    import cStringIO, gzip
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   161
    zbuf = cStringIO.StringIO()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   162
    zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   163
    zfile.write(s)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   164
    zfile.close()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   165
    return zbuf.getvalue()
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   166
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   167
ustring_re = re.compile(u"([\u0080-\uffff])")
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   168
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   169
def javascript_quote(s, quote_double_quotes=False):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   170
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   171
    def fix(match):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   172
        return r"\u%04x" % ord(match.group(1))
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   173
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   174
    if type(s) == str:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   175
        s = s.decode(settings.DEFAULT_CHARSET)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   176
    elif type(s) != unicode:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   177
        raise TypeError, s
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   178
    s = s.replace('\\', '\\\\')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   179
    s = s.replace('\r', '\\r')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   180
    s = s.replace('\n', '\\n')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   181
    s = s.replace('\t', '\\t')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   182
    s = s.replace("'", "\\'")
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   183
    if quote_double_quotes:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   184
        s = s.replace('"', '&quot;')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   185
    return str(ustring_re.sub(fix, s))
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   186
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   187
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   188
def smart_split(text):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   189
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   190
    Generator that splits a string by spaces, leaving quoted phrases together.
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   191
    Supports both single and double quotes, and supports escaping quotes with
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   192
    backslashes. In the output, strings will keep their initial and trailing
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   193
    quote marks.
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   194
    >>> list(smart_split('This is "a person\'s" test.'))
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   195
    ['This', 'is', '"a person\'s"', 'test.']
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   196
    """
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   197
    for bit in smart_split_re.finditer(text):
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   198
        bit = bit.group(0)
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   199
        if bit[0] == '"':
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   200
            yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   201
        elif bit[0] == "'":
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   202
            yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   203
        else:
620f9b141567 Load ../../google_appengine into trunk/thirdparty/google_appengine.
Todd Larsen <tlarsen@google.com>
parents:
diff changeset
   204
            yield bit