--- a/app/django/utils/text.py Tue Oct 14 12:36:55 2008 +0000
+++ b/app/django/utils/text.py Tue Oct 14 16:00:59 2008 +0000
@@ -3,6 +3,7 @@
from django.utils.encoding import force_unicode
from django.utils.functional import allow_lazy
from django.utils.translation import ugettext_lazy
+from htmlentitydefs import name2codepoint
# Capitalizes the first letter of a string.
capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
@@ -118,7 +119,7 @@
spaces are converted to underscores; and all non-filename-safe characters
are removed.
>>> get_valid_filename("john's portrait in 2004.jpg")
- 'johns_portrait_in_2004.jpg'
+ u'johns_portrait_in_2004.jpg'
"""
s = force_unicode(s).strip().replace(' ', '_')
return re.sub(r'[^-A-Za-z0-9_.]', '', s)
@@ -127,15 +128,15 @@
def get_text_list(list_, last_word=ugettext_lazy(u'or')):
"""
>>> get_text_list(['a', 'b', 'c', 'd'])
- 'a, b, c or d'
+ u'a, b, c or d'
>>> get_text_list(['a', 'b', 'c'], 'and')
- 'a, b and c'
+ u'a, b and c'
>>> get_text_list(['a', 'b'], 'and')
- 'a and b'
+ u'a and b'
>>> get_text_list(['a'])
- 'a'
+ u'a'
>>> get_text_list([])
- ''
+ u''
"""
if len(list_) == 0: return u''
if len(list_) == 1: return force_unicode(list_[0])
@@ -198,14 +199,18 @@
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
def smart_split(text):
- """
+ r"""
Generator that splits a string by spaces, leaving quoted phrases together.
Supports both single and double quotes, and supports escaping quotes with
backslashes. In the output, strings will keep their initial and trailing
quote marks.
- >>> list(smart_split('This is "a person\'s" test.'))
- ['This', 'is', '"a person\'s"', 'test.']
+ >>> list(smart_split(r'This is "a person\'s" test.'))
+ [u'This', u'is', u'"a person\\\'s"', u'test.']
+ >>> list(smart_split(r"Another 'person\'s' test."))
+ [u'Another', u"'person's'", u'test.']
+ >>> list(smart_split(r'A "\"funky\" style" test.'))
+ [u'A', u'""funky" style"', u'test.']
"""
text = force_unicode(text)
for bit in smart_split_re.finditer(text):
@@ -218,3 +223,26 @@
yield bit
smart_split = allow_lazy(smart_split, unicode)
+def _replace_entity(match):
+ text = match.group(1)
+ if text[0] == u'#':
+ text = text[1:]
+ try:
+ if text[0] in u'xX':
+ c = int(text[1:], 16)
+ else:
+ c = int(text)
+ return unichr(c)
+ except ValueError:
+ return match.group(0)
+ else:
+ try:
+ return unichr(name2codepoint[text])
+ except (ValueError, KeyError):
+ return match.group(0)
+
+_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
+
+def unescape_entities(text):
+ return _entity_re.sub(_replace_entity, text)
+unescape_entities = allow_lazy(unescape_entities, unicode)