app/django/utils/text.py
changeset 323 ff1a9aa48cfd
parent 54 03e267d67478
--- a/app/django/utils/text.py	Tue Oct 14 12:36:55 2008 +0000
+++ b/app/django/utils/text.py	Tue Oct 14 16:00:59 2008 +0000
@@ -3,6 +3,7 @@
 from django.utils.encoding import force_unicode
 from django.utils.functional import allow_lazy
 from django.utils.translation import ugettext_lazy
+from htmlentitydefs import name2codepoint
 
 # Capitalizes the first letter of a string.
 capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
@@ -118,7 +119,7 @@
     spaces are converted to underscores; and all non-filename-safe characters
     are removed.
     >>> get_valid_filename("john's portrait in 2004.jpg")
-    'johns_portrait_in_2004.jpg'
+    u'johns_portrait_in_2004.jpg'
     """
     s = force_unicode(s).strip().replace(' ', '_')
     return re.sub(r'[^-A-Za-z0-9_.]', '', s)
@@ -127,15 +128,15 @@
 def get_text_list(list_, last_word=ugettext_lazy(u'or')):
     """
     >>> get_text_list(['a', 'b', 'c', 'd'])
-    'a, b, c or d'
+    u'a, b, c or d'
     >>> get_text_list(['a', 'b', 'c'], 'and')
-    'a, b and c'
+    u'a, b and c'
     >>> get_text_list(['a', 'b'], 'and')
-    'a and b'
+    u'a and b'
     >>> get_text_list(['a'])
-    'a'
+    u'a'
     >>> get_text_list([])
-    ''
+    u''
     """
     if len(list_) == 0: return u''
     if len(list_) == 1: return force_unicode(list_[0])
@@ -198,14 +199,18 @@
 
 smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
 def smart_split(text):
-    """
+    r"""
     Generator that splits a string by spaces, leaving quoted phrases together.
     Supports both single and double quotes, and supports escaping quotes with
     backslashes. In the output, strings will keep their initial and trailing
     quote marks.
 
-    >>> list(smart_split('This is "a person\'s" test.'))
-    ['This', 'is', '"a person\'s"', 'test.']
+    >>> list(smart_split(r'This is "a person\'s" test.'))
+    [u'This', u'is', u'"a person\\\'s"', u'test.']
+    >>> list(smart_split(r"Another 'person\'s' test.")) 
+    [u'Another', u"'person's'", u'test.']
+    >>> list(smart_split(r'A "\"funky\" style" test.')) 
+    [u'A', u'""funky" style"', u'test.']
     """
     text = force_unicode(text)
     for bit in smart_split_re.finditer(text):
@@ -218,3 +223,26 @@
             yield bit
 smart_split = allow_lazy(smart_split, unicode)
 
+def _replace_entity(match):
+     text = match.group(1)
+     if text[0] == u'#':
+         text = text[1:]
+         try:
+             if text[0] in u'xX':
+                 c = int(text[1:], 16)
+             else:
+                 c = int(text)
+             return unichr(c)
+         except ValueError:
+             return match.group(0)
+     else:
+         try:
+             return unichr(name2codepoint[text])
+         except (ValueError, KeyError):
+             return match.group(0)
+
+_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
+
+def unescape_entities(text):
+     return _entity_re.sub(_replace_entity, text)
+unescape_entities = allow_lazy(unescape_entities, unicode)