eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/encoding.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 # encoding.py - character transcoding support for Mercurial
       
     2 #
       
     3 #  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 import error
       
     9 import unicodedata, locale, os
       
    10 
       
    11 def _getpreferredencoding():
       
    12     '''
       
    13     On darwin, getpreferredencoding ignores the locale environment and
       
    14     always returns mac-roman. http://bugs.python.org/issue6202 fixes this
       
    15     for Python 2.7 and up. This is the same corrected code for earlier
       
    16     Python versions.
       
    17 
       
    18     However, we can't use a version check for this method, as some distributions
       
    19     patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
       
    20     encoding, as it is unlikely that this encoding is the actually expected.
       
    21     '''
       
    22     try:
       
    23         locale.CODESET
       
    24     except AttributeError:
       
    25         # Fall back to parsing environment variables :-(
       
    26         return locale.getdefaultlocale()[1]
       
    27 
       
    28     oldloc = locale.setlocale(locale.LC_CTYPE)
       
    29     locale.setlocale(locale.LC_CTYPE, "")
       
    30     result = locale.nl_langinfo(locale.CODESET)
       
    31     locale.setlocale(locale.LC_CTYPE, oldloc)
       
    32 
       
    33     return result
       
    34 
       
    35 _encodingfixers = {
       
    36     '646': lambda: 'ascii',
       
    37     'ANSI_X3.4-1968': lambda: 'ascii',
       
    38     'mac-roman': _getpreferredencoding
       
    39 }
       
    40 
       
    41 try:
       
    42     encoding = os.environ.get("HGENCODING")
       
    43     if not encoding:
       
    44         encoding = locale.getpreferredencoding() or 'ascii'
       
    45         encoding = _encodingfixers.get(encoding, lambda: encoding)()
       
    46 except locale.Error:
       
    47     encoding = 'ascii'
       
    48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
       
    49 fallbackencoding = 'ISO-8859-1'
       
    50 
       
    51 def tolocal(s):
       
    52     """
       
    53     Convert a string from internal UTF-8 to local encoding
       
    54 
       
    55     All internal strings should be UTF-8 but some repos before the
       
    56     implementation of locale support may contain latin1 or possibly
       
    57     other character sets. We attempt to decode everything strictly
       
    58     using UTF-8, then Latin-1, and failing that, we use UTF-8 and
       
    59     replace unknown characters.
       
    60     """
       
    61     for e in ('UTF-8', fallbackencoding):
       
    62         try:
       
    63             u = s.decode(e) # attempt strict decoding
       
    64             return u.encode(encoding, "replace")
       
    65         except LookupError, k:
       
    66             raise error.Abort("%s, please check your locale settings" % k)
       
    67         except UnicodeDecodeError:
       
    68             pass
       
    69     u = s.decode("utf-8", "replace") # last ditch
       
    70     return u.encode(encoding, "replace")
       
    71 
       
    72 def fromlocal(s):
       
    73     """
       
    74     Convert a string from the local character encoding to UTF-8
       
    75 
       
    76     We attempt to decode strings using the encoding mode set by
       
    77     HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
       
    78     characters will cause an error message. Other modes include
       
    79     'replace', which replaces unknown characters with a special
       
    80     Unicode character, and 'ignore', which drops the character.
       
    81     """
       
    82     try:
       
    83         return s.decode(encoding, encodingmode).encode("utf-8")
       
    84     except UnicodeDecodeError, inst:
       
    85         sub = s[max(0, inst.start - 10):inst.start + 10]
       
    86         raise error.Abort("decoding near '%s': %s!" % (sub, inst))
       
    87     except LookupError, k:
       
    88         raise error.Abort("%s, please check your locale settings" % k)
       
    89 
       
    90 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
       
    91 ambiguous = os.environ.get("HGENCODINGAMBIGUOUS", "narrow")
       
    92 
       
    93 def colwidth(s):
       
    94     "Find the column width of a UTF-8 string for display"
       
    95     d = s.decode(encoding, 'replace')
       
    96     if hasattr(unicodedata, 'east_asian_width'):
       
    97         wide = "WF"
       
    98         if ambiguous == "wide":
       
    99             wide = "WFA"
       
   100         w = unicodedata.east_asian_width
       
   101         return sum([w(c) in wide and 2 or 1 for c in d])
       
   102     return len(d)
       
   103