eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py
changeset 69 c6bca38c1cbf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py	Sat Jan 08 11:20:57 2011 +0530
@@ -0,0 +1,159 @@
+# win32mbcs.py -- MBCS filename support for Mercurial
+#
+# Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
+#
+# Version: 0.3
+# Author:  Shun-ichi Goto <shunichi.goto@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+#
+
+'''allow the use of MBCS paths with problematic encodings
+
+Some MBCS encodings are not good for some path operations (i.e.
+splitting path, case conversion, etc.) with its encoded bytes. We call
+such a encoding (i.e. shift_jis and big5) as "problematic encoding".
+This extension can be used to fix the issue with those encodings by
+wrapping some functions to convert to Unicode string before path
+operation.
+
+This extension is useful for:
+
+- Japanese Windows users using shift_jis encoding.
+- Chinese Windows users using big5 encoding.
+- All users who use a repository with one of problematic encodings on
+  case-insensitive file system.
+
+This extension is not needed for:
+
+- Any user who use only ASCII chars in path.
+- Any user who do not use any of problematic encodings.
+
+Note that there are some limitations on using this extension:
+
+- You should use single encoding in one repository.
+- If the repository path ends with 0x5c, .hg/hgrc cannot be read.
+- win32mbcs is not compatible with fixutf8 extention.
+
+By default, win32mbcs uses encoding.encoding decided by Mercurial.
+You can specify the encoding by config option::
+
+ [win32mbcs]
+ encoding = sjis
+
+It is useful for the users who want to commit with UTF-8 log message.
+'''
+
+import os, sys
+from mercurial.i18n import _
+from mercurial import util, encoding
+
+_encoding = None                                # see extsetup
+
+def decode(arg):
+    if isinstance(arg, str):
+        uarg = arg.decode(_encoding)
+        if arg == uarg.encode(_encoding):
+            return uarg
+        raise UnicodeError("Not local encoding")
+    elif isinstance(arg, tuple):
+        return tuple(map(decode, arg))
+    elif isinstance(arg, list):
+        return map(decode, arg)
+    elif isinstance(arg, dict):
+        for k, v in arg.items():
+            arg[k] = decode(v)
+    return arg
+
+def encode(arg):
+    if isinstance(arg, unicode):
+        return arg.encode(_encoding)
+    elif isinstance(arg, tuple):
+        return tuple(map(encode, arg))
+    elif isinstance(arg, list):
+        return map(encode, arg)
+    elif isinstance(arg, dict):
+        for k, v in arg.items():
+            arg[k] = encode(v)
+    return arg
+
+def appendsep(s):
+    # ensure the path ends with os.sep, appending it if necessary.
+    try:
+        us = decode(s)
+    except UnicodeError:
+        us = s
+    if us and us[-1] not in ':/\\':
+        s += os.sep
+    return s
+
+def wrapper(func, args, kwds):
+    # check argument is unicode, then call original
+    for arg in args:
+        if isinstance(arg, unicode):
+            return func(*args, **kwds)
+
+    try:
+        # convert arguments to unicode, call func, then convert back
+        return encode(func(*decode(args), **decode(kwds)))
+    except UnicodeError:
+        raise util.Abort(_("[win32mbcs] filename conversion failed with"
+                         " %s encoding\n") % (_encoding))
+
+def wrapperforlistdir(func, args, kwds):
+    # Ensure 'path' argument ends with os.sep to avoids
+    # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
+    if args:
+        args = list(args)
+        args[0] = appendsep(args[0])
+    if 'path' in kwds:
+        kwds['path'] = appendsep(kwds['path'])
+    return func(*args, **kwds)
+
+def wrapname(name, wrapper):
+    module, name = name.rsplit('.', 1)
+    module = sys.modules[module]
+    func = getattr(module, name)
+    def f(*args, **kwds):
+        return wrapper(func, args, kwds)
+    try:
+        f.__name__ = func.__name__                # fail with python23
+    except Exception:
+        pass
+    setattr(module, name, f)
+
+# List of functions to be wrapped.
+# NOTE: os.path.dirname() and os.path.basename() are safe because
+#       they use result of os.path.split()
+funcs = '''os.path.join os.path.split os.path.splitext
+ os.path.splitunc os.path.normpath os.path.normcase os.makedirs
+ mercurial.util.endswithsep mercurial.util.splitpath mercurial.util.checkcase
+ mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath'''
+
+# codec and alias names of sjis and big5 to be faked.
+problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs
+ hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
+ sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
+ shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
+
+def extsetup(ui):
+    # TODO: decide use of config section for this extension
+    if not os.path.supports_unicode_filenames:
+        ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
+        return
+    # determine encoding for filename
+    global _encoding
+    _encoding = ui.config('win32mbcs', 'encoding', encoding.encoding)
+    # fake is only for relevant environment.
+    if _encoding.lower() in problematic_encodings.split():
+        for f in funcs.split():
+            wrapname(f, wrapper)
+        wrapname("mercurial.osutil.listdir", wrapperforlistdir)
+        # Check sys.args manually instead of using ui.debug() because
+        # command line options is not yet applied when
+        # extensions.loadall() is called.
+        if '--debug' in sys.argv:
+            ui.write("[win32mbcs] activated with encoding: %s\n"
+                     % _encoding)
+