eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/store.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 # store.py - repository store handling for Mercurial
       
     2 #
       
     3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 from i18n import _
       
     9 import osutil, util
       
    10 import os, stat
       
    11 
       
    12 _sha = util.sha1
       
    13 
       
    14 # This avoids a collision between a file named foo and a dir named
       
    15 # foo.i or foo.d
       
    16 def encodedir(path):
       
    17     if not path.startswith('data/'):
       
    18         return path
       
    19     return (path
       
    20             .replace(".hg/", ".hg.hg/")
       
    21             .replace(".i/", ".i.hg/")
       
    22             .replace(".d/", ".d.hg/"))
       
    23 
       
    24 def decodedir(path):
       
    25     if not path.startswith('data/') or ".hg/" not in path:
       
    26         return path
       
    27     return (path
       
    28             .replace(".d.hg/", ".d/")
       
    29             .replace(".i.hg/", ".i/")
       
    30             .replace(".hg.hg/", ".hg/"))
       
    31 
       
    32 def _buildencodefun():
       
    33     e = '_'
       
    34     win_reserved = [ord(x) for x in '\\:*?"<>|']
       
    35     cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
       
    36     for x in (range(32) + range(126, 256) + win_reserved):
       
    37         cmap[chr(x)] = "~%02x" % x
       
    38     for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
       
    39         cmap[chr(x)] = e + chr(x).lower()
       
    40     dmap = {}
       
    41     for k, v in cmap.iteritems():
       
    42         dmap[v] = k
       
    43     def decode(s):
       
    44         i = 0
       
    45         while i < len(s):
       
    46             for l in xrange(1, 4):
       
    47                 try:
       
    48                     yield dmap[s[i:i + l]]
       
    49                     i += l
       
    50                     break
       
    51                 except KeyError:
       
    52                     pass
       
    53             else:
       
    54                 raise KeyError
       
    55     return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
       
    56             lambda s: decodedir("".join(list(decode(s)))))
       
    57 
       
    58 encodefilename, decodefilename = _buildencodefun()
       
    59 
       
    60 def _build_lower_encodefun():
       
    61     win_reserved = [ord(x) for x in '\\:*?"<>|']
       
    62     cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
       
    63     for x in (range(32) + range(126, 256) + win_reserved):
       
    64         cmap[chr(x)] = "~%02x" % x
       
    65     for x in range(ord("A"), ord("Z")+1):
       
    66         cmap[chr(x)] = chr(x).lower()
       
    67     return lambda s: "".join([cmap[c] for c in s])
       
    68 
       
    69 lowerencode = _build_lower_encodefun()
       
    70 
       
    71 _windows_reserved_filenames = '''con prn aux nul
       
    72     com1 com2 com3 com4 com5 com6 com7 com8 com9
       
    73     lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
       
    74 def _auxencode(path, dotencode):
       
    75     res = []
       
    76     for n in path.split('/'):
       
    77         if n:
       
    78             base = n.split('.')[0]
       
    79             if base and (base in _windows_reserved_filenames):
       
    80                 # encode third letter ('aux' -> 'au~78')
       
    81                 ec = "~%02x" % ord(n[2])
       
    82                 n = n[0:2] + ec + n[3:]
       
    83             if n[-1] in '. ':
       
    84                 # encode last period or space ('foo...' -> 'foo..~2e')
       
    85                 n = n[:-1] + "~%02x" % ord(n[-1])
       
    86             if dotencode and n[0] in '. ':
       
    87                 n = "~%02x" % ord(n[0]) + n[1:]
       
    88         res.append(n)
       
    89     return '/'.join(res)
       
    90 
       
    91 MAX_PATH_LEN_IN_HGSTORE = 120
       
    92 DIR_PREFIX_LEN = 8
       
    93 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
       
    94 def _hybridencode(path, auxencode):
       
    95     '''encodes path with a length limit
       
    96 
       
    97     Encodes all paths that begin with 'data/', according to the following.
       
    98 
       
    99     Default encoding (reversible):
       
   100 
       
   101     Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
       
   102     characters are encoded as '~xx', where xx is the two digit hex code
       
   103     of the character (see encodefilename).
       
   104     Relevant path components consisting of Windows reserved filenames are
       
   105     masked by encoding the third character ('aux' -> 'au~78', see auxencode).
       
   106 
       
   107     Hashed encoding (not reversible):
       
   108 
       
   109     If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
       
   110     non-reversible hybrid hashing of the path is done instead.
       
   111     This encoding uses up to DIR_PREFIX_LEN characters of all directory
       
   112     levels of the lowerencoded path, but not more levels than can fit into
       
   113     _MAX_SHORTENED_DIRS_LEN.
       
   114     Then follows the filler followed by the sha digest of the full path.
       
   115     The filler is the beginning of the basename of the lowerencoded path
       
   116     (the basename is everything after the last path separator). The filler
       
   117     is as long as possible, filling in characters from the basename until
       
   118     the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
       
   119     of the basename have been taken).
       
   120     The extension (e.g. '.i' or '.d') is preserved.
       
   121 
       
   122     The string 'data/' at the beginning is replaced with 'dh/', if the hashed
       
   123     encoding was used.
       
   124     '''
       
   125     if not path.startswith('data/'):
       
   126         return path
       
   127     # escape directories ending with .i and .d
       
   128     path = encodedir(path)
       
   129     ndpath = path[len('data/'):]
       
   130     res = 'data/' + auxencode(encodefilename(ndpath))
       
   131     if len(res) > MAX_PATH_LEN_IN_HGSTORE:
       
   132         digest = _sha(path).hexdigest()
       
   133         aep = auxencode(lowerencode(ndpath))
       
   134         _root, ext = os.path.splitext(aep)
       
   135         parts = aep.split('/')
       
   136         basename = parts[-1]
       
   137         sdirs = []
       
   138         for p in parts[:-1]:
       
   139             d = p[:DIR_PREFIX_LEN]
       
   140             if d[-1] in '. ':
       
   141                 # Windows can't access dirs ending in period or space
       
   142                 d = d[:-1] + '_'
       
   143             t = '/'.join(sdirs) + '/' + d
       
   144             if len(t) > _MAX_SHORTENED_DIRS_LEN:
       
   145                 break
       
   146             sdirs.append(d)
       
   147         dirs = '/'.join(sdirs)
       
   148         if len(dirs) > 0:
       
   149             dirs += '/'
       
   150         res = 'dh/' + dirs + digest + ext
       
   151         space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
       
   152         if space_left > 0:
       
   153             filler = basename[:space_left]
       
   154             res = 'dh/' + dirs + filler + digest + ext
       
   155     return res
       
   156 
       
   157 def _calcmode(path):
       
   158     try:
       
   159         # files in .hg/ will be created using this mode
       
   160         mode = os.stat(path).st_mode
       
   161             # avoid some useless chmods
       
   162         if (0777 & ~util.umask) == (0777 & mode):
       
   163             mode = None
       
   164     except OSError:
       
   165         mode = None
       
   166     return mode
       
   167 
       
   168 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
       
   169 
       
   170 class basicstore(object):
       
   171     '''base class for local repository stores'''
       
   172     def __init__(self, path, opener, pathjoiner):
       
   173         self.pathjoiner = pathjoiner
       
   174         self.path = path
       
   175         self.createmode = _calcmode(path)
       
   176         op = opener(self.path)
       
   177         op.createmode = self.createmode
       
   178         self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
       
   179 
       
   180     def join(self, f):
       
   181         return self.pathjoiner(self.path, encodedir(f))
       
   182 
       
   183     def _walk(self, relpath, recurse):
       
   184         '''yields (unencoded, encoded, size)'''
       
   185         path = self.pathjoiner(self.path, relpath)
       
   186         striplen = len(self.path) + len(os.sep)
       
   187         l = []
       
   188         if os.path.isdir(path):
       
   189             visit = [path]
       
   190             while visit:
       
   191                 p = visit.pop()
       
   192                 for f, kind, st in osutil.listdir(p, stat=True):
       
   193                     fp = self.pathjoiner(p, f)
       
   194                     if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
       
   195                         n = util.pconvert(fp[striplen:])
       
   196                         l.append((decodedir(n), n, st.st_size))
       
   197                     elif kind == stat.S_IFDIR and recurse:
       
   198                         visit.append(fp)
       
   199         return sorted(l)
       
   200 
       
   201     def datafiles(self):
       
   202         return self._walk('data', True)
       
   203 
       
   204     def walk(self):
       
   205         '''yields (unencoded, encoded, size)'''
       
   206         # yield data files first
       
   207         for x in self.datafiles():
       
   208             yield x
       
   209         # yield manifest before changelog
       
   210         for x in reversed(self._walk('', False)):
       
   211             yield x
       
   212 
       
   213     def copylist(self):
       
   214         return ['requires'] + _data.split()
       
   215 
       
   216 class encodedstore(basicstore):
       
   217     def __init__(self, path, opener, pathjoiner):
       
   218         self.pathjoiner = pathjoiner
       
   219         self.path = self.pathjoiner(path, 'store')
       
   220         self.createmode = _calcmode(self.path)
       
   221         op = opener(self.path)
       
   222         op.createmode = self.createmode
       
   223         self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
       
   224 
       
   225     def datafiles(self):
       
   226         for a, b, size in self._walk('data', True):
       
   227             try:
       
   228                 a = decodefilename(a)
       
   229             except KeyError:
       
   230                 a = None
       
   231             yield a, b, size
       
   232 
       
   233     def join(self, f):
       
   234         return self.pathjoiner(self.path, encodefilename(f))
       
   235 
       
   236     def copylist(self):
       
   237         return (['requires', '00changelog.i'] +
       
   238                 [self.pathjoiner('store', f) for f in _data.split()])
       
   239 
       
   240 class fncache(object):
       
   241     # the filename used to be partially encoded
       
   242     # hence the encodedir/decodedir dance
       
   243     def __init__(self, opener):
       
   244         self.opener = opener
       
   245         self.entries = None
       
   246 
       
   247     def _load(self):
       
   248         '''fill the entries from the fncache file'''
       
   249         self.entries = set()
       
   250         try:
       
   251             fp = self.opener('fncache', mode='rb')
       
   252         except IOError:
       
   253             # skip nonexistent file
       
   254             return
       
   255         for n, line in enumerate(fp):
       
   256             if (len(line) < 2) or (line[-1] != '\n'):
       
   257                 t = _('invalid entry in fncache, line %s') % (n + 1)
       
   258                 raise util.Abort(t)
       
   259             self.entries.add(decodedir(line[:-1]))
       
   260         fp.close()
       
   261 
       
   262     def rewrite(self, files):
       
   263         fp = self.opener('fncache', mode='wb')
       
   264         for p in files:
       
   265             fp.write(encodedir(p) + '\n')
       
   266         fp.close()
       
   267         self.entries = set(files)
       
   268 
       
   269     def add(self, fn):
       
   270         if self.entries is None:
       
   271             self._load()
       
   272         if fn not in self.entries:
       
   273             self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
       
   274             self.entries.add(fn)
       
   275 
       
   276     def __contains__(self, fn):
       
   277         if self.entries is None:
       
   278             self._load()
       
   279         return fn in self.entries
       
   280 
       
   281     def __iter__(self):
       
   282         if self.entries is None:
       
   283             self._load()
       
   284         return iter(self.entries)
       
   285 
       
   286 class fncachestore(basicstore):
       
   287     def __init__(self, path, opener, pathjoiner, encode):
       
   288         self.encode = encode
       
   289         self.pathjoiner = pathjoiner
       
   290         self.path = self.pathjoiner(path, 'store')
       
   291         self.createmode = _calcmode(self.path)
       
   292         op = opener(self.path)
       
   293         op.createmode = self.createmode
       
   294         fnc = fncache(op)
       
   295         self.fncache = fnc
       
   296 
       
   297         def fncacheopener(path, mode='r', *args, **kw):
       
   298             if mode not in ('r', 'rb') and path.startswith('data/'):
       
   299                 fnc.add(path)
       
   300             return op(self.encode(path), mode, *args, **kw)
       
   301         self.opener = fncacheopener
       
   302 
       
   303     def join(self, f):
       
   304         return self.pathjoiner(self.path, self.encode(f))
       
   305 
       
   306     def datafiles(self):
       
   307         rewrite = False
       
   308         existing = []
       
   309         pjoin = self.pathjoiner
       
   310         spath = self.path
       
   311         for f in self.fncache:
       
   312             ef = self.encode(f)
       
   313             try:
       
   314                 st = os.stat(pjoin(spath, ef))
       
   315                 yield f, ef, st.st_size
       
   316                 existing.append(f)
       
   317             except OSError:
       
   318                 # nonexistent entry
       
   319                 rewrite = True
       
   320         if rewrite:
       
   321             # rewrite fncache to remove nonexistent entries
       
   322             # (may be caused by rollback / strip)
       
   323             self.fncache.rewrite(existing)
       
   324 
       
   325     def copylist(self):
       
   326         d = ('data dh fncache'
       
   327              ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
       
   328         return (['requires', '00changelog.i'] +
       
   329                 [self.pathjoiner('store', f) for f in d.split()])
       
   330 
       
   331 def store(requirements, path, opener, pathjoiner=None):
       
   332     pathjoiner = pathjoiner or os.path.join
       
   333     if 'store' in requirements:
       
   334         if 'fncache' in requirements:
       
   335             auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
       
   336             encode = lambda f: _hybridencode(f, auxencode)
       
   337             return fncachestore(path, opener, pathjoiner, encode)
       
   338         return encodedstore(path, opener, pathjoiner)
       
   339     return basicstore(path, opener, pathjoiner)