eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/match.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 # match.py - filename matching
       
     2 #
       
     3 #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 import re
       
     9 import util
       
    10 from i18n import _
       
    11 
       
    12 class match(object):
       
    13     def __init__(self, root, cwd, patterns, include=[], exclude=[],
       
    14                  default='glob', exact=False, auditor=None):
       
    15         """build an object to match a set of file patterns
       
    16 
       
    17         arguments:
       
    18         root - the canonical root of the tree you're matching against
       
    19         cwd - the current working directory, if relevant
       
    20         patterns - patterns to find
       
    21         include - patterns to include
       
    22         exclude - patterns to exclude
       
    23         default - if a pattern in names has no explicit type, assume this one
       
    24         exact - patterns are actually literals
       
    25 
       
    26         a pattern is one of:
       
    27         'glob:<glob>' - a glob relative to cwd
       
    28         're:<regexp>' - a regular expression
       
    29         'path:<path>' - a path relative to canonroot
       
    30         'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
       
    31         'relpath:<path>' - a path relative to cwd
       
    32         'relre:<regexp>' - a regexp that needn't match the start of a name
       
    33         '<something>' - a pattern of the specified default type
       
    34         """
       
    35 
       
    36         self._root = root
       
    37         self._cwd = cwd
       
    38         self._files = []
       
    39         self._anypats = bool(include or exclude)
       
    40 
       
    41         if include:
       
    42             im = _buildmatch(_normalize(include, 'glob', root, cwd, auditor),
       
    43                              '(?:/|$)')
       
    44         if exclude:
       
    45             em = _buildmatch(_normalize(exclude, 'glob', root, cwd, auditor),
       
    46                              '(?:/|$)')
       
    47         if exact:
       
    48             self._files = patterns
       
    49             pm = self.exact
       
    50         elif patterns:
       
    51             pats = _normalize(patterns, default, root, cwd, auditor)
       
    52             self._files = _roots(pats)
       
    53             self._anypats = self._anypats or _anypats(pats)
       
    54             pm = _buildmatch(pats, '$')
       
    55 
       
    56         if patterns or exact:
       
    57             if include:
       
    58                 if exclude:
       
    59                     m = lambda f: im(f) and not em(f) and pm(f)
       
    60                 else:
       
    61                     m = lambda f: im(f) and pm(f)
       
    62             else:
       
    63                 if exclude:
       
    64                     m = lambda f: not em(f) and pm(f)
       
    65                 else:
       
    66                     m = pm
       
    67         else:
       
    68             if include:
       
    69                 if exclude:
       
    70                     m = lambda f: im(f) and not em(f)
       
    71                 else:
       
    72                     m = im
       
    73             else:
       
    74                 if exclude:
       
    75                     m = lambda f: not em(f)
       
    76                 else:
       
    77                     m = lambda f: True
       
    78 
       
    79         self.matchfn = m
       
    80         self._fmap = set(self._files)
       
    81 
       
    82     def __call__(self, fn):
       
    83         return self.matchfn(fn)
       
    84     def __iter__(self):
       
    85         for f in self._files:
       
    86             yield f
       
    87     def bad(self, f, msg):
       
    88         '''callback for each explicit file that can't be
       
    89         found/accessed, with an error message
       
    90         '''
       
    91         pass
       
    92     def dir(self, f):
       
    93         pass
       
    94     def missing(self, f):
       
    95         pass
       
    96     def exact(self, f):
       
    97         return f in self._fmap
       
    98     def rel(self, f):
       
    99         return util.pathto(self._root, self._cwd, f)
       
   100     def files(self):
       
   101         return self._files
       
   102     def anypats(self):
       
   103         return self._anypats
       
   104 
       
   105 class exact(match):
       
   106     def __init__(self, root, cwd, files):
       
   107         match.__init__(self, root, cwd, files, exact = True)
       
   108 
       
   109 class always(match):
       
   110     def __init__(self, root, cwd):
       
   111         match.__init__(self, root, cwd, [])
       
   112 
       
   113 class narrowmatcher(match):
       
   114     """Adapt a matcher to work on a subdirectory only.
       
   115 
       
   116     The paths are remapped to remove/insert the path as needed:
       
   117 
       
   118     >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
       
   119     >>> m2 = narrowmatcher('sub', m1)
       
   120     >>> bool(m2('a.txt'))
       
   121     False
       
   122     >>> bool(m2('b.txt'))
       
   123     True
       
   124     >>> bool(m2.matchfn('a.txt'))
       
   125     False
       
   126     >>> bool(m2.matchfn('b.txt'))
       
   127     True
       
   128     >>> m2.files()
       
   129     ['b.txt']
       
   130     >>> m2.exact('b.txt')
       
   131     True
       
   132     >>> m2.rel('b.txt')
       
   133     'b.txt'
       
   134     >>> def bad(f, msg):
       
   135     ...     print "%s: %s" % (f, msg)
       
   136     >>> m1.bad = bad
       
   137     >>> m2.bad('x.txt', 'No such file')
       
   138     sub/x.txt: No such file
       
   139     """
       
   140 
       
   141     def __init__(self, path, matcher):
       
   142         self._root = matcher._root
       
   143         self._cwd = matcher._cwd
       
   144         self._path = path
       
   145         self._matcher = matcher
       
   146 
       
   147         self._files = [f[len(path) + 1:] for f in matcher._files
       
   148                        if f.startswith(path + "/")]
       
   149         self._anypats = matcher._anypats
       
   150         self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
       
   151         self._fmap = set(self._files)
       
   152 
       
   153     def bad(self, f, msg):
       
   154         self._matcher.bad(self._path + "/" + f, msg)
       
   155 
       
   156 def patkind(pat):
       
   157     return _patsplit(pat, None)[0]
       
   158 
       
   159 def _patsplit(pat, default):
       
   160     """Split a string into an optional pattern kind prefix and the
       
   161     actual pattern."""
       
   162     if ':' in pat:
       
   163         kind, val = pat.split(':', 1)
       
   164         if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
       
   165             return kind, val
       
   166     return default, pat
       
   167 
       
   168 def _globre(pat):
       
   169     "convert a glob pattern into a regexp"
       
   170     i, n = 0, len(pat)
       
   171     res = ''
       
   172     group = 0
       
   173     escape = re.escape
       
   174     def peek():
       
   175         return i < n and pat[i]
       
   176     while i < n:
       
   177         c = pat[i]
       
   178         i += 1
       
   179         if c not in '*?[{},\\':
       
   180             res += escape(c)
       
   181         elif c == '*':
       
   182             if peek() == '*':
       
   183                 i += 1
       
   184                 res += '.*'
       
   185             else:
       
   186                 res += '[^/]*'
       
   187         elif c == '?':
       
   188             res += '.'
       
   189         elif c == '[':
       
   190             j = i
       
   191             if j < n and pat[j] in '!]':
       
   192                 j += 1
       
   193             while j < n and pat[j] != ']':
       
   194                 j += 1
       
   195             if j >= n:
       
   196                 res += '\\['
       
   197             else:
       
   198                 stuff = pat[i:j].replace('\\','\\\\')
       
   199                 i = j + 1
       
   200                 if stuff[0] == '!':
       
   201                     stuff = '^' + stuff[1:]
       
   202                 elif stuff[0] == '^':
       
   203                     stuff = '\\' + stuff
       
   204                 res = '%s[%s]' % (res, stuff)
       
   205         elif c == '{':
       
   206             group += 1
       
   207             res += '(?:'
       
   208         elif c == '}' and group:
       
   209             res += ')'
       
   210             group -= 1
       
   211         elif c == ',' and group:
       
   212             res += '|'
       
   213         elif c == '\\':
       
   214             p = peek()
       
   215             if p:
       
   216                 i += 1
       
   217                 res += escape(p)
       
   218             else:
       
   219                 res += escape(c)
       
   220         else:
       
   221             res += escape(c)
       
   222     return res
       
   223 
       
   224 def _regex(kind, name, tail):
       
   225     '''convert a pattern into a regular expression'''
       
   226     if not name:
       
   227         return ''
       
   228     if kind == 're':
       
   229         return name
       
   230     elif kind == 'path':
       
   231         return '^' + re.escape(name) + '(?:/|$)'
       
   232     elif kind == 'relglob':
       
   233         return '(?:|.*/)' + _globre(name) + tail
       
   234     elif kind == 'relpath':
       
   235         return re.escape(name) + '(?:/|$)'
       
   236     elif kind == 'relre':
       
   237         if name.startswith('^'):
       
   238             return name
       
   239         return '.*' + name
       
   240     return _globre(name) + tail
       
   241 
       
   242 def _buildmatch(pats, tail):
       
   243     """build a matching function from a set of patterns"""
       
   244     try:
       
   245         pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
       
   246         if len(pat) > 20000:
       
   247             raise OverflowError()
       
   248         return re.compile(pat).match
       
   249     except OverflowError:
       
   250         # We're using a Python with a tiny regex engine and we
       
   251         # made it explode, so we'll divide the pattern list in two
       
   252         # until it works
       
   253         l = len(pats)
       
   254         if l < 2:
       
   255             raise
       
   256         a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
       
   257         return lambda s: a(s) or b(s)
       
   258     except re.error:
       
   259         for k, p in pats:
       
   260             try:
       
   261                 re.compile('(?:%s)' % _regex(k, p, tail))
       
   262             except re.error:
       
   263                 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
       
   264         raise util.Abort(_("invalid pattern"))
       
   265 
       
   266 def _normalize(names, default, root, cwd, auditor):
       
   267     pats = []
       
   268     for kind, name in [_patsplit(p, default) for p in names]:
       
   269         if kind in ('glob', 'relpath'):
       
   270             name = util.canonpath(root, cwd, name, auditor)
       
   271         elif kind in ('relglob', 'path'):
       
   272             name = util.normpath(name)
       
   273 
       
   274         pats.append((kind, name))
       
   275     return pats
       
   276 
       
   277 def _roots(patterns):
       
   278     r = []
       
   279     for kind, name in patterns:
       
   280         if kind == 'glob': # find the non-glob prefix
       
   281             root = []
       
   282             for p in name.split('/'):
       
   283                 if '[' in p or '{' in p or '*' in p or '?' in p:
       
   284                     break
       
   285                 root.append(p)
       
   286             r.append('/'.join(root) or '.')
       
   287         elif kind in ('relpath', 'path'):
       
   288             r.append(name or '.')
       
   289         elif kind == 'relglob':
       
   290             r.append('.')
       
   291     return r
       
   292 
       
   293 def _anypats(patterns):
       
   294     for kind, name in patterns:
       
   295         if kind in ('glob', 're', 'relglob', 'relre'):
       
   296             return True