eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/convert/cvsps.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 # Mercurial built-in replacement for cvsps.
       
     2 #
       
     3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 import os
       
     9 import re
       
    10 import cPickle as pickle
       
    11 from mercurial import util
       
    12 from mercurial.i18n import _
       
    13 from mercurial import hook
       
    14 
       
    15 class logentry(object):
       
    16     '''Class logentry has the following attributes:
       
    17         .author    - author name as CVS knows it
       
    18         .branch    - name of branch this revision is on
       
    19         .branches  - revision tuple of branches starting at this revision
       
    20         .comment   - commit message
       
    21         .date      - the commit date as a (time, tz) tuple
       
    22         .dead      - true if file revision is dead
       
    23         .file      - Name of file
       
    24         .lines     - a tuple (+lines, -lines) or None
       
    25         .parent    - Previous revision of this entry
       
    26         .rcs       - name of file as returned from CVS
       
    27         .revision  - revision number as tuple
       
    28         .tags      - list of tags on the file
       
    29         .synthetic - is this a synthetic "file ... added on ..." revision?
       
    30         .mergepoint- the branch that has been merged from
       
    31                      (if present in rlog output)
       
    32         .branchpoints- the branches that start at the current entry
       
    33     '''
       
    34     def __init__(self, **entries):
       
    35         self.synthetic = False
       
    36         self.__dict__.update(entries)
       
    37 
       
    38     def __repr__(self):
       
    39         return "<%s at 0x%x: %s %s>" % (self.__class__.__name__,
       
    40                                         id(self),
       
    41                                         self.file,
       
    42                                         ".".join(map(str, self.revision)))
       
    43 
       
    44 class logerror(Exception):
       
    45     pass
       
    46 
       
    47 def getrepopath(cvspath):
       
    48     """Return the repository path from a CVS path.
       
    49 
       
    50     >>> getrepopath('/foo/bar')
       
    51     '/foo/bar'
       
    52     >>> getrepopath('c:/foo/bar')
       
    53     'c:/foo/bar'
       
    54     >>> getrepopath(':pserver:10/foo/bar')
       
    55     '/foo/bar'
       
    56     >>> getrepopath(':pserver:10c:/foo/bar')
       
    57     '/foo/bar'
       
    58     >>> getrepopath(':pserver:/foo/bar')
       
    59     '/foo/bar'
       
    60     >>> getrepopath(':pserver:c:/foo/bar')
       
    61     'c:/foo/bar'
       
    62     >>> getrepopath(':pserver:truc@foo.bar:/foo/bar')
       
    63     '/foo/bar'
       
    64     >>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')
       
    65     'c:/foo/bar'
       
    66     """
       
    67     # According to CVS manual, CVS paths are expressed like:
       
    68     # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
       
    69     #
       
    70     # Unfortunately, Windows absolute paths start with a drive letter
       
    71     # like 'c:' making it harder to parse. Here we assume that drive
       
    72     # letters are only one character long and any CVS component before
       
    73     # the repository path is at least 2 characters long, and use this
       
    74     # to disambiguate.
       
    75     parts = cvspath.split(':')
       
    76     if len(parts) == 1:
       
    77         return parts[0]
       
    78     # Here there is an ambiguous case if we have a port number
       
    79     # immediately followed by a Windows driver letter. We assume this
       
    80     # never happens and decide it must be CVS path component,
       
    81     # therefore ignoring it.
       
    82     if len(parts[-2]) > 1:
       
    83         return parts[-1].lstrip('0123456789')
       
    84     return parts[-2] + ':' + parts[-1]
       
    85 
       
    86 def createlog(ui, directory=None, root="", rlog=True, cache=None):
       
    87     '''Collect the CVS rlog'''
       
    88 
       
    89     # Because we store many duplicate commit log messages, reusing strings
       
    90     # saves a lot of memory and pickle storage space.
       
    91     _scache = {}
       
    92     def scache(s):
       
    93         "return a shared version of a string"
       
    94         return _scache.setdefault(s, s)
       
    95 
       
    96     ui.status(_('collecting CVS rlog\n'))
       
    97 
       
    98     log = []      # list of logentry objects containing the CVS state
       
    99 
       
   100     # patterns to match in CVS (r)log output, by state of use
       
   101     re_00 = re.compile('RCS file: (.+)$')
       
   102     re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
       
   103     re_02 = re.compile('cvs (r?log|server): (.+)\n$')
       
   104     re_03 = re.compile("(Cannot access.+CVSROOT)|"
       
   105                        "(can't create temporary directory.+)$")
       
   106     re_10 = re.compile('Working file: (.+)$')
       
   107     re_20 = re.compile('symbolic names:')
       
   108     re_30 = re.compile('\t(.+): ([\\d.]+)$')
       
   109     re_31 = re.compile('----------------------------$')
       
   110     re_32 = re.compile('======================================='
       
   111                        '======================================$')
       
   112     re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
       
   113     re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
       
   114                        r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
       
   115                        r'(.*mergepoint:\s+([^;]+);)?')
       
   116     re_70 = re.compile('branches: (.+);$')
       
   117 
       
   118     file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
       
   119 
       
   120     prefix = ''   # leading path to strip of what we get from CVS
       
   121 
       
   122     if directory is None:
       
   123         # Current working directory
       
   124 
       
   125         # Get the real directory in the repository
       
   126         try:
       
   127             prefix = open(os.path.join('CVS','Repository')).read().strip()
       
   128             directory = prefix
       
   129             if prefix == ".":
       
   130                 prefix = ""
       
   131         except IOError:
       
   132             raise logerror(_('not a CVS sandbox'))
       
   133 
       
   134         if prefix and not prefix.endswith(os.sep):
       
   135             prefix += os.sep
       
   136 
       
   137         # Use the Root file in the sandbox, if it exists
       
   138         try:
       
   139             root = open(os.path.join('CVS','Root')).read().strip()
       
   140         except IOError:
       
   141             pass
       
   142 
       
   143     if not root:
       
   144         root = os.environ.get('CVSROOT', '')
       
   145 
       
   146     # read log cache if one exists
       
   147     oldlog = []
       
   148     date = None
       
   149 
       
   150     if cache:
       
   151         cachedir = os.path.expanduser('~/.hg.cvsps')
       
   152         if not os.path.exists(cachedir):
       
   153             os.mkdir(cachedir)
       
   154 
       
   155         # The cvsps cache pickle needs a uniquified name, based on the
       
   156         # repository location. The address may have all sort of nasties
       
   157         # in it, slashes, colons and such. So here we take just the
       
   158         # alphanumerics, concatenated in a way that does not mix up the
       
   159         # various components, so that
       
   160         #    :pserver:user@server:/path
       
   161         # and
       
   162         #    /pserver/user/server/path
       
   163         # are mapped to different cache file names.
       
   164         cachefile = root.split(":") + [directory, "cache"]
       
   165         cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
       
   166         cachefile = os.path.join(cachedir,
       
   167                                  '.'.join([s for s in cachefile if s]))
       
   168 
       
   169     if cache == 'update':
       
   170         try:
       
   171             ui.note(_('reading cvs log cache %s\n') % cachefile)
       
   172             oldlog = pickle.load(open(cachefile))
       
   173             ui.note(_('cache has %d log entries\n') % len(oldlog))
       
   174         except Exception, e:
       
   175             ui.note(_('error reading cache: %r\n') % e)
       
   176 
       
   177         if oldlog:
       
   178             date = oldlog[-1].date    # last commit date as a (time,tz) tuple
       
   179             date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
       
   180 
       
   181     # build the CVS commandline
       
   182     cmd = ['cvs', '-q']
       
   183     if root:
       
   184         cmd.append('-d%s' % root)
       
   185         p = util.normpath(getrepopath(root))
       
   186         if not p.endswith('/'):
       
   187             p += '/'
       
   188         if prefix:
       
   189             # looks like normpath replaces "" by "."
       
   190             prefix = p + util.normpath(prefix)
       
   191         else:
       
   192             prefix = p
       
   193     cmd.append(['log', 'rlog'][rlog])
       
   194     if date:
       
   195         # no space between option and date string
       
   196         cmd.append('-d>%s' % date)
       
   197     cmd.append(directory)
       
   198 
       
   199     # state machine begins here
       
   200     tags = {}     # dictionary of revisions on current file with their tags
       
   201     branchmap = {} # mapping between branch names and revision numbers
       
   202     state = 0
       
   203     store = False # set when a new record can be appended
       
   204 
       
   205     cmd = [util.shellquote(arg) for arg in cmd]
       
   206     ui.note(_("running %s\n") % (' '.join(cmd)))
       
   207     ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
       
   208 
       
   209     pfp = util.popen(' '.join(cmd))
       
   210     peek = pfp.readline()
       
   211     while True:
       
   212         line = peek
       
   213         if line == '':
       
   214             break
       
   215         peek = pfp.readline()
       
   216         if line.endswith('\n'):
       
   217             line = line[:-1]
       
   218         #ui.debug('state=%d line=%r\n' % (state, line))
       
   219 
       
   220         if state == 0:
       
   221             # initial state, consume input until we see 'RCS file'
       
   222             match = re_00.match(line)
       
   223             if match:
       
   224                 rcs = match.group(1)
       
   225                 tags = {}
       
   226                 if rlog:
       
   227                     filename = util.normpath(rcs[:-2])
       
   228                     if filename.startswith(prefix):
       
   229                         filename = filename[len(prefix):]
       
   230                     if filename.startswith('/'):
       
   231                         filename = filename[1:]
       
   232                     if filename.startswith('Attic/'):
       
   233                         filename = filename[6:]
       
   234                     else:
       
   235                         filename = filename.replace('/Attic/', '/')
       
   236                     state = 2
       
   237                     continue
       
   238                 state = 1
       
   239                 continue
       
   240             match = re_01.match(line)
       
   241             if match:
       
   242                 raise logerror(match.group(1))
       
   243             match = re_02.match(line)
       
   244             if match:
       
   245                 raise logerror(match.group(2))
       
   246             if re_03.match(line):
       
   247                 raise logerror(line)
       
   248 
       
   249         elif state == 1:
       
   250             # expect 'Working file' (only when using log instead of rlog)
       
   251             match = re_10.match(line)
       
   252             assert match, _('RCS file must be followed by working file')
       
   253             filename = util.normpath(match.group(1))
       
   254             state = 2
       
   255 
       
   256         elif state == 2:
       
   257             # expect 'symbolic names'
       
   258             if re_20.match(line):
       
   259                 branchmap = {}
       
   260                 state = 3
       
   261 
       
   262         elif state == 3:
       
   263             # read the symbolic names and store as tags
       
   264             match = re_30.match(line)
       
   265             if match:
       
   266                 rev = [int(x) for x in match.group(2).split('.')]
       
   267 
       
   268                 # Convert magic branch number to an odd-numbered one
       
   269                 revn = len(rev)
       
   270                 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
       
   271                     rev = rev[:-2] + rev[-1:]
       
   272                 rev = tuple(rev)
       
   273 
       
   274                 if rev not in tags:
       
   275                     tags[rev] = []
       
   276                 tags[rev].append(match.group(1))
       
   277                 branchmap[match.group(1)] = match.group(2)
       
   278 
       
   279             elif re_31.match(line):
       
   280                 state = 5
       
   281             elif re_32.match(line):
       
   282                 state = 0
       
   283 
       
   284         elif state == 4:
       
   285             # expecting '------' separator before first revision
       
   286             if re_31.match(line):
       
   287                 state = 5
       
   288             else:
       
   289                 assert not re_32.match(line), _('must have at least '
       
   290                                                 'some revisions')
       
   291 
       
   292         elif state == 5:
       
   293             # expecting revision number and possibly (ignored) lock indication
       
   294             # we create the logentry here from values stored in states 0 to 4,
       
   295             # as this state is re-entered for subsequent revisions of a file.
       
   296             match = re_50.match(line)
       
   297             assert match, _('expected revision number')
       
   298             e = logentry(rcs=scache(rcs), file=scache(filename),
       
   299                     revision=tuple([int(x) for x in match.group(1).split('.')]),
       
   300                     branches=[], parent=None)
       
   301             state = 6
       
   302 
       
   303         elif state == 6:
       
   304             # expecting date, author, state, lines changed
       
   305             match = re_60.match(line)
       
   306             assert match, _('revision must be followed by date line')
       
   307             d = match.group(1)
       
   308             if d[2] == '/':
       
   309                 # Y2K
       
   310                 d = '19' + d
       
   311 
       
   312             if len(d.split()) != 3:
       
   313                 # cvs log dates always in GMT
       
   314                 d = d + ' UTC'
       
   315             e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S',
       
   316                                         '%Y/%m/%d %H:%M:%S',
       
   317                                         '%Y-%m-%d %H:%M:%S'])
       
   318             e.author = scache(match.group(2))
       
   319             e.dead = match.group(3).lower() == 'dead'
       
   320 
       
   321             if match.group(5):
       
   322                 if match.group(6):
       
   323                     e.lines = (int(match.group(5)), int(match.group(6)))
       
   324                 else:
       
   325                     e.lines = (int(match.group(5)), 0)
       
   326             elif match.group(6):
       
   327                 e.lines = (0, int(match.group(6)))
       
   328             else:
       
   329                 e.lines = None
       
   330 
       
   331             if match.group(7): # cvsnt mergepoint
       
   332                 myrev = match.group(8).split('.')
       
   333                 if len(myrev) == 2: # head
       
   334                     e.mergepoint = 'HEAD'
       
   335                 else:
       
   336                     myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
       
   337                     branches = [b for b in branchmap if branchmap[b] == myrev]
       
   338                     assert len(branches) == 1, 'unknown branch: %s' % e.mergepoint
       
   339                     e.mergepoint = branches[0]
       
   340             else:
       
   341                 e.mergepoint = None
       
   342             e.comment = []
       
   343             state = 7
       
   344 
       
   345         elif state == 7:
       
   346             # read the revision numbers of branches that start at this revision
       
   347             # or store the commit log message otherwise
       
   348             m = re_70.match(line)
       
   349             if m:
       
   350                 e.branches = [tuple([int(y) for y in x.strip().split('.')])
       
   351                                 for x in m.group(1).split(';')]
       
   352                 state = 8
       
   353             elif re_31.match(line) and re_50.match(peek):
       
   354                 state = 5
       
   355                 store = True
       
   356             elif re_32.match(line):
       
   357                 state = 0
       
   358                 store = True
       
   359             else:
       
   360                 e.comment.append(line)
       
   361 
       
   362         elif state == 8:
       
   363             # store commit log message
       
   364             if re_31.match(line):
       
   365                 state = 5
       
   366                 store = True
       
   367             elif re_32.match(line):
       
   368                 state = 0
       
   369                 store = True
       
   370             else:
       
   371                 e.comment.append(line)
       
   372 
       
   373         # When a file is added on a branch B1, CVS creates a synthetic
       
   374         # dead trunk revision 1.1 so that the branch has a root.
       
   375         # Likewise, if you merge such a file to a later branch B2 (one
       
   376         # that already existed when the file was added on B1), CVS
       
   377         # creates a synthetic dead revision 1.1.x.1 on B2.  Don't drop
       
   378         # these revisions now, but mark them synthetic so
       
   379         # createchangeset() can take care of them.
       
   380         if (store and
       
   381               e.dead and
       
   382               e.revision[-1] == 1 and      # 1.1 or 1.1.x.1
       
   383               len(e.comment) == 1 and
       
   384               file_added_re.match(e.comment[0])):
       
   385             ui.debug('found synthetic revision in %s: %r\n'
       
   386                      % (e.rcs, e.comment[0]))
       
   387             e.synthetic = True
       
   388 
       
   389         if store:
       
   390             # clean up the results and save in the log.
       
   391             store = False
       
   392             e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
       
   393             e.comment = scache('\n'.join(e.comment))
       
   394 
       
   395             revn = len(e.revision)
       
   396             if revn > 3 and (revn % 2) == 0:
       
   397                 e.branch = tags.get(e.revision[:-1], [None])[0]
       
   398             else:
       
   399                 e.branch = None
       
   400 
       
   401             # find the branches starting from this revision
       
   402             branchpoints = set()
       
   403             for branch, revision in branchmap.iteritems():
       
   404                 revparts = tuple([int(i) for i in revision.split('.')])
       
   405                 if len(revparts) < 2: # bad tags
       
   406                     continue
       
   407                 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
       
   408                     # normal branch
       
   409                     if revparts[:-2] == e.revision:
       
   410                         branchpoints.add(branch)
       
   411                 elif revparts == (1, 1, 1): # vendor branch
       
   412                     if revparts in e.branches:
       
   413                         branchpoints.add(branch)
       
   414             e.branchpoints = branchpoints
       
   415 
       
   416             log.append(e)
       
   417 
       
   418             if len(log) % 100 == 0:
       
   419                 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
       
   420 
       
   421     log.sort(key=lambda x: (x.rcs, x.revision))
       
   422 
       
   423     # find parent revisions of individual files
       
   424     versions = {}
       
   425     for e in log:
       
   426         branch = e.revision[:-1]
       
   427         p = versions.get((e.rcs, branch), None)
       
   428         if p is None:
       
   429             p = e.revision[:-2]
       
   430         e.parent = p
       
   431         versions[(e.rcs, branch)] = e.revision
       
   432 
       
   433     # update the log cache
       
   434     if cache:
       
   435         if log:
       
   436             # join up the old and new logs
       
   437             log.sort(key=lambda x: x.date)
       
   438 
       
   439             if oldlog and oldlog[-1].date >= log[0].date:
       
   440                 raise logerror(_('log cache overlaps with new log entries,'
       
   441                                  ' re-run without cache.'))
       
   442 
       
   443             log = oldlog + log
       
   444 
       
   445             # write the new cachefile
       
   446             ui.note(_('writing cvs log cache %s\n') % cachefile)
       
   447             pickle.dump(log, open(cachefile, 'w'))
       
   448         else:
       
   449             log = oldlog
       
   450 
       
   451     ui.status(_('%d log entries\n') % len(log))
       
   452 
       
   453     hook.hook(ui, None, "cvslog", True, log=log)
       
   454 
       
   455     return log
       
   456 
       
   457 
       
   458 class changeset(object):
       
   459     '''Class changeset has the following attributes:
       
   460         .id        - integer identifying this changeset (list index)
       
   461         .author    - author name as CVS knows it
       
   462         .branch    - name of branch this changeset is on, or None
       
   463         .comment   - commit message
       
   464         .date      - the commit date as a (time,tz) tuple
       
   465         .entries   - list of logentry objects in this changeset
       
   466         .parents   - list of one or two parent changesets
       
   467         .tags      - list of tags on this changeset
       
   468         .synthetic - from synthetic revision "file ... added on branch ..."
       
   469         .mergepoint- the branch that has been merged from
       
   470                      (if present in rlog output)
       
   471         .branchpoints- the branches that start at the current entry
       
   472     '''
       
   473     def __init__(self, **entries):
       
   474         self.synthetic = False
       
   475         self.__dict__.update(entries)
       
   476 
       
   477     def __repr__(self):
       
   478         return "<%s at 0x%x: %s>" % (self.__class__.__name__,
       
   479                                      id(self),
       
   480                                      getattr(self, 'id', "(no id)"))
       
   481 
       
   482 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
       
   483     '''Convert log into changesets.'''
       
   484 
       
   485     ui.status(_('creating changesets\n'))
       
   486 
       
   487     # Merge changesets
       
   488 
       
   489     log.sort(key=lambda x: (x.comment, x.author, x.branch, x.date))
       
   490 
       
   491     changesets = []
       
   492     files = set()
       
   493     c = None
       
   494     for i, e in enumerate(log):
       
   495 
       
   496         # Check if log entry belongs to the current changeset or not.
       
   497 
       
   498         # Since CVS is file centric, two different file revisions with
       
   499         # different branchpoints should be treated as belonging to two
       
   500         # different changesets (and the ordering is important and not
       
   501         # honoured by cvsps at this point).
       
   502         #
       
   503         # Consider the following case:
       
   504         # foo 1.1 branchpoints: [MYBRANCH]
       
   505         # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
       
   506         #
       
   507         # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
       
   508         # later version of foo may be in MYBRANCH2, so foo should be the
       
   509         # first changeset and bar the next and MYBRANCH and MYBRANCH2
       
   510         # should both start off of the bar changeset. No provisions are
       
   511         # made to ensure that this is, in fact, what happens.
       
   512         if not (c and
       
   513                   e.comment == c.comment and
       
   514                   e.author == c.author and
       
   515                   e.branch == c.branch and
       
   516                   (not hasattr(e, 'branchpoints') or
       
   517                     not hasattr (c, 'branchpoints') or
       
   518                     e.branchpoints == c.branchpoints) and
       
   519                   ((c.date[0] + c.date[1]) <=
       
   520                    (e.date[0] + e.date[1]) <=
       
   521                    (c.date[0] + c.date[1]) + fuzz) and
       
   522                   e.file not in files):
       
   523             c = changeset(comment=e.comment, author=e.author,
       
   524                           branch=e.branch, date=e.date, entries=[],
       
   525                           mergepoint=getattr(e, 'mergepoint', None),
       
   526                           branchpoints=getattr(e, 'branchpoints', set()))
       
   527             changesets.append(c)
       
   528             files = set()
       
   529             if len(changesets) % 100 == 0:
       
   530                 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
       
   531                 ui.status(util.ellipsis(t, 80) + '\n')
       
   532 
       
   533         c.entries.append(e)
       
   534         files.add(e.file)
       
   535         c.date = e.date       # changeset date is date of latest commit in it
       
   536 
       
   537     # Mark synthetic changesets
       
   538 
       
   539     for c in changesets:
       
   540         # Synthetic revisions always get their own changeset, because
       
   541         # the log message includes the filename.  E.g. if you add file3
       
   542         # and file4 on a branch, you get four log entries and three
       
   543         # changesets:
       
   544         #   "File file3 was added on branch ..." (synthetic, 1 entry)
       
   545         #   "File file4 was added on branch ..." (synthetic, 1 entry)
       
   546         #   "Add file3 and file4 to fix ..."     (real, 2 entries)
       
   547         # Hence the check for 1 entry here.
       
   548         c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
       
   549 
       
   550     # Sort files in each changeset
       
   551 
       
   552     for c in changesets:
       
   553         def pathcompare(l, r):
       
   554             'Mimic cvsps sorting order'
       
   555             l = l.split('/')
       
   556             r = r.split('/')
       
   557             nl = len(l)
       
   558             nr = len(r)
       
   559             n = min(nl, nr)
       
   560             for i in range(n):
       
   561                 if i + 1 == nl and nl < nr:
       
   562                     return -1
       
   563                 elif i + 1 == nr and nl > nr:
       
   564                     return +1
       
   565                 elif l[i] < r[i]:
       
   566                     return -1
       
   567                 elif l[i] > r[i]:
       
   568                     return +1
       
   569             return 0
       
   570         def entitycompare(l, r):
       
   571             return pathcompare(l.file, r.file)
       
   572 
       
   573         c.entries.sort(entitycompare)
       
   574 
       
   575     # Sort changesets by date
       
   576 
       
   577     def cscmp(l, r):
       
   578         d = sum(l.date) - sum(r.date)
       
   579         if d:
       
   580             return d
       
   581 
       
   582         # detect vendor branches and initial commits on a branch
       
   583         le = {}
       
   584         for e in l.entries:
       
   585             le[e.rcs] = e.revision
       
   586         re = {}
       
   587         for e in r.entries:
       
   588             re[e.rcs] = e.revision
       
   589 
       
   590         d = 0
       
   591         for e in l.entries:
       
   592             if re.get(e.rcs, None) == e.parent:
       
   593                 assert not d
       
   594                 d = 1
       
   595                 break
       
   596 
       
   597         for e in r.entries:
       
   598             if le.get(e.rcs, None) == e.parent:
       
   599                 assert not d
       
   600                 d = -1
       
   601                 break
       
   602 
       
   603         return d
       
   604 
       
   605     changesets.sort(cscmp)
       
   606 
       
   607     # Collect tags
       
   608 
       
   609     globaltags = {}
       
   610     for c in changesets:
       
   611         for e in c.entries:
       
   612             for tag in e.tags:
       
   613                 # remember which is the latest changeset to have this tag
       
   614                 globaltags[tag] = c
       
   615 
       
   616     for c in changesets:
       
   617         tags = set()
       
   618         for e in c.entries:
       
   619             tags.update(e.tags)
       
   620         # remember tags only if this is the latest changeset to have it
       
   621         c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
       
   622 
       
   623     # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
       
   624     # by inserting dummy changesets with two parents, and handle
       
   625     # {{mergefrombranch BRANCHNAME}} by setting two parents.
       
   626 
       
   627     if mergeto is None:
       
   628         mergeto = r'{{mergetobranch ([-\w]+)}}'
       
   629     if mergeto:
       
   630         mergeto = re.compile(mergeto)
       
   631 
       
   632     if mergefrom is None:
       
   633         mergefrom = r'{{mergefrombranch ([-\w]+)}}'
       
   634     if mergefrom:
       
   635         mergefrom = re.compile(mergefrom)
       
   636 
       
   637     versions = {}    # changeset index where we saw any particular file version
       
   638     branches = {}    # changeset index where we saw a branch
       
   639     n = len(changesets)
       
   640     i = 0
       
   641     while i < n:
       
   642         c = changesets[i]
       
   643 
       
   644         for f in c.entries:
       
   645             versions[(f.rcs, f.revision)] = i
       
   646 
       
   647         p = None
       
   648         if c.branch in branches:
       
   649             p = branches[c.branch]
       
   650         else:
       
   651             # first changeset on a new branch
       
   652             # the parent is a changeset with the branch in its
       
   653             # branchpoints such that it is the latest possible
       
   654             # commit without any intervening, unrelated commits.
       
   655 
       
   656             for candidate in xrange(i):
       
   657                 if c.branch not in changesets[candidate].branchpoints:
       
   658                     if p is not None:
       
   659                         break
       
   660                     continue
       
   661                 p = candidate
       
   662 
       
   663         c.parents = []
       
   664         if p is not None:
       
   665             p = changesets[p]
       
   666 
       
   667             # Ensure no changeset has a synthetic changeset as a parent.
       
   668             while p.synthetic:
       
   669                 assert len(p.parents) <= 1, \
       
   670                        _('synthetic changeset cannot have multiple parents')
       
   671                 if p.parents:
       
   672                     p = p.parents[0]
       
   673                 else:
       
   674                     p = None
       
   675                     break
       
   676 
       
   677             if p is not None:
       
   678                 c.parents.append(p)
       
   679 
       
   680         if c.mergepoint:
       
   681             if c.mergepoint == 'HEAD':
       
   682                 c.mergepoint = None
       
   683             c.parents.append(changesets[branches[c.mergepoint]])
       
   684 
       
   685         if mergefrom:
       
   686             m = mergefrom.search(c.comment)
       
   687             if m:
       
   688                 m = m.group(1)
       
   689                 if m == 'HEAD':
       
   690                     m = None
       
   691                 try:
       
   692                     candidate = changesets[branches[m]]
       
   693                 except KeyError:
       
   694                     ui.warn(_("warning: CVS commit message references "
       
   695                               "non-existent branch %r:\n%s\n")
       
   696                             % (m, c.comment))
       
   697                 if m in branches and c.branch != m and not candidate.synthetic:
       
   698                     c.parents.append(candidate)
       
   699 
       
   700         if mergeto:
       
   701             m = mergeto.search(c.comment)
       
   702             if m:
       
   703                 try:
       
   704                     m = m.group(1)
       
   705                     if m == 'HEAD':
       
   706                         m = None
       
   707                 except:
       
   708                     m = None   # if no group found then merge to HEAD
       
   709                 if m in branches and c.branch != m:
       
   710                     # insert empty changeset for merge
       
   711                     cc = changeset(
       
   712                         author=c.author, branch=m, date=c.date,
       
   713                         comment='convert-repo: CVS merge from branch %s'
       
   714                         % c.branch,
       
   715                         entries=[], tags=[],
       
   716                         parents=[changesets[branches[m]], c])
       
   717                     changesets.insert(i + 1, cc)
       
   718                     branches[m] = i + 1
       
   719 
       
   720                     # adjust our loop counters now we have inserted a new entry
       
   721                     n += 1
       
   722                     i += 2
       
   723                     continue
       
   724 
       
   725         branches[c.branch] = i
       
   726         i += 1
       
   727 
       
   728     # Drop synthetic changesets (safe now that we have ensured no other
       
   729     # changesets can have them as parents).
       
   730     i = 0
       
   731     while i < len(changesets):
       
   732         if changesets[i].synthetic:
       
   733             del changesets[i]
       
   734         else:
       
   735             i += 1
       
   736 
       
   737     # Number changesets
       
   738 
       
   739     for i, c in enumerate(changesets):
       
   740         c.id = i + 1
       
   741 
       
   742     ui.status(_('%d changeset entries\n') % len(changesets))
       
   743 
       
   744     hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
       
   745 
       
   746     return changesets
       
   747 
       
   748 
       
   749 def debugcvsps(ui, *args, **opts):
       
   750     '''Read CVS rlog for current directory or named path in
       
   751     repository, and convert the log to changesets based on matching
       
   752     commit log entries and dates.
       
   753     '''
       
   754     if opts["new_cache"]:
       
   755         cache = "write"
       
   756     elif opts["update_cache"]:
       
   757         cache = "update"
       
   758     else:
       
   759         cache = None
       
   760 
       
   761     revisions = opts["revisions"]
       
   762 
       
   763     try:
       
   764         if args:
       
   765             log = []
       
   766             for d in args:
       
   767                 log += createlog(ui, d, root=opts["root"], cache=cache)
       
   768         else:
       
   769             log = createlog(ui, root=opts["root"], cache=cache)
       
   770     except logerror, e:
       
   771         ui.write("%r\n"%e)
       
   772         return
       
   773 
       
   774     changesets = createchangeset(ui, log, opts["fuzz"])
       
   775     del log
       
   776 
       
   777     # Print changesets (optionally filtered)
       
   778 
       
   779     off = len(revisions)
       
   780     branches = {}    # latest version number in each branch
       
   781     ancestors = {}   # parent branch
       
   782     for cs in changesets:
       
   783 
       
   784         if opts["ancestors"]:
       
   785             if cs.branch not in branches and cs.parents and cs.parents[0].id:
       
   786                 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
       
   787                                         cs.parents[0].id)
       
   788             branches[cs.branch] = cs.id
       
   789 
       
   790         # limit by branches
       
   791         if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
       
   792             continue
       
   793 
       
   794         if not off:
       
   795             # Note: trailing spaces on several lines here are needed to have
       
   796             #       bug-for-bug compatibility with cvsps.
       
   797             ui.write('---------------------\n')
       
   798             ui.write('PatchSet %d \n' % cs.id)
       
   799             ui.write('Date: %s\n' % util.datestr(cs.date,
       
   800                                                  '%Y/%m/%d %H:%M:%S %1%2'))
       
   801             ui.write('Author: %s\n' % cs.author)
       
   802             ui.write('Branch: %s\n' % (cs.branch or 'HEAD'))
       
   803             ui.write('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
       
   804                                   ','.join(cs.tags) or '(none)'))
       
   805             branchpoints = getattr(cs, 'branchpoints', None)
       
   806             if branchpoints:
       
   807                 ui.write('Branchpoints: %s \n' % ', '.join(branchpoints))
       
   808             if opts["parents"] and cs.parents:
       
   809                 if len(cs.parents) > 1:
       
   810                     ui.write('Parents: %s\n' %
       
   811                              (','.join([str(p.id) for p in cs.parents])))
       
   812                 else:
       
   813                     ui.write('Parent: %d\n' % cs.parents[0].id)
       
   814 
       
   815             if opts["ancestors"]:
       
   816                 b = cs.branch
       
   817                 r = []
       
   818                 while b:
       
   819                     b, c = ancestors[b]
       
   820                     r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
       
   821                 if r:
       
   822                     ui.write('Ancestors: %s\n' % (','.join(r)))
       
   823 
       
   824             ui.write('Log:\n')
       
   825             ui.write('%s\n\n' % cs.comment)
       
   826             ui.write('Members: \n')
       
   827             for f in cs.entries:
       
   828                 fn = f.file
       
   829                 if fn.startswith(opts["prefix"]):
       
   830                     fn = fn[len(opts["prefix"]):]
       
   831                 ui.write('\t%s:%s->%s%s \n' % (
       
   832                         fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
       
   833                         '.'.join([str(x) for x in f.revision]),
       
   834                         ['', '(DEAD)'][f.dead]))
       
   835             ui.write('\n')
       
   836 
       
   837         # have we seen the start tag?
       
   838         if revisions and off:
       
   839             if revisions[0] == str(cs.id) or \
       
   840                 revisions[0] in cs.tags:
       
   841                 off = False
       
   842 
       
   843         # see if we reached the end tag
       
   844         if len(revisions) > 1 and not off:
       
   845             if revisions[1] == str(cs.id) or \
       
   846                 revisions[1] in cs.tags:
       
   847                 break