|
1 # mdiff.py - diff and patch routines for mercurial |
|
2 # |
|
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> |
|
4 # |
|
5 # This software may be used and distributed according to the terms of the |
|
6 # GNU General Public License version 2 or any later version. |
|
7 |
|
8 from i18n import _ |
|
9 import bdiff, mpatch, util |
|
10 import re, struct |
|
11 |
|
12 def splitnewlines(text): |
|
13 '''like str.splitlines, but only split on newlines.''' |
|
14 lines = [l + '\n' for l in text.split('\n')] |
|
15 if lines: |
|
16 if lines[-1] == '\n': |
|
17 lines.pop() |
|
18 else: |
|
19 lines[-1] = lines[-1][:-1] |
|
20 return lines |
|
21 |
|
22 class diffopts(object): |
|
23 '''context is the number of context lines |
|
24 text treats all files as text |
|
25 showfunc enables diff -p output |
|
26 git enables the git extended patch format |
|
27 nodates removes dates from diff headers |
|
28 ignorews ignores all whitespace changes in the diff |
|
29 ignorewsamount ignores changes in the amount of whitespace |
|
30 ignoreblanklines ignores changes whose lines are all blank |
|
31 upgrade generates git diffs to avoid data loss |
|
32 ''' |
|
33 |
|
34 defaults = { |
|
35 'context': 3, |
|
36 'text': False, |
|
37 'showfunc': False, |
|
38 'git': False, |
|
39 'nodates': False, |
|
40 'ignorews': False, |
|
41 'ignorewsamount': False, |
|
42 'ignoreblanklines': False, |
|
43 'upgrade': False, |
|
44 } |
|
45 |
|
46 __slots__ = defaults.keys() |
|
47 |
|
48 def __init__(self, **opts): |
|
49 for k in self.__slots__: |
|
50 v = opts.get(k) |
|
51 if v is None: |
|
52 v = self.defaults[k] |
|
53 setattr(self, k, v) |
|
54 |
|
55 try: |
|
56 self.context = int(self.context) |
|
57 except ValueError: |
|
58 raise util.Abort(_('diff context lines count must be ' |
|
59 'an integer, not %r') % self.context) |
|
60 |
|
61 def copy(self, **kwargs): |
|
62 opts = dict((k, getattr(self, k)) for k in self.defaults) |
|
63 opts.update(kwargs) |
|
64 return diffopts(**opts) |
|
65 |
|
66 defaultopts = diffopts() |
|
67 |
|
68 def wsclean(opts, text, blank=True): |
|
69 if opts.ignorews: |
|
70 text = re.sub('[ \t\r]+', '', text) |
|
71 elif opts.ignorewsamount: |
|
72 text = re.sub('[ \t\r]+', ' ', text) |
|
73 text = text.replace(' \n', '\n') |
|
74 if blank and opts.ignoreblanklines: |
|
75 text = re.sub('\n+', '', text) |
|
76 return text |
|
77 |
|
78 def diffline(revs, a, b, opts): |
|
79 parts = ['diff'] |
|
80 if opts.git: |
|
81 parts.append('--git') |
|
82 if revs and not opts.git: |
|
83 parts.append(' '.join(["-r %s" % rev for rev in revs])) |
|
84 if opts.git: |
|
85 parts.append('a/%s' % a) |
|
86 parts.append('b/%s' % b) |
|
87 else: |
|
88 parts.append(a) |
|
89 return ' '.join(parts) + '\n' |
|
90 |
|
91 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts): |
|
92 def datetag(date, addtab=True): |
|
93 if not opts.git and not opts.nodates: |
|
94 return '\t%s\n' % date |
|
95 if addtab and ' ' in fn1: |
|
96 return '\t\n' |
|
97 return '\n' |
|
98 |
|
99 if not a and not b: |
|
100 return "" |
|
101 epoch = util.datestr((0, 0)) |
|
102 |
|
103 if not opts.text and (util.binary(a) or util.binary(b)): |
|
104 if a and b and len(a) == len(b) and a == b: |
|
105 return "" |
|
106 l = ['Binary file %s has changed\n' % fn1] |
|
107 elif not a: |
|
108 b = splitnewlines(b) |
|
109 if a is None: |
|
110 l1 = '--- /dev/null%s' % datetag(epoch, False) |
|
111 else: |
|
112 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad)) |
|
113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd)) |
|
114 l3 = "@@ -0,0 +1,%d @@\n" % len(b) |
|
115 l = [l1, l2, l3] + ["+" + e for e in b] |
|
116 elif not b: |
|
117 a = splitnewlines(a) |
|
118 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad)) |
|
119 if b is None: |
|
120 l2 = '+++ /dev/null%s' % datetag(epoch, False) |
|
121 else: |
|
122 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd)) |
|
123 l3 = "@@ -1,%d +0,0 @@\n" % len(a) |
|
124 l = [l1, l2, l3] + ["-" + e for e in a] |
|
125 else: |
|
126 al = splitnewlines(a) |
|
127 bl = splitnewlines(b) |
|
128 l = list(_unidiff(a, b, al, bl, opts=opts)) |
|
129 if not l: |
|
130 return "" |
|
131 |
|
132 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad))) |
|
133 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd))) |
|
134 |
|
135 for ln in xrange(len(l)): |
|
136 if l[ln][-1] != '\n': |
|
137 l[ln] += "\n\ No newline at end of file\n" |
|
138 |
|
139 if r: |
|
140 l.insert(0, diffline(r, fn1, fn2, opts)) |
|
141 |
|
142 return "".join(l) |
|
143 |
|
144 # creates a headerless unified diff |
|
145 # t1 and t2 are the text to be diffed |
|
146 # l1 and l2 are the text broken up into lines |
|
147 def _unidiff(t1, t2, l1, l2, opts=defaultopts): |
|
148 def contextend(l, len): |
|
149 ret = l + opts.context |
|
150 if ret > len: |
|
151 ret = len |
|
152 return ret |
|
153 |
|
154 def contextstart(l): |
|
155 ret = l - opts.context |
|
156 if ret < 0: |
|
157 return 0 |
|
158 return ret |
|
159 |
|
160 def yieldhunk(hunk): |
|
161 (astart, a2, bstart, b2, delta) = hunk |
|
162 aend = contextend(a2, len(l1)) |
|
163 alen = aend - astart |
|
164 blen = b2 - bstart + aend - a2 |
|
165 |
|
166 func = "" |
|
167 if opts.showfunc: |
|
168 # walk backwards from the start of the context |
|
169 # to find a line starting with an alphanumeric char. |
|
170 for x in xrange(astart - 1, -1, -1): |
|
171 t = l1[x].rstrip() |
|
172 if funcre.match(t): |
|
173 func = ' ' + t[:40] |
|
174 break |
|
175 |
|
176 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, |
|
177 bstart + 1, blen, func) |
|
178 for x in delta: |
|
179 yield x |
|
180 for x in xrange(a2, aend): |
|
181 yield ' ' + l1[x] |
|
182 |
|
183 if opts.showfunc: |
|
184 funcre = re.compile('\w') |
|
185 |
|
186 # bdiff.blocks gives us the matching sequences in the files. The loop |
|
187 # below finds the spaces between those matching sequences and translates |
|
188 # them into diff output. |
|
189 # |
|
190 if opts.ignorews or opts.ignorewsamount: |
|
191 t1 = wsclean(opts, t1, False) |
|
192 t2 = wsclean(opts, t2, False) |
|
193 |
|
194 diff = bdiff.blocks(t1, t2) |
|
195 hunk = None |
|
196 for i, s1 in enumerate(diff): |
|
197 # The first match is special. |
|
198 # we've either found a match starting at line 0 or a match later |
|
199 # in the file. If it starts later, old and new below will both be |
|
200 # empty and we'll continue to the next match. |
|
201 if i > 0: |
|
202 s = diff[i - 1] |
|
203 else: |
|
204 s = [0, 0, 0, 0] |
|
205 delta = [] |
|
206 a1 = s[1] |
|
207 a2 = s1[0] |
|
208 b1 = s[3] |
|
209 b2 = s1[2] |
|
210 |
|
211 old = l1[a1:a2] |
|
212 new = l2[b1:b2] |
|
213 |
|
214 # bdiff sometimes gives huge matches past eof, this check eats them, |
|
215 # and deals with the special first match case described above |
|
216 if not old and not new: |
|
217 continue |
|
218 |
|
219 if opts.ignoreblanklines: |
|
220 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)): |
|
221 continue |
|
222 |
|
223 astart = contextstart(a1) |
|
224 bstart = contextstart(b1) |
|
225 prev = None |
|
226 if hunk: |
|
227 # join with the previous hunk if it falls inside the context |
|
228 if astart < hunk[1] + opts.context + 1: |
|
229 prev = hunk |
|
230 astart = hunk[1] |
|
231 bstart = hunk[3] |
|
232 else: |
|
233 for x in yieldhunk(hunk): |
|
234 yield x |
|
235 if prev: |
|
236 # we've joined the previous hunk, record the new ending points. |
|
237 hunk[1] = a2 |
|
238 hunk[3] = b2 |
|
239 delta = hunk[4] |
|
240 else: |
|
241 # create a new hunk |
|
242 hunk = [astart, a2, bstart, b2, delta] |
|
243 |
|
244 delta[len(delta):] = [' ' + x for x in l1[astart:a1]] |
|
245 delta[len(delta):] = ['-' + x for x in old] |
|
246 delta[len(delta):] = ['+' + x for x in new] |
|
247 |
|
248 if hunk: |
|
249 for x in yieldhunk(hunk): |
|
250 yield x |
|
251 |
|
252 def patchtext(bin): |
|
253 pos = 0 |
|
254 t = [] |
|
255 while pos < len(bin): |
|
256 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) |
|
257 pos += 12 |
|
258 t.append(bin[pos:pos + l]) |
|
259 pos += l |
|
260 return "".join(t) |
|
261 |
|
262 def patch(a, bin): |
|
263 if len(a) == 0: |
|
264 # skip over trivial delta header |
|
265 return buffer(bin, 12) |
|
266 return mpatch.patches(a, [bin]) |
|
267 |
|
268 # similar to difflib.SequenceMatcher.get_matching_blocks |
|
269 def get_matching_blocks(a, b): |
|
270 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)] |
|
271 |
|
272 def trivialdiffheader(length): |
|
273 return struct.pack(">lll", 0, 0, length) |
|
274 |
|
275 patches = mpatch.patches |
|
276 patchedsize = mpatch.patchedsize |
|
277 textdiff = bdiff.bdiff |