135
|
1 # mdiff.py - diff and patch routines for mercurial
|
|
2 #
|
|
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
|
|
4 #
|
|
5 # This software may be used and distributed according to the terms of the
|
|
6 # GNU General Public License version 2, incorporated herein by reference.
|
|
7
|
|
8 from i18n import _
|
|
9 import bdiff, mpatch, util
|
|
10 import re, struct
|
|
11
|
|
12 def splitnewlines(text):
|
|
13 '''like str.splitlines, but only split on newlines.'''
|
|
14 lines = [l + '\n' for l in text.split('\n')]
|
|
15 if lines:
|
|
16 if lines[-1] == '\n':
|
|
17 lines.pop()
|
|
18 else:
|
|
19 lines[-1] = lines[-1][:-1]
|
|
20 return lines
|
|
21
|
|
22 class diffopts(object):
|
|
23 '''context is the number of context lines
|
|
24 text treats all files as text
|
|
25 showfunc enables diff -p output
|
|
26 git enables the git extended patch format
|
|
27 nodates removes dates from diff headers
|
|
28 ignorews ignores all whitespace changes in the diff
|
|
29 ignorewsamount ignores changes in the amount of whitespace
|
|
30 ignoreblanklines ignores changes whose lines are all blank'''
|
|
31
|
|
32 defaults = {
|
|
33 'context': 3,
|
|
34 'text': False,
|
|
35 'showfunc': False,
|
|
36 'git': False,
|
|
37 'nodates': False,
|
|
38 'ignorews': False,
|
|
39 'ignorewsamount': False,
|
|
40 'ignoreblanklines': False,
|
|
41 }
|
|
42
|
|
43 __slots__ = defaults.keys()
|
|
44
|
|
45 def __init__(self, **opts):
|
|
46 for k in self.__slots__:
|
|
47 v = opts.get(k)
|
|
48 if v is None:
|
|
49 v = self.defaults[k]
|
|
50 setattr(self, k, v)
|
|
51
|
|
52 try:
|
|
53 self.context = int(self.context)
|
|
54 except ValueError:
|
|
55 raise util.Abort(_('diff context lines count must be '
|
|
56 'an integer, not %r') % self.context)
|
|
57
|
|
58 defaultopts = diffopts()
|
|
59
|
|
60 def wsclean(opts, text):
|
|
61 if opts.ignorews:
|
|
62 text = re.sub('[ \t]+', '', text)
|
|
63 elif opts.ignorewsamount:
|
|
64 text = re.sub('[ \t]+', ' ', text)
|
|
65 text = re.sub('[ \t]+\n', '\n', text)
|
|
66 if opts.ignoreblanklines:
|
|
67 text = re.sub('\n+', '', text)
|
|
68 return text
|
|
69
|
|
70 def diffline(revs, a, b, opts):
|
|
71 parts = ['diff']
|
|
72 if opts.git:
|
|
73 parts.append('--git')
|
|
74 if revs and not opts.git:
|
|
75 parts.append(' '.join(["-r %s" % rev for rev in revs]))
|
|
76 if opts.git:
|
|
77 parts.append('a/%s' % a)
|
|
78 parts.append('b/%s' % b)
|
|
79 else:
|
|
80 parts.append(a)
|
|
81 return ' '.join(parts) + '\n'
|
|
82
|
|
83 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
|
|
84 def datetag(date, addtab=True):
|
|
85 if not opts.git and not opts.nodates:
|
|
86 return '\t%s\n' % date
|
|
87 if addtab and ' ' in fn1:
|
|
88 return '\t\n'
|
|
89 return '\n'
|
|
90
|
|
91 if not a and not b: return ""
|
|
92 epoch = util.datestr((0, 0))
|
|
93
|
|
94 if not opts.text and (util.binary(a) or util.binary(b)):
|
|
95 if a and b and len(a) == len(b) and a == b:
|
|
96 return ""
|
|
97 l = ['Binary file %s has changed\n' % fn1]
|
|
98 elif not a:
|
|
99 b = splitnewlines(b)
|
|
100 if a is None:
|
|
101 l1 = '--- /dev/null%s' % datetag(epoch, False)
|
|
102 else:
|
|
103 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
|
|
104 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
|
|
105 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
|
|
106 l = [l1, l2, l3] + ["+" + e for e in b]
|
|
107 elif not b:
|
|
108 a = splitnewlines(a)
|
|
109 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
|
|
110 if b is None:
|
|
111 l2 = '+++ /dev/null%s' % datetag(epoch, False)
|
|
112 else:
|
|
113 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
|
|
114 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
|
|
115 l = [l1, l2, l3] + ["-" + e for e in a]
|
|
116 else:
|
|
117 al = splitnewlines(a)
|
|
118 bl = splitnewlines(b)
|
|
119 l = list(bunidiff(a, b, al, bl, "a/" + fn1, "b/" + fn2, opts=opts))
|
|
120 if not l: return ""
|
|
121 # difflib uses a space, rather than a tab
|
|
122 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
|
|
123 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
|
|
124
|
|
125 for ln in xrange(len(l)):
|
|
126 if l[ln][-1] != '\n':
|
|
127 l[ln] += "\n\ No newline at end of file\n"
|
|
128
|
|
129 if r:
|
|
130 l.insert(0, diffline(r, fn1, fn2, opts))
|
|
131
|
|
132 return "".join(l)
|
|
133
|
|
134 # somewhat self contained replacement for difflib.unified_diff
|
|
135 # t1 and t2 are the text to be diffed
|
|
136 # l1 and l2 are the text broken up into lines
|
|
137 # header1 and header2 are the filenames for the diff output
|
|
138 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
|
|
139 def contextend(l, len):
|
|
140 ret = l + opts.context
|
|
141 if ret > len:
|
|
142 ret = len
|
|
143 return ret
|
|
144
|
|
145 def contextstart(l):
|
|
146 ret = l - opts.context
|
|
147 if ret < 0:
|
|
148 return 0
|
|
149 return ret
|
|
150
|
|
151 def yieldhunk(hunk, header):
|
|
152 if header:
|
|
153 for x in header:
|
|
154 yield x
|
|
155 (astart, a2, bstart, b2, delta) = hunk
|
|
156 aend = contextend(a2, len(l1))
|
|
157 alen = aend - astart
|
|
158 blen = b2 - bstart + aend - a2
|
|
159
|
|
160 func = ""
|
|
161 if opts.showfunc:
|
|
162 # walk backwards from the start of the context
|
|
163 # to find a line starting with an alphanumeric char.
|
|
164 for x in xrange(astart - 1, -1, -1):
|
|
165 t = l1[x].rstrip()
|
|
166 if funcre.match(t):
|
|
167 func = ' ' + t[:40]
|
|
168 break
|
|
169
|
|
170 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
|
|
171 bstart + 1, blen, func)
|
|
172 for x in delta:
|
|
173 yield x
|
|
174 for x in xrange(a2, aend):
|
|
175 yield ' ' + l1[x]
|
|
176
|
|
177 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
|
|
178
|
|
179 if opts.showfunc:
|
|
180 funcre = re.compile('\w')
|
|
181
|
|
182 # bdiff.blocks gives us the matching sequences in the files. The loop
|
|
183 # below finds the spaces between those matching sequences and translates
|
|
184 # them into diff output.
|
|
185 #
|
|
186 diff = bdiff.blocks(t1, t2)
|
|
187 hunk = None
|
|
188 for i, s1 in enumerate(diff):
|
|
189 # The first match is special.
|
|
190 # we've either found a match starting at line 0 or a match later
|
|
191 # in the file. If it starts later, old and new below will both be
|
|
192 # empty and we'll continue to the next match.
|
|
193 if i > 0:
|
|
194 s = diff[i-1]
|
|
195 else:
|
|
196 s = [0, 0, 0, 0]
|
|
197 delta = []
|
|
198 a1 = s[1]
|
|
199 a2 = s1[0]
|
|
200 b1 = s[3]
|
|
201 b2 = s1[2]
|
|
202
|
|
203 old = l1[a1:a2]
|
|
204 new = l2[b1:b2]
|
|
205
|
|
206 # bdiff sometimes gives huge matches past eof, this check eats them,
|
|
207 # and deals with the special first match case described above
|
|
208 if not old and not new:
|
|
209 continue
|
|
210
|
|
211 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
|
|
212 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
|
|
213 continue
|
|
214
|
|
215 astart = contextstart(a1)
|
|
216 bstart = contextstart(b1)
|
|
217 prev = None
|
|
218 if hunk:
|
|
219 # join with the previous hunk if it falls inside the context
|
|
220 if astart < hunk[1] + opts.context + 1:
|
|
221 prev = hunk
|
|
222 astart = hunk[1]
|
|
223 bstart = hunk[3]
|
|
224 else:
|
|
225 for x in yieldhunk(hunk, header):
|
|
226 yield x
|
|
227 # we only want to yield the header if the files differ, and
|
|
228 # we only want to yield it once.
|
|
229 header = None
|
|
230 if prev:
|
|
231 # we've joined the previous hunk, record the new ending points.
|
|
232 hunk[1] = a2
|
|
233 hunk[3] = b2
|
|
234 delta = hunk[4]
|
|
235 else:
|
|
236 # create a new hunk
|
|
237 hunk = [ astart, a2, bstart, b2, delta ]
|
|
238
|
|
239 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
|
|
240 delta[len(delta):] = [ '-' + x for x in old ]
|
|
241 delta[len(delta):] = [ '+' + x for x in new ]
|
|
242
|
|
243 if hunk:
|
|
244 for x in yieldhunk(hunk, header):
|
|
245 yield x
|
|
246
|
|
247 def patchtext(bin):
|
|
248 pos = 0
|
|
249 t = []
|
|
250 while pos < len(bin):
|
|
251 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
|
|
252 pos += 12
|
|
253 t.append(bin[pos:pos + l])
|
|
254 pos += l
|
|
255 return "".join(t)
|
|
256
|
|
257 def patch(a, bin):
|
|
258 return mpatch.patches(a, [bin])
|
|
259
|
|
260 # similar to difflib.SequenceMatcher.get_matching_blocks
|
|
261 def get_matching_blocks(a, b):
|
|
262 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
|
|
263
|
|
264 def trivialdiffheader(length):
|
|
265 return struct.pack(">lll", 0, 0, length)
|
|
266
|
|
267 patches = mpatch.patches
|
|
268 patchedsize = mpatch.patchedsize
|
|
269 textdiff = bdiff.bdiff
|