121
|
1 # bdiff.py - Python implementation of bdiff.c
|
|
2 #
|
|
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
|
|
4 #
|
|
5 # This software may be used and distributed according to the terms of the
|
|
6 # GNU General Public License version 2, incorporated herein by reference.
|
|
7
|
|
8 import struct, difflib
|
|
9
|
|
10 def splitnewlines(text):
|
|
11 '''like str.splitlines, but only split on newlines.'''
|
|
12 lines = [l + '\n' for l in text.split('\n')]
|
|
13 if lines:
|
|
14 if lines[-1] == '\n':
|
|
15 lines.pop()
|
|
16 else:
|
|
17 lines[-1] = lines[-1][:-1]
|
|
18 return lines
|
|
19
|
|
20 def _normalizeblocks(a, b, blocks):
|
|
21 prev = None
|
|
22 for curr in blocks:
|
|
23 if prev is None:
|
|
24 prev = curr
|
|
25 continue
|
|
26 shift = 0
|
|
27
|
|
28 a1, b1, l1 = prev
|
|
29 a1end = a1 + l1
|
|
30 b1end = b1 + l1
|
|
31
|
|
32 a2, b2, l2 = curr
|
|
33 a2end = a2 + l2
|
|
34 b2end = b2 + l2
|
|
35 if a1end == a2:
|
|
36 while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]:
|
|
37 shift += 1
|
|
38 elif b1end == b2:
|
|
39 while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]:
|
|
40 shift += 1
|
|
41 yield a1, b1, l1+shift
|
|
42 prev = a2+shift, b2+shift, l2-shift
|
|
43 yield prev
|
|
44
|
|
45 def bdiff(a, b):
|
|
46 a = str(a).splitlines(True)
|
|
47 b = str(b).splitlines(True)
|
|
48
|
|
49 if not a:
|
|
50 s = "".join(b)
|
|
51 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
|
|
52
|
|
53 bin = []
|
|
54 p = [0]
|
|
55 for i in a: p.append(p[-1] + len(i))
|
|
56
|
|
57 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
|
|
58 d = _normalizeblocks(a, b, d)
|
|
59 la = 0
|
|
60 lb = 0
|
|
61 for am, bm, size in d:
|
|
62 s = "".join(b[lb:bm])
|
|
63 if am > la or s:
|
|
64 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
|
|
65 la = am + size
|
|
66 lb = bm + size
|
|
67
|
|
68 return "".join(bin)
|
|
69
|
|
70 def blocks(a, b):
|
|
71 an = splitnewlines(a)
|
|
72 bn = splitnewlines(b)
|
|
73 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
|
|
74 d = _normalizeblocks(an, bn, d)
|
|
75 return [(i, i + n, j, j + n) for (i, j, n) in d]
|
|
76
|