135
|
1 # mpatch.py - Python implementation of mpatch.c
|
|
2 #
|
|
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
|
|
4 #
|
|
5 # This software may be used and distributed according to the terms of the
|
|
6 # GNU General Public License version 2, incorporated herein by reference.
|
|
7
|
|
8 import struct
|
|
9 try:
|
|
10 from cStringIO import StringIO
|
|
11 except ImportError:
|
|
12 from StringIO import StringIO
|
|
13
|
|
14 # This attempts to apply a series of patches in time proportional to
|
|
15 # the total size of the patches, rather than patches * len(text). This
|
|
16 # means rather than shuffling strings around, we shuffle around
|
|
17 # pointers to fragments with fragment lists.
|
|
18 #
|
|
19 # When the fragment lists get too long, we collapse them. To do this
|
|
20 # efficiently, we do all our operations inside a buffer created by
|
|
21 # mmap and simply use memmove. This avoids creating a bunch of large
|
|
22 # temporary string buffers.
|
|
23
|
|
24 def patches(a, bins):
|
|
25 if not bins: return a
|
|
26
|
|
27 plens = [len(x) for x in bins]
|
|
28 pl = sum(plens)
|
|
29 bl = len(a) + pl
|
|
30 tl = bl + bl + pl # enough for the patches and two working texts
|
|
31 b1, b2 = 0, bl
|
|
32
|
|
33 if not tl: return a
|
|
34
|
|
35 m = StringIO()
|
|
36 def move(dest, src, count):
|
|
37 """move count bytes from src to dest
|
|
38
|
|
39 The file pointer is left at the end of dest.
|
|
40 """
|
|
41 m.seek(src)
|
|
42 buf = m.read(count)
|
|
43 m.seek(dest)
|
|
44 m.write(buf)
|
|
45
|
|
46 # load our original text
|
|
47 m.write(a)
|
|
48 frags = [(len(a), b1)]
|
|
49
|
|
50 # copy all the patches into our segment so we can memmove from them
|
|
51 pos = b2 + bl
|
|
52 m.seek(pos)
|
|
53 for p in bins: m.write(p)
|
|
54
|
|
55 def pull(dst, src, l): # pull l bytes from src
|
|
56 while l:
|
|
57 f = src.pop(0)
|
|
58 if f[0] > l: # do we need to split?
|
|
59 src.insert(0, (f[0] - l, f[1] + l))
|
|
60 dst.append((l, f[1]))
|
|
61 return
|
|
62 dst.append(f)
|
|
63 l -= f[0]
|
|
64
|
|
65 def collect(buf, list):
|
|
66 start = buf
|
|
67 for l, p in list:
|
|
68 move(buf, p, l)
|
|
69 buf += l
|
|
70 return (buf - start, start)
|
|
71
|
|
72 for plen in plens:
|
|
73 # if our list gets too long, execute it
|
|
74 if len(frags) > 128:
|
|
75 b2, b1 = b1, b2
|
|
76 frags = [collect(b1, frags)]
|
|
77
|
|
78 new = []
|
|
79 end = pos + plen
|
|
80 last = 0
|
|
81 while pos < end:
|
|
82 m.seek(pos)
|
|
83 p1, p2, l = struct.unpack(">lll", m.read(12))
|
|
84 pull(new, frags, p1 - last) # what didn't change
|
|
85 pull([], frags, p2 - p1) # what got deleted
|
|
86 new.append((l, pos + 12)) # what got added
|
|
87 pos += l + 12
|
|
88 last = p2
|
|
89 frags = new + frags # what was left at the end
|
|
90
|
|
91 t = collect(b2, frags)
|
|
92
|
|
93 m.seek(t[1])
|
|
94 return m.read(t[0])
|
|
95
|
|
96 def patchedsize(orig, delta):
|
|
97 outlen, last, bin = 0, 0, 0
|
|
98 binend = len(delta)
|
|
99 data = 12
|
|
100
|
|
101 while data <= binend:
|
|
102 decode = delta[bin:bin + 12]
|
|
103 start, end, length = struct.unpack(">lll", decode)
|
|
104 if start > end:
|
|
105 break
|
|
106 bin = data + length
|
|
107 data = bin + 12
|
|
108 outlen += start - last
|
|
109 last = end
|
|
110 outlen += length
|
|
111
|
|
112 if bin != binend:
|
|
113 raise Exception("patch cannot be decoded")
|
|
114
|
|
115 outlen += orig - last
|
|
116 return outlen
|