135
|
1 # copies.py - copy detection for Mercurial
|
|
2 #
|
|
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
|
|
4 #
|
|
5 # This software may be used and distributed according to the terms of the
|
|
6 # GNU General Public License version 2, incorporated herein by reference.
|
|
7
|
|
8 from i18n import _
|
|
9 import util
|
|
10 import heapq
|
|
11
|
|
12 def _nonoverlap(d1, d2, d3):
|
|
13 "Return list of elements in d1 not in d2 or d3"
|
|
14 return sorted([d for d in d1 if d not in d3 and d not in d2])
|
|
15
|
|
16 def _dirname(f):
|
|
17 s = f.rfind("/")
|
|
18 if s == -1:
|
|
19 return ""
|
|
20 return f[:s]
|
|
21
|
|
22 def _dirs(files):
|
|
23 d = set()
|
|
24 for f in files:
|
|
25 f = _dirname(f)
|
|
26 while f not in d:
|
|
27 d.add(f)
|
|
28 f = _dirname(f)
|
|
29 return d
|
|
30
|
|
31 def _findoldnames(fctx, limit):
|
|
32 "find files that path was copied from, back to linkrev limit"
|
|
33 old = {}
|
|
34 seen = set()
|
|
35 orig = fctx.path()
|
|
36 visit = [(fctx, 0)]
|
|
37 while visit:
|
|
38 fc, depth = visit.pop()
|
|
39 s = str(fc)
|
|
40 if s in seen:
|
|
41 continue
|
|
42 seen.add(s)
|
|
43 if fc.path() != orig and fc.path() not in old:
|
|
44 old[fc.path()] = (depth, fc.path()) # remember depth
|
|
45 if fc.rev() < limit and fc.rev() is not None:
|
|
46 continue
|
|
47 visit += [(p, depth - 1) for p in fc.parents()]
|
|
48
|
|
49 # return old names sorted by depth
|
|
50 return [o[1] for o in sorted(old.values())]
|
|
51
|
|
52 def _findlimit(repo, a, b):
|
|
53 "find the earliest revision that's an ancestor of a or b but not both"
|
|
54 # basic idea:
|
|
55 # - mark a and b with different sides
|
|
56 # - if a parent's children are all on the same side, the parent is
|
|
57 # on that side, otherwise it is on no side
|
|
58 # - walk the graph in topological order with the help of a heap;
|
|
59 # - add unseen parents to side map
|
|
60 # - clear side of any parent that has children on different sides
|
|
61 # - track number of interesting revs that might still be on a side
|
|
62 # - track the lowest interesting rev seen
|
|
63 # - quit when interesting revs is zero
|
|
64
|
|
65 cl = repo.changelog
|
|
66 working = len(cl) # pseudo rev for the working directory
|
|
67 if a is None:
|
|
68 a = working
|
|
69 if b is None:
|
|
70 b = working
|
|
71
|
|
72 side = {a: -1, b: 1}
|
|
73 visit = [-a, -b]
|
|
74 heapq.heapify(visit)
|
|
75 interesting = len(visit)
|
|
76 limit = working
|
|
77
|
|
78 while interesting:
|
|
79 r = -heapq.heappop(visit)
|
|
80 if r == working:
|
|
81 parents = [cl.rev(p) for p in repo.dirstate.parents()]
|
|
82 else:
|
|
83 parents = cl.parentrevs(r)
|
|
84 for p in parents:
|
|
85 if p not in side:
|
|
86 # first time we see p; add it to visit
|
|
87 side[p] = side[r]
|
|
88 if side[p]:
|
|
89 interesting += 1
|
|
90 heapq.heappush(visit, -p)
|
|
91 elif side[p] and side[p] != side[r]:
|
|
92 # p was interesting but now we know better
|
|
93 side[p] = 0
|
|
94 interesting -= 1
|
|
95 if side[r]:
|
|
96 limit = r # lowest rev visited
|
|
97 interesting -= 1
|
|
98 return limit
|
|
99
|
|
100 def copies(repo, c1, c2, ca, checkdirs=False):
|
|
101 """
|
|
102 Find moves and copies between context c1 and c2
|
|
103 """
|
|
104 # avoid silly behavior for update from empty dir
|
|
105 if not c1 or not c2 or c1 == c2:
|
|
106 return {}, {}
|
|
107
|
|
108 # avoid silly behavior for parent -> working dir
|
|
109 if c2.node() is None and c1.node() == repo.dirstate.parents()[0]:
|
|
110 return repo.dirstate.copies(), {}
|
|
111
|
|
112 limit = _findlimit(repo, c1.rev(), c2.rev())
|
|
113 m1 = c1.manifest()
|
|
114 m2 = c2.manifest()
|
|
115 ma = ca.manifest()
|
|
116
|
|
117 def makectx(f, n):
|
|
118 if len(n) != 20: # in a working context?
|
|
119 if c1.rev() is None:
|
|
120 return c1.filectx(f)
|
|
121 return c2.filectx(f)
|
|
122 return repo.filectx(f, fileid=n)
|
|
123
|
|
124 ctx = util.lrucachefunc(makectx)
|
|
125 copy = {}
|
|
126 fullcopy = {}
|
|
127 diverge = {}
|
|
128
|
|
129 def checkcopies(f, m1, m2):
|
|
130 '''check possible copies of f from m1 to m2'''
|
|
131 c1 = ctx(f, m1[f])
|
|
132 for of in _findoldnames(c1, limit):
|
|
133 fullcopy[f] = of # remember for dir rename detection
|
|
134 if of in m2: # original file not in other manifest?
|
|
135 # if the original file is unchanged on the other branch,
|
|
136 # no merge needed
|
|
137 if m2[of] != ma.get(of):
|
|
138 c2 = ctx(of, m2[of])
|
|
139 ca = c1.ancestor(c2)
|
|
140 # related and named changed on only one side?
|
|
141 if ca and (ca.path() == f or ca.path() == c2.path()):
|
|
142 if c1 != ca or c2 != ca: # merge needed?
|
|
143 copy[f] = of
|
|
144 elif of in ma:
|
|
145 diverge.setdefault(of, []).append(f)
|
|
146
|
|
147 repo.ui.debug(_(" searching for copies back to rev %d\n") % limit)
|
|
148
|
|
149 u1 = _nonoverlap(m1, m2, ma)
|
|
150 u2 = _nonoverlap(m2, m1, ma)
|
|
151
|
|
152 if u1:
|
|
153 repo.ui.debug(_(" unmatched files in local:\n %s\n")
|
|
154 % "\n ".join(u1))
|
|
155 if u2:
|
|
156 repo.ui.debug(_(" unmatched files in other:\n %s\n")
|
|
157 % "\n ".join(u2))
|
|
158
|
|
159 for f in u1:
|
|
160 checkcopies(f, m1, m2)
|
|
161 for f in u2:
|
|
162 checkcopies(f, m2, m1)
|
|
163
|
|
164 diverge2 = set()
|
|
165 for of, fl in diverge.items():
|
|
166 if len(fl) == 1:
|
|
167 del diverge[of] # not actually divergent
|
|
168 else:
|
|
169 diverge2.update(fl) # reverse map for below
|
|
170
|
|
171 if fullcopy:
|
|
172 repo.ui.debug(_(" all copies found (* = to merge, ! = divergent):\n"))
|
|
173 for f in fullcopy:
|
|
174 note = ""
|
|
175 if f in copy: note += "*"
|
|
176 if f in diverge2: note += "!"
|
|
177 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
|
|
178 del diverge2
|
|
179
|
|
180 if not fullcopy or not checkdirs:
|
|
181 return copy, diverge
|
|
182
|
|
183 repo.ui.debug(_(" checking for directory renames\n"))
|
|
184
|
|
185 # generate a directory move map
|
|
186 d1, d2 = _dirs(m1), _dirs(m2)
|
|
187 invalid = set()
|
|
188 dirmove = {}
|
|
189
|
|
190 # examine each file copy for a potential directory move, which is
|
|
191 # when all the files in a directory are moved to a new directory
|
|
192 for dst, src in fullcopy.iteritems():
|
|
193 dsrc, ddst = _dirname(src), _dirname(dst)
|
|
194 if dsrc in invalid:
|
|
195 # already seen to be uninteresting
|
|
196 continue
|
|
197 elif dsrc in d1 and ddst in d1:
|
|
198 # directory wasn't entirely moved locally
|
|
199 invalid.add(dsrc)
|
|
200 elif dsrc in d2 and ddst in d2:
|
|
201 # directory wasn't entirely moved remotely
|
|
202 invalid.add(dsrc)
|
|
203 elif dsrc in dirmove and dirmove[dsrc] != ddst:
|
|
204 # files from the same directory moved to two different places
|
|
205 invalid.add(dsrc)
|
|
206 else:
|
|
207 # looks good so far
|
|
208 dirmove[dsrc + "/"] = ddst + "/"
|
|
209
|
|
210 for i in invalid:
|
|
211 if i in dirmove:
|
|
212 del dirmove[i]
|
|
213 del d1, d2, invalid
|
|
214
|
|
215 if not dirmove:
|
|
216 return copy, diverge
|
|
217
|
|
218 for d in dirmove:
|
|
219 repo.ui.debug(_(" dir %s -> %s\n") % (d, dirmove[d]))
|
|
220
|
|
221 # check unaccounted nonoverlapping files against directory moves
|
|
222 for f in u1 + u2:
|
|
223 if f not in fullcopy:
|
|
224 for d in dirmove:
|
|
225 if f.startswith(d):
|
|
226 # new file added in a directory that was moved, move it
|
|
227 df = dirmove[d] + f[len(d):]
|
|
228 if df not in copy:
|
|
229 copy[f] = df
|
|
230 repo.ui.debug(_(" file %s -> %s\n") % (f, copy[f]))
|
|
231 break
|
|
232
|
|
233 return copy, diverge
|