135
|
1 # verify.py - repository integrity checking for Mercurial
|
|
2 #
|
|
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
|
|
4 #
|
|
5 # This software may be used and distributed according to the terms of the
|
|
6 # GNU General Public License version 2, incorporated herein by reference.
|
|
7
|
|
8 from node import nullid, short
|
|
9 from i18n import _
|
|
10 import revlog, util, error
|
|
11
|
|
12 def verify(repo):
|
|
13 lock = repo.lock()
|
|
14 try:
|
|
15 return _verify(repo)
|
|
16 finally:
|
|
17 lock.release()
|
|
18
|
|
19 def _verify(repo):
|
|
20 mflinkrevs = {}
|
|
21 filelinkrevs = {}
|
|
22 filenodes = {}
|
|
23 revisions = 0
|
|
24 badrevs = set()
|
|
25 errors = [0]
|
|
26 warnings = [0]
|
|
27 ui = repo.ui
|
|
28 cl = repo.changelog
|
|
29 mf = repo.manifest
|
|
30
|
|
31 if not repo.cancopy():
|
|
32 raise util.Abort(_("cannot verify bundle or remote repos"))
|
|
33
|
|
34 def err(linkrev, msg, filename=None):
|
|
35 if linkrev != None:
|
|
36 badrevs.add(linkrev)
|
|
37 else:
|
|
38 linkrev = '?'
|
|
39 msg = "%s: %s" % (linkrev, msg)
|
|
40 if filename:
|
|
41 msg = "%s@%s" % (filename, msg)
|
|
42 ui.warn(" " + msg + "\n")
|
|
43 errors[0] += 1
|
|
44
|
|
45 def exc(linkrev, msg, inst, filename=None):
|
|
46 if isinstance(inst, KeyboardInterrupt):
|
|
47 ui.warn(_("interrupted"))
|
|
48 raise
|
|
49 err(linkrev, "%s: %s" % (msg, inst), filename)
|
|
50
|
|
51 def warn(msg):
|
|
52 ui.warn(msg + "\n")
|
|
53 warnings[0] += 1
|
|
54
|
|
55 def checklog(obj, name, linkrev):
|
|
56 if not len(obj) and (havecl or havemf):
|
|
57 err(linkrev, _("empty or missing %s") % name)
|
|
58 return
|
|
59
|
|
60 d = obj.checksize()
|
|
61 if d[0]:
|
|
62 err(None, _("data length off by %d bytes") % d[0], name)
|
|
63 if d[1]:
|
|
64 err(None, _("index contains %d extra bytes") % d[1], name)
|
|
65
|
|
66 if obj.version != revlog.REVLOGV0:
|
|
67 if not revlogv1:
|
|
68 warn(_("warning: `%s' uses revlog format 1") % name)
|
|
69 elif revlogv1:
|
|
70 warn(_("warning: `%s' uses revlog format 0") % name)
|
|
71
|
|
72 def checkentry(obj, i, node, seen, linkrevs, f):
|
|
73 lr = obj.linkrev(obj.rev(node))
|
|
74 if lr < 0 or (havecl and lr not in linkrevs):
|
|
75 if lr < 0 or lr >= len(cl):
|
|
76 msg = _("rev %d points to nonexistent changeset %d")
|
|
77 else:
|
|
78 msg = _("rev %d points to unexpected changeset %d")
|
|
79 err(None, msg % (i, lr), f)
|
|
80 if linkrevs:
|
|
81 warn(_(" (expected %s)") % " ".join(map(str,linkrevs)))
|
|
82 lr = None # can't be trusted
|
|
83
|
|
84 try:
|
|
85 p1, p2 = obj.parents(node)
|
|
86 if p1 not in seen and p1 != nullid:
|
|
87 err(lr, _("unknown parent 1 %s of %s") %
|
|
88 (short(p1), short(n)), f)
|
|
89 if p2 not in seen and p2 != nullid:
|
|
90 err(lr, _("unknown parent 2 %s of %s") %
|
|
91 (short(p2), short(p1)), f)
|
|
92 except Exception, inst:
|
|
93 exc(lr, _("checking parents of %s") % short(node), inst, f)
|
|
94
|
|
95 if node in seen:
|
|
96 err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
|
|
97 seen[n] = i
|
|
98 return lr
|
|
99
|
|
100 revlogv1 = cl.version != revlog.REVLOGV0
|
|
101 if ui.verbose or not revlogv1:
|
|
102 ui.status(_("repository uses revlog format %d\n") %
|
|
103 (revlogv1 and 1 or 0))
|
|
104
|
|
105 havecl = len(cl) > 0
|
|
106 havemf = len(mf) > 0
|
|
107
|
|
108 ui.status(_("checking changesets\n"))
|
|
109 seen = {}
|
|
110 checklog(cl, "changelog", 0)
|
|
111 for i in repo:
|
|
112 n = cl.node(i)
|
|
113 checkentry(cl, i, n, seen, [i], "changelog")
|
|
114
|
|
115 try:
|
|
116 changes = cl.read(n)
|
|
117 mflinkrevs.setdefault(changes[0], []).append(i)
|
|
118 for f in changes[3]:
|
|
119 filelinkrevs.setdefault(f, []).append(i)
|
|
120 except Exception, inst:
|
|
121 exc(i, _("unpacking changeset %s") % short(n), inst)
|
|
122
|
|
123 ui.status(_("checking manifests\n"))
|
|
124 seen = {}
|
|
125 checklog(mf, "manifest", 0)
|
|
126 for i in mf:
|
|
127 n = mf.node(i)
|
|
128 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
|
|
129 if n in mflinkrevs:
|
|
130 del mflinkrevs[n]
|
|
131 else:
|
|
132 err(lr, _("%s not in changesets") % short(n), "manifest")
|
|
133
|
|
134 try:
|
|
135 for f, fn in mf.readdelta(n).iteritems():
|
|
136 if not f:
|
|
137 err(lr, _("file without name in manifest"))
|
|
138 elif f != "/dev/null":
|
|
139 fns = filenodes.setdefault(f, {})
|
|
140 if fn not in fns:
|
|
141 fns[fn] = i
|
|
142 except Exception, inst:
|
|
143 exc(lr, _("reading manifest delta %s") % short(n), inst)
|
|
144
|
|
145 ui.status(_("crosschecking files in changesets and manifests\n"))
|
|
146
|
|
147 if havemf:
|
|
148 for c,m in sorted([(c, m) for m in mflinkrevs for c in mflinkrevs[m]]):
|
|
149 err(c, _("changeset refers to unknown manifest %s") % short(m))
|
|
150 del mflinkrevs
|
|
151
|
|
152 for f in sorted(filelinkrevs):
|
|
153 if f not in filenodes:
|
|
154 lr = filelinkrevs[f][0]
|
|
155 err(lr, _("in changeset but not in manifest"), f)
|
|
156
|
|
157 if havecl:
|
|
158 for f in sorted(filenodes):
|
|
159 if f not in filelinkrevs:
|
|
160 try:
|
|
161 fl = repo.file(f)
|
|
162 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
|
|
163 except:
|
|
164 lr = None
|
|
165 err(lr, _("in manifest but not in changeset"), f)
|
|
166
|
|
167 ui.status(_("checking files\n"))
|
|
168
|
|
169 storefiles = set()
|
|
170 for f, f2, size in repo.store.datafiles():
|
|
171 if not f:
|
|
172 err(None, _("cannot decode filename '%s'") % f2)
|
|
173 elif size > 0:
|
|
174 storefiles.add(f)
|
|
175
|
|
176 files = sorted(set(filenodes) | set(filelinkrevs))
|
|
177 for f in files:
|
|
178 try:
|
|
179 linkrevs = filelinkrevs[f]
|
|
180 except KeyError:
|
|
181 # in manifest but not in changelog
|
|
182 linkrevs = []
|
|
183
|
|
184 if linkrevs:
|
|
185 lr = linkrevs[0]
|
|
186 else:
|
|
187 lr = None
|
|
188
|
|
189 try:
|
|
190 fl = repo.file(f)
|
|
191 except error.RevlogError, e:
|
|
192 err(lr, _("broken revlog! (%s)") % e, f)
|
|
193 continue
|
|
194
|
|
195 for ff in fl.files():
|
|
196 try:
|
|
197 storefiles.remove(ff)
|
|
198 except KeyError:
|
|
199 err(lr, _("missing revlog!"), ff)
|
|
200
|
|
201 checklog(fl, f, lr)
|
|
202 seen = {}
|
|
203 for i in fl:
|
|
204 revisions += 1
|
|
205 n = fl.node(i)
|
|
206 lr = checkentry(fl, i, n, seen, linkrevs, f)
|
|
207 if f in filenodes:
|
|
208 if havemf and n not in filenodes[f]:
|
|
209 err(lr, _("%s not in manifests") % (short(n)), f)
|
|
210 else:
|
|
211 del filenodes[f][n]
|
|
212
|
|
213 # verify contents
|
|
214 try:
|
|
215 t = fl.read(n)
|
|
216 rp = fl.renamed(n)
|
|
217 if len(t) != fl.size(i):
|
|
218 if len(fl.revision(n)) != fl.size(i):
|
|
219 err(lr, _("unpacked size is %s, %s expected") %
|
|
220 (len(t), fl.size(i)), f)
|
|
221 except Exception, inst:
|
|
222 exc(lr, _("unpacking %s") % short(n), inst, f)
|
|
223
|
|
224 # check renames
|
|
225 try:
|
|
226 if rp:
|
|
227 fl2 = repo.file(rp[0])
|
|
228 if not len(fl2):
|
|
229 err(lr, _("empty or missing copy source revlog %s:%s")
|
|
230 % (rp[0], short(rp[1])), f)
|
|
231 elif rp[1] == nullid:
|
|
232 ui.note(_("warning: %s@%s: copy source"
|
|
233 " revision is nullid %s:%s\n")
|
|
234 % (f, lr, rp[0], short(rp[1])))
|
|
235 else:
|
|
236 fl2.rev(rp[1])
|
|
237 except Exception, inst:
|
|
238 exc(lr, _("checking rename of %s") % short(n), inst, f)
|
|
239
|
|
240 # cross-check
|
|
241 if f in filenodes:
|
|
242 fns = [(mf.linkrev(l), n) for n,l in filenodes[f].iteritems()]
|
|
243 for lr, node in sorted(fns):
|
|
244 err(lr, _("%s in manifests not found") % short(node), f)
|
|
245
|
|
246 for f in storefiles:
|
|
247 warn(_("warning: orphan revlog '%s'") % f)
|
|
248
|
|
249 ui.status(_("%d files, %d changesets, %d total revisions\n") %
|
|
250 (len(files), len(cl), revisions))
|
|
251 if warnings[0]:
|
|
252 ui.warn(_("%d warnings encountered!\n") % warnings[0])
|
|
253 if errors[0]:
|
|
254 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
|
|
255 if badrevs:
|
|
256 ui.warn(_("(first damaged changeset appears to be %d)\n")
|
|
257 % min(badrevs))
|
|
258 return 1
|