comparison upmana/mercurial/revlog.py @ 135:dcf4fbe09b70 beta

Traipse Beta 'OpenRPG' {091010-00} Traipse is a distribution of OpenRPG that is designed to be easy to setup and go. Traipse also makes it easy for developers to work on code without fear of sacrifice. 'Ornery-Orc' continues the trend of 'Grumpy' and adds fixes to the code. 'Ornery-Orc's main goal is to offer more advanced features and enhance the productivity of the user. Update Summary (Beta) Added Bookmarks Fix to Remote Admin Commands Minor fix to text based Server Fix to Pretty Print, from Core Fix to Splitter Nodes not being created Fix to massive amounts of images loading, from Core Added 'boot' command to remote admin Added confirmation window for sent nodes Minor changes to allow for portability to an OpenSUSE linux OS Miniatures Layer pop up box allows users to turn off Mini labels, from FlexiRPG Zoom Mouse plugin added Images added to Plugin UI Switching to Element Tree Map efficiency, from FlexiRPG Added Status Bar to Update Manager default_manifest.xml renamed to default_upmana.xml Cleaner clode for saved repositories New TrueDebug Class in orpg_log (See documentation for usage) Mercurial's hgweb folder is ported to upmana **Pretty important update that can help remove thousands of dead children from your gametree. **Children, <forms />, <group_atts />, <horizontal />, <cols />, <rows />, <height />, etc... are all tags now. Check your gametree and look for dead children!! **New Gamtree Recusion method, mapping, and context sensitivity. !!Alpha - Watch out for infinite loops!!
author sirebral
date Tue, 10 Nov 2009 14:11:28 -0600
parents
children
comparison
equal deleted inserted replaced
101:394ebb3b6a0f 135:dcf4fbe09b70
1 # revlog.py - storage back-end for mercurial
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
7
8 """Storage back-end for Mercurial.
9
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
12 """
13
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
19
20 _pack = struct.pack
21 _unpack = struct.unpack
22 _compress = zlib.compress
23 _decompress = zlib.decompress
24 _sha = util.sha1
25
26 # revlog flags
27 REVLOGV0 = 0
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
34 _prereadsize = 1048576
35
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
38
39 def getoffset(q):
40 return int(q >> 16)
41
42 def gettype(q):
43 return int(q & 0xFFFF)
44
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
47
48 nullhash = _sha(nullid)
49
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
52
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
56 """
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
61 s.update(p1)
62 else:
63 # none of the parent nodes are nullid
64 l = [p1, p2]
65 l.sort()
66 s = _sha(l[0])
67 s.update(l[1])
68 s.update(text)
69 return s.digest()
70
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
73 if not text:
74 return ("", text)
75 l = len(text)
76 bin = None
77 if l < 44:
78 pass
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
83 p = []
84 pos = 0
85 while pos < l:
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
92 else:
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
96 return ("", text)
97 return ('u', text)
98 return ("", bin)
99
100 def decompress(bin):
101 """ decompress the given input """
102 if not bin:
103 return bin
104 t = bin[0]
105 if t == '\0':
106 return bin
107 if t == 'x':
108 return _decompress(bin)
109 if t == 'u':
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
112
113 class lazyparser(object):
114 """
115 this class avoids the need to parse the entirety of large indices
116 """
117
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
121
122 def __init__(self, dataf):
123 try:
124 size = util.fstat(dataf).st_size
125 except AttributeError:
126 size = 0
127 self.dataf = dataf
128 self.s = struct.calcsize(indexformatng)
129 self.datasize = size
130 self.l = size/self.s
131 self.index = [None] * self.l
132 self.map = {nullid: nullrev}
133 self.allmap = 0
134 self.all = 0
135 self.mapfind_count = 0
136
137 def loadmap(self):
138 """
139 during a commit, we need to make sure the rev being added is
140 not a duplicate. This requires loading the entire index,
141 which is fairly slow. loadmap can load up just the node map,
142 which takes much less time.
143 """
144 if self.allmap:
145 return
146 end = self.datasize
147 self.allmap = 1
148 cur = 0
149 count = 0
150 blocksize = self.s * 256
151 self.dataf.seek(0)
152 while cur < end:
153 data = self.dataf.read(blocksize)
154 off = 0
155 for x in xrange(256):
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
157 self.map[n] = count
158 count += 1
159 if count >= self.l:
160 break
161 off += self.s
162 cur += blocksize
163
164 def loadblock(self, blockstart, blocksize, data=None):
165 if self.all:
166 return
167 if data is None:
168 self.dataf.seek(blockstart)
169 if blockstart + blocksize > self.datasize:
170 # the revlog may have grown since we've started running,
171 # but we don't have space in self.index for more entries.
172 # limit blocksize so that we don't get too much data.
173 blocksize = max(self.datasize - blockstart, 0)
174 data = self.dataf.read(blocksize)
175 lend = len(data) / self.s
176 i = blockstart / self.s
177 off = 0
178 # lazyindex supports __delitem__
179 if lend > len(self.index) - i:
180 lend = len(self.index) - i
181 for x in xrange(lend):
182 if self.index[i + x] is None:
183 b = data[off : off + self.s]
184 self.index[i + x] = b
185 n = b[ngshaoffset:ngshaoffset + 20]
186 self.map[n] = i + x
187 off += self.s
188
189 def findnode(self, node):
190 """search backwards through the index file for a specific node"""
191 if self.allmap:
192 return None
193
194 # hg log will cause many many searches for the manifest
195 # nodes. After we get called a few times, just load the whole
196 # thing.
197 if self.mapfind_count > 8:
198 self.loadmap()
199 if node in self.map:
200 return node
201 return None
202 self.mapfind_count += 1
203 last = self.l - 1
204 while self.index[last] != None:
205 if last == 0:
206 self.all = 1
207 self.allmap = 1
208 return None
209 last -= 1
210 end = (last + 1) * self.s
211 blocksize = self.s * 256
212 while end >= 0:
213 start = max(end - blocksize, 0)
214 self.dataf.seek(start)
215 data = self.dataf.read(end - start)
216 findend = end - start
217 while True:
218 # we're searching backwards, so we have to make sure
219 # we don't find a changeset where this node is a parent
220 off = data.find(node, 0, findend)
221 findend = off
222 if off >= 0:
223 i = off / self.s
224 off = i * self.s
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
226 if n == node:
227 self.map[n] = i + start / self.s
228 return node
229 else:
230 break
231 end -= blocksize
232 return None
233
234 def loadindex(self, i=None, end=None):
235 if self.all:
236 return
237 all = False
238 if i is None:
239 blockstart = 0
240 blocksize = (65536 / self.s) * self.s
241 end = self.datasize
242 all = True
243 else:
244 if end:
245 blockstart = i * self.s
246 end = end * self.s
247 blocksize = end - blockstart
248 else:
249 blockstart = (i & ~1023) * self.s
250 blocksize = self.s * 1024
251 end = blockstart + blocksize
252 while blockstart < end:
253 self.loadblock(blockstart, blocksize)
254 blockstart += blocksize
255 if all:
256 self.all = True
257
258 class lazyindex(object):
259 """a lazy version of the index array"""
260 def __init__(self, parser):
261 self.p = parser
262 def __len__(self):
263 return len(self.p.index)
264 def load(self, pos):
265 if pos < 0:
266 pos += len(self.p.index)
267 self.p.loadindex(pos)
268 return self.p.index[pos]
269 def __getitem__(self, pos):
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
271 def __setitem__(self, pos, item):
272 self.p.index[pos] = _pack(indexformatng, *item)
273 def __delitem__(self, pos):
274 del self.p.index[pos]
275 def insert(self, pos, e):
276 self.p.index.insert(pos, _pack(indexformatng, *e))
277 def append(self, e):
278 self.p.index.append(_pack(indexformatng, *e))
279
280 class lazymap(object):
281 """a lazy version of the node map"""
282 def __init__(self, parser):
283 self.p = parser
284 def load(self, key):
285 n = self.p.findnode(key)
286 if n is None:
287 raise KeyError(key)
288 def __contains__(self, key):
289 if key in self.p.map:
290 return True
291 self.p.loadmap()
292 return key in self.p.map
293 def __iter__(self):
294 yield nullid
295 for i in xrange(self.p.l):
296 ret = self.p.index[i]
297 if not ret:
298 self.p.loadindex(i)
299 ret = self.p.index[i]
300 if isinstance(ret, str):
301 ret = _unpack(indexformatng, ret)
302 yield ret[7]
303 def __getitem__(self, key):
304 try:
305 return self.p.map[key]
306 except KeyError:
307 try:
308 self.load(key)
309 return self.p.map[key]
310 except KeyError:
311 raise KeyError("node " + hex(key))
312 def __setitem__(self, key, val):
313 self.p.map[key] = val
314 def __delitem__(self, key):
315 del self.p.map[key]
316
317 indexformatv0 = ">4l20s20s20s"
318 v0shaoffset = 56
319
320 class revlogoldio(object):
321 def __init__(self):
322 self.size = struct.calcsize(indexformatv0)
323
324 def parseindex(self, fp, data, inline):
325 s = self.size
326 index = []
327 nodemap = {nullid: nullrev}
328 n = off = 0
329 if len(data) == _prereadsize:
330 data += fp.read() # read the rest
331 l = len(data)
332 while off + s <= l:
333 cur = data[off:off + s]
334 off += s
335 e = _unpack(indexformatv0, cur)
336 # transform to revlogv1 format
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
339 index.append(e2)
340 nodemap[e[6]] = n
341 n += 1
342
343 return index, nodemap, None
344
345 def packentry(self, entry, node, version, rev):
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
347 node(entry[5]), node(entry[6]), entry[7])
348 return _pack(indexformatv0, *e2)
349
350 # index ng:
351 # 6 bytes offset
352 # 2 bytes flags
353 # 4 bytes compressed length
354 # 4 bytes uncompressed length
355 # 4 bytes: base rev
356 # 4 bytes link rev
357 # 4 bytes parent 1 rev
358 # 4 bytes parent 2 rev
359 # 32 bytes: nodeid
360 indexformatng = ">Qiiiiii20s12x"
361 ngshaoffset = 32
362 versionformat = ">I"
363
364 class revlogio(object):
365 def __init__(self):
366 self.size = struct.calcsize(indexformatng)
367
368 def parseindex(self, fp, data, inline):
369 if len(data) == _prereadsize:
370 if util.openhardlinks() and not inline:
371 # big index, let's parse it on demand
372 parser = lazyparser(fp)
373 index = lazyindex(parser)
374 nodemap = lazymap(parser)
375 e = list(index[0])
376 type = gettype(e[0])
377 e[0] = offset_type(0, type)
378 index[0] = e
379 return index, nodemap, None
380 else:
381 data += fp.read()
382
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
386
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
391 return p
392
393 class revlog(object):
394 """
395 the underlying revision storage object
396
397 A revlog consists of two parts, an index and the revision data.
398
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
404 data.
405
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
412
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
417 """
418 def __init__(self, opener, indexfile):
419 """
420 create a revlog object
421
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
424 """
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
428 self._cache = None
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
431 self.index = []
432
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
438
439 i = ''
440 try:
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
447 raise
448
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
462
463 self._io = revlogio()
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
466 if i:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
473 self._chunkclear()
474
475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
480 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
484
485 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
488 p = self.index.p
489 p.loadindex()
490 self.nodemap = p.map
491
492 def _loadmap(self):
493 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
497
498 def tip(self):
499 return self.node(len(self.index) - 2)
500 def __len__(self):
501 return len(self.index) - 1
502 def __iter__(self):
503 for i in xrange(len(self)):
504 yield i
505 def rev(self, node):
506 try:
507 return self.nodemap[node]
508 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
511 return self.index[rev][7]
512 def linkrev(self, rev):
513 return self.index[rev][4]
514 def parents(self, node):
515 i = self.index
516 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
519 return self.index[rev][5:7]
520 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
523 return self.start(rev) + self.length(rev)
524 def length(self, rev):
525 return self.index[rev][1]
526 def base(self, rev):
527 return self.index[rev][3]
528
529 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
532 if l >= 0:
533 return l
534
535 t = self.revision(self.node(rev))
536 return len(t)
537
538 # Alternate implementation. The advantage to this code is it
539 # will be faster for a single revision. However, the results
540 # are not cached, so finding the size of every revision will
541 # be slower.
542 #
543 # if self.cache and self.cache[1] == rev:
544 # return len(self.cache[2])
545 #
546 # base = self.base(rev)
547 # if self.cache and self.cache[1] >= base and self.cache[1] < rev:
548 # base = self.cache[1]
549 # text = self.cache[2]
550 # else:
551 # text = self.revision(self.node(base))
552 #
553 # l = len(text)
554 # for x in xrange(base + 1, rev + 1):
555 # l = mdiff.patchedsize(l, self._chunk(x))
556 # return l
557
558 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
561 reachable = set((node,))
562 visit = [node]
563 if stop:
564 stopn = self.rev(stop)
565 else:
566 stopn = 0
567 while visit:
568 n = visit.pop(0)
569 if n == stop:
570 continue
571 if n == nullid:
572 continue
573 for p in self.parents(n):
574 if self.rev(p) < stopn:
575 continue
576 if p not in reachable:
577 reachable.add(p)
578 visit.append(p)
579 return reachable
580
581 def ancestors(self, *revs):
582 'Generate the ancestors of revs using a breadth-first visit'
583 visit = list(revs)
584 seen = set([nullrev])
585 while visit:
586 for parent in self.parentrevs(visit.pop(0)):
587 if parent not in seen:
588 visit.append(parent)
589 seen.add(parent)
590 yield parent
591
592 def descendants(self, *revs):
593 'Generate the descendants of revs in topological order'
594 seen = set(revs)
595 for i in xrange(min(revs) + 1, len(self)):
596 for x in self.parentrevs(i):
597 if x != nullrev and x in seen:
598 seen.add(i)
599 yield i
600 break
601
602 def findmissing(self, common=None, heads=None):
603 '''
604 returns the topologically sorted list of nodes from the set:
605 missing = (ancestors(heads) \ ancestors(common))
606
607 where ancestors() is the set of ancestors from heads, heads included
608
609 if heads is None, the heads of the revlog are used
610 if common is None, nullid is assumed to be a common node
611 '''
612 if common is None:
613 common = [nullid]
614 if heads is None:
615 heads = self.heads()
616
617 common = [self.rev(n) for n in common]
618 heads = [self.rev(n) for n in heads]
619
620 # we want the ancestors, but inclusive
621 has = set(self.ancestors(*common))
622 has.add(nullrev)
623 has.update(common)
624
625 # take all ancestors from heads that aren't in has
626 missing = set()
627 visit = [r for r in heads if r not in has]
628 while visit:
629 r = visit.pop(0)
630 if r in missing:
631 continue
632 else:
633 missing.add(r)
634 for p in self.parentrevs(r):
635 if p not in has:
636 visit.append(p)
637 missing = list(missing)
638 missing.sort()
639 return [self.node(r) for r in missing]
640
641 def nodesbetween(self, roots=None, heads=None):
642 """Return a tuple containing three elements. Elements 1 and 2 contain
643 a final list bases and heads after all the unreachable ones have been
644 pruned. Element 0 contains a topologically sorted list of all
645
646 nodes that satisfy these constraints:
647 1. All nodes must be descended from a node in roots (the nodes on
648 roots are considered descended from themselves).
649 2. All nodes must also be ancestors of a node in heads (the nodes in
650 heads are considered to be their own ancestors).
651
652 If roots is unspecified, nullid is assumed as the only root.
653 If heads is unspecified, it is taken to be the output of the
654 heads method (i.e. a list of all nodes in the repository that
655 have no children)."""
656 nonodes = ([], [], [])
657 if roots is not None:
658 roots = list(roots)
659 if not roots:
660 return nonodes
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
663 roots = [nullid] # Everybody's a descendent of nullid
664 lowestrev = nullrev
665 if (lowestrev == nullrev) and (heads is None):
666 # We want _all_ the nodes!
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 if heads is None:
669 # All nodes are ancestors, so the latest ancestor is the last
670 # node.
671 highestrev = len(self) - 1
672 # Set ancestors to None to signal that every node is an ancestor.
673 ancestors = None
674 # Set heads to an empty dictionary for later discovery of heads
675 heads = {}
676 else:
677 heads = list(heads)
678 if not heads:
679 return nonodes
680 ancestors = set()
681 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Also, later we will be using it to filter out the heads we can't
683 # find from roots.
684 heads = dict.fromkeys(heads, 0)
685 # Start at the top and keep marking parents until we're done.
686 nodestotag = set(heads)
687 # Remember where the top was so we can use it as a limit later.
688 highestrev = max([self.rev(n) for n in nodestotag])
689 while nodestotag:
690 # grab a node to tag
691 n = nodestotag.pop()
692 # Never tag nullid
693 if n == nullid:
694 continue
695 # A node's revision number represents its place in a
696 # topologically sorted list of nodes.
697 r = self.rev(n)
698 if r >= lowestrev:
699 if n not in ancestors:
700 # If we are possibly a descendent of one of the roots
701 # and we haven't already been marked as an ancestor
702 ancestors.add(n) # Mark as ancestor
703 # Add non-nullid parents to list of nodes to tag.
704 nodestotag.update([p for p in self.parents(n) if
705 p != nullid])
706 elif n in heads: # We've seen it before, is it a fake head?
707 # So it is, real heads should not be the ancestors of
708 # any other heads.
709 heads.pop(n)
710 if not ancestors:
711 return nonodes
712 # Now that we have our set of ancestors, we want to remove any
713 # roots that are not ancestors.
714
715 # If one of the roots was nullid, everything is included anyway.
716 if lowestrev > nullrev:
717 # But, since we weren't, let's recompute the lowest rev to not
718 # include roots that aren't ancestors.
719
720 # Filter out roots that aren't ancestors of heads
721 roots = [n for n in roots if n in ancestors]
722 # Recompute the lowest revision
723 if roots:
724 lowestrev = min([self.rev(n) for n in roots])
725 else:
726 # No more roots? Return empty list
727 return nonodes
728 else:
729 # We are descending from nullid, and don't need to care about
730 # any other roots.
731 lowestrev = nullrev
732 roots = [nullid]
733 # Transform our roots list into a set.
734 descendents = set(roots)
735 # Also, keep the original roots so we can filter out roots that aren't
736 # 'real' roots (i.e. are descended from other roots).
737 roots = descendents.copy()
738 # Our topologically sorted list of output nodes.
739 orderedout = []
740 # Don't start at nullid since we don't want nullid in our output list,
741 # and if nullid shows up in descedents, empty parents will look like
742 # they're descendents.
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 n = self.node(r)
745 isdescendent = False
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 isdescendent = True
748 elif n in descendents:
749 # n is already a descendent
750 isdescendent = True
751 # This check only needs to be done here because all the roots
752 # will start being marked is descendents before the loop.
753 if n in roots:
754 # If n was a root, check if it's a 'real' root.
755 p = tuple(self.parents(n))
756 # If any of its parents are descendents, it's not a root.
757 if (p[0] in descendents) or (p[1] in descendents):
758 roots.remove(n)
759 else:
760 p = tuple(self.parents(n))
761 # A node is a descendent if either of its parents are
762 # descendents. (We seeded the dependents list with the roots
763 # up there, remember?)
764 if (p[0] in descendents) or (p[1] in descendents):
765 descendents.add(n)
766 isdescendent = True
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 # Only include nodes that are both descendents and ancestors.
769 orderedout.append(n)
770 if (ancestors is not None) and (n in heads):
771 # We're trying to figure out which heads are reachable
772 # from roots.
773 # Mark this head as having been reached
774 heads[n] = 1
775 elif ancestors is None:
776 # Otherwise, we're trying to discover the heads.
777 # Assume this is a head because if it isn't, the next step
778 # will eventually remove it.
779 heads[n] = 1
780 # But, obviously its parents aren't.
781 for p in self.parents(n):
782 heads.pop(p, None)
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 roots = list(roots)
785 assert orderedout
786 assert roots
787 assert heads
788 return (orderedout, roots, heads)
789
790 def heads(self, start=None, stop=None):
791 """return the list of all nodes that have no children
792
793 if start is specified, only heads that are descendants of
794 start will be returned
795 if stop is specified, it will consider all the revs from stop
796 as if they had no children
797 """
798 if start is None and stop is None:
799 count = len(self)
800 if not count:
801 return [nullid]
802 ishead = [1] * (count + 1)
803 index = self.index
804 for r in xrange(count):
805 e = index[r]
806 ishead[e[5]] = ishead[e[6]] = 0
807 return [self.node(r) for r in xrange(count) if ishead[r]]
808
809 if start is None:
810 start = nullid
811 if stop is None:
812 stop = []
813 stoprevs = set([self.rev(n) for n in stop])
814 startrev = self.rev(start)
815 reachable = set((startrev,))
816 heads = set((startrev,))
817
818 parentrevs = self.parentrevs
819 for r in xrange(startrev + 1, len(self)):
820 for p in parentrevs(r):
821 if p in reachable:
822 if r not in stoprevs:
823 reachable.add(r)
824 heads.add(r)
825 if p in heads and p not in stoprevs:
826 heads.remove(p)
827
828 return [self.node(r) for r in heads]
829
830 def children(self, node):
831 """find the children of a given node"""
832 c = []
833 p = self.rev(node)
834 for r in range(p + 1, len(self)):
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 if prevs:
837 for pr in prevs:
838 if pr == p:
839 c.append(self.node(r))
840 elif p == nullrev:
841 c.append(self.node(r))
842 return c
843
844 def _match(self, id):
845 if isinstance(id, (long, int)):
846 # rev
847 return self.node(id)
848 if len(id) == 20:
849 # possibly a binary node
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 try:
852 node = id
853 self.rev(node) # quick search the index
854 return node
855 except LookupError:
856 pass # may be partial hex id
857 try:
858 # str(rev)
859 rev = int(id)
860 if str(rev) != id:
861 raise ValueError
862 if rev < 0:
863 rev = len(self) + rev
864 if rev < 0 or rev >= len(self):
865 raise ValueError
866 return self.node(rev)
867 except (ValueError, OverflowError):
868 pass
869 if len(id) == 40:
870 try:
871 # a full hex nodeid?
872 node = bin(id)
873 self.rev(node)
874 return node
875 except (TypeError, LookupError):
876 pass
877
878 def _partialmatch(self, id):
879 if len(id) < 40:
880 try:
881 # hex(node)[:...]
882 l = len(id) / 2 # grab an even number of digits
883 bin_id = bin(id[:l*2])
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in nl if hex(n).startswith(id)]
886 if len(nl) > 0:
887 if len(nl) == 1:
888 return nl[0]
889 raise LookupError(id, self.indexfile,
890 _('ambiguous identifier'))
891 return None
892 except TypeError:
893 pass
894
895 def lookup(self, id):
896 """locate a node based on:
897 - revision number or str(revision number)
898 - nodeid or subset of hex nodeid
899 """
900 n = self._match(id)
901 if n is not None:
902 return n
903 n = self._partialmatch(id)
904 if n:
905 return n
906
907 raise LookupError(id, self.indexfile, _('no match found'))
908
909 def cmp(self, node, text):
910 """compare text with a given file revision"""
911 p1, p2 = self.parents(node)
912 return hash(text, p1, p2) != node
913
914 def _addchunk(self, offset, data):
915 o, d = self._chunkcache
916 # try to add to existing cache
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 self._chunkcache = o, d + data
919 else:
920 self._chunkcache = offset, data
921
922 def _loadchunk(self, offset, length):
923 if self._inline:
924 df = self.opener(self.indexfile)
925 else:
926 df = self.opener(self.datafile)
927
928 readahead = max(65536, length)
929 df.seek(offset)
930 d = df.read(readahead)
931 self._addchunk(offset, d)
932 if readahead > length:
933 return d[:length]
934 return d
935
936 def _getchunk(self, offset, length):
937 o, d = self._chunkcache
938 l = len(d)
939
940 # is it in the cache?
941 cachestart = offset - o
942 cacheend = cachestart + length
943 if cachestart >= 0 and cacheend <= l:
944 if cachestart == 0 and cacheend == l:
945 return d # avoid a copy
946 return d[cachestart:cacheend]
947
948 return self._loadchunk(offset, length)
949
950 def _chunkraw(self, startrev, endrev):
951 start = self.start(startrev)
952 length = self.end(endrev) - start
953 if self._inline:
954 start += (startrev + 1) * self._io.size
955 return self._getchunk(start, length)
956
957 def _chunk(self, rev):
958 return decompress(self._chunkraw(rev, rev))
959
960 def _chunkclear(self):
961 self._chunkcache = (0, '')
962
963 def revdiff(self, rev1, rev2):
964 """return or calculate a delta between two revisions"""
965 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
966 return self._chunk(rev2)
967
968 return mdiff.textdiff(self.revision(self.node(rev1)),
969 self.revision(self.node(rev2)))
970
971 def revision(self, node):
972 """return an uncompressed revision of a given node"""
973 if node == nullid:
974 return ""
975 if self._cache and self._cache[0] == node:
976 return str(self._cache[2])
977
978 # look up what we need to read
979 text = None
980 rev = self.rev(node)
981 base = self.base(rev)
982
983 # check rev flags
984 if self.index[rev][0] & 0xFFFF:
985 raise RevlogError(_('incompatible revision flag %x') %
986 (self.index[rev][0] & 0xFFFF))
987
988 # do we have useful data cached?
989 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
990 base = self._cache[1]
991 text = str(self._cache[2])
992
993 self._loadindex(base, rev + 1)
994 self._chunkraw(base, rev)
995 if text is None:
996 text = self._chunk(base)
997
998 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
999 text = mdiff.patches(text, bins)
1000 p1, p2 = self.parents(node)
1001 if node != hash(text, p1, p2):
1002 raise RevlogError(_("integrity check failed on %s:%d")
1003 % (self.indexfile, rev))
1004
1005 self._cache = (node, rev, text)
1006 return text
1007
1008 def checkinlinesize(self, tr, fp=None):
1009 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1010 return
1011
1012 trinfo = tr.find(self.indexfile)
1013 if trinfo is None:
1014 raise RevlogError(_("%s not found in the transaction")
1015 % self.indexfile)
1016
1017 trindex = trinfo[2]
1018 dataoff = self.start(trindex)
1019
1020 tr.add(self.datafile, dataoff)
1021
1022 if fp:
1023 fp.flush()
1024 fp.close()
1025
1026 df = self.opener(self.datafile, 'w')
1027 try:
1028 for r in self:
1029 df.write(self._chunkraw(r, r))
1030 finally:
1031 df.close()
1032
1033 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1034 self.version &= ~(REVLOGNGINLINEDATA)
1035 self._inline = False
1036 for i in self:
1037 e = self._io.packentry(self.index[i], self.node, self.version, i)
1038 fp.write(e)
1039
1040 # if we don't call rename, the temp file will never replace the
1041 # real index
1042 fp.rename()
1043
1044 tr.replace(self.indexfile, trindex * self._io.size)
1045 self._chunkclear()
1046
1047 def addrevision(self, text, transaction, link, p1, p2, d=None):
1048 """add a revision to the log
1049
1050 text - the revision data to add
1051 transaction - the transaction object used for rollback
1052 link - the linkrev data to add
1053 p1, p2 - the parent nodeids of the revision
1054 d - an optional precomputed delta
1055 """
1056 dfh = None
1057 if not self._inline:
1058 dfh = self.opener(self.datafile, "a")
1059 ifh = self.opener(self.indexfile, "a+")
1060 try:
1061 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1062 finally:
1063 if dfh:
1064 dfh.close()
1065 ifh.close()
1066
1067 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1068 node = hash(text, p1, p2)
1069 if node in self.nodemap:
1070 return node
1071
1072 curr = len(self)
1073 prev = curr - 1
1074 base = self.base(prev)
1075 offset = self.end(prev)
1076
1077 if curr:
1078 if not d:
1079 ptext = self.revision(self.node(prev))
1080 d = mdiff.textdiff(ptext, text)
1081 data = compress(d)
1082 l = len(data[1]) + len(data[0])
1083 dist = l + offset - self.start(base)
1084
1085 # full versions are inserted when the needed deltas
1086 # become comparable to the uncompressed text
1087 if not curr or dist > len(text) * 2:
1088 data = compress(text)
1089 l = len(data[1]) + len(data[0])
1090 base = curr
1091
1092 e = (offset_type(offset, 0), l, len(text),
1093 base, link, self.rev(p1), self.rev(p2), node)
1094 self.index.insert(-1, e)
1095 self.nodemap[node] = curr
1096
1097 entry = self._io.packentry(e, self.node, self.version, curr)
1098 if not self._inline:
1099 transaction.add(self.datafile, offset)
1100 transaction.add(self.indexfile, curr * len(entry))
1101 if data[0]:
1102 dfh.write(data[0])
1103 dfh.write(data[1])
1104 dfh.flush()
1105 ifh.write(entry)
1106 else:
1107 offset += curr * self._io.size
1108 transaction.add(self.indexfile, offset, curr)
1109 ifh.write(entry)
1110 ifh.write(data[0])
1111 ifh.write(data[1])
1112 self.checkinlinesize(transaction, ifh)
1113
1114 self._cache = (node, curr, text)
1115 return node
1116
1117 def ancestor(self, a, b):
1118 """calculate the least common ancestor of nodes a and b"""
1119
1120 def parents(rev):
1121 return [p for p in self.parentrevs(rev) if p != nullrev]
1122
1123 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1124 if c is None:
1125 return nullid
1126
1127 return self.node(c)
1128
1129 def group(self, nodelist, lookup, infocollect=None):
1130 """calculate a delta group
1131
1132 Given a list of changeset revs, return a set of deltas and
1133 metadata corresponding to nodes. the first delta is
1134 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1135 have this parent as it has all history before these
1136 changesets. parent is parent[0]
1137 """
1138
1139 revs = [self.rev(n) for n in nodelist]
1140
1141 # if we don't have any revisions touched by these changesets, bail
1142 if not revs:
1143 yield changegroup.closechunk()
1144 return
1145
1146 # add the parent of the first rev
1147 p = self.parentrevs(revs[0])[0]
1148 revs.insert(0, p)
1149
1150 # build deltas
1151 for d in xrange(len(revs) - 1):
1152 a, b = revs[d], revs[d + 1]
1153 nb = self.node(b)
1154
1155 if infocollect is not None:
1156 infocollect(nb)
1157
1158 p = self.parents(nb)
1159 meta = nb + p[0] + p[1] + lookup(nb)
1160 if a == -1:
1161 d = self.revision(nb)
1162 meta += mdiff.trivialdiffheader(len(d))
1163 else:
1164 d = self.revdiff(a, b)
1165 yield changegroup.chunkheader(len(meta) + len(d))
1166 yield meta
1167 if len(d) > 2**20:
1168 pos = 0
1169 while pos < len(d):
1170 pos2 = pos + 2 ** 18
1171 yield d[pos:pos2]
1172 pos = pos2
1173 else:
1174 yield d
1175
1176 yield changegroup.closechunk()
1177
1178 def addgroup(self, revs, linkmapper, transaction):
1179 """
1180 add a delta group
1181
1182 given a set of deltas, add them to the revision log. the
1183 first delta is against its parent, which should be in our
1184 log, the rest are against the previous delta.
1185 """
1186
1187 #track the base of the current delta log
1188 r = len(self)
1189 t = r - 1
1190 node = None
1191
1192 base = prev = nullrev
1193 start = end = textlen = 0
1194 if r:
1195 end = self.end(t)
1196
1197 ifh = self.opener(self.indexfile, "a+")
1198 isize = r * self._io.size
1199 if self._inline:
1200 transaction.add(self.indexfile, end + isize, r)
1201 dfh = None
1202 else:
1203 transaction.add(self.indexfile, isize, r)
1204 transaction.add(self.datafile, end)
1205 dfh = self.opener(self.datafile, "a")
1206
1207 try:
1208 # loop through our set of deltas
1209 chain = None
1210 for chunk in revs:
1211 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1212 link = linkmapper(cs)
1213 if node in self.nodemap:
1214 # this can happen if two branches make the same change
1215 chain = node
1216 continue
1217 delta = buffer(chunk, 80)
1218 del chunk
1219
1220 for p in (p1, p2):
1221 if not p in self.nodemap:
1222 raise LookupError(p, self.indexfile, _('unknown parent'))
1223
1224 if not chain:
1225 # retrieve the parent revision of the delta chain
1226 chain = p1
1227 if not chain in self.nodemap:
1228 raise LookupError(chain, self.indexfile, _('unknown base'))
1229
1230 # full versions are inserted when the needed deltas become
1231 # comparable to the uncompressed text or when the previous
1232 # version is not the one we have a delta against. We use
1233 # the size of the previous full rev as a proxy for the
1234 # current size.
1235
1236 if chain == prev:
1237 cdelta = compress(delta)
1238 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1239 textlen = mdiff.patchedsize(textlen, delta)
1240
1241 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1242 # flush our writes here so we can read it in revision
1243 if dfh:
1244 dfh.flush()
1245 ifh.flush()
1246 text = self.revision(chain)
1247 if len(text) == 0:
1248 # skip over trivial delta header
1249 text = buffer(delta, 12)
1250 else:
1251 text = mdiff.patches(text, [delta])
1252 del delta
1253 chk = self._addrevision(text, transaction, link, p1, p2, None,
1254 ifh, dfh)
1255 if not dfh and not self._inline:
1256 # addrevision switched from inline to conventional
1257 # reopen the index
1258 dfh = self.opener(self.datafile, "a")
1259 ifh = self.opener(self.indexfile, "a")
1260 if chk != node:
1261 raise RevlogError(_("consistency error adding group"))
1262 textlen = len(text)
1263 else:
1264 e = (offset_type(end, 0), cdeltalen, textlen, base,
1265 link, self.rev(p1), self.rev(p2), node)
1266 self.index.insert(-1, e)
1267 self.nodemap[node] = r
1268 entry = self._io.packentry(e, self.node, self.version, r)
1269 if self._inline:
1270 ifh.write(entry)
1271 ifh.write(cdelta[0])
1272 ifh.write(cdelta[1])
1273 self.checkinlinesize(transaction, ifh)
1274 if not self._inline:
1275 dfh = self.opener(self.datafile, "a")
1276 ifh = self.opener(self.indexfile, "a")
1277 else:
1278 dfh.write(cdelta[0])
1279 dfh.write(cdelta[1])
1280 ifh.write(entry)
1281
1282 t, r, chain, prev = r, r + 1, node, node
1283 base = self.base(t)
1284 start = self.start(base)
1285 end = self.end(t)
1286 finally:
1287 if dfh:
1288 dfh.close()
1289 ifh.close()
1290
1291 return node
1292
1293 def strip(self, minlink, transaction):
1294 """truncate the revlog on the first revision with a linkrev >= minlink
1295
1296 This function is called when we're stripping revision minlink and
1297 its descendants from the repository.
1298
1299 We have to remove all revisions with linkrev >= minlink, because
1300 the equivalent changelog revisions will be renumbered after the
1301 strip.
1302
1303 So we truncate the revlog on the first of these revisions, and
1304 trust that the caller has saved the revisions that shouldn't be
1305 removed and that it'll readd them after this truncation.
1306 """
1307 if len(self) == 0:
1308 return
1309
1310 if isinstance(self.index, lazyindex):
1311 self._loadindexmap()
1312
1313 for rev in self:
1314 if self.index[rev][4] >= minlink:
1315 break
1316 else:
1317 return
1318
1319 # first truncate the files on disk
1320 end = self.start(rev)
1321 if not self._inline:
1322 transaction.add(self.datafile, end)
1323 end = rev * self._io.size
1324 else:
1325 end += rev * self._io.size
1326
1327 transaction.add(self.indexfile, end)
1328
1329 # then reset internal state in memory to forget those revisions
1330 self._cache = None
1331 self._chunkclear()
1332 for x in xrange(rev, len(self)):
1333 del self.nodemap[self.node(x)]
1334
1335 del self.index[rev:-1]
1336
1337 def checksize(self):
1338 expected = 0
1339 if len(self):
1340 expected = max(0, self.end(len(self) - 1))
1341
1342 try:
1343 f = self.opener(self.datafile)
1344 f.seek(0, 2)
1345 actual = f.tell()
1346 dd = actual - expected
1347 except IOError, inst:
1348 if inst.errno != errno.ENOENT:
1349 raise
1350 dd = 0
1351
1352 try:
1353 f = self.opener(self.indexfile)
1354 f.seek(0, 2)
1355 actual = f.tell()
1356 s = self._io.size
1357 i = max(0, actual / s)
1358 di = actual - (i * s)
1359 if self._inline:
1360 databytes = 0
1361 for r in self:
1362 databytes += max(0, self.length(r))
1363 dd = 0
1364 di = actual - len(self) * s - databytes
1365 except IOError, inst:
1366 if inst.errno != errno.ENOENT:
1367 raise
1368 di = 0
1369
1370 return (dd, di)
1371
1372 def files(self):
1373 res = [ self.indexfile ]
1374 if not self._inline:
1375 res.append(self.datafile)
1376 return res