121
|
1 # store.py - repository store handling for Mercurial
|
|
2 #
|
|
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
|
|
4 #
|
|
5 # This software may be used and distributed according to the terms of the
|
|
6 # GNU General Public License version 2, incorporated herein by reference.
|
|
7
|
|
8 from i18n import _
|
|
9 import osutil, util
|
|
10 import os, stat
|
|
11
|
|
12 _sha = util.sha1
|
|
13
|
|
14 # This avoids a collision between a file named foo and a dir named
|
|
15 # foo.i or foo.d
|
|
16 def encodedir(path):
|
|
17 if not path.startswith('data/'):
|
|
18 return path
|
|
19 return (path
|
|
20 .replace(".hg/", ".hg.hg/")
|
|
21 .replace(".i/", ".i.hg/")
|
|
22 .replace(".d/", ".d.hg/"))
|
|
23
|
|
24 def decodedir(path):
|
|
25 if not path.startswith('data/'):
|
|
26 return path
|
|
27 return (path
|
|
28 .replace(".d.hg/", ".d/")
|
|
29 .replace(".i.hg/", ".i/")
|
|
30 .replace(".hg.hg/", ".hg/"))
|
|
31
|
|
32 def _buildencodefun():
|
|
33 e = '_'
|
|
34 win_reserved = [ord(x) for x in '\\:*?"<>|']
|
|
35 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
|
|
36 for x in (range(32) + range(126, 256) + win_reserved):
|
|
37 cmap[chr(x)] = "~%02x" % x
|
|
38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
|
|
39 cmap[chr(x)] = e + chr(x).lower()
|
|
40 dmap = {}
|
|
41 for k, v in cmap.iteritems():
|
|
42 dmap[v] = k
|
|
43 def decode(s):
|
|
44 i = 0
|
|
45 while i < len(s):
|
|
46 for l in xrange(1, 4):
|
|
47 try:
|
|
48 yield dmap[s[i:i+l]]
|
|
49 i += l
|
|
50 break
|
|
51 except KeyError:
|
|
52 pass
|
|
53 else:
|
|
54 raise KeyError
|
|
55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
|
|
56 lambda s: decodedir("".join(list(decode(s)))))
|
|
57
|
|
58 encodefilename, decodefilename = _buildencodefun()
|
|
59
|
|
60 def _build_lower_encodefun():
|
|
61 win_reserved = [ord(x) for x in '\\:*?"<>|']
|
|
62 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
|
|
63 for x in (range(32) + range(126, 256) + win_reserved):
|
|
64 cmap[chr(x)] = "~%02x" % x
|
|
65 for x in range(ord("A"), ord("Z")+1):
|
|
66 cmap[chr(x)] = chr(x).lower()
|
|
67 return lambda s: "".join([cmap[c] for c in s])
|
|
68
|
|
69 lowerencode = _build_lower_encodefun()
|
|
70
|
|
71 _windows_reserved_filenames = '''con prn aux nul
|
|
72 com1 com2 com3 com4 com5 com6 com7 com8 com9
|
|
73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
|
|
74 def auxencode(path):
|
|
75 res = []
|
|
76 for n in path.split('/'):
|
|
77 if n:
|
|
78 base = n.split('.')[0]
|
|
79 if base and (base in _windows_reserved_filenames):
|
|
80 # encode third letter ('aux' -> 'au~78')
|
|
81 ec = "~%02x" % ord(n[2])
|
|
82 n = n[0:2] + ec + n[3:]
|
|
83 if n[-1] in '. ':
|
|
84 # encode last period or space ('foo...' -> 'foo..~2e')
|
|
85 n = n[:-1] + "~%02x" % ord(n[-1])
|
|
86 res.append(n)
|
|
87 return '/'.join(res)
|
|
88
|
|
89 MAX_PATH_LEN_IN_HGSTORE = 120
|
|
90 DIR_PREFIX_LEN = 8
|
|
91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
|
|
92 def hybridencode(path):
|
|
93 '''encodes path with a length limit
|
|
94
|
|
95 Encodes all paths that begin with 'data/', according to the following.
|
|
96
|
|
97 Default encoding (reversible):
|
|
98
|
|
99 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
|
|
100 characters are encoded as '~xx', where xx is the two digit hex code
|
|
101 of the character (see encodefilename).
|
|
102 Relevant path components consisting of Windows reserved filenames are
|
|
103 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
|
|
104
|
|
105 Hashed encoding (not reversible):
|
|
106
|
|
107 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
|
|
108 non-reversible hybrid hashing of the path is done instead.
|
|
109 This encoding uses up to DIR_PREFIX_LEN characters of all directory
|
|
110 levels of the lowerencoded path, but not more levels than can fit into
|
|
111 _MAX_SHORTENED_DIRS_LEN.
|
|
112 Then follows the filler followed by the sha digest of the full path.
|
|
113 The filler is the beginning of the basename of the lowerencoded path
|
|
114 (the basename is everything after the last path separator). The filler
|
|
115 is as long as possible, filling in characters from the basename until
|
|
116 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
|
|
117 of the basename have been taken).
|
|
118 The extension (e.g. '.i' or '.d') is preserved.
|
|
119
|
|
120 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
|
|
121 encoding was used.
|
|
122 '''
|
|
123 if not path.startswith('data/'):
|
|
124 return path
|
|
125 # escape directories ending with .i and .d
|
|
126 path = encodedir(path)
|
|
127 ndpath = path[len('data/'):]
|
|
128 res = 'data/' + auxencode(encodefilename(ndpath))
|
|
129 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
|
|
130 digest = _sha(path).hexdigest()
|
|
131 aep = auxencode(lowerencode(ndpath))
|
|
132 _root, ext = os.path.splitext(aep)
|
|
133 parts = aep.split('/')
|
|
134 basename = parts[-1]
|
|
135 sdirs = []
|
|
136 for p in parts[:-1]:
|
|
137 d = p[:DIR_PREFIX_LEN]
|
|
138 if d[-1] in '. ':
|
|
139 # Windows can't access dirs ending in period or space
|
|
140 d = d[:-1] + '_'
|
|
141 t = '/'.join(sdirs) + '/' + d
|
|
142 if len(t) > _MAX_SHORTENED_DIRS_LEN:
|
|
143 break
|
|
144 sdirs.append(d)
|
|
145 dirs = '/'.join(sdirs)
|
|
146 if len(dirs) > 0:
|
|
147 dirs += '/'
|
|
148 res = 'dh/' + dirs + digest + ext
|
|
149 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
|
|
150 if space_left > 0:
|
|
151 filler = basename[:space_left]
|
|
152 res = 'dh/' + dirs + filler + digest + ext
|
|
153 return res
|
|
154
|
|
155 def _calcmode(path):
|
|
156 try:
|
|
157 # files in .hg/ will be created using this mode
|
|
158 mode = os.stat(path).st_mode
|
|
159 # avoid some useless chmods
|
|
160 if (0777 & ~util.umask) == (0777 & mode):
|
|
161 mode = None
|
|
162 except OSError:
|
|
163 mode = None
|
|
164 return mode
|
|
165
|
|
166 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
|
|
167
|
|
168 class basicstore(object):
|
|
169 '''base class for local repository stores'''
|
|
170 def __init__(self, path, opener, pathjoiner):
|
|
171 self.pathjoiner = pathjoiner
|
|
172 self.path = path
|
|
173 self.createmode = _calcmode(path)
|
|
174 op = opener(self.path)
|
|
175 op.createmode = self.createmode
|
|
176 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
|
|
177
|
|
178 def join(self, f):
|
|
179 return self.pathjoiner(self.path, encodedir(f))
|
|
180
|
|
181 def _walk(self, relpath, recurse):
|
|
182 '''yields (unencoded, encoded, size)'''
|
|
183 path = self.pathjoiner(self.path, relpath)
|
|
184 striplen = len(self.path) + len(os.sep)
|
|
185 l = []
|
|
186 if os.path.isdir(path):
|
|
187 visit = [path]
|
|
188 while visit:
|
|
189 p = visit.pop()
|
|
190 for f, kind, st in osutil.listdir(p, stat=True):
|
|
191 fp = self.pathjoiner(p, f)
|
|
192 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
|
|
193 n = util.pconvert(fp[striplen:])
|
|
194 l.append((decodedir(n), n, st.st_size))
|
|
195 elif kind == stat.S_IFDIR and recurse:
|
|
196 visit.append(fp)
|
|
197 return sorted(l)
|
|
198
|
|
199 def datafiles(self):
|
|
200 return self._walk('data', True)
|
|
201
|
|
202 def walk(self):
|
|
203 '''yields (unencoded, encoded, size)'''
|
|
204 # yield data files first
|
|
205 for x in self.datafiles():
|
|
206 yield x
|
|
207 # yield manifest before changelog
|
|
208 for x in reversed(self._walk('', False)):
|
|
209 yield x
|
|
210
|
|
211 def copylist(self):
|
|
212 return ['requires'] + _data.split()
|
|
213
|
|
214 class encodedstore(basicstore):
|
|
215 def __init__(self, path, opener, pathjoiner):
|
|
216 self.pathjoiner = pathjoiner
|
|
217 self.path = self.pathjoiner(path, 'store')
|
|
218 self.createmode = _calcmode(self.path)
|
|
219 op = opener(self.path)
|
|
220 op.createmode = self.createmode
|
|
221 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
|
|
222
|
|
223 def datafiles(self):
|
|
224 for a, b, size in self._walk('data', True):
|
|
225 try:
|
|
226 a = decodefilename(a)
|
|
227 except KeyError:
|
|
228 a = None
|
|
229 yield a, b, size
|
|
230
|
|
231 def join(self, f):
|
|
232 return self.pathjoiner(self.path, encodefilename(f))
|
|
233
|
|
234 def copylist(self):
|
|
235 return (['requires', '00changelog.i'] +
|
|
236 [self.pathjoiner('store', f) for f in _data.split()])
|
|
237
|
|
238 class fncache(object):
|
|
239 # the filename used to be partially encoded
|
|
240 # hence the encodedir/decodedir dance
|
|
241 def __init__(self, opener):
|
|
242 self.opener = opener
|
|
243 self.entries = None
|
|
244
|
|
245 def _load(self):
|
|
246 '''fill the entries from the fncache file'''
|
|
247 self.entries = set()
|
|
248 try:
|
|
249 fp = self.opener('fncache', mode='rb')
|
|
250 except IOError:
|
|
251 # skip nonexistent file
|
|
252 return
|
|
253 for n, line in enumerate(fp):
|
|
254 if (len(line) < 2) or (line[-1] != '\n'):
|
|
255 t = _('invalid entry in fncache, line %s') % (n + 1)
|
|
256 raise util.Abort(t)
|
|
257 self.entries.add(decodedir(line[:-1]))
|
|
258 fp.close()
|
|
259
|
|
260 def rewrite(self, files):
|
|
261 fp = self.opener('fncache', mode='wb')
|
|
262 for p in files:
|
|
263 fp.write(encodedir(p) + '\n')
|
|
264 fp.close()
|
|
265 self.entries = set(files)
|
|
266
|
|
267 def add(self, fn):
|
|
268 if self.entries is None:
|
|
269 self._load()
|
|
270 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
|
|
271
|
|
272 def __contains__(self, fn):
|
|
273 if self.entries is None:
|
|
274 self._load()
|
|
275 return fn in self.entries
|
|
276
|
|
277 def __iter__(self):
|
|
278 if self.entries is None:
|
|
279 self._load()
|
|
280 return iter(self.entries)
|
|
281
|
|
282 class fncachestore(basicstore):
|
|
283 def __init__(self, path, opener, pathjoiner):
|
|
284 self.pathjoiner = pathjoiner
|
|
285 self.path = self.pathjoiner(path, 'store')
|
|
286 self.createmode = _calcmode(self.path)
|
|
287 op = opener(self.path)
|
|
288 op.createmode = self.createmode
|
|
289 fnc = fncache(op)
|
|
290 self.fncache = fnc
|
|
291
|
|
292 def fncacheopener(path, mode='r', *args, **kw):
|
|
293 if (mode not in ('r', 'rb')
|
|
294 and path.startswith('data/')
|
|
295 and path not in fnc):
|
|
296 fnc.add(path)
|
|
297 return op(hybridencode(path), mode, *args, **kw)
|
|
298 self.opener = fncacheopener
|
|
299
|
|
300 def join(self, f):
|
|
301 return self.pathjoiner(self.path, hybridencode(f))
|
|
302
|
|
303 def datafiles(self):
|
|
304 rewrite = False
|
|
305 existing = []
|
|
306 pjoin = self.pathjoiner
|
|
307 spath = self.path
|
|
308 for f in self.fncache:
|
|
309 ef = hybridencode(f)
|
|
310 try:
|
|
311 st = os.stat(pjoin(spath, ef))
|
|
312 yield f, ef, st.st_size
|
|
313 existing.append(f)
|
|
314 except OSError:
|
|
315 # nonexistent entry
|
|
316 rewrite = True
|
|
317 if rewrite:
|
|
318 # rewrite fncache to remove nonexistent entries
|
|
319 # (may be caused by rollback / strip)
|
|
320 self.fncache.rewrite(existing)
|
|
321
|
|
322 def copylist(self):
|
|
323 d = _data + ' dh fncache'
|
|
324 return (['requires', '00changelog.i'] +
|
|
325 [self.pathjoiner('store', f) for f in d.split()])
|
|
326
|
|
327 def store(requirements, path, opener, pathjoiner=None):
|
|
328 pathjoiner = pathjoiner or os.path.join
|
|
329 if 'store' in requirements:
|
|
330 if 'fncache' in requirements:
|
|
331 return fncachestore(path, opener, pathjoiner)
|
|
332 return encodedstore(path, opener, pathjoiner)
|
|
333 return basicstore(path, opener, pathjoiner)
|