121
|
1 # match.py - filename matching
|
|
2 #
|
|
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
|
|
4 #
|
|
5 # This software may be used and distributed according to the terms of the
|
|
6 # GNU General Public License version 2, incorporated herein by reference.
|
|
7
|
|
8 import util, re
|
|
9
|
|
10 class match(object):
|
|
11 def __init__(self, root, cwd, patterns, include=[], exclude=[],
|
|
12 default='glob', exact=False):
|
|
13 """build an object to match a set of file patterns
|
|
14
|
|
15 arguments:
|
|
16 root - the canonical root of the tree you're matching against
|
|
17 cwd - the current working directory, if relevant
|
|
18 patterns - patterns to find
|
|
19 include - patterns to include
|
|
20 exclude - patterns to exclude
|
|
21 default - if a pattern in names has no explicit type, assume this one
|
|
22 exact - patterns are actually literals
|
|
23
|
|
24 a pattern is one of:
|
|
25 'glob:<glob>' - a glob relative to cwd
|
|
26 're:<regexp>' - a regular expression
|
|
27 'path:<path>' - a path relative to canonroot
|
|
28 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
|
|
29 'relpath:<path>' - a path relative to cwd
|
|
30 'relre:<regexp>' - a regexp that needn't match the start of a name
|
|
31 '<something>' - a pattern of the specified default type
|
|
32 """
|
|
33
|
|
34 self._root = root
|
|
35 self._cwd = cwd
|
|
36 self._files = []
|
|
37 self._anypats = bool(include or exclude)
|
|
38
|
|
39 if include:
|
|
40 im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
|
|
41 if exclude:
|
|
42 em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
|
|
43 if exact:
|
|
44 self._files = patterns
|
|
45 pm = self.exact
|
|
46 elif patterns:
|
|
47 pats = _normalize(patterns, default, root, cwd)
|
|
48 self._files = _roots(pats)
|
|
49 self._anypats = self._anypats or _anypats(pats)
|
|
50 pm = _buildmatch(pats, '$')
|
|
51
|
|
52 if patterns or exact:
|
|
53 if include:
|
|
54 if exclude:
|
|
55 m = lambda f: im(f) and not em(f) and pm(f)
|
|
56 else:
|
|
57 m = lambda f: im(f) and pm(f)
|
|
58 else:
|
|
59 if exclude:
|
|
60 m = lambda f: not em(f) and pm(f)
|
|
61 else:
|
|
62 m = pm
|
|
63 else:
|
|
64 if include:
|
|
65 if exclude:
|
|
66 m = lambda f: im(f) and not em(f)
|
|
67 else:
|
|
68 m = im
|
|
69 else:
|
|
70 if exclude:
|
|
71 m = lambda f: not em(f)
|
|
72 else:
|
|
73 m = lambda f: True
|
|
74
|
|
75 self.matchfn = m
|
|
76 self._fmap = set(self._files)
|
|
77
|
|
78 def __call__(self, fn):
|
|
79 return self.matchfn(fn)
|
|
80 def __iter__(self):
|
|
81 for f in self._files:
|
|
82 yield f
|
|
83 def bad(self, f, msg):
|
|
84 '''callback for each explicit file that can't be
|
|
85 found/accessed, with an error message
|
|
86 '''
|
|
87 pass
|
|
88 def dir(self, f):
|
|
89 pass
|
|
90 def missing(self, f):
|
|
91 pass
|
|
92 def exact(self, f):
|
|
93 return f in self._fmap
|
|
94 def rel(self, f):
|
|
95 return util.pathto(self._root, self._cwd, f)
|
|
96 def files(self):
|
|
97 return self._files
|
|
98 def anypats(self):
|
|
99 return self._anypats
|
|
100
|
|
101 class exact(match):
|
|
102 def __init__(self, root, cwd, files):
|
|
103 match.__init__(self, root, cwd, files, exact = True)
|
|
104
|
|
105 class always(match):
|
|
106 def __init__(self, root, cwd):
|
|
107 match.__init__(self, root, cwd, [])
|
|
108
|
|
109 def patkind(pat):
|
|
110 return _patsplit(pat, None)[0]
|
|
111
|
|
112 def _patsplit(pat, default):
|
|
113 """Split a string into an optional pattern kind prefix and the
|
|
114 actual pattern."""
|
|
115 if ':' in pat:
|
|
116 kind, val = pat.split(':', 1)
|
|
117 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
|
|
118 return kind, val
|
|
119 return default, pat
|
|
120
|
|
121 def _globre(pat):
|
|
122 "convert a glob pattern into a regexp"
|
|
123 i, n = 0, len(pat)
|
|
124 res = ''
|
|
125 group = 0
|
|
126 escape = re.escape
|
|
127 def peek(): return i < n and pat[i]
|
|
128 while i < n:
|
|
129 c = pat[i]
|
|
130 i = i+1
|
|
131 if c not in '*?[{},\\':
|
|
132 res += escape(c)
|
|
133 elif c == '*':
|
|
134 if peek() == '*':
|
|
135 i += 1
|
|
136 res += '.*'
|
|
137 else:
|
|
138 res += '[^/]*'
|
|
139 elif c == '?':
|
|
140 res += '.'
|
|
141 elif c == '[':
|
|
142 j = i
|
|
143 if j < n and pat[j] in '!]':
|
|
144 j += 1
|
|
145 while j < n and pat[j] != ']':
|
|
146 j += 1
|
|
147 if j >= n:
|
|
148 res += '\\['
|
|
149 else:
|
|
150 stuff = pat[i:j].replace('\\','\\\\')
|
|
151 i = j + 1
|
|
152 if stuff[0] == '!':
|
|
153 stuff = '^' + stuff[1:]
|
|
154 elif stuff[0] == '^':
|
|
155 stuff = '\\' + stuff
|
|
156 res = '%s[%s]' % (res, stuff)
|
|
157 elif c == '{':
|
|
158 group += 1
|
|
159 res += '(?:'
|
|
160 elif c == '}' and group:
|
|
161 res += ')'
|
|
162 group -= 1
|
|
163 elif c == ',' and group:
|
|
164 res += '|'
|
|
165 elif c == '\\':
|
|
166 p = peek()
|
|
167 if p:
|
|
168 i += 1
|
|
169 res += escape(p)
|
|
170 else:
|
|
171 res += escape(c)
|
|
172 else:
|
|
173 res += escape(c)
|
|
174 return res
|
|
175
|
|
176 def _regex(kind, name, tail):
|
|
177 '''convert a pattern into a regular expression'''
|
|
178 if not name:
|
|
179 return ''
|
|
180 if kind == 're':
|
|
181 return name
|
|
182 elif kind == 'path':
|
|
183 return '^' + re.escape(name) + '(?:/|$)'
|
|
184 elif kind == 'relglob':
|
|
185 return '(?:|.*/)' + _globre(name) + tail
|
|
186 elif kind == 'relpath':
|
|
187 return re.escape(name) + '(?:/|$)'
|
|
188 elif kind == 'relre':
|
|
189 if name.startswith('^'):
|
|
190 return name
|
|
191 return '.*' + name
|
|
192 return _globre(name) + tail
|
|
193
|
|
194 def _buildmatch(pats, tail):
|
|
195 """build a matching function from a set of patterns"""
|
|
196 try:
|
|
197 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
|
|
198 if len(pat) > 20000:
|
|
199 raise OverflowError()
|
|
200 return re.compile(pat).match
|
|
201 except OverflowError:
|
|
202 # We're using a Python with a tiny regex engine and we
|
|
203 # made it explode, so we'll divide the pattern list in two
|
|
204 # until it works
|
|
205 l = len(pats)
|
|
206 if l < 2:
|
|
207 raise
|
|
208 a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
|
|
209 return lambda s: a(s) or b(s)
|
|
210 except re.error:
|
|
211 for k, p in pats:
|
|
212 try:
|
|
213 re.compile('(?:%s)' % _regex(k, p, tail))
|
|
214 except re.error:
|
|
215 raise util.Abort("invalid pattern (%s): %s" % (k, p))
|
|
216 raise util.Abort("invalid pattern")
|
|
217
|
|
218 def _normalize(names, default, root, cwd):
|
|
219 pats = []
|
|
220 for kind, name in [_patsplit(p, default) for p in names]:
|
|
221 if kind in ('glob', 'relpath'):
|
|
222 name = util.canonpath(root, cwd, name)
|
|
223 elif kind in ('relglob', 'path'):
|
|
224 name = util.normpath(name)
|
|
225
|
|
226 pats.append((kind, name))
|
|
227 return pats
|
|
228
|
|
229 def _roots(patterns):
|
|
230 r = []
|
|
231 for kind, name in patterns:
|
|
232 if kind == 'glob': # find the non-glob prefix
|
|
233 root = []
|
|
234 for p in name.split('/'):
|
|
235 if '[' in p or '{' in p or '*' in p or '?' in p:
|
|
236 break
|
|
237 root.append(p)
|
|
238 r.append('/'.join(root) or '.')
|
|
239 elif kind in ('relpath', 'path'):
|
|
240 r.append(name or '.')
|
|
241 elif kind == 'relglob':
|
|
242 r.append('.')
|
|
243 return r
|
|
244
|
|
245 def _anypats(patterns):
|
|
246 for kind, name in patterns:
|
|
247 if kind in ('glob', 're', 'relglob', 'relre'):
|
|
248 return True
|