28
|
1 # url.py - HTTP handling for mercurial
|
|
2 #
|
|
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
|
|
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
|
|
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
|
|
6 #
|
|
7 # This software may be used and distributed according to the terms of the
|
|
8 # GNU General Public License version 2, incorporated herein by reference.
|
|
9
|
|
10 import urllib, urllib2, urlparse, httplib, os, re, socket, cStringIO
|
|
11 from i18n import _
|
|
12 import keepalive, util
|
|
13
|
|
14 def hidepassword(url):
|
|
15 '''hide user credential in a url string'''
|
|
16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
|
|
17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
|
|
18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
|
|
19
|
|
20 def removeauth(url):
|
|
21 '''remove all authentication information from a url string'''
|
|
22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
|
|
23 netloc = netloc[netloc.find('@')+1:]
|
|
24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
|
|
25
|
|
26 def netlocsplit(netloc):
|
|
27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
|
|
28
|
|
29 a = netloc.find('@')
|
|
30 if a == -1:
|
|
31 user, passwd = None, None
|
|
32 else:
|
|
33 userpass, netloc = netloc[:a], netloc[a+1:]
|
|
34 c = userpass.find(':')
|
|
35 if c == -1:
|
|
36 user, passwd = urllib.unquote(userpass), None
|
|
37 else:
|
|
38 user = urllib.unquote(userpass[:c])
|
|
39 passwd = urllib.unquote(userpass[c+1:])
|
|
40 c = netloc.find(':')
|
|
41 if c == -1:
|
|
42 host, port = netloc, None
|
|
43 else:
|
|
44 host, port = netloc[:c], netloc[c+1:]
|
|
45 return host, port, user, passwd
|
|
46
|
|
47 def netlocunsplit(host, port, user=None, passwd=None):
|
|
48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
|
|
49 if port:
|
|
50 hostport = host + ':' + port
|
|
51 else:
|
|
52 hostport = host
|
|
53 if user:
|
|
54 if passwd:
|
|
55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
|
|
56 else:
|
|
57 userpass = urllib.quote(user)
|
|
58 return userpass + '@' + hostport
|
|
59 return hostport
|
|
60
|
|
61 _safe = ('abcdefghijklmnopqrstuvwxyz'
|
|
62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
63 '0123456789' '_.-/')
|
|
64 _safeset = None
|
|
65 _hex = None
|
|
66 def quotepath(path):
|
|
67 '''quote the path part of a URL
|
|
68
|
|
69 This is similar to urllib.quote, but it also tries to avoid
|
|
70 quoting things twice (inspired by wget):
|
|
71
|
|
72 >>> quotepath('abc def')
|
|
73 'abc%20def'
|
|
74 >>> quotepath('abc%20def')
|
|
75 'abc%20def'
|
|
76 >>> quotepath('abc%20 def')
|
|
77 'abc%20%20def'
|
|
78 >>> quotepath('abc def%20')
|
|
79 'abc%20def%20'
|
|
80 >>> quotepath('abc def%2')
|
|
81 'abc%20def%252'
|
|
82 >>> quotepath('abc def%')
|
|
83 'abc%20def%25'
|
|
84 '''
|
|
85 global _safeset, _hex
|
|
86 if _safeset is None:
|
|
87 _safeset = set(_safe)
|
|
88 _hex = set('abcdefABCDEF0123456789')
|
|
89 l = list(path)
|
|
90 for i in xrange(len(l)):
|
|
91 c = l[i]
|
|
92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
|
|
93 pass
|
|
94 elif c not in _safeset:
|
|
95 l[i] = '%%%02X' % ord(c)
|
|
96 return ''.join(l)
|
|
97
|
|
98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
|
|
99 def __init__(self, ui):
|
|
100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
|
|
101 self.ui = ui
|
|
102
|
|
103 def find_user_password(self, realm, authuri):
|
|
104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
|
|
105 self, realm, authuri)
|
|
106 user, passwd = authinfo
|
|
107 if user and passwd:
|
|
108 self._writedebug(user, passwd)
|
|
109 return (user, passwd)
|
|
110
|
|
111 if not user:
|
|
112 auth = self.readauthtoken(authuri)
|
|
113 if auth:
|
|
114 user, passwd = auth.get('username'), auth.get('password')
|
|
115 if not user or not passwd:
|
|
116 if not self.ui.interactive():
|
|
117 raise util.Abort(_('http authorization required'))
|
|
118
|
|
119 self.ui.write(_("http authorization required\n"))
|
|
120 self.ui.status(_("realm: %s\n") % realm)
|
|
121 if user:
|
|
122 self.ui.status(_("user: %s\n") % user)
|
|
123 else:
|
|
124 user = self.ui.prompt(_("user:"), default=None)
|
|
125
|
|
126 if not passwd:
|
|
127 passwd = self.ui.getpass()
|
|
128
|
|
129 self.add_password(realm, authuri, user, passwd)
|
|
130 self._writedebug(user, passwd)
|
|
131 return (user, passwd)
|
|
132
|
|
133 def _writedebug(self, user, passwd):
|
|
134 msg = _('http auth: user %s, password %s\n')
|
|
135 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
|
|
136
|
|
137 def readauthtoken(self, uri):
|
|
138 # Read configuration
|
|
139 config = dict()
|
|
140 for key, val in self.ui.configitems('auth'):
|
|
141 group, setting = key.split('.', 1)
|
|
142 gdict = config.setdefault(group, dict())
|
|
143 gdict[setting] = val
|
|
144
|
|
145 # Find the best match
|
|
146 scheme, hostpath = uri.split('://', 1)
|
|
147 bestlen = 0
|
|
148 bestauth = None
|
|
149 for auth in config.itervalues():
|
|
150 prefix = auth.get('prefix')
|
|
151 if not prefix: continue
|
|
152 p = prefix.split('://', 1)
|
|
153 if len(p) > 1:
|
|
154 schemes, prefix = [p[0]], p[1]
|
|
155 else:
|
|
156 schemes = (auth.get('schemes') or 'https').split()
|
|
157 if (prefix == '*' or hostpath.startswith(prefix)) and \
|
|
158 len(prefix) > bestlen and scheme in schemes:
|
|
159 bestlen = len(prefix)
|
|
160 bestauth = auth
|
|
161 return bestauth
|
|
162
|
|
163 class proxyhandler(urllib2.ProxyHandler):
|
|
164 def __init__(self, ui):
|
|
165 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
|
|
166 # XXX proxyauthinfo = None
|
|
167
|
|
168 if proxyurl:
|
|
169 # proxy can be proper url or host[:port]
|
|
170 if not (proxyurl.startswith('http:') or
|
|
171 proxyurl.startswith('https:')):
|
|
172 proxyurl = 'http://' + proxyurl + '/'
|
|
173 snpqf = urlparse.urlsplit(proxyurl)
|
|
174 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
|
|
175 hpup = netlocsplit(proxynetloc)
|
|
176
|
|
177 proxyhost, proxyport, proxyuser, proxypasswd = hpup
|
|
178 if not proxyuser:
|
|
179 proxyuser = ui.config("http_proxy", "user")
|
|
180 proxypasswd = ui.config("http_proxy", "passwd")
|
|
181
|
|
182 # see if we should use a proxy for this url
|
|
183 no_list = [ "localhost", "127.0.0.1" ]
|
|
184 no_list.extend([p.lower() for
|
|
185 p in ui.configlist("http_proxy", "no")])
|
|
186 no_list.extend([p.strip().lower() for
|
|
187 p in os.getenv("no_proxy", '').split(',')
|
|
188 if p.strip()])
|
|
189 # "http_proxy.always" config is for running tests on localhost
|
|
190 if ui.configbool("http_proxy", "always"):
|
|
191 self.no_list = []
|
|
192 else:
|
|
193 self.no_list = no_list
|
|
194
|
|
195 proxyurl = urlparse.urlunsplit((
|
|
196 proxyscheme, netlocunsplit(proxyhost, proxyport,
|
|
197 proxyuser, proxypasswd or ''),
|
|
198 proxypath, proxyquery, proxyfrag))
|
|
199 proxies = {'http': proxyurl, 'https': proxyurl}
|
|
200 ui.debug(_('proxying through http://%s:%s\n') %
|
|
201 (proxyhost, proxyport))
|
|
202 else:
|
|
203 proxies = {}
|
|
204
|
|
205 # urllib2 takes proxy values from the environment and those
|
|
206 # will take precedence if found, so drop them
|
|
207 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
|
|
208 try:
|
|
209 if env in os.environ:
|
|
210 del os.environ[env]
|
|
211 except OSError:
|
|
212 pass
|
|
213
|
|
214 urllib2.ProxyHandler.__init__(self, proxies)
|
|
215 self.ui = ui
|
|
216
|
|
217 def proxy_open(self, req, proxy, type_):
|
|
218 host = req.get_host().split(':')[0]
|
|
219 if host in self.no_list:
|
|
220 return None
|
|
221
|
|
222 # work around a bug in Python < 2.4.2
|
|
223 # (it leaves a "\n" at the end of Proxy-authorization headers)
|
|
224 baseclass = req.__class__
|
|
225 class _request(baseclass):
|
|
226 def add_header(self, key, val):
|
|
227 if key.lower() == 'proxy-authorization':
|
|
228 val = val.strip()
|
|
229 return baseclass.add_header(self, key, val)
|
|
230 req.__class__ = _request
|
|
231
|
|
232 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
|
|
233
|
|
234 class httpsendfile(file):
|
|
235 def __len__(self):
|
|
236 return os.fstat(self.fileno()).st_size
|
|
237
|
|
238 def _gen_sendfile(connection):
|
|
239 def _sendfile(self, data):
|
|
240 # send a file
|
|
241 if isinstance(data, httpsendfile):
|
|
242 # if auth required, some data sent twice, so rewind here
|
|
243 data.seek(0)
|
|
244 for chunk in util.filechunkiter(data):
|
|
245 connection.send(self, chunk)
|
|
246 else:
|
|
247 connection.send(self, data)
|
|
248 return _sendfile
|
|
249
|
|
250 has_https = hasattr(urllib2, 'HTTPSHandler')
|
|
251 if has_https:
|
|
252 try:
|
|
253 # avoid using deprecated/broken FakeSocket in python 2.6
|
|
254 import ssl
|
|
255 _ssl_wrap_socket = ssl.wrap_socket
|
|
256 except ImportError:
|
|
257 def _ssl_wrap_socket(sock, key_file, cert_file):
|
|
258 ssl = socket.ssl(sock, key_file, cert_file)
|
|
259 return httplib.FakeSocket(sock, ssl)
|
|
260
|
|
261 class httpconnection(keepalive.HTTPConnection):
|
|
262 # must be able to send big bundle as stream.
|
|
263 send = _gen_sendfile(keepalive.HTTPConnection)
|
|
264
|
|
265 def _proxytunnel(self):
|
|
266 proxyheaders = dict(
|
|
267 [(x, self.headers[x]) for x in self.headers
|
|
268 if x.lower().startswith('proxy-')])
|
|
269 self._set_hostport(self.host, self.port)
|
|
270 self.send('CONNECT %s:%d HTTP/1.0\r\n' % (self.realhost, self.realport))
|
|
271 for header in proxyheaders.iteritems():
|
|
272 self.send('%s: %s\r\n' % header)
|
|
273 self.send('\r\n')
|
|
274
|
|
275 # majority of the following code is duplicated from
|
|
276 # httplib.HTTPConnection as there are no adequate places to
|
|
277 # override functions to provide the needed functionality
|
|
278 res = self.response_class(self.sock,
|
|
279 strict=self.strict,
|
|
280 method=self._method)
|
|
281
|
|
282 while True:
|
|
283 version, status, reason = res._read_status()
|
|
284 if status != httplib.CONTINUE:
|
|
285 break
|
|
286 while True:
|
|
287 skip = res.fp.readline().strip()
|
|
288 if not skip:
|
|
289 break
|
|
290 res.status = status
|
|
291 res.reason = reason.strip()
|
|
292
|
|
293 if res.status == 200:
|
|
294 while True:
|
|
295 line = res.fp.readline()
|
|
296 if line == '\r\n':
|
|
297 break
|
|
298 return True
|
|
299
|
|
300 if version == 'HTTP/1.0':
|
|
301 res.version = 10
|
|
302 elif version.startswith('HTTP/1.'):
|
|
303 res.version = 11
|
|
304 elif version == 'HTTP/0.9':
|
|
305 res.version = 9
|
|
306 else:
|
|
307 raise httplib.UnknownProtocol(version)
|
|
308
|
|
309 if res.version == 9:
|
|
310 res.length = None
|
|
311 res.chunked = 0
|
|
312 res.will_close = 1
|
|
313 res.msg = httplib.HTTPMessage(cStringIO.StringIO())
|
|
314 return False
|
|
315
|
|
316 res.msg = httplib.HTTPMessage(res.fp)
|
|
317 res.msg.fp = None
|
|
318
|
|
319 # are we using the chunked-style of transfer encoding?
|
|
320 trenc = res.msg.getheader('transfer-encoding')
|
|
321 if trenc and trenc.lower() == "chunked":
|
|
322 res.chunked = 1
|
|
323 res.chunk_left = None
|
|
324 else:
|
|
325 res.chunked = 0
|
|
326
|
|
327 # will the connection close at the end of the response?
|
|
328 res.will_close = res._check_close()
|
|
329
|
|
330 # do we have a Content-Length?
|
|
331 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
|
|
332 length = res.msg.getheader('content-length')
|
|
333 if length and not res.chunked:
|
|
334 try:
|
|
335 res.length = int(length)
|
|
336 except ValueError:
|
|
337 res.length = None
|
|
338 else:
|
|
339 if res.length < 0: # ignore nonsensical negative lengths
|
|
340 res.length = None
|
|
341 else:
|
|
342 res.length = None
|
|
343
|
|
344 # does the body have a fixed length? (of zero)
|
|
345 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
|
|
346 100 <= status < 200 or # 1xx codes
|
|
347 res._method == 'HEAD'):
|
|
348 res.length = 0
|
|
349
|
|
350 # if the connection remains open, and we aren't using chunked, and
|
|
351 # a content-length was not provided, then assume that the connection
|
|
352 # WILL close.
|
|
353 if (not res.will_close and
|
|
354 not res.chunked and
|
|
355 res.length is None):
|
|
356 res.will_close = 1
|
|
357
|
|
358 self.proxyres = res
|
|
359
|
|
360 return False
|
|
361
|
|
362 def connect(self):
|
|
363 if has_https and self.realhost: # use CONNECT proxy
|
|
364 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
365 self.sock.connect((self.host, self.port))
|
|
366 if self._proxytunnel():
|
|
367 # we do not support client x509 certificates
|
|
368 self.sock = _ssl_wrap_socket(self.sock, None, None)
|
|
369 else:
|
|
370 keepalive.HTTPConnection.connect(self)
|
|
371
|
|
372 def getresponse(self):
|
|
373 proxyres = getattr(self, 'proxyres', None)
|
|
374 if proxyres:
|
|
375 if proxyres.will_close:
|
|
376 self.close()
|
|
377 self.proxyres = None
|
|
378 return proxyres
|
|
379 return keepalive.HTTPConnection.getresponse(self)
|
|
380
|
|
381 class httphandler(keepalive.HTTPHandler):
|
|
382 def http_open(self, req):
|
|
383 return self.do_open(httpconnection, req)
|
|
384
|
|
385 def _start_transaction(self, h, req):
|
|
386 if req.get_selector() == req.get_full_url(): # has proxy
|
|
387 urlparts = urlparse.urlparse(req.get_selector())
|
|
388 if urlparts[0] == 'https': # only use CONNECT for HTTPS
|
|
389 if ':' in urlparts[1]:
|
|
390 realhost, realport = urlparts[1].split(':')
|
|
391 realport = int(realport)
|
|
392 else:
|
|
393 realhost = urlparts[1]
|
|
394 realport = 443
|
|
395
|
|
396 h.realhost = realhost
|
|
397 h.realport = realport
|
|
398 h.headers = req.headers.copy()
|
|
399 h.headers.update(self.parent.addheaders)
|
|
400 return keepalive.HTTPHandler._start_transaction(self, h, req)
|
|
401
|
|
402 h.realhost = None
|
|
403 h.realport = None
|
|
404 h.headers = None
|
|
405 return keepalive.HTTPHandler._start_transaction(self, h, req)
|
|
406
|
|
407 def __del__(self):
|
|
408 self.close_all()
|
|
409
|
|
410 if has_https:
|
|
411 class httpsconnection(httplib.HTTPSConnection):
|
|
412 response_class = keepalive.HTTPResponse
|
|
413 # must be able to send big bundle as stream.
|
|
414 send = _gen_sendfile(httplib.HTTPSConnection)
|
|
415
|
|
416 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
|
|
417 def __init__(self, ui):
|
|
418 keepalive.KeepAliveHandler.__init__(self)
|
|
419 urllib2.HTTPSHandler.__init__(self)
|
|
420 self.ui = ui
|
|
421 self.pwmgr = passwordmgr(self.ui)
|
|
422
|
|
423 def https_open(self, req):
|
|
424 self.auth = self.pwmgr.readauthtoken(req.get_full_url())
|
|
425 return self.do_open(self._makeconnection, req)
|
|
426
|
|
427 def _makeconnection(self, host, port=443, *args, **kwargs):
|
|
428 keyfile = None
|
|
429 certfile = None
|
|
430
|
|
431 if args: # key_file
|
|
432 keyfile = args.pop(0)
|
|
433 if args: # cert_file
|
|
434 certfile = args.pop(0)
|
|
435
|
|
436 # if the user has specified different key/cert files in
|
|
437 # hgrc, we prefer these
|
|
438 if self.auth and 'key' in self.auth and 'cert' in self.auth:
|
|
439 keyfile = self.auth['key']
|
|
440 certfile = self.auth['cert']
|
|
441
|
|
442 # let host port take precedence
|
|
443 if ':' in host and '[' not in host or ']:' in host:
|
|
444 host, port = host.rsplit(':', 1)
|
|
445 port = int(port)
|
|
446 if '[' in host:
|
|
447 host = host[1:-1]
|
|
448
|
|
449 return httpsconnection(host, port, keyfile, certfile, *args, **kwargs)
|
|
450
|
|
451 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
|
|
452 # it doesn't know about the auth type requested. This can happen if
|
|
453 # somebody is using BasicAuth and types a bad password.
|
|
454 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
|
|
455 def http_error_auth_reqed(self, auth_header, host, req, headers):
|
|
456 try:
|
|
457 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
|
|
458 self, auth_header, host, req, headers)
|
|
459 except ValueError, inst:
|
|
460 arg = inst.args[0]
|
|
461 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
|
|
462 return
|
|
463 raise
|
|
464
|
|
465 def getauthinfo(path):
|
|
466 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
|
|
467 if not urlpath:
|
|
468 urlpath = '/'
|
|
469 if scheme != 'file':
|
|
470 # XXX: why are we quoting the path again with some smart
|
|
471 # heuristic here? Anyway, it cannot be done with file://
|
|
472 # urls since path encoding is os/fs dependent (see
|
|
473 # urllib.pathname2url() for details).
|
|
474 urlpath = quotepath(urlpath)
|
|
475 host, port, user, passwd = netlocsplit(netloc)
|
|
476
|
|
477 # urllib cannot handle URLs with embedded user or passwd
|
|
478 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
|
|
479 urlpath, query, frag))
|
|
480 if user:
|
|
481 netloc = host
|
|
482 if port:
|
|
483 netloc += ':' + port
|
|
484 # Python < 2.4.3 uses only the netloc to search for a password
|
|
485 authinfo = (None, (url, netloc), user, passwd or '')
|
|
486 else:
|
|
487 authinfo = None
|
|
488 return url, authinfo
|
|
489
|
|
490 def opener(ui, authinfo=None):
|
|
491 '''
|
|
492 construct an opener suitable for urllib2
|
|
493 authinfo will be added to the password manager
|
|
494 '''
|
|
495 handlers = [httphandler()]
|
|
496 if has_https:
|
|
497 handlers.append(httpshandler(ui))
|
|
498
|
|
499 handlers.append(proxyhandler(ui))
|
|
500
|
|
501 passmgr = passwordmgr(ui)
|
|
502 if authinfo is not None:
|
|
503 passmgr.add_password(*authinfo)
|
|
504 user, passwd = authinfo[2:4]
|
|
505 ui.debug(_('http auth: user %s, password %s\n') %
|
|
506 (user, passwd and '*' * len(passwd) or 'not set'))
|
|
507
|
|
508 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
|
|
509 httpdigestauthhandler(passmgr)))
|
|
510 opener = urllib2.build_opener(*handlers)
|
|
511
|
|
512 # 1.0 here is the _protocol_ version
|
|
513 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
|
|
514 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
|
|
515 return opener
|
|
516
|
|
517 scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
|
|
518
|
|
519 def open(ui, url, data=None):
|
|
520 scheme = None
|
|
521 m = scheme_re.search(url)
|
|
522 if m:
|
|
523 scheme = m.group(1).lower()
|
|
524 if not scheme:
|
|
525 path = util.normpath(os.path.abspath(url))
|
|
526 url = 'file://' + urllib.pathname2url(path)
|
|
527 authinfo = None
|
|
528 else:
|
|
529 url, authinfo = getauthinfo(url)
|
|
530 return opener(ui, authinfo).open(url, data)
|