121
|
1 # This library is free software; you can redistribute it and/or
|
|
2 # modify it under the terms of the GNU Lesser General Public
|
|
3 # License as published by the Free Software Foundation; either
|
|
4 # version 2.1 of the License, or (at your option) any later version.
|
|
5 #
|
|
6 # This library is distributed in the hope that it will be useful,
|
|
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
9 # Lesser General Public License for more details.
|
|
10 #
|
|
11 # You should have received a copy of the GNU Lesser General Public
|
|
12 # License along with this library; if not, write to the
|
|
13 # Free Software Foundation, Inc.,
|
|
14 # 59 Temple Place, Suite 330,
|
|
15 # Boston, MA 02111-1307 USA
|
|
16
|
|
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
|
|
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
|
|
19
|
|
20 # Modified by Benoit Boissinot:
|
|
21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
|
|
22 # Modified by Dirkjan Ochtman:
|
|
23 # - import md5 function from a local util module
|
|
24 # Modified by Martin Geisler:
|
|
25 # - moved md5 function from local util module to this module
|
|
26
|
|
27 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
|
|
28
|
|
29 >>> import urllib2
|
|
30 >>> from keepalive import HTTPHandler
|
|
31 >>> keepalive_handler = HTTPHandler()
|
|
32 >>> opener = urllib2.build_opener(keepalive_handler)
|
|
33 >>> urllib2.install_opener(opener)
|
|
34 >>>
|
|
35 >>> fo = urllib2.urlopen('http://www.python.org')
|
|
36
|
|
37 If a connection to a given host is requested, and all of the existing
|
|
38 connections are still in use, another connection will be opened. If
|
|
39 the handler tries to use an existing connection but it fails in some
|
|
40 way, it will be closed and removed from the pool.
|
|
41
|
|
42 To remove the handler, simply re-run build_opener with no arguments, and
|
|
43 install that opener.
|
|
44
|
|
45 You can explicitly close connections by using the close_connection()
|
|
46 method of the returned file-like object (described below) or you can
|
|
47 use the handler methods:
|
|
48
|
|
49 close_connection(host)
|
|
50 close_all()
|
|
51 open_connections()
|
|
52
|
|
53 NOTE: using the close_connection and close_all methods of the handler
|
|
54 should be done with care when using multiple threads.
|
|
55 * there is nothing that prevents another thread from creating new
|
|
56 connections immediately after connections are closed
|
|
57 * no checks are done to prevent in-use connections from being closed
|
|
58
|
|
59 >>> keepalive_handler.close_all()
|
|
60
|
|
61 EXTRA ATTRIBUTES AND METHODS
|
|
62
|
|
63 Upon a status of 200, the object returned has a few additional
|
|
64 attributes and methods, which should not be used if you want to
|
|
65 remain consistent with the normal urllib2-returned objects:
|
|
66
|
|
67 close_connection() - close the connection to the host
|
|
68 readlines() - you know, readlines()
|
|
69 status - the return status (ie 404)
|
|
70 reason - english translation of status (ie 'File not found')
|
|
71
|
|
72 If you want the best of both worlds, use this inside an
|
|
73 AttributeError-catching try:
|
|
74
|
|
75 >>> try: status = fo.status
|
|
76 >>> except AttributeError: status = None
|
|
77
|
|
78 Unfortunately, these are ONLY there if status == 200, so it's not
|
|
79 easy to distinguish between non-200 responses. The reason is that
|
|
80 urllib2 tries to do clever things with error codes 301, 302, 401,
|
|
81 and 407, and it wraps the object upon return.
|
|
82
|
|
83 For python versions earlier than 2.4, you can avoid this fancy error
|
|
84 handling by setting the module-level global HANDLE_ERRORS to zero.
|
|
85 You see, prior to 2.4, it's the HTTP Handler's job to determine what
|
|
86 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
|
|
87 means "pass everything up". In python 2.4, however, this job no
|
|
88 longer belongs to the HTTP Handler and is now done by a NEW handler,
|
|
89 HTTPErrorProcessor. Here's the bottom line:
|
|
90
|
|
91 python version < 2.4
|
|
92 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
|
|
93 errors
|
|
94 HANDLE_ERRORS == 0 pass everything up, error processing is
|
|
95 left to the calling code
|
|
96 python version >= 2.4
|
|
97 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
|
|
98 HANDLE_ERRORS == 0 (default) pass everything up, let the
|
|
99 other handlers (specifically,
|
|
100 HTTPErrorProcessor) decide what to do
|
|
101
|
|
102 In practice, setting the variable either way makes little difference
|
|
103 in python 2.4, so for the most consistent behavior across versions,
|
|
104 you probably just want to use the defaults, which will give you
|
|
105 exceptions on errors.
|
|
106
|
|
107 """
|
|
108
|
|
109 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
|
|
110
|
|
111 import urllib2
|
|
112 import httplib
|
|
113 import socket
|
|
114 import thread
|
|
115
|
|
116 DEBUG = None
|
|
117
|
|
118 import sys
|
|
119 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
|
|
120 else: HANDLE_ERRORS = 0
|
|
121
|
|
122 class ConnectionManager:
|
|
123 """
|
|
124 The connection manager must be able to:
|
|
125 * keep track of all existing
|
|
126 """
|
|
127 def __init__(self):
|
|
128 self._lock = thread.allocate_lock()
|
|
129 self._hostmap = {} # map hosts to a list of connections
|
|
130 self._connmap = {} # map connections to host
|
|
131 self._readymap = {} # map connection to ready state
|
|
132
|
|
133 def add(self, host, connection, ready):
|
|
134 self._lock.acquire()
|
|
135 try:
|
|
136 if not host in self._hostmap: self._hostmap[host] = []
|
|
137 self._hostmap[host].append(connection)
|
|
138 self._connmap[connection] = host
|
|
139 self._readymap[connection] = ready
|
|
140 finally:
|
|
141 self._lock.release()
|
|
142
|
|
143 def remove(self, connection):
|
|
144 self._lock.acquire()
|
|
145 try:
|
|
146 try:
|
|
147 host = self._connmap[connection]
|
|
148 except KeyError:
|
|
149 pass
|
|
150 else:
|
|
151 del self._connmap[connection]
|
|
152 del self._readymap[connection]
|
|
153 self._hostmap[host].remove(connection)
|
|
154 if not self._hostmap[host]: del self._hostmap[host]
|
|
155 finally:
|
|
156 self._lock.release()
|
|
157
|
|
158 def set_ready(self, connection, ready):
|
|
159 try: self._readymap[connection] = ready
|
|
160 except KeyError: pass
|
|
161
|
|
162 def get_ready_conn(self, host):
|
|
163 conn = None
|
|
164 self._lock.acquire()
|
|
165 try:
|
|
166 if host in self._hostmap:
|
|
167 for c in self._hostmap[host]:
|
|
168 if self._readymap[c]:
|
|
169 self._readymap[c] = 0
|
|
170 conn = c
|
|
171 break
|
|
172 finally:
|
|
173 self._lock.release()
|
|
174 return conn
|
|
175
|
|
176 def get_all(self, host=None):
|
|
177 if host:
|
|
178 return list(self._hostmap.get(host, []))
|
|
179 else:
|
|
180 return dict(self._hostmap)
|
|
181
|
|
182 class KeepAliveHandler:
|
|
183 def __init__(self):
|
|
184 self._cm = ConnectionManager()
|
|
185
|
|
186 #### Connection Management
|
|
187 def open_connections(self):
|
|
188 """return a list of connected hosts and the number of connections
|
|
189 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
|
|
190 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
|
|
191
|
|
192 def close_connection(self, host):
|
|
193 """close connection(s) to <host>
|
|
194 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
|
|
195 no error occurs if there is no connection to that host."""
|
|
196 for h in self._cm.get_all(host):
|
|
197 self._cm.remove(h)
|
|
198 h.close()
|
|
199
|
|
200 def close_all(self):
|
|
201 """close all open connections"""
|
|
202 for host, conns in self._cm.get_all().iteritems():
|
|
203 for h in conns:
|
|
204 self._cm.remove(h)
|
|
205 h.close()
|
|
206
|
|
207 def _request_closed(self, request, host, connection):
|
|
208 """tells us that this request is now closed and the the
|
|
209 connection is ready for another request"""
|
|
210 self._cm.set_ready(connection, 1)
|
|
211
|
|
212 def _remove_connection(self, host, connection, close=0):
|
|
213 if close: connection.close()
|
|
214 self._cm.remove(connection)
|
|
215
|
|
216 #### Transaction Execution
|
|
217 def http_open(self, req):
|
|
218 return self.do_open(HTTPConnection, req)
|
|
219
|
|
220 def do_open(self, http_class, req):
|
|
221 host = req.get_host()
|
|
222 if not host:
|
|
223 raise urllib2.URLError('no host given')
|
|
224
|
|
225 try:
|
|
226 h = self._cm.get_ready_conn(host)
|
|
227 while h:
|
|
228 r = self._reuse_connection(h, req, host)
|
|
229
|
|
230 # if this response is non-None, then it worked and we're
|
|
231 # done. Break out, skipping the else block.
|
|
232 if r: break
|
|
233
|
|
234 # connection is bad - possibly closed by server
|
|
235 # discard it and ask for the next free connection
|
|
236 h.close()
|
|
237 self._cm.remove(h)
|
|
238 h = self._cm.get_ready_conn(host)
|
|
239 else:
|
|
240 # no (working) free connections were found. Create a new one.
|
|
241 h = http_class(host)
|
|
242 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
|
|
243 host, id(h))
|
|
244 self._cm.add(host, h, 0)
|
|
245 self._start_transaction(h, req)
|
|
246 r = h.getresponse()
|
|
247 except (socket.error, httplib.HTTPException), err:
|
|
248 raise urllib2.URLError(err)
|
|
249
|
|
250 # if not a persistent connection, don't try to reuse it
|
|
251 if r.will_close: self._cm.remove(h)
|
|
252
|
|
253 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
|
|
254 r._handler = self
|
|
255 r._host = host
|
|
256 r._url = req.get_full_url()
|
|
257 r._connection = h
|
|
258 r.code = r.status
|
|
259 r.headers = r.msg
|
|
260 r.msg = r.reason
|
|
261
|
|
262 if r.status == 200 or not HANDLE_ERRORS:
|
|
263 return r
|
|
264 else:
|
|
265 return self.parent.error('http', req, r,
|
|
266 r.status, r.msg, r.headers)
|
|
267
|
|
268 def _reuse_connection(self, h, req, host):
|
|
269 """start the transaction with a re-used connection
|
|
270 return a response object (r) upon success or None on failure.
|
|
271 This DOES not close or remove bad connections in cases where
|
|
272 it returns. However, if an unexpected exception occurs, it
|
|
273 will close and remove the connection before re-raising.
|
|
274 """
|
|
275 try:
|
|
276 self._start_transaction(h, req)
|
|
277 r = h.getresponse()
|
|
278 # note: just because we got something back doesn't mean it
|
|
279 # worked. We'll check the version below, too.
|
|
280 except (socket.error, httplib.HTTPException):
|
|
281 r = None
|
|
282 except:
|
|
283 # adding this block just in case we've missed
|
|
284 # something we will still raise the exception, but
|
|
285 # lets try and close the connection and remove it
|
|
286 # first. We previously got into a nasty loop
|
|
287 # where an exception was uncaught, and so the
|
|
288 # connection stayed open. On the next try, the
|
|
289 # same exception was raised, etc. The tradeoff is
|
|
290 # that it's now possible this call will raise
|
|
291 # a DIFFERENT exception
|
|
292 if DEBUG: DEBUG.error("unexpected exception - closing " + \
|
|
293 "connection to %s (%d)", host, id(h))
|
|
294 self._cm.remove(h)
|
|
295 h.close()
|
|
296 raise
|
|
297
|
|
298 if r is None or r.version == 9:
|
|
299 # httplib falls back to assuming HTTP 0.9 if it gets a
|
|
300 # bad header back. This is most likely to happen if
|
|
301 # the socket has been closed by the server since we
|
|
302 # last used the connection.
|
|
303 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
|
|
304 host, id(h))
|
|
305 r = None
|
|
306 else:
|
|
307 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
|
|
308
|
|
309 return r
|
|
310
|
|
311 def _start_transaction(self, h, req):
|
|
312 # What follows mostly reimplements HTTPConnection.request()
|
|
313 # except it adds self.parent.addheaders in the mix.
|
|
314 headers = req.headers.copy()
|
|
315 if sys.version_info >= (2, 4):
|
|
316 headers.update(req.unredirected_hdrs)
|
|
317 headers.update(self.parent.addheaders)
|
|
318 headers = dict((n.lower(), v) for n,v in headers.items())
|
|
319 skipheaders = {}
|
|
320 for n in ('host', 'accept-encoding'):
|
|
321 if n in headers:
|
|
322 skipheaders['skip_' + n.replace('-', '_')] = 1
|
|
323 try:
|
|
324 if req.has_data():
|
|
325 data = req.get_data()
|
|
326 h.putrequest('POST', req.get_selector(), **skipheaders)
|
|
327 if 'content-type' not in headers:
|
|
328 h.putheader('Content-type',
|
|
329 'application/x-www-form-urlencoded')
|
|
330 if 'content-length' not in headers:
|
|
331 h.putheader('Content-length', '%d' % len(data))
|
|
332 else:
|
|
333 h.putrequest('GET', req.get_selector(), **skipheaders)
|
|
334 except (socket.error), err:
|
|
335 raise urllib2.URLError(err)
|
|
336 for k, v in headers.items():
|
|
337 h.putheader(k, v)
|
|
338 h.endheaders()
|
|
339 if req.has_data():
|
|
340 h.send(data)
|
|
341
|
|
342 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
|
|
343 pass
|
|
344
|
|
345 class HTTPResponse(httplib.HTTPResponse):
|
|
346 # we need to subclass HTTPResponse in order to
|
|
347 # 1) add readline() and readlines() methods
|
|
348 # 2) add close_connection() methods
|
|
349 # 3) add info() and geturl() methods
|
|
350
|
|
351 # in order to add readline(), read must be modified to deal with a
|
|
352 # buffer. example: readline must read a buffer and then spit back
|
|
353 # one line at a time. The only real alternative is to read one
|
|
354 # BYTE at a time (ick). Once something has been read, it can't be
|
|
355 # put back (ok, maybe it can, but that's even uglier than this),
|
|
356 # so if you THEN do a normal read, you must first take stuff from
|
|
357 # the buffer.
|
|
358
|
|
359 # the read method wraps the original to accomodate buffering,
|
|
360 # although read() never adds to the buffer.
|
|
361 # Both readline and readlines have been stolen with almost no
|
|
362 # modification from socket.py
|
|
363
|
|
364
|
|
365 def __init__(self, sock, debuglevel=0, strict=0, method=None):
|
|
366 if method: # the httplib in python 2.3 uses the method arg
|
|
367 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
|
|
368 else: # 2.2 doesn't
|
|
369 httplib.HTTPResponse.__init__(self, sock, debuglevel)
|
|
370 self.fileno = sock.fileno
|
|
371 self.code = None
|
|
372 self._rbuf = ''
|
|
373 self._rbufsize = 8096
|
|
374 self._handler = None # inserted by the handler later
|
|
375 self._host = None # (same)
|
|
376 self._url = None # (same)
|
|
377 self._connection = None # (same)
|
|
378
|
|
379 _raw_read = httplib.HTTPResponse.read
|
|
380
|
|
381 def close(self):
|
|
382 if self.fp:
|
|
383 self.fp.close()
|
|
384 self.fp = None
|
|
385 if self._handler:
|
|
386 self._handler._request_closed(self, self._host,
|
|
387 self._connection)
|
|
388
|
|
389 def close_connection(self):
|
|
390 self._handler._remove_connection(self._host, self._connection, close=1)
|
|
391 self.close()
|
|
392
|
|
393 def info(self):
|
|
394 return self.headers
|
|
395
|
|
396 def geturl(self):
|
|
397 return self._url
|
|
398
|
|
399 def read(self, amt=None):
|
|
400 # the _rbuf test is only in this first if for speed. It's not
|
|
401 # logically necessary
|
|
402 if self._rbuf and not amt is None:
|
|
403 L = len(self._rbuf)
|
|
404 if amt > L:
|
|
405 amt -= L
|
|
406 else:
|
|
407 s = self._rbuf[:amt]
|
|
408 self._rbuf = self._rbuf[amt:]
|
|
409 return s
|
|
410
|
|
411 s = self._rbuf + self._raw_read(amt)
|
|
412 self._rbuf = ''
|
|
413 return s
|
|
414
|
|
415 # stolen from Python SVN #68532 to fix issue1088
|
|
416 def _read_chunked(self, amt):
|
|
417 chunk_left = self.chunk_left
|
|
418 value = ''
|
|
419
|
|
420 # XXX This accumulates chunks by repeated string concatenation,
|
|
421 # which is not efficient as the number or size of chunks gets big.
|
|
422 while True:
|
|
423 if chunk_left is None:
|
|
424 line = self.fp.readline()
|
|
425 i = line.find(';')
|
|
426 if i >= 0:
|
|
427 line = line[:i] # strip chunk-extensions
|
|
428 try:
|
|
429 chunk_left = int(line, 16)
|
|
430 except ValueError:
|
|
431 # close the connection as protocol synchronisation is
|
|
432 # probably lost
|
|
433 self.close()
|
|
434 raise httplib.IncompleteRead(value)
|
|
435 if chunk_left == 0:
|
|
436 break
|
|
437 if amt is None:
|
|
438 value += self._safe_read(chunk_left)
|
|
439 elif amt < chunk_left:
|
|
440 value += self._safe_read(amt)
|
|
441 self.chunk_left = chunk_left - amt
|
|
442 return value
|
|
443 elif amt == chunk_left:
|
|
444 value += self._safe_read(amt)
|
|
445 self._safe_read(2) # toss the CRLF at the end of the chunk
|
|
446 self.chunk_left = None
|
|
447 return value
|
|
448 else:
|
|
449 value += self._safe_read(chunk_left)
|
|
450 amt -= chunk_left
|
|
451
|
|
452 # we read the whole chunk, get another
|
|
453 self._safe_read(2) # toss the CRLF at the end of the chunk
|
|
454 chunk_left = None
|
|
455
|
|
456 # read and discard trailer up to the CRLF terminator
|
|
457 ### note: we shouldn't have any trailers!
|
|
458 while True:
|
|
459 line = self.fp.readline()
|
|
460 if not line:
|
|
461 # a vanishingly small number of sites EOF without
|
|
462 # sending the trailer
|
|
463 break
|
|
464 if line == '\r\n':
|
|
465 break
|
|
466
|
|
467 # we read everything; close the "file"
|
|
468 self.close()
|
|
469
|
|
470 return value
|
|
471
|
|
472 def readline(self, limit=-1):
|
|
473 i = self._rbuf.find('\n')
|
|
474 while i < 0 and not (0 < limit <= len(self._rbuf)):
|
|
475 new = self._raw_read(self._rbufsize)
|
|
476 if not new: break
|
|
477 i = new.find('\n')
|
|
478 if i >= 0: i = i + len(self._rbuf)
|
|
479 self._rbuf = self._rbuf + new
|
|
480 if i < 0: i = len(self._rbuf)
|
|
481 else: i = i+1
|
|
482 if 0 <= limit < len(self._rbuf): i = limit
|
|
483 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
|
|
484 return data
|
|
485
|
|
486 def readlines(self, sizehint = 0):
|
|
487 total = 0
|
|
488 list = []
|
|
489 while 1:
|
|
490 line = self.readline()
|
|
491 if not line: break
|
|
492 list.append(line)
|
|
493 total += len(line)
|
|
494 if sizehint and total >= sizehint:
|
|
495 break
|
|
496 return list
|
|
497
|
|
498
|
|
499 class HTTPConnection(httplib.HTTPConnection):
|
|
500 # use the modified response class
|
|
501 response_class = HTTPResponse
|
|
502
|
|
503 #########################################################################
|
|
504 ##### TEST FUNCTIONS
|
|
505 #########################################################################
|
|
506
|
|
507 def error_handler(url):
|
|
508 global HANDLE_ERRORS
|
|
509 orig = HANDLE_ERRORS
|
|
510 keepalive_handler = HTTPHandler()
|
|
511 opener = urllib2.build_opener(keepalive_handler)
|
|
512 urllib2.install_opener(opener)
|
|
513 pos = {0: 'off', 1: 'on'}
|
|
514 for i in (0, 1):
|
|
515 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
|
|
516 HANDLE_ERRORS = i
|
|
517 try:
|
|
518 fo = urllib2.urlopen(url)
|
|
519 fo.read()
|
|
520 fo.close()
|
|
521 try: status, reason = fo.status, fo.reason
|
|
522 except AttributeError: status, reason = None, None
|
|
523 except IOError, e:
|
|
524 print " EXCEPTION: %s" % e
|
|
525 raise
|
|
526 else:
|
|
527 print " status = %s, reason = %s" % (status, reason)
|
|
528 HANDLE_ERRORS = orig
|
|
529 hosts = keepalive_handler.open_connections()
|
|
530 print "open connections:", hosts
|
|
531 keepalive_handler.close_all()
|
|
532
|
|
533 def md5(s):
|
|
534 try:
|
|
535 from hashlib import md5 as _md5
|
|
536 except ImportError:
|
|
537 from md5 import md5 as _md5
|
|
538 global md5
|
|
539 md5 = _md5
|
|
540 return _md5(s)
|
|
541
|
|
542 def continuity(url):
|
|
543 format = '%25s: %s'
|
|
544
|
|
545 # first fetch the file with the normal http handler
|
|
546 opener = urllib2.build_opener()
|
|
547 urllib2.install_opener(opener)
|
|
548 fo = urllib2.urlopen(url)
|
|
549 foo = fo.read()
|
|
550 fo.close()
|
|
551 m = md5.new(foo)
|
|
552 print format % ('normal urllib', m.hexdigest())
|
|
553
|
|
554 # now install the keepalive handler and try again
|
|
555 opener = urllib2.build_opener(HTTPHandler())
|
|
556 urllib2.install_opener(opener)
|
|
557
|
|
558 fo = urllib2.urlopen(url)
|
|
559 foo = fo.read()
|
|
560 fo.close()
|
|
561 m = md5.new(foo)
|
|
562 print format % ('keepalive read', m.hexdigest())
|
|
563
|
|
564 fo = urllib2.urlopen(url)
|
|
565 foo = ''
|
|
566 while 1:
|
|
567 f = fo.readline()
|
|
568 if f: foo = foo + f
|
|
569 else: break
|
|
570 fo.close()
|
|
571 m = md5.new(foo)
|
|
572 print format % ('keepalive readline', m.hexdigest())
|
|
573
|
|
574 def comp(N, url):
|
|
575 print ' making %i connections to:\n %s' % (N, url)
|
|
576
|
|
577 sys.stdout.write(' first using the normal urllib handlers')
|
|
578 # first use normal opener
|
|
579 opener = urllib2.build_opener()
|
|
580 urllib2.install_opener(opener)
|
|
581 t1 = fetch(N, url)
|
|
582 print ' TIME: %.3f s' % t1
|
|
583
|
|
584 sys.stdout.write(' now using the keepalive handler ')
|
|
585 # now install the keepalive handler and try again
|
|
586 opener = urllib2.build_opener(HTTPHandler())
|
|
587 urllib2.install_opener(opener)
|
|
588 t2 = fetch(N, url)
|
|
589 print ' TIME: %.3f s' % t2
|
|
590 print ' improvement factor: %.2f' % (t1/t2, )
|
|
591
|
|
592 def fetch(N, url, delay=0):
|
|
593 import time
|
|
594 lens = []
|
|
595 starttime = time.time()
|
|
596 for i in range(N):
|
|
597 if delay and i > 0: time.sleep(delay)
|
|
598 fo = urllib2.urlopen(url)
|
|
599 foo = fo.read()
|
|
600 fo.close()
|
|
601 lens.append(len(foo))
|
|
602 diff = time.time() - starttime
|
|
603
|
|
604 j = 0
|
|
605 for i in lens[1:]:
|
|
606 j = j + 1
|
|
607 if not i == lens[0]:
|
|
608 print "WARNING: inconsistent length on read %i: %i" % (j, i)
|
|
609
|
|
610 return diff
|
|
611
|
|
612 def test_timeout(url):
|
|
613 global DEBUG
|
|
614 dbbackup = DEBUG
|
|
615 class FakeLogger:
|
|
616 def debug(self, msg, *args): print msg % args
|
|
617 info = warning = error = debug
|
|
618 DEBUG = FakeLogger()
|
|
619 print " fetching the file to establish a connection"
|
|
620 fo = urllib2.urlopen(url)
|
|
621 data1 = fo.read()
|
|
622 fo.close()
|
|
623
|
|
624 i = 20
|
|
625 print " waiting %i seconds for the server to close the connection" % i
|
|
626 while i > 0:
|
|
627 sys.stdout.write('\r %2i' % i)
|
|
628 sys.stdout.flush()
|
|
629 time.sleep(1)
|
|
630 i -= 1
|
|
631 sys.stderr.write('\r')
|
|
632
|
|
633 print " fetching the file a second time"
|
|
634 fo = urllib2.urlopen(url)
|
|
635 data2 = fo.read()
|
|
636 fo.close()
|
|
637
|
|
638 if data1 == data2:
|
|
639 print ' data are identical'
|
|
640 else:
|
|
641 print ' ERROR: DATA DIFFER'
|
|
642
|
|
643 DEBUG = dbbackup
|
|
644
|
|
645
|
|
646 def test(url, N=10):
|
|
647 print "checking error hander (do this on a non-200)"
|
|
648 try: error_handler(url)
|
|
649 except IOError:
|
|
650 print "exiting - exception will prevent further tests"
|
|
651 sys.exit()
|
|
652 print
|
|
653 print "performing continuity test (making sure stuff isn't corrupted)"
|
|
654 continuity(url)
|
|
655 print
|
|
656 print "performing speed comparison"
|
|
657 comp(N, url)
|
|
658 print
|
|
659 print "performing dropped-connection check"
|
|
660 test_timeout(url)
|
|
661
|
|
662 if __name__ == '__main__':
|
|
663 import time
|
|
664 import sys
|
|
665 try:
|
|
666 N = int(sys.argv[1])
|
|
667 url = sys.argv[2]
|
|
668 except:
|
|
669 print "%s <integer> <url>" % sys.argv[0]
|
|
670 else:
|
|
671 test(url, N)
|