eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/keepalive.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 #   This library is free software; you can redistribute it and/or
       
     2 #   modify it under the terms of the GNU Lesser General Public
       
     3 #   License as published by the Free Software Foundation; either
       
     4 #   version 2.1 of the License, or (at your option) any later version.
       
     5 #
       
     6 #   This library is distributed in the hope that it will be useful,
       
     7 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
       
     8 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
     9 #   Lesser General Public License for more details.
       
    10 #
       
    11 #   You should have received a copy of the GNU Lesser General Public
       
    12 #   License along with this library; if not, write to the
       
    13 #      Free Software Foundation, Inc.,
       
    14 #      59 Temple Place, Suite 330,
       
    15 #      Boston, MA  02111-1307  USA
       
    16 
       
    17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
       
    18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
       
    19 
       
    20 # Modified by Benoit Boissinot:
       
    21 #  - fix for digest auth (inspired from urllib2.py @ Python v2.4)
       
    22 # Modified by Dirkjan Ochtman:
       
    23 #  - import md5 function from a local util module
       
    24 # Modified by Martin Geisler:
       
    25 #  - moved md5 function from local util module to this module
       
    26 # Modified by Augie Fackler:
       
    27 #  - add safesend method and use it to prevent broken pipe errors
       
    28 #    on large POST requests
       
    29 
       
    30 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
       
    31 
       
    32 >>> import urllib2
       
    33 >>> from keepalive import HTTPHandler
       
    34 >>> keepalive_handler = HTTPHandler()
       
    35 >>> opener = urllib2.build_opener(keepalive_handler)
       
    36 >>> urllib2.install_opener(opener)
       
    37 >>>
       
    38 >>> fo = urllib2.urlopen('http://www.python.org')
       
    39 
       
    40 If a connection to a given host is requested, and all of the existing
       
    41 connections are still in use, another connection will be opened.  If
       
    42 the handler tries to use an existing connection but it fails in some
       
    43 way, it will be closed and removed from the pool.
       
    44 
       
    45 To remove the handler, simply re-run build_opener with no arguments, and
       
    46 install that opener.
       
    47 
       
    48 You can explicitly close connections by using the close_connection()
       
    49 method of the returned file-like object (described below) or you can
       
    50 use the handler methods:
       
    51 
       
    52   close_connection(host)
       
    53   close_all()
       
    54   open_connections()
       
    55 
       
    56 NOTE: using the close_connection and close_all methods of the handler
       
    57 should be done with care when using multiple threads.
       
    58   * there is nothing that prevents another thread from creating new
       
    59     connections immediately after connections are closed
       
    60   * no checks are done to prevent in-use connections from being closed
       
    61 
       
    62 >>> keepalive_handler.close_all()
       
    63 
       
    64 EXTRA ATTRIBUTES AND METHODS
       
    65 
       
    66   Upon a status of 200, the object returned has a few additional
       
    67   attributes and methods, which should not be used if you want to
       
    68   remain consistent with the normal urllib2-returned objects:
       
    69 
       
    70     close_connection()  -  close the connection to the host
       
    71     readlines()         -  you know, readlines()
       
    72     status              -  the return status (ie 404)
       
    73     reason              -  english translation of status (ie 'File not found')
       
    74 
       
    75   If you want the best of both worlds, use this inside an
       
    76   AttributeError-catching try:
       
    77 
       
    78   >>> try: status = fo.status
       
    79   >>> except AttributeError: status = None
       
    80 
       
    81   Unfortunately, these are ONLY there if status == 200, so it's not
       
    82   easy to distinguish between non-200 responses.  The reason is that
       
    83   urllib2 tries to do clever things with error codes 301, 302, 401,
       
    84   and 407, and it wraps the object upon return.
       
    85 
       
    86   For python versions earlier than 2.4, you can avoid this fancy error
       
    87   handling by setting the module-level global HANDLE_ERRORS to zero.
       
    88   You see, prior to 2.4, it's the HTTP Handler's job to determine what
       
    89   to handle specially, and what to just pass up.  HANDLE_ERRORS == 0
       
    90   means "pass everything up".  In python 2.4, however, this job no
       
    91   longer belongs to the HTTP Handler and is now done by a NEW handler,
       
    92   HTTPErrorProcessor.  Here's the bottom line:
       
    93 
       
    94     python version < 2.4
       
    95         HANDLE_ERRORS == 1  (default) pass up 200, treat the rest as
       
    96                             errors
       
    97         HANDLE_ERRORS == 0  pass everything up, error processing is
       
    98                             left to the calling code
       
    99     python version >= 2.4
       
   100         HANDLE_ERRORS == 1  pass up 200, treat the rest as errors
       
   101         HANDLE_ERRORS == 0  (default) pass everything up, let the
       
   102                             other handlers (specifically,
       
   103                             HTTPErrorProcessor) decide what to do
       
   104 
       
   105   In practice, setting the variable either way makes little difference
       
   106   in python 2.4, so for the most consistent behavior across versions,
       
   107   you probably just want to use the defaults, which will give you
       
   108   exceptions on errors.
       
   109 
       
   110 """
       
   111 
       
   112 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
       
   113 
       
   114 import errno
       
   115 import httplib
       
   116 import socket
       
   117 import thread
       
   118 import urllib2
       
   119 
       
   120 DEBUG = None
       
   121 
       
   122 import sys
       
   123 if sys.version_info < (2, 4):
       
   124     HANDLE_ERRORS = 1
       
   125 else: HANDLE_ERRORS = 0
       
   126 
       
   127 class ConnectionManager:
       
   128     """
       
   129     The connection manager must be able to:
       
   130       * keep track of all existing
       
   131       """
       
   132     def __init__(self):
       
   133         self._lock = thread.allocate_lock()
       
   134         self._hostmap = {} # map hosts to a list of connections
       
   135         self._connmap = {} # map connections to host
       
   136         self._readymap = {} # map connection to ready state
       
   137 
       
   138     def add(self, host, connection, ready):
       
   139         self._lock.acquire()
       
   140         try:
       
   141             if not host in self._hostmap:
       
   142                 self._hostmap[host] = []
       
   143             self._hostmap[host].append(connection)
       
   144             self._connmap[connection] = host
       
   145             self._readymap[connection] = ready
       
   146         finally:
       
   147             self._lock.release()
       
   148 
       
   149     def remove(self, connection):
       
   150         self._lock.acquire()
       
   151         try:
       
   152             try:
       
   153                 host = self._connmap[connection]
       
   154             except KeyError:
       
   155                 pass
       
   156             else:
       
   157                 del self._connmap[connection]
       
   158                 del self._readymap[connection]
       
   159                 self._hostmap[host].remove(connection)
       
   160                 if not self._hostmap[host]: del self._hostmap[host]
       
   161         finally:
       
   162             self._lock.release()
       
   163 
       
   164     def set_ready(self, connection, ready):
       
   165         try:
       
   166             self._readymap[connection] = ready
       
   167         except KeyError:
       
   168             pass
       
   169 
       
   170     def get_ready_conn(self, host):
       
   171         conn = None
       
   172         self._lock.acquire()
       
   173         try:
       
   174             if host in self._hostmap:
       
   175                 for c in self._hostmap[host]:
       
   176                     if self._readymap[c]:
       
   177                         self._readymap[c] = 0
       
   178                         conn = c
       
   179                         break
       
   180         finally:
       
   181             self._lock.release()
       
   182         return conn
       
   183 
       
   184     def get_all(self, host=None):
       
   185         if host:
       
   186             return list(self._hostmap.get(host, []))
       
   187         else:
       
   188             return dict(self._hostmap)
       
   189 
       
   190 class KeepAliveHandler:
       
   191     def __init__(self):
       
   192         self._cm = ConnectionManager()
       
   193 
       
   194     #### Connection Management
       
   195     def open_connections(self):
       
   196         """return a list of connected hosts and the number of connections
       
   197         to each.  [('foo.com:80', 2), ('bar.org', 1)]"""
       
   198         return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
       
   199 
       
   200     def close_connection(self, host):
       
   201         """close connection(s) to <host>
       
   202         host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
       
   203         no error occurs if there is no connection to that host."""
       
   204         for h in self._cm.get_all(host):
       
   205             self._cm.remove(h)
       
   206             h.close()
       
   207 
       
   208     def close_all(self):
       
   209         """close all open connections"""
       
   210         for host, conns in self._cm.get_all().iteritems():
       
   211             for h in conns:
       
   212                 self._cm.remove(h)
       
   213                 h.close()
       
   214 
       
   215     def _request_closed(self, request, host, connection):
       
   216         """tells us that this request is now closed and the the
       
   217         connection is ready for another request"""
       
   218         self._cm.set_ready(connection, 1)
       
   219 
       
   220     def _remove_connection(self, host, connection, close=0):
       
   221         if close:
       
   222             connection.close()
       
   223         self._cm.remove(connection)
       
   224 
       
   225     #### Transaction Execution
       
   226     def http_open(self, req):
       
   227         return self.do_open(HTTPConnection, req)
       
   228 
       
   229     def do_open(self, http_class, req):
       
   230         host = req.get_host()
       
   231         if not host:
       
   232             raise urllib2.URLError('no host given')
       
   233 
       
   234         try:
       
   235             h = self._cm.get_ready_conn(host)
       
   236             while h:
       
   237                 r = self._reuse_connection(h, req, host)
       
   238 
       
   239                 # if this response is non-None, then it worked and we're
       
   240                 # done.  Break out, skipping the else block.
       
   241                 if r:
       
   242                     break
       
   243 
       
   244                 # connection is bad - possibly closed by server
       
   245                 # discard it and ask for the next free connection
       
   246                 h.close()
       
   247                 self._cm.remove(h)
       
   248                 h = self._cm.get_ready_conn(host)
       
   249             else:
       
   250                 # no (working) free connections were found.  Create a new one.
       
   251                 h = http_class(host)
       
   252                 if DEBUG:
       
   253                     DEBUG.info("creating new connection to %s (%d)",
       
   254                                host, id(h))
       
   255                 self._cm.add(host, h, 0)
       
   256                 self._start_transaction(h, req)
       
   257                 r = h.getresponse()
       
   258         except (socket.error, httplib.HTTPException), err:
       
   259             raise urllib2.URLError(err)
       
   260 
       
   261         # if not a persistent connection, don't try to reuse it
       
   262         if r.will_close:
       
   263             self._cm.remove(h)
       
   264 
       
   265         if DEBUG:
       
   266             DEBUG.info("STATUS: %s, %s", r.status, r.reason)
       
   267         r._handler = self
       
   268         r._host = host
       
   269         r._url = req.get_full_url()
       
   270         r._connection = h
       
   271         r.code = r.status
       
   272         r.headers = r.msg
       
   273         r.msg = r.reason
       
   274 
       
   275         if r.status == 200 or not HANDLE_ERRORS:
       
   276             return r
       
   277         else:
       
   278             return self.parent.error('http', req, r,
       
   279                                      r.status, r.msg, r.headers)
       
   280 
       
   281     def _reuse_connection(self, h, req, host):
       
   282         """start the transaction with a re-used connection
       
   283         return a response object (r) upon success or None on failure.
       
   284         This DOES not close or remove bad connections in cases where
       
   285         it returns.  However, if an unexpected exception occurs, it
       
   286         will close and remove the connection before re-raising.
       
   287         """
       
   288         try:
       
   289             self._start_transaction(h, req)
       
   290             r = h.getresponse()
       
   291             # note: just because we got something back doesn't mean it
       
   292             # worked.  We'll check the version below, too.
       
   293         except (socket.error, httplib.HTTPException):
       
   294             r = None
       
   295         except:
       
   296             # adding this block just in case we've missed
       
   297             # something we will still raise the exception, but
       
   298             # lets try and close the connection and remove it
       
   299             # first.  We previously got into a nasty loop
       
   300             # where an exception was uncaught, and so the
       
   301             # connection stayed open.  On the next try, the
       
   302             # same exception was raised, etc.  The tradeoff is
       
   303             # that it's now possible this call will raise
       
   304             # a DIFFERENT exception
       
   305             if DEBUG:
       
   306                 DEBUG.error("unexpected exception - closing "
       
   307                             "connection to %s (%d)", host, id(h))
       
   308             self._cm.remove(h)
       
   309             h.close()
       
   310             raise
       
   311 
       
   312         if r is None or r.version == 9:
       
   313             # httplib falls back to assuming HTTP 0.9 if it gets a
       
   314             # bad header back.  This is most likely to happen if
       
   315             # the socket has been closed by the server since we
       
   316             # last used the connection.
       
   317             if DEBUG:
       
   318                 DEBUG.info("failed to re-use connection to %s (%d)",
       
   319                            host, id(h))
       
   320             r = None
       
   321         else:
       
   322             if DEBUG:
       
   323                 DEBUG.info("re-using connection to %s (%d)", host, id(h))
       
   324 
       
   325         return r
       
   326 
       
   327     def _start_transaction(self, h, req):
       
   328         # What follows mostly reimplements HTTPConnection.request()
       
   329         # except it adds self.parent.addheaders in the mix.
       
   330         headers = req.headers.copy()
       
   331         if sys.version_info >= (2, 4):
       
   332             headers.update(req.unredirected_hdrs)
       
   333         headers.update(self.parent.addheaders)
       
   334         headers = dict((n.lower(), v) for n, v in headers.items())
       
   335         skipheaders = {}
       
   336         for n in ('host', 'accept-encoding'):
       
   337             if n in headers:
       
   338                 skipheaders['skip_' + n.replace('-', '_')] = 1
       
   339         try:
       
   340             if req.has_data():
       
   341                 data = req.get_data()
       
   342                 h.putrequest('POST', req.get_selector(), **skipheaders)
       
   343                 if 'content-type' not in headers:
       
   344                     h.putheader('Content-type',
       
   345                                 'application/x-www-form-urlencoded')
       
   346                 if 'content-length' not in headers:
       
   347                     h.putheader('Content-length', '%d' % len(data))
       
   348             else:
       
   349                 h.putrequest('GET', req.get_selector(), **skipheaders)
       
   350         except (socket.error), err:
       
   351             raise urllib2.URLError(err)
       
   352         for k, v in headers.items():
       
   353             h.putheader(k, v)
       
   354         h.endheaders()
       
   355         if req.has_data():
       
   356             h.send(data)
       
   357 
       
   358 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
       
   359     pass
       
   360 
       
   361 class HTTPResponse(httplib.HTTPResponse):
       
   362     # we need to subclass HTTPResponse in order to
       
   363     # 1) add readline() and readlines() methods
       
   364     # 2) add close_connection() methods
       
   365     # 3) add info() and geturl() methods
       
   366 
       
   367     # in order to add readline(), read must be modified to deal with a
       
   368     # buffer.  example: readline must read a buffer and then spit back
       
   369     # one line at a time.  The only real alternative is to read one
       
   370     # BYTE at a time (ick).  Once something has been read, it can't be
       
   371     # put back (ok, maybe it can, but that's even uglier than this),
       
   372     # so if you THEN do a normal read, you must first take stuff from
       
   373     # the buffer.
       
   374 
       
   375     # the read method wraps the original to accomodate buffering,
       
   376     # although read() never adds to the buffer.
       
   377     # Both readline and readlines have been stolen with almost no
       
   378     # modification from socket.py
       
   379 
       
   380 
       
   381     def __init__(self, sock, debuglevel=0, strict=0, method=None):
       
   382         if method: # the httplib in python 2.3 uses the method arg
       
   383             httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
       
   384         else: # 2.2 doesn't
       
   385             httplib.HTTPResponse.__init__(self, sock, debuglevel)
       
   386         self.fileno = sock.fileno
       
   387         self.code = None
       
   388         self._rbuf = ''
       
   389         self._rbufsize = 8096
       
   390         self._handler = None # inserted by the handler later
       
   391         self._host = None    # (same)
       
   392         self._url = None     # (same)
       
   393         self._connection = None # (same)
       
   394 
       
   395     _raw_read = httplib.HTTPResponse.read
       
   396 
       
   397     def close(self):
       
   398         if self.fp:
       
   399             self.fp.close()
       
   400             self.fp = None
       
   401             if self._handler:
       
   402                 self._handler._request_closed(self, self._host,
       
   403                                               self._connection)
       
   404 
       
   405     def close_connection(self):
       
   406         self._handler._remove_connection(self._host, self._connection, close=1)
       
   407         self.close()
       
   408 
       
   409     def info(self):
       
   410         return self.headers
       
   411 
       
   412     def geturl(self):
       
   413         return self._url
       
   414 
       
   415     def read(self, amt=None):
       
   416         # the _rbuf test is only in this first if for speed.  It's not
       
   417         # logically necessary
       
   418         if self._rbuf and not amt is None:
       
   419             L = len(self._rbuf)
       
   420             if amt > L:
       
   421                 amt -= L
       
   422             else:
       
   423                 s = self._rbuf[:amt]
       
   424                 self._rbuf = self._rbuf[amt:]
       
   425                 return s
       
   426 
       
   427         s = self._rbuf + self._raw_read(amt)
       
   428         self._rbuf = ''
       
   429         return s
       
   430 
       
   431     # stolen from Python SVN #68532 to fix issue1088
       
   432     def _read_chunked(self, amt):
       
   433         chunk_left = self.chunk_left
       
   434         value = ''
       
   435 
       
   436         # XXX This accumulates chunks by repeated string concatenation,
       
   437         # which is not efficient as the number or size of chunks gets big.
       
   438         while True:
       
   439             if chunk_left is None:
       
   440                 line = self.fp.readline()
       
   441                 i = line.find(';')
       
   442                 if i >= 0:
       
   443                     line = line[:i] # strip chunk-extensions
       
   444                 try:
       
   445                     chunk_left = int(line, 16)
       
   446                 except ValueError:
       
   447                     # close the connection as protocol synchronisation is
       
   448                     # probably lost
       
   449                     self.close()
       
   450                     raise httplib.IncompleteRead(value)
       
   451                 if chunk_left == 0:
       
   452                     break
       
   453             if amt is None:
       
   454                 value += self._safe_read(chunk_left)
       
   455             elif amt < chunk_left:
       
   456                 value += self._safe_read(amt)
       
   457                 self.chunk_left = chunk_left - amt
       
   458                 return value
       
   459             elif amt == chunk_left:
       
   460                 value += self._safe_read(amt)
       
   461                 self._safe_read(2)  # toss the CRLF at the end of the chunk
       
   462                 self.chunk_left = None
       
   463                 return value
       
   464             else:
       
   465                 value += self._safe_read(chunk_left)
       
   466                 amt -= chunk_left
       
   467 
       
   468             # we read the whole chunk, get another
       
   469             self._safe_read(2)      # toss the CRLF at the end of the chunk
       
   470             chunk_left = None
       
   471 
       
   472         # read and discard trailer up to the CRLF terminator
       
   473         ### note: we shouldn't have any trailers!
       
   474         while True:
       
   475             line = self.fp.readline()
       
   476             if not line:
       
   477                 # a vanishingly small number of sites EOF without
       
   478                 # sending the trailer
       
   479                 break
       
   480             if line == '\r\n':
       
   481                 break
       
   482 
       
   483         # we read everything; close the "file"
       
   484         self.close()
       
   485 
       
   486         return value
       
   487 
       
   488     def readline(self, limit=-1):
       
   489         i = self._rbuf.find('\n')
       
   490         while i < 0 and not (0 < limit <= len(self._rbuf)):
       
   491             new = self._raw_read(self._rbufsize)
       
   492             if not new:
       
   493                 break
       
   494             i = new.find('\n')
       
   495             if i >= 0:
       
   496                 i = i + len(self._rbuf)
       
   497             self._rbuf = self._rbuf + new
       
   498         if i < 0:
       
   499             i = len(self._rbuf)
       
   500         else:
       
   501             i = i + 1
       
   502         if 0 <= limit < len(self._rbuf):
       
   503             i = limit
       
   504         data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
       
   505         return data
       
   506 
       
   507     def readlines(self, sizehint = 0):
       
   508         total = 0
       
   509         list = []
       
   510         while 1:
       
   511             line = self.readline()
       
   512             if not line:
       
   513                 break
       
   514             list.append(line)
       
   515             total += len(line)
       
   516             if sizehint and total >= sizehint:
       
   517                 break
       
   518         return list
       
   519 
       
   520 def safesend(self, str):
       
   521     """Send `str' to the server.
       
   522 
       
   523     Shamelessly ripped off from httplib to patch a bad behavior.
       
   524     """
       
   525     # _broken_pipe_resp is an attribute we set in this function
       
   526     # if the socket is closed while we're sending data but
       
   527     # the server sent us a response before hanging up.
       
   528     # In that case, we want to pretend to send the rest of the
       
   529     # outgoing data, and then let the user use getresponse()
       
   530     # (which we wrap) to get this last response before
       
   531     # opening a new socket.
       
   532     if getattr(self, '_broken_pipe_resp', None) is not None:
       
   533         return
       
   534 
       
   535     if self.sock is None:
       
   536         if self.auto_open:
       
   537             self.connect()
       
   538         else:
       
   539             raise httplib.NotConnected()
       
   540 
       
   541     # send the data to the server. if we get a broken pipe, then close
       
   542     # the socket. we want to reconnect when somebody tries to send again.
       
   543     #
       
   544     # NOTE: we DO propagate the error, though, because we cannot simply
       
   545     #       ignore the error... the caller will know if they can retry.
       
   546     if self.debuglevel > 0:
       
   547         print "send:", repr(str)
       
   548     try:
       
   549         blocksize = 8192
       
   550         if hasattr(str,'read') :
       
   551             if self.debuglevel > 0:
       
   552                 print "sendIng a read()able"
       
   553             data = str.read(blocksize)
       
   554             while data:
       
   555                 self.sock.sendall(data)
       
   556                 data = str.read(blocksize)
       
   557         else:
       
   558             self.sock.sendall(str)
       
   559     except socket.error, v:
       
   560         reraise = True
       
   561         if v[0] == errno.EPIPE:      # Broken pipe
       
   562             if self._HTTPConnection__state == httplib._CS_REQ_SENT:
       
   563                 self._broken_pipe_resp = None
       
   564                 self._broken_pipe_resp = self.getresponse()
       
   565                 reraise = False
       
   566             self.close()
       
   567         if reraise:
       
   568             raise
       
   569 
       
   570 def wrapgetresponse(cls):
       
   571     """Wraps getresponse in cls with a broken-pipe sane version.
       
   572     """
       
   573     def safegetresponse(self):
       
   574         # In safesend() we might set the _broken_pipe_resp
       
   575         # attribute, in which case the socket has already
       
   576         # been closed and we just need to give them the response
       
   577         # back. Otherwise, we use the normal response path.
       
   578         r = getattr(self, '_broken_pipe_resp', None)
       
   579         if r is not None:
       
   580             return r
       
   581         return cls.getresponse(self)
       
   582     safegetresponse.__doc__ = cls.getresponse.__doc__
       
   583     return safegetresponse
       
   584 
       
   585 class HTTPConnection(httplib.HTTPConnection):
       
   586     # use the modified response class
       
   587     response_class = HTTPResponse
       
   588     send = safesend
       
   589     getresponse = wrapgetresponse(httplib.HTTPConnection)
       
   590 
       
   591 
       
   592 #########################################################################
       
   593 #####   TEST FUNCTIONS
       
   594 #########################################################################
       
   595 
       
   596 def error_handler(url):
       
   597     global HANDLE_ERRORS
       
   598     orig = HANDLE_ERRORS
       
   599     keepalive_handler = HTTPHandler()
       
   600     opener = urllib2.build_opener(keepalive_handler)
       
   601     urllib2.install_opener(opener)
       
   602     pos = {0: 'off', 1: 'on'}
       
   603     for i in (0, 1):
       
   604         print "  fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
       
   605         HANDLE_ERRORS = i
       
   606         try:
       
   607             fo = urllib2.urlopen(url)
       
   608             fo.read()
       
   609             fo.close()
       
   610             try:
       
   611                 status, reason = fo.status, fo.reason
       
   612             except AttributeError:
       
   613                 status, reason = None, None
       
   614         except IOError, e:
       
   615             print "  EXCEPTION: %s" % e
       
   616             raise
       
   617         else:
       
   618             print "  status = %s, reason = %s" % (status, reason)
       
   619     HANDLE_ERRORS = orig
       
   620     hosts = keepalive_handler.open_connections()
       
   621     print "open connections:", hosts
       
   622     keepalive_handler.close_all()
       
   623 
       
   624 def md5(s):
       
   625     try:
       
   626         from hashlib import md5 as _md5
       
   627     except ImportError:
       
   628         from md5 import md5 as _md5
       
   629     global md5
       
   630     md5 = _md5
       
   631     return _md5(s)
       
   632 
       
   633 def continuity(url):
       
   634     format = '%25s: %s'
       
   635 
       
   636     # first fetch the file with the normal http handler
       
   637     opener = urllib2.build_opener()
       
   638     urllib2.install_opener(opener)
       
   639     fo = urllib2.urlopen(url)
       
   640     foo = fo.read()
       
   641     fo.close()
       
   642     m = md5.new(foo)
       
   643     print format % ('normal urllib', m.hexdigest())
       
   644 
       
   645     # now install the keepalive handler and try again
       
   646     opener = urllib2.build_opener(HTTPHandler())
       
   647     urllib2.install_opener(opener)
       
   648 
       
   649     fo = urllib2.urlopen(url)
       
   650     foo = fo.read()
       
   651     fo.close()
       
   652     m = md5.new(foo)
       
   653     print format % ('keepalive read', m.hexdigest())
       
   654 
       
   655     fo = urllib2.urlopen(url)
       
   656     foo = ''
       
   657     while 1:
       
   658         f = fo.readline()
       
   659         if f:
       
   660             foo = foo + f
       
   661         else: break
       
   662     fo.close()
       
   663     m = md5.new(foo)
       
   664     print format % ('keepalive readline', m.hexdigest())
       
   665 
       
   666 def comp(N, url):
       
   667     print '  making %i connections to:\n  %s' % (N, url)
       
   668 
       
   669     sys.stdout.write('  first using the normal urllib handlers')
       
   670     # first use normal opener
       
   671     opener = urllib2.build_opener()
       
   672     urllib2.install_opener(opener)
       
   673     t1 = fetch(N, url)
       
   674     print '  TIME: %.3f s' % t1
       
   675 
       
   676     sys.stdout.write('  now using the keepalive handler       ')
       
   677     # now install the keepalive handler and try again
       
   678     opener = urllib2.build_opener(HTTPHandler())
       
   679     urllib2.install_opener(opener)
       
   680     t2 = fetch(N, url)
       
   681     print '  TIME: %.3f s' % t2
       
   682     print '  improvement factor: %.2f' % (t1 / t2)
       
   683 
       
   684 def fetch(N, url, delay=0):
       
   685     import time
       
   686     lens = []
       
   687     starttime = time.time()
       
   688     for i in range(N):
       
   689         if delay and i > 0:
       
   690             time.sleep(delay)
       
   691         fo = urllib2.urlopen(url)
       
   692         foo = fo.read()
       
   693         fo.close()
       
   694         lens.append(len(foo))
       
   695     diff = time.time() - starttime
       
   696 
       
   697     j = 0
       
   698     for i in lens[1:]:
       
   699         j = j + 1
       
   700         if not i == lens[0]:
       
   701             print "WARNING: inconsistent length on read %i: %i" % (j, i)
       
   702 
       
   703     return diff
       
   704 
       
   705 def test_timeout(url):
       
   706     global DEBUG
       
   707     dbbackup = DEBUG
       
   708     class FakeLogger:
       
   709         def debug(self, msg, *args):
       
   710             print msg % args
       
   711         info = warning = error = debug
       
   712     DEBUG = FakeLogger()
       
   713     print "  fetching the file to establish a connection"
       
   714     fo = urllib2.urlopen(url)
       
   715     data1 = fo.read()
       
   716     fo.close()
       
   717 
       
   718     i = 20
       
   719     print "  waiting %i seconds for the server to close the connection" % i
       
   720     while i > 0:
       
   721         sys.stdout.write('\r  %2i' % i)
       
   722         sys.stdout.flush()
       
   723         time.sleep(1)
       
   724         i -= 1
       
   725     sys.stderr.write('\r')
       
   726 
       
   727     print "  fetching the file a second time"
       
   728     fo = urllib2.urlopen(url)
       
   729     data2 = fo.read()
       
   730     fo.close()
       
   731 
       
   732     if data1 == data2:
       
   733         print '  data are identical'
       
   734     else:
       
   735         print '  ERROR: DATA DIFFER'
       
   736 
       
   737     DEBUG = dbbackup
       
   738 
       
   739 
       
   740 def test(url, N=10):
       
   741     print "checking error hander (do this on a non-200)"
       
   742     try: error_handler(url)
       
   743     except IOError:
       
   744         print "exiting - exception will prevent further tests"
       
   745         sys.exit()
       
   746     print
       
   747     print "performing continuity test (making sure stuff isn't corrupted)"
       
   748     continuity(url)
       
   749     print
       
   750     print "performing speed comparison"
       
   751     comp(N, url)
       
   752     print
       
   753     print "performing dropped-connection check"
       
   754     test_timeout(url)
       
   755 
       
   756 if __name__ == '__main__':
       
   757     import time
       
   758     import sys
       
   759     try:
       
   760         N = int(sys.argv[1])
       
   761         url = sys.argv[2]
       
   762     except:
       
   763         print "%s <integer> <url>" % sys.argv[0]
       
   764     else:
       
   765         test(url, N)