eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/byterange.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 #   This library is free software; you can redistribute it and/or
       
     2 #   modify it under the terms of the GNU Lesser General Public
       
     3 #   License as published by the Free Software Foundation; either
       
     4 #   version 2.1 of the License, or (at your option) any later version.
       
     5 #
       
     6 #   This library is distributed in the hope that it will be useful,
       
     7 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
       
     8 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
     9 #   Lesser General Public License for more details.
       
    10 #
       
    11 #   You should have received a copy of the GNU Lesser General Public
       
    12 #   License along with this library; if not, write to the
       
    13 #      Free Software Foundation, Inc.,
       
    14 #      59 Temple Place, Suite 330,
       
    15 #      Boston, MA  02111-1307  USA
       
    16 
       
    17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
       
    18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
       
    19 
       
    20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
       
    21 
       
    22 import os
       
    23 import stat
       
    24 import urllib
       
    25 import urllib2
       
    26 import email.Utils
       
    27 
       
    28 class RangeError(IOError):
       
    29     """Error raised when an unsatisfiable range is requested."""
       
    30     pass
       
    31 
       
    32 class HTTPRangeHandler(urllib2.BaseHandler):
       
    33     """Handler that enables HTTP Range headers.
       
    34 
       
    35     This was extremely simple. The Range header is a HTTP feature to
       
    36     begin with so all this class does is tell urllib2 that the
       
    37     "206 Partial Content" reponse from the HTTP server is what we
       
    38     expected.
       
    39 
       
    40     Example:
       
    41         import urllib2
       
    42         import byterange
       
    43 
       
    44         range_handler = range.HTTPRangeHandler()
       
    45         opener = urllib2.build_opener(range_handler)
       
    46 
       
    47         # install it
       
    48         urllib2.install_opener(opener)
       
    49 
       
    50         # create Request and set Range header
       
    51         req = urllib2.Request('http://www.python.org/')
       
    52         req.header['Range'] = 'bytes=30-50'
       
    53         f = urllib2.urlopen(req)
       
    54     """
       
    55 
       
    56     def http_error_206(self, req, fp, code, msg, hdrs):
       
    57         # 206 Partial Content Response
       
    58         r = urllib.addinfourl(fp, hdrs, req.get_full_url())
       
    59         r.code = code
       
    60         r.msg = msg
       
    61         return r
       
    62 
       
    63     def http_error_416(self, req, fp, code, msg, hdrs):
       
    64         # HTTP's Range Not Satisfiable error
       
    65         raise RangeError('Requested Range Not Satisfiable')
       
    66 
       
    67 class RangeableFileObject:
       
    68     """File object wrapper to enable raw range handling.
       
    69     This was implemented primarilary for handling range
       
    70     specifications for file:// urls. This object effectively makes
       
    71     a file object look like it consists only of a range of bytes in
       
    72     the stream.
       
    73 
       
    74     Examples:
       
    75         # expose 10 bytes, starting at byte position 20, from
       
    76         # /etc/aliases.
       
    77         >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
       
    78         # seek seeks within the range (to position 23 in this case)
       
    79         >>> fo.seek(3)
       
    80         # tell tells where your at _within the range_ (position 3 in
       
    81         # this case)
       
    82         >>> fo.tell()
       
    83         # read EOFs if an attempt is made to read past the last
       
    84         # byte in the range. the following will return only 7 bytes.
       
    85         >>> fo.read(30)
       
    86     """
       
    87 
       
    88     def __init__(self, fo, rangetup):
       
    89         """Create a RangeableFileObject.
       
    90         fo       -- a file like object. only the read() method need be
       
    91                     supported but supporting an optimized seek() is
       
    92                     preferable.
       
    93         rangetup -- a (firstbyte,lastbyte) tuple specifying the range
       
    94                     to work over.
       
    95         The file object provided is assumed to be at byte offset 0.
       
    96         """
       
    97         self.fo = fo
       
    98         (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
       
    99         self.realpos = 0
       
   100         self._do_seek(self.firstbyte)
       
   101 
       
   102     def __getattr__(self, name):
       
   103         """This effectively allows us to wrap at the instance level.
       
   104         Any attribute not found in _this_ object will be searched for
       
   105         in self.fo.  This includes methods."""
       
   106         if hasattr(self.fo, name):
       
   107             return getattr(self.fo, name)
       
   108         raise AttributeError(name)
       
   109 
       
   110     def tell(self):
       
   111         """Return the position within the range.
       
   112         This is different from fo.seek in that position 0 is the
       
   113         first byte position of the range tuple. For example, if
       
   114         this object was created with a range tuple of (500,899),
       
   115         tell() will return 0 when at byte position 500 of the file.
       
   116         """
       
   117         return (self.realpos - self.firstbyte)
       
   118 
       
   119     def seek(self, offset, whence=0):
       
   120         """Seek within the byte range.
       
   121         Positioning is identical to that described under tell().
       
   122         """
       
   123         assert whence in (0, 1, 2)
       
   124         if whence == 0:   # absolute seek
       
   125             realoffset = self.firstbyte + offset
       
   126         elif whence == 1: # relative seek
       
   127             realoffset = self.realpos + offset
       
   128         elif whence == 2: # absolute from end of file
       
   129             # XXX: are we raising the right Error here?
       
   130             raise IOError('seek from end of file not supported.')
       
   131 
       
   132         # do not allow seek past lastbyte in range
       
   133         if self.lastbyte and (realoffset >= self.lastbyte):
       
   134             realoffset = self.lastbyte
       
   135 
       
   136         self._do_seek(realoffset - self.realpos)
       
   137 
       
   138     def read(self, size=-1):
       
   139         """Read within the range.
       
   140         This method will limit the size read based on the range.
       
   141         """
       
   142         size = self._calc_read_size(size)
       
   143         rslt = self.fo.read(size)
       
   144         self.realpos += len(rslt)
       
   145         return rslt
       
   146 
       
   147     def readline(self, size=-1):
       
   148         """Read lines within the range.
       
   149         This method will limit the size read based on the range.
       
   150         """
       
   151         size = self._calc_read_size(size)
       
   152         rslt = self.fo.readline(size)
       
   153         self.realpos += len(rslt)
       
   154         return rslt
       
   155 
       
   156     def _calc_read_size(self, size):
       
   157         """Handles calculating the amount of data to read based on
       
   158         the range.
       
   159         """
       
   160         if self.lastbyte:
       
   161             if size > -1:
       
   162                 if ((self.realpos + size) >= self.lastbyte):
       
   163                     size = (self.lastbyte - self.realpos)
       
   164             else:
       
   165                 size = (self.lastbyte - self.realpos)
       
   166         return size
       
   167 
       
   168     def _do_seek(self, offset):
       
   169         """Seek based on whether wrapped object supports seek().
       
   170         offset is relative to the current position (self.realpos).
       
   171         """
       
   172         assert offset >= 0
       
   173         if not hasattr(self.fo, 'seek'):
       
   174             self._poor_mans_seek(offset)
       
   175         else:
       
   176             self.fo.seek(self.realpos + offset)
       
   177         self.realpos += offset
       
   178 
       
   179     def _poor_mans_seek(self, offset):
       
   180         """Seek by calling the wrapped file objects read() method.
       
   181         This is used for file like objects that do not have native
       
   182         seek support. The wrapped objects read() method is called
       
   183         to manually seek to the desired position.
       
   184         offset -- read this number of bytes from the wrapped
       
   185                   file object.
       
   186         raise RangeError if we encounter EOF before reaching the
       
   187         specified offset.
       
   188         """
       
   189         pos = 0
       
   190         bufsize = 1024
       
   191         while pos < offset:
       
   192             if (pos + bufsize) > offset:
       
   193                 bufsize = offset - pos
       
   194             buf = self.fo.read(bufsize)
       
   195             if len(buf) != bufsize:
       
   196                 raise RangeError('Requested Range Not Satisfiable')
       
   197             pos += bufsize
       
   198 
       
   199 class FileRangeHandler(urllib2.FileHandler):
       
   200     """FileHandler subclass that adds Range support.
       
   201     This class handles Range headers exactly like an HTTP
       
   202     server would.
       
   203     """
       
   204     def open_local_file(self, req):
       
   205         import mimetypes
       
   206         import email
       
   207         host = req.get_host()
       
   208         file = req.get_selector()
       
   209         localfile = urllib.url2pathname(file)
       
   210         stats = os.stat(localfile)
       
   211         size = stats[stat.ST_SIZE]
       
   212         modified = email.Utils.formatdate(stats[stat.ST_MTIME])
       
   213         mtype = mimetypes.guess_type(file)[0]
       
   214         if host:
       
   215             host, port = urllib.splitport(host)
       
   216             if port or socket.gethostbyname(host) not in self.get_names():
       
   217                 raise urllib2.URLError('file not on local host')
       
   218         fo = open(localfile,'rb')
       
   219         brange = req.headers.get('Range', None)
       
   220         brange = range_header_to_tuple(brange)
       
   221         assert brange != ()
       
   222         if brange:
       
   223             (fb, lb) = brange
       
   224             if lb == '':
       
   225                 lb = size
       
   226             if fb < 0 or fb > size or lb > size:
       
   227                 raise RangeError('Requested Range Not Satisfiable')
       
   228             size = (lb - fb)
       
   229             fo = RangeableFileObject(fo, (fb, lb))
       
   230         headers = email.message_from_string(
       
   231             'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
       
   232             (mtype or 'text/plain', size, modified))
       
   233         return urllib.addinfourl(fo, headers, 'file:'+file)
       
   234 
       
   235 
       
   236 # FTP Range Support
       
   237 # Unfortunately, a large amount of base FTP code had to be copied
       
   238 # from urllib and urllib2 in order to insert the FTP REST command.
       
   239 # Code modifications for range support have been commented as
       
   240 # follows:
       
   241 # -- range support modifications start/end here
       
   242 
       
   243 from urllib import splitport, splituser, splitpasswd, splitattr, \
       
   244                    unquote, addclosehook, addinfourl
       
   245 import ftplib
       
   246 import socket
       
   247 import sys
       
   248 import mimetypes
       
   249 import email
       
   250 
       
   251 class FTPRangeHandler(urllib2.FTPHandler):
       
   252     def ftp_open(self, req):
       
   253         host = req.get_host()
       
   254         if not host:
       
   255             raise IOError('ftp error', 'no host given')
       
   256         host, port = splitport(host)
       
   257         if port is None:
       
   258             port = ftplib.FTP_PORT
       
   259         else:
       
   260             port = int(port)
       
   261 
       
   262         # username/password handling
       
   263         user, host = splituser(host)
       
   264         if user:
       
   265             user, passwd = splitpasswd(user)
       
   266         else:
       
   267             passwd = None
       
   268         host = unquote(host)
       
   269         user = unquote(user or '')
       
   270         passwd = unquote(passwd or '')
       
   271 
       
   272         try:
       
   273             host = socket.gethostbyname(host)
       
   274         except socket.error, msg:
       
   275             raise urllib2.URLError(msg)
       
   276         path, attrs = splitattr(req.get_selector())
       
   277         dirs = path.split('/')
       
   278         dirs = map(unquote, dirs)
       
   279         dirs, file = dirs[:-1], dirs[-1]
       
   280         if dirs and not dirs[0]:
       
   281             dirs = dirs[1:]
       
   282         try:
       
   283             fw = self.connect_ftp(user, passwd, host, port, dirs)
       
   284             type = file and 'I' or 'D'
       
   285             for attr in attrs:
       
   286                 attr, value = splitattr(attr)
       
   287                 if attr.lower() == 'type' and \
       
   288                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
       
   289                     type = value.upper()
       
   290 
       
   291             # -- range support modifications start here
       
   292             rest = None
       
   293             range_tup = range_header_to_tuple(req.headers.get('Range', None))
       
   294             assert range_tup != ()
       
   295             if range_tup:
       
   296                 (fb, lb) = range_tup
       
   297                 if fb > 0:
       
   298                     rest = fb
       
   299             # -- range support modifications end here
       
   300 
       
   301             fp, retrlen = fw.retrfile(file, type, rest)
       
   302 
       
   303             # -- range support modifications start here
       
   304             if range_tup:
       
   305                 (fb, lb) = range_tup
       
   306                 if lb == '':
       
   307                     if retrlen is None or retrlen == 0:
       
   308                         raise RangeError('Requested Range Not Satisfiable due'
       
   309                                          ' to unobtainable file length.')
       
   310                     lb = retrlen
       
   311                     retrlen = lb - fb
       
   312                     if retrlen < 0:
       
   313                         # beginning of range is larger than file
       
   314                         raise RangeError('Requested Range Not Satisfiable')
       
   315                 else:
       
   316                     retrlen = lb - fb
       
   317                     fp = RangeableFileObject(fp, (0, retrlen))
       
   318             # -- range support modifications end here
       
   319 
       
   320             headers = ""
       
   321             mtype = mimetypes.guess_type(req.get_full_url())[0]
       
   322             if mtype:
       
   323                 headers += "Content-Type: %s\n" % mtype
       
   324             if retrlen is not None and retrlen >= 0:
       
   325                 headers += "Content-Length: %d\n" % retrlen
       
   326             headers = email.message_from_string(headers)
       
   327             return addinfourl(fp, headers, req.get_full_url())
       
   328         except ftplib.all_errors, msg:
       
   329             raise IOError('ftp error', msg), sys.exc_info()[2]
       
   330 
       
   331     def connect_ftp(self, user, passwd, host, port, dirs):
       
   332         fw = ftpwrapper(user, passwd, host, port, dirs)
       
   333         return fw
       
   334 
       
   335 class ftpwrapper(urllib.ftpwrapper):
       
   336     # range support note:
       
   337     # this ftpwrapper code is copied directly from
       
   338     # urllib. The only enhancement is to add the rest
       
   339     # argument and pass it on to ftp.ntransfercmd
       
   340     def retrfile(self, file, type, rest=None):
       
   341         self.endtransfer()
       
   342         if type in ('d', 'D'):
       
   343             cmd = 'TYPE A'
       
   344             isdir = 1
       
   345         else:
       
   346             cmd = 'TYPE ' + type
       
   347             isdir = 0
       
   348         try:
       
   349             self.ftp.voidcmd(cmd)
       
   350         except ftplib.all_errors:
       
   351             self.init()
       
   352             self.ftp.voidcmd(cmd)
       
   353         conn = None
       
   354         if file and not isdir:
       
   355             # Use nlst to see if the file exists at all
       
   356             try:
       
   357                 self.ftp.nlst(file)
       
   358             except ftplib.error_perm, reason:
       
   359                 raise IOError('ftp error', reason), sys.exc_info()[2]
       
   360             # Restore the transfer mode!
       
   361             self.ftp.voidcmd(cmd)
       
   362             # Try to retrieve as a file
       
   363             try:
       
   364                 cmd = 'RETR ' + file
       
   365                 conn = self.ftp.ntransfercmd(cmd, rest)
       
   366             except ftplib.error_perm, reason:
       
   367                 if str(reason).startswith('501'):
       
   368                     # workaround for REST not supported error
       
   369                     fp, retrlen = self.retrfile(file, type)
       
   370                     fp = RangeableFileObject(fp, (rest,''))
       
   371                     return (fp, retrlen)
       
   372                 elif not str(reason).startswith('550'):
       
   373                     raise IOError('ftp error', reason), sys.exc_info()[2]
       
   374         if not conn:
       
   375             # Set transfer mode to ASCII!
       
   376             self.ftp.voidcmd('TYPE A')
       
   377             # Try a directory listing
       
   378             if file:
       
   379                 cmd = 'LIST ' + file
       
   380             else:
       
   381                 cmd = 'LIST'
       
   382             conn = self.ftp.ntransfercmd(cmd)
       
   383         self.busy = 1
       
   384         # Pass back both a suitably decorated object and a retrieval length
       
   385         return (addclosehook(conn[0].makefile('rb'),
       
   386                             self.endtransfer), conn[1])
       
   387 
       
   388 
       
   389 ####################################################################
       
   390 # Range Tuple Functions
       
   391 # XXX: These range tuple functions might go better in a class.
       
   392 
       
   393 _rangere = None
       
   394 def range_header_to_tuple(range_header):
       
   395     """Get a (firstbyte,lastbyte) tuple from a Range header value.
       
   396 
       
   397     Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
       
   398     function pulls the firstbyte and lastbyte values and returns
       
   399     a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
       
   400     the header value, it is returned as an empty string in the
       
   401     tuple.
       
   402 
       
   403     Return None if range_header is None
       
   404     Return () if range_header does not conform to the range spec
       
   405     pattern.
       
   406 
       
   407     """
       
   408     global _rangere
       
   409     if range_header is None:
       
   410         return None
       
   411     if _rangere is None:
       
   412         import re
       
   413         _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
       
   414     match = _rangere.match(range_header)
       
   415     if match:
       
   416         tup = range_tuple_normalize(match.group(1, 2))
       
   417         if tup and tup[1]:
       
   418             tup = (tup[0], tup[1]+1)
       
   419         return tup
       
   420     return ()
       
   421 
       
   422 def range_tuple_to_header(range_tup):
       
   423     """Convert a range tuple to a Range header value.
       
   424     Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
       
   425     if no range is needed.
       
   426     """
       
   427     if range_tup is None:
       
   428         return None
       
   429     range_tup = range_tuple_normalize(range_tup)
       
   430     if range_tup:
       
   431         if range_tup[1]:
       
   432             range_tup = (range_tup[0], range_tup[1] - 1)
       
   433         return 'bytes=%s-%s' % range_tup
       
   434 
       
   435 def range_tuple_normalize(range_tup):
       
   436     """Normalize a (first_byte,last_byte) range tuple.
       
   437     Return a tuple whose first element is guaranteed to be an int
       
   438     and whose second element will be '' (meaning: the last byte) or
       
   439     an int. Finally, return None if the normalized tuple == (0,'')
       
   440     as that is equivelant to retrieving the entire file.
       
   441     """
       
   442     if range_tup is None:
       
   443         return None
       
   444     # handle first byte
       
   445     fb = range_tup[0]
       
   446     if fb in (None, ''):
       
   447         fb = 0
       
   448     else:
       
   449         fb = int(fb)
       
   450     # handle last byte
       
   451     try:
       
   452         lb = range_tup[1]
       
   453     except IndexError:
       
   454         lb = ''
       
   455     else:
       
   456         if lb is None:
       
   457             lb = ''
       
   458         elif lb != '':
       
   459             lb = int(lb)
       
   460     # check if range is over the entire file
       
   461     if (fb, lb) == (0, ''):
       
   462         return None
       
   463     # check that the range is valid
       
   464     if lb < fb:
       
   465         raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
       
   466     return (fb, lb)