diff -r 5ff1fc726848 -r c6bca38c1cbf eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/byterange.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/byterange.py Sat Jan 08 11:20:57 2011 +0530 @@ -0,0 +1,466 @@ +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, +# Boston, MA 02111-1307 USA + +# This file is part of urlgrabber, a high-level cross-protocol url-grabber +# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko + +# $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $ + +import os +import stat +import urllib +import urllib2 +import email.Utils + +class RangeError(IOError): + """Error raised when an unsatisfiable range is requested.""" + pass + +class HTTPRangeHandler(urllib2.BaseHandler): + """Handler that enables HTTP Range headers. + + This was extremely simple. The Range header is a HTTP feature to + begin with so all this class does is tell urllib2 that the + "206 Partial Content" reponse from the HTTP server is what we + expected. + + Example: + import urllib2 + import byterange + + range_handler = range.HTTPRangeHandler() + opener = urllib2.build_opener(range_handler) + + # install it + urllib2.install_opener(opener) + + # create Request and set Range header + req = urllib2.Request('http://www.python.org/') + req.header['Range'] = 'bytes=30-50' + f = urllib2.urlopen(req) + """ + + def http_error_206(self, req, fp, code, msg, hdrs): + # 206 Partial Content Response + r = urllib.addinfourl(fp, hdrs, req.get_full_url()) + r.code = code + r.msg = msg + return r + + def http_error_416(self, req, fp, code, msg, hdrs): + # HTTP's Range Not Satisfiable error + raise RangeError('Requested Range Not Satisfiable') + +class RangeableFileObject: + """File object wrapper to enable raw range handling. + This was implemented primarilary for handling range + specifications for file:// urls. This object effectively makes + a file object look like it consists only of a range of bytes in + the stream. + + Examples: + # expose 10 bytes, starting at byte position 20, from + # /etc/aliases. + >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) + # seek seeks within the range (to position 23 in this case) + >>> fo.seek(3) + # tell tells where your at _within the range_ (position 3 in + # this case) + >>> fo.tell() + # read EOFs if an attempt is made to read past the last + # byte in the range. the following will return only 7 bytes. + >>> fo.read(30) + """ + + def __init__(self, fo, rangetup): + """Create a RangeableFileObject. + fo -- a file like object. only the read() method need be + supported but supporting an optimized seek() is + preferable. + rangetup -- a (firstbyte,lastbyte) tuple specifying the range + to work over. + The file object provided is assumed to be at byte offset 0. + """ + self.fo = fo + (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) + self.realpos = 0 + self._do_seek(self.firstbyte) + + def __getattr__(self, name): + """This effectively allows us to wrap at the instance level. + Any attribute not found in _this_ object will be searched for + in self.fo. This includes methods.""" + if hasattr(self.fo, name): + return getattr(self.fo, name) + raise AttributeError(name) + + def tell(self): + """Return the position within the range. + This is different from fo.seek in that position 0 is the + first byte position of the range tuple. For example, if + this object was created with a range tuple of (500,899), + tell() will return 0 when at byte position 500 of the file. + """ + return (self.realpos - self.firstbyte) + + def seek(self, offset, whence=0): + """Seek within the byte range. + Positioning is identical to that described under tell(). + """ + assert whence in (0, 1, 2) + if whence == 0: # absolute seek + realoffset = self.firstbyte + offset + elif whence == 1: # relative seek + realoffset = self.realpos + offset + elif whence == 2: # absolute from end of file + # XXX: are we raising the right Error here? + raise IOError('seek from end of file not supported.') + + # do not allow seek past lastbyte in range + if self.lastbyte and (realoffset >= self.lastbyte): + realoffset = self.lastbyte + + self._do_seek(realoffset - self.realpos) + + def read(self, size=-1): + """Read within the range. + This method will limit the size read based on the range. + """ + size = self._calc_read_size(size) + rslt = self.fo.read(size) + self.realpos += len(rslt) + return rslt + + def readline(self, size=-1): + """Read lines within the range. + This method will limit the size read based on the range. + """ + size = self._calc_read_size(size) + rslt = self.fo.readline(size) + self.realpos += len(rslt) + return rslt + + def _calc_read_size(self, size): + """Handles calculating the amount of data to read based on + the range. + """ + if self.lastbyte: + if size > -1: + if ((self.realpos + size) >= self.lastbyte): + size = (self.lastbyte - self.realpos) + else: + size = (self.lastbyte - self.realpos) + return size + + def _do_seek(self, offset): + """Seek based on whether wrapped object supports seek(). + offset is relative to the current position (self.realpos). + """ + assert offset >= 0 + if not hasattr(self.fo, 'seek'): + self._poor_mans_seek(offset) + else: + self.fo.seek(self.realpos + offset) + self.realpos += offset + + def _poor_mans_seek(self, offset): + """Seek by calling the wrapped file objects read() method. + This is used for file like objects that do not have native + seek support. The wrapped objects read() method is called + to manually seek to the desired position. + offset -- read this number of bytes from the wrapped + file object. + raise RangeError if we encounter EOF before reaching the + specified offset. + """ + pos = 0 + bufsize = 1024 + while pos < offset: + if (pos + bufsize) > offset: + bufsize = offset - pos + buf = self.fo.read(bufsize) + if len(buf) != bufsize: + raise RangeError('Requested Range Not Satisfiable') + pos += bufsize + +class FileRangeHandler(urllib2.FileHandler): + """FileHandler subclass that adds Range support. + This class handles Range headers exactly like an HTTP + server would. + """ + def open_local_file(self, req): + import mimetypes + import email + host = req.get_host() + file = req.get_selector() + localfile = urllib.url2pathname(file) + stats = os.stat(localfile) + size = stats[stat.ST_SIZE] + modified = email.Utils.formatdate(stats[stat.ST_MTIME]) + mtype = mimetypes.guess_type(file)[0] + if host: + host, port = urllib.splitport(host) + if port or socket.gethostbyname(host) not in self.get_names(): + raise urllib2.URLError('file not on local host') + fo = open(localfile,'rb') + brange = req.headers.get('Range', None) + brange = range_header_to_tuple(brange) + assert brange != () + if brange: + (fb, lb) = brange + if lb == '': + lb = size + if fb < 0 or fb > size or lb > size: + raise RangeError('Requested Range Not Satisfiable') + size = (lb - fb) + fo = RangeableFileObject(fo, (fb, lb)) + headers = email.message_from_string( + 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' % + (mtype or 'text/plain', size, modified)) + return urllib.addinfourl(fo, headers, 'file:'+file) + + +# FTP Range Support +# Unfortunately, a large amount of base FTP code had to be copied +# from urllib and urllib2 in order to insert the FTP REST command. +# Code modifications for range support have been commented as +# follows: +# -- range support modifications start/end here + +from urllib import splitport, splituser, splitpasswd, splitattr, \ + unquote, addclosehook, addinfourl +import ftplib +import socket +import sys +import mimetypes +import email + +class FTPRangeHandler(urllib2.FTPHandler): + def ftp_open(self, req): + host = req.get_host() + if not host: + raise IOError('ftp error', 'no host given') + host, port = splitport(host) + if port is None: + port = ftplib.FTP_PORT + else: + port = int(port) + + # username/password handling + user, host = splituser(host) + if user: + user, passwd = splitpasswd(user) + else: + passwd = None + host = unquote(host) + user = unquote(user or '') + passwd = unquote(passwd or '') + + try: + host = socket.gethostbyname(host) + except socket.error, msg: + raise urllib2.URLError(msg) + path, attrs = splitattr(req.get_selector()) + dirs = path.split('/') + dirs = map(unquote, dirs) + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: + dirs = dirs[1:] + try: + fw = self.connect_ftp(user, passwd, host, port, dirs) + type = file and 'I' or 'D' + for attr in attrs: + attr, value = splitattr(attr) + if attr.lower() == 'type' and \ + value in ('a', 'A', 'i', 'I', 'd', 'D'): + type = value.upper() + + # -- range support modifications start here + rest = None + range_tup = range_header_to_tuple(req.headers.get('Range', None)) + assert range_tup != () + if range_tup: + (fb, lb) = range_tup + if fb > 0: + rest = fb + # -- range support modifications end here + + fp, retrlen = fw.retrfile(file, type, rest) + + # -- range support modifications start here + if range_tup: + (fb, lb) = range_tup + if lb == '': + if retrlen is None or retrlen == 0: + raise RangeError('Requested Range Not Satisfiable due' + ' to unobtainable file length.') + lb = retrlen + retrlen = lb - fb + if retrlen < 0: + # beginning of range is larger than file + raise RangeError('Requested Range Not Satisfiable') + else: + retrlen = lb - fb + fp = RangeableFileObject(fp, (0, retrlen)) + # -- range support modifications end here + + headers = "" + mtype = mimetypes.guess_type(req.get_full_url())[0] + if mtype: + headers += "Content-Type: %s\n" % mtype + if retrlen is not None and retrlen >= 0: + headers += "Content-Length: %d\n" % retrlen + headers = email.message_from_string(headers) + return addinfourl(fp, headers, req.get_full_url()) + except ftplib.all_errors, msg: + raise IOError('ftp error', msg), sys.exc_info()[2] + + def connect_ftp(self, user, passwd, host, port, dirs): + fw = ftpwrapper(user, passwd, host, port, dirs) + return fw + +class ftpwrapper(urllib.ftpwrapper): + # range support note: + # this ftpwrapper code is copied directly from + # urllib. The only enhancement is to add the rest + # argument and pass it on to ftp.ntransfercmd + def retrfile(self, file, type, rest=None): + self.endtransfer() + if type in ('d', 'D'): + cmd = 'TYPE A' + isdir = 1 + else: + cmd = 'TYPE ' + type + isdir = 0 + try: + self.ftp.voidcmd(cmd) + except ftplib.all_errors: + self.init() + self.ftp.voidcmd(cmd) + conn = None + if file and not isdir: + # Use nlst to see if the file exists at all + try: + self.ftp.nlst(file) + except ftplib.error_perm, reason: + raise IOError('ftp error', reason), sys.exc_info()[2] + # Restore the transfer mode! + self.ftp.voidcmd(cmd) + # Try to retrieve as a file + try: + cmd = 'RETR ' + file + conn = self.ftp.ntransfercmd(cmd, rest) + except ftplib.error_perm, reason: + if str(reason).startswith('501'): + # workaround for REST not supported error + fp, retrlen = self.retrfile(file, type) + fp = RangeableFileObject(fp, (rest,'')) + return (fp, retrlen) + elif not str(reason).startswith('550'): + raise IOError('ftp error', reason), sys.exc_info()[2] + if not conn: + # Set transfer mode to ASCII! + self.ftp.voidcmd('TYPE A') + # Try a directory listing + if file: + cmd = 'LIST ' + file + else: + cmd = 'LIST' + conn = self.ftp.ntransfercmd(cmd) + self.busy = 1 + # Pass back both a suitably decorated object and a retrieval length + return (addclosehook(conn[0].makefile('rb'), + self.endtransfer), conn[1]) + + +#################################################################### +# Range Tuple Functions +# XXX: These range tuple functions might go better in a class. + +_rangere = None +def range_header_to_tuple(range_header): + """Get a (firstbyte,lastbyte) tuple from a Range header value. + + Range headers have the form "bytes=-". This + function pulls the firstbyte and lastbyte values and returns + a (firstbyte,lastbyte) tuple. If lastbyte is not specified in + the header value, it is returned as an empty string in the + tuple. + + Return None if range_header is None + Return () if range_header does not conform to the range spec + pattern. + + """ + global _rangere + if range_header is None: + return None + if _rangere is None: + import re + _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') + match = _rangere.match(range_header) + if match: + tup = range_tuple_normalize(match.group(1, 2)) + if tup and tup[1]: + tup = (tup[0], tup[1]+1) + return tup + return () + +def range_tuple_to_header(range_tup): + """Convert a range tuple to a Range header value. + Return a string of the form "bytes=-" or None + if no range is needed. + """ + if range_tup is None: + return None + range_tup = range_tuple_normalize(range_tup) + if range_tup: + if range_tup[1]: + range_tup = (range_tup[0], range_tup[1] - 1) + return 'bytes=%s-%s' % range_tup + +def range_tuple_normalize(range_tup): + """Normalize a (first_byte,last_byte) range tuple. + Return a tuple whose first element is guaranteed to be an int + and whose second element will be '' (meaning: the last byte) or + an int. Finally, return None if the normalized tuple == (0,'') + as that is equivelant to retrieving the entire file. + """ + if range_tup is None: + return None + # handle first byte + fb = range_tup[0] + if fb in (None, ''): + fb = 0 + else: + fb = int(fb) + # handle last byte + try: + lb = range_tup[1] + except IndexError: + lb = '' + else: + if lb is None: + lb = '' + elif lb != '': + lb = int(lb) + # check if range is over the entire file + if (fb, lb) == (0, ''): + return None + # check that the range is valid + if lb < fb: + raise RangeError('Invalid byte range: %s-%s' % (fb, lb)) + return (fb, lb)