eggs/zc.buildout-1.5.2-py2.6.egg/zc/buildout/download.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 ##############################################################################
       
     2 #
       
     3 # Copyright (c) 2009 Zope Corporation and Contributors.
       
     4 # All Rights Reserved.
       
     5 #
       
     6 # This software is subject to the provisions of the Zope Public License,
       
     7 # Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
       
     8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
       
     9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
       
    11 # FOR A PARTICULAR PURPOSE.
       
    12 #
       
    13 ##############################################################################
       
    14 """Buildout download infrastructure"""
       
    15 
       
    16 try:
       
    17     from hashlib import md5
       
    18 except ImportError:
       
    19     from md5 import new as md5
       
    20 from zc.buildout.easy_install import realpath
       
    21 import logging
       
    22 import os
       
    23 import os.path
       
    24 import re
       
    25 import shutil
       
    26 import tempfile
       
    27 import urllib
       
    28 import urlparse
       
    29 import zc.buildout
       
    30 
       
    31 
       
    32 class URLOpener(urllib.FancyURLopener):
       
    33     http_error_default = urllib.URLopener.http_error_default
       
    34 
       
    35 
       
    36 class ChecksumError(zc.buildout.UserError):
       
    37     pass
       
    38 
       
    39 
       
    40 url_opener = URLOpener()
       
    41 
       
    42 
       
    43 class Download(object):
       
    44     """Configurable download utility.
       
    45 
       
    46     Handles the download cache and offline mode.
       
    47 
       
    48     Download(options=None, cache=None, namespace=None, hash_name=False)
       
    49 
       
    50     options: mapping of buildout options (e.g. a ``buildout`` config section)
       
    51     cache: path to the download cache (excluding namespaces)
       
    52     namespace: namespace directory to use inside the cache
       
    53     hash_name: whether to use a hash of the URL as cache file name
       
    54     logger: an optional logger to receive download-related log messages
       
    55 
       
    56     """
       
    57 
       
    58     def __init__(self, options={}, cache=-1, namespace=None,
       
    59                  offline=-1, fallback=False, hash_name=False, logger=None):
       
    60         self.directory = options.get('directory', '')
       
    61         self.cache = cache
       
    62         if cache == -1:
       
    63             self.cache = options.get('download-cache')
       
    64         self.namespace = namespace
       
    65         self.offline = offline
       
    66         if offline == -1:
       
    67             self.offline = (options.get('offline') == 'true'
       
    68                             or options.get('install-from-cache') == 'true')
       
    69         self.fallback = fallback
       
    70         self.hash_name = hash_name
       
    71         self.logger = logger or logging.getLogger('zc.buildout')
       
    72 
       
    73     @property
       
    74     def download_cache(self):
       
    75         if self.cache is not None:
       
    76             return realpath(os.path.join(self.directory, self.cache))
       
    77 
       
    78     @property
       
    79     def cache_dir(self):
       
    80         if self.download_cache is not None:
       
    81             return os.path.join(self.download_cache, self.namespace or '')
       
    82 
       
    83     def __call__(self, url, md5sum=None, path=None):
       
    84         """Download a file according to the utility's configuration.
       
    85 
       
    86         url: URL to download
       
    87         md5sum: MD5 checksum to match
       
    88         path: where to place the downloaded file
       
    89 
       
    90         Returns the path to the downloaded file.
       
    91 
       
    92         """
       
    93         if self.cache:
       
    94             local_path, is_temp = self.download_cached(url, md5sum)
       
    95         else:
       
    96             local_path, is_temp = self.download(url, md5sum, path)
       
    97 
       
    98         return locate_at(local_path, path), is_temp
       
    99 
       
   100     def download_cached(self, url, md5sum=None):
       
   101         """Download a file from a URL using the cache.
       
   102 
       
   103         This method assumes that the cache has been configured. Optionally, it
       
   104         raises a ChecksumError if a cached copy of a file has an MD5 mismatch,
       
   105         but will not remove the copy in that case.
       
   106 
       
   107         """
       
   108         if not os.path.exists(self.download_cache):
       
   109             raise zc.buildout.UserError(
       
   110                 'The directory:\n'
       
   111                 '%r\n'
       
   112                 "to be used as a download cache doesn't exist.\n"
       
   113                 % self.download_cache)
       
   114         cache_dir = self.cache_dir
       
   115         if not os.path.exists(cache_dir):
       
   116             os.mkdir(cache_dir)
       
   117         cache_key = self.filename(url)
       
   118         cached_path = os.path.join(cache_dir, cache_key)
       
   119 
       
   120         self.logger.debug('Searching cache at %s' % cache_dir)
       
   121         if os.path.isfile(cached_path):
       
   122             is_temp = False
       
   123             if self.fallback:
       
   124                 try:
       
   125                     _, is_temp = self.download(url, md5sum, cached_path)
       
   126                 except ChecksumError:
       
   127                     raise
       
   128                 except Exception:
       
   129                     pass
       
   130 
       
   131             if not check_md5sum(cached_path, md5sum):
       
   132                 raise ChecksumError(
       
   133                     'MD5 checksum mismatch for cached download '
       
   134                     'from %r at %r' % (url, cached_path))
       
   135             self.logger.debug('Using cache file %s' % cached_path)
       
   136         else:
       
   137             self.logger.debug('Cache miss; will cache %s as %s' %
       
   138                               (url, cached_path))
       
   139             _, is_temp = self.download(url, md5sum, cached_path)
       
   140 
       
   141         return cached_path, is_temp
       
   142 
       
   143     def download(self, url, md5sum=None, path=None):
       
   144         """Download a file from a URL to a given or temporary path.
       
   145 
       
   146         An online resource is always downloaded to a temporary file and moved
       
   147         to the specified path only after the download is complete and the
       
   148         checksum (if given) matches. If path is None, the temporary file is
       
   149         returned and the client code is responsible for cleaning it up.
       
   150 
       
   151         """
       
   152         if re.match(r"^[A-Za-z]:\\", url):
       
   153             url = 'file:' + url
       
   154         parsed_url = urlparse.urlparse(url, 'file')
       
   155         url_scheme, _, url_path = parsed_url[:3]
       
   156         if url_scheme == 'file':
       
   157             self.logger.debug('Using local resource %s' % url)
       
   158             if not check_md5sum(url_path, md5sum):
       
   159                 raise ChecksumError(
       
   160                     'MD5 checksum mismatch for local resource at %r.' %
       
   161                     url_path)
       
   162             return locate_at(url_path, path), False
       
   163 
       
   164         if self.offline:
       
   165             raise zc.buildout.UserError(
       
   166                 "Couldn't download %r in offline mode." % url)
       
   167 
       
   168         self.logger.info('Downloading %s' % url)
       
   169         urllib._urlopener = url_opener
       
   170         handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
       
   171         try:
       
   172             try:
       
   173                 tmp_path, headers = urllib.urlretrieve(url, tmp_path)
       
   174                 if not check_md5sum(tmp_path, md5sum):
       
   175                     raise ChecksumError(
       
   176                         'MD5 checksum mismatch downloading %r' % url)
       
   177             finally:
       
   178                 os.close(handle)
       
   179         except:
       
   180             os.remove(tmp_path)
       
   181             raise
       
   182 
       
   183         if path:
       
   184             shutil.move(tmp_path, path)
       
   185             return path, False
       
   186         else:
       
   187             return tmp_path, True
       
   188 
       
   189     def filename(self, url):
       
   190         """Determine a file name from a URL according to the configuration.
       
   191 
       
   192         """
       
   193         if self.hash_name:
       
   194             return md5(url).hexdigest()
       
   195         else:
       
   196             if re.match(r"^[A-Za-z]:\\", url):
       
   197                 url = 'file:' + url
       
   198             parsed = urlparse.urlparse(url, 'file')
       
   199             url_path = parsed[2]
       
   200 
       
   201             if parsed[0] == 'file':
       
   202                 while True:
       
   203                     url_path, name = os.path.split(url_path)
       
   204                     if name:
       
   205                         return name
       
   206                     if not url_path:
       
   207                         break
       
   208             else:
       
   209                 for name in reversed(url_path.split('/')):
       
   210                     if name:
       
   211                         return name
       
   212 
       
   213             url_host, url_port = parsed[-2:]
       
   214             return '%s:%s' % (url_host, url_port)
       
   215 
       
   216 
       
   217 def check_md5sum(path, md5sum):
       
   218     """Tell whether the MD5 checksum of the file at path matches.
       
   219 
       
   220     No checksum being given is considered a match.
       
   221 
       
   222     """
       
   223     if md5sum is None:
       
   224         return True
       
   225 
       
   226     f = open(path, 'rb')
       
   227     checksum = md5()
       
   228     try:
       
   229         chunk = f.read(2**16)
       
   230         while chunk:
       
   231             checksum.update(chunk)
       
   232             chunk = f.read(2**16)
       
   233         return checksum.hexdigest() == md5sum
       
   234     finally:
       
   235         f.close()
       
   236 
       
   237 
       
   238 def remove(path):
       
   239     if os.path.exists(path):
       
   240         os.remove(path)
       
   241 
       
   242 
       
   243 def locate_at(source, dest):
       
   244     if dest is None or realpath(dest) == realpath(source):
       
   245         return source
       
   246 
       
   247     try:
       
   248         os.link(source, dest)
       
   249     except (AttributeError, OSError):
       
   250         shutil.copyfile(source, dest)
       
   251     return dest