|
1 ############################################################################## |
|
2 # |
|
3 # Copyright (c) 2009 Zope Corporation and Contributors. |
|
4 # All Rights Reserved. |
|
5 # |
|
6 # This software is subject to the provisions of the Zope Public License, |
|
7 # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. |
|
8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED |
|
9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS |
|
11 # FOR A PARTICULAR PURPOSE. |
|
12 # |
|
13 ############################################################################## |
|
14 """Buildout download infrastructure""" |
|
15 |
|
16 try: |
|
17 from hashlib import md5 |
|
18 except ImportError: |
|
19 from md5 import new as md5 |
|
20 from zc.buildout.easy_install import realpath |
|
21 import logging |
|
22 import os |
|
23 import os.path |
|
24 import re |
|
25 import shutil |
|
26 import tempfile |
|
27 import urllib |
|
28 import urlparse |
|
29 import zc.buildout |
|
30 |
|
31 |
|
32 class URLOpener(urllib.FancyURLopener): |
|
33 http_error_default = urllib.URLopener.http_error_default |
|
34 |
|
35 |
|
36 class ChecksumError(zc.buildout.UserError): |
|
37 pass |
|
38 |
|
39 |
|
40 url_opener = URLOpener() |
|
41 |
|
42 |
|
43 class Download(object): |
|
44 """Configurable download utility. |
|
45 |
|
46 Handles the download cache and offline mode. |
|
47 |
|
48 Download(options=None, cache=None, namespace=None, hash_name=False) |
|
49 |
|
50 options: mapping of buildout options (e.g. a ``buildout`` config section) |
|
51 cache: path to the download cache (excluding namespaces) |
|
52 namespace: namespace directory to use inside the cache |
|
53 hash_name: whether to use a hash of the URL as cache file name |
|
54 logger: an optional logger to receive download-related log messages |
|
55 |
|
56 """ |
|
57 |
|
58 def __init__(self, options={}, cache=-1, namespace=None, |
|
59 offline=-1, fallback=False, hash_name=False, logger=None): |
|
60 self.directory = options.get('directory', '') |
|
61 self.cache = cache |
|
62 if cache == -1: |
|
63 self.cache = options.get('download-cache') |
|
64 self.namespace = namespace |
|
65 self.offline = offline |
|
66 if offline == -1: |
|
67 self.offline = (options.get('offline') == 'true' |
|
68 or options.get('install-from-cache') == 'true') |
|
69 self.fallback = fallback |
|
70 self.hash_name = hash_name |
|
71 self.logger = logger or logging.getLogger('zc.buildout') |
|
72 |
|
73 @property |
|
74 def download_cache(self): |
|
75 if self.cache is not None: |
|
76 return realpath(os.path.join(self.directory, self.cache)) |
|
77 |
|
78 @property |
|
79 def cache_dir(self): |
|
80 if self.download_cache is not None: |
|
81 return os.path.join(self.download_cache, self.namespace or '') |
|
82 |
|
83 def __call__(self, url, md5sum=None, path=None): |
|
84 """Download a file according to the utility's configuration. |
|
85 |
|
86 url: URL to download |
|
87 md5sum: MD5 checksum to match |
|
88 path: where to place the downloaded file |
|
89 |
|
90 Returns the path to the downloaded file. |
|
91 |
|
92 """ |
|
93 if self.cache: |
|
94 local_path, is_temp = self.download_cached(url, md5sum) |
|
95 else: |
|
96 local_path, is_temp = self.download(url, md5sum, path) |
|
97 |
|
98 return locate_at(local_path, path), is_temp |
|
99 |
|
100 def download_cached(self, url, md5sum=None): |
|
101 """Download a file from a URL using the cache. |
|
102 |
|
103 This method assumes that the cache has been configured. Optionally, it |
|
104 raises a ChecksumError if a cached copy of a file has an MD5 mismatch, |
|
105 but will not remove the copy in that case. |
|
106 |
|
107 """ |
|
108 if not os.path.exists(self.download_cache): |
|
109 raise zc.buildout.UserError( |
|
110 'The directory:\n' |
|
111 '%r\n' |
|
112 "to be used as a download cache doesn't exist.\n" |
|
113 % self.download_cache) |
|
114 cache_dir = self.cache_dir |
|
115 if not os.path.exists(cache_dir): |
|
116 os.mkdir(cache_dir) |
|
117 cache_key = self.filename(url) |
|
118 cached_path = os.path.join(cache_dir, cache_key) |
|
119 |
|
120 self.logger.debug('Searching cache at %s' % cache_dir) |
|
121 if os.path.isfile(cached_path): |
|
122 is_temp = False |
|
123 if self.fallback: |
|
124 try: |
|
125 _, is_temp = self.download(url, md5sum, cached_path) |
|
126 except ChecksumError: |
|
127 raise |
|
128 except Exception: |
|
129 pass |
|
130 |
|
131 if not check_md5sum(cached_path, md5sum): |
|
132 raise ChecksumError( |
|
133 'MD5 checksum mismatch for cached download ' |
|
134 'from %r at %r' % (url, cached_path)) |
|
135 self.logger.debug('Using cache file %s' % cached_path) |
|
136 else: |
|
137 self.logger.debug('Cache miss; will cache %s as %s' % |
|
138 (url, cached_path)) |
|
139 _, is_temp = self.download(url, md5sum, cached_path) |
|
140 |
|
141 return cached_path, is_temp |
|
142 |
|
143 def download(self, url, md5sum=None, path=None): |
|
144 """Download a file from a URL to a given or temporary path. |
|
145 |
|
146 An online resource is always downloaded to a temporary file and moved |
|
147 to the specified path only after the download is complete and the |
|
148 checksum (if given) matches. If path is None, the temporary file is |
|
149 returned and the client code is responsible for cleaning it up. |
|
150 |
|
151 """ |
|
152 if re.match(r"^[A-Za-z]:\\", url): |
|
153 url = 'file:' + url |
|
154 parsed_url = urlparse.urlparse(url, 'file') |
|
155 url_scheme, _, url_path = parsed_url[:3] |
|
156 if url_scheme == 'file': |
|
157 self.logger.debug('Using local resource %s' % url) |
|
158 if not check_md5sum(url_path, md5sum): |
|
159 raise ChecksumError( |
|
160 'MD5 checksum mismatch for local resource at %r.' % |
|
161 url_path) |
|
162 return locate_at(url_path, path), False |
|
163 |
|
164 if self.offline: |
|
165 raise zc.buildout.UserError( |
|
166 "Couldn't download %r in offline mode." % url) |
|
167 |
|
168 self.logger.info('Downloading %s' % url) |
|
169 urllib._urlopener = url_opener |
|
170 handle, tmp_path = tempfile.mkstemp(prefix='buildout-') |
|
171 try: |
|
172 try: |
|
173 tmp_path, headers = urllib.urlretrieve(url, tmp_path) |
|
174 if not check_md5sum(tmp_path, md5sum): |
|
175 raise ChecksumError( |
|
176 'MD5 checksum mismatch downloading %r' % url) |
|
177 finally: |
|
178 os.close(handle) |
|
179 except: |
|
180 os.remove(tmp_path) |
|
181 raise |
|
182 |
|
183 if path: |
|
184 shutil.move(tmp_path, path) |
|
185 return path, False |
|
186 else: |
|
187 return tmp_path, True |
|
188 |
|
189 def filename(self, url): |
|
190 """Determine a file name from a URL according to the configuration. |
|
191 |
|
192 """ |
|
193 if self.hash_name: |
|
194 return md5(url).hexdigest() |
|
195 else: |
|
196 if re.match(r"^[A-Za-z]:\\", url): |
|
197 url = 'file:' + url |
|
198 parsed = urlparse.urlparse(url, 'file') |
|
199 url_path = parsed[2] |
|
200 |
|
201 if parsed[0] == 'file': |
|
202 while True: |
|
203 url_path, name = os.path.split(url_path) |
|
204 if name: |
|
205 return name |
|
206 if not url_path: |
|
207 break |
|
208 else: |
|
209 for name in reversed(url_path.split('/')): |
|
210 if name: |
|
211 return name |
|
212 |
|
213 url_host, url_port = parsed[-2:] |
|
214 return '%s:%s' % (url_host, url_port) |
|
215 |
|
216 |
|
217 def check_md5sum(path, md5sum): |
|
218 """Tell whether the MD5 checksum of the file at path matches. |
|
219 |
|
220 No checksum being given is considered a match. |
|
221 |
|
222 """ |
|
223 if md5sum is None: |
|
224 return True |
|
225 |
|
226 f = open(path, 'rb') |
|
227 checksum = md5() |
|
228 try: |
|
229 chunk = f.read(2**16) |
|
230 while chunk: |
|
231 checksum.update(chunk) |
|
232 chunk = f.read(2**16) |
|
233 return checksum.hexdigest() == md5sum |
|
234 finally: |
|
235 f.close() |
|
236 |
|
237 |
|
238 def remove(path): |
|
239 if os.path.exists(path): |
|
240 os.remove(path) |
|
241 |
|
242 |
|
243 def locate_at(source, dest): |
|
244 if dest is None or realpath(dest) == realpath(source): |
|
245 return source |
|
246 |
|
247 try: |
|
248 os.link(source, dest) |
|
249 except (AttributeError, OSError): |
|
250 shutil.copyfile(source, dest) |
|
251 return dest |