|
1 #!/usr/bin/env python |
|
2 # |
|
3 # Copyright 2007 Google Inc. |
|
4 # |
|
5 # Licensed under the Apache License, Version 2.0 (the "License"); |
|
6 # you may not use this file except in compliance with the License. |
|
7 # You may obtain a copy of the License at |
|
8 # |
|
9 # http://www.apache.org/licenses/LICENSE-2.0 |
|
10 # |
|
11 # Unless required by applicable law or agreed to in writing, software |
|
12 # distributed under the License is distributed on an "AS IS" BASIS, |
|
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 # See the License for the specific language governing permissions and |
|
15 # limitations under the License. |
|
16 # |
|
17 |
|
18 """URL downloading API. |
|
19 |
|
20 Methods defined in this module: |
|
21 Fetch(): fetchs a given URL using an HTTP GET or POST |
|
22 """ |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 import UserDict |
|
29 |
|
30 from google.appengine.api import apiproxy_stub_map |
|
31 from google.appengine.api import urlfetch_service_pb |
|
32 from google.appengine.api.urlfetch_errors import * |
|
33 from google.appengine.runtime import apiproxy_errors |
|
34 |
|
35 |
|
36 GET = 1 |
|
37 POST = 2 |
|
38 HEAD = 3 |
|
39 PUT = 4 |
|
40 DELETE = 5 |
|
41 |
|
42 |
|
43 _URL_STRING_MAP = { |
|
44 'GET': GET, |
|
45 'POST': POST, |
|
46 'HEAD': HEAD, |
|
47 'PUT': PUT, |
|
48 'DELETE': DELETE, |
|
49 } |
|
50 |
|
51 |
|
52 _VALID_METHODS = frozenset(_URL_STRING_MAP.values()) |
|
53 |
|
54 |
|
55 class _CaselessDict(UserDict.IterableUserDict): |
|
56 """Case insensitive dictionary. |
|
57 |
|
58 This class was lifted from os.py and slightly modified. |
|
59 """ |
|
60 |
|
61 def __init__(self): |
|
62 UserDict.IterableUserDict.__init__(self) |
|
63 self.caseless_keys = {} |
|
64 |
|
65 def __setitem__(self, key, item): |
|
66 """Set dictionary item. |
|
67 |
|
68 Args: |
|
69 key: Key of new item. Key is case insensitive, so "d['Key'] = value " |
|
70 will replace previous values set by "d['key'] = old_value". |
|
71 item: Item to store. |
|
72 """ |
|
73 caseless_key = key.lower() |
|
74 if caseless_key in self.caseless_keys: |
|
75 del self.data[self.caseless_keys[caseless_key]] |
|
76 self.caseless_keys[caseless_key] = key |
|
77 self.data[key] = item |
|
78 |
|
79 def __getitem__(self, key): |
|
80 """Get dictionary item. |
|
81 |
|
82 Args: |
|
83 key: Key of item to get. Key is case insensitive, so "d['Key']" is the |
|
84 same as "d['key']". |
|
85 |
|
86 Returns: |
|
87 Item associated with key. |
|
88 """ |
|
89 return self.data[self.caseless_keys[key.lower()]] |
|
90 |
|
91 def __delitem__(self, key): |
|
92 """Remove item from dictionary. |
|
93 |
|
94 Args: |
|
95 key: Key of item to remove. Key is case insensitive, so "del d['Key']" is |
|
96 the same as "del d['key']" |
|
97 """ |
|
98 caseless_key = key.lower() |
|
99 del self.data[self.caseless_keys[caseless_key]] |
|
100 del self.caseless_keys[caseless_key] |
|
101 |
|
102 def has_key(self, key): |
|
103 """Determine if dictionary has item with specific key. |
|
104 |
|
105 Args: |
|
106 key: Key to check for presence. Key is case insensitive, so |
|
107 "d.has_key('Key')" evaluates to the same value as "d.has_key('key')". |
|
108 |
|
109 Returns: |
|
110 True if dictionary contains key, else False. |
|
111 """ |
|
112 return key.lower() in self.caseless_keys |
|
113 |
|
114 def __contains__(self, key): |
|
115 """Same as 'has_key', but used for 'in' operator.'""" |
|
116 return self.has_key(key) |
|
117 |
|
118 def get(self, key, failobj=None): |
|
119 """Get dictionary item, defaulting to another value if it does not exist. |
|
120 |
|
121 Args: |
|
122 key: Key of item to get. Key is case insensitive, so "d['Key']" is the |
|
123 same as "d['key']". |
|
124 failobj: Value to return if key not in dictionary. |
|
125 """ |
|
126 try: |
|
127 cased_key = self.caseless_keys[key.lower()] |
|
128 except KeyError: |
|
129 return failobj |
|
130 return self.data[cased_key] |
|
131 |
|
132 def update(self, dict=None, **kwargs): |
|
133 """Update dictionary using values from another dictionary and keywords. |
|
134 |
|
135 Args: |
|
136 dict: Dictionary to update from. |
|
137 kwargs: Keyword arguments to update from. |
|
138 """ |
|
139 if dict: |
|
140 try: |
|
141 keys = dict.keys() |
|
142 except AttributeError: |
|
143 for k, v in dict: |
|
144 self[k] = v |
|
145 else: |
|
146 for k in keys: |
|
147 self[k] = dict[k] |
|
148 if kwargs: |
|
149 self.update(kwargs) |
|
150 |
|
151 def copy(self): |
|
152 """Make a shallow, case sensitive copy of self.""" |
|
153 return dict(self) |
|
154 |
|
155 |
|
156 def fetch(url, payload=None, method=GET, headers={}, allow_truncated=False): |
|
157 """Fetches the given HTTP URL, blocking until the result is returned. |
|
158 |
|
159 Other optional parameters are: |
|
160 method: GET, POST, HEAD, PUT, or DELETE |
|
161 payload: POST or PUT payload (implies method is not GET, HEAD, or DELETE) |
|
162 headers: dictionary of HTTP headers to send with the request |
|
163 allow_truncated: if true, truncate large responses and return them without |
|
164 error. otherwise, ResponseTooLargeError will be thrown when a response is |
|
165 truncated. |
|
166 |
|
167 We use a HTTP/1.1 compliant proxy to fetch the result. |
|
168 |
|
169 The returned data structure has the following fields: |
|
170 content: string containing the response from the server |
|
171 status_code: HTTP status code returned by the server |
|
172 headers: dictionary of headers returned by the server |
|
173 |
|
174 If the URL is an empty string or obviously invalid, we throw an |
|
175 urlfetch.InvalidURLError. If the server cannot be contacted, we throw a |
|
176 urlfetch.DownloadError. Note that HTTP errors are returned as a part |
|
177 of the returned structure, so HTTP errors like 404 do not result in an |
|
178 exception. |
|
179 """ |
|
180 request = urlfetch_service_pb.URLFetchRequest() |
|
181 response = urlfetch_service_pb.URLFetchResponse() |
|
182 request.set_url(url) |
|
183 |
|
184 if isinstance(method, basestring): |
|
185 method = method.upper() |
|
186 method = _URL_STRING_MAP.get(method, method) |
|
187 if method not in _VALID_METHODS: |
|
188 raise InvalidMethodError('Invalid method %s.' % str(method)) |
|
189 if method == GET: |
|
190 request.set_method(urlfetch_service_pb.URLFetchRequest.GET) |
|
191 elif method == POST: |
|
192 request.set_method(urlfetch_service_pb.URLFetchRequest.POST) |
|
193 elif method == HEAD: |
|
194 request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD) |
|
195 elif method == PUT: |
|
196 request.set_method(urlfetch_service_pb.URLFetchRequest.PUT) |
|
197 elif method == DELETE: |
|
198 request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE) |
|
199 |
|
200 if payload and (method == POST or method == PUT): |
|
201 request.set_payload(payload) |
|
202 |
|
203 for key, value in headers.iteritems(): |
|
204 header_proto = request.add_header() |
|
205 header_proto.set_key(key) |
|
206 header_proto.set_value(value) |
|
207 |
|
208 try: |
|
209 apiproxy_stub_map.MakeSyncCall('urlfetch', 'Fetch', request, response) |
|
210 except apiproxy_errors.ApplicationError, e: |
|
211 if (e.application_error == |
|
212 urlfetch_service_pb.URLFetchServiceError.INVALID_URL): |
|
213 raise InvalidURLError(str(e)) |
|
214 if (e.application_error == |
|
215 urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR): |
|
216 raise DownloadError(str(e)) |
|
217 if (e.application_error == |
|
218 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR): |
|
219 raise DownloadError(str(e)) |
|
220 if (e.application_error == |
|
221 urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE): |
|
222 raise ResponseTooLargeError(None) |
|
223 raise e |
|
224 result = _URLFetchResult(response) |
|
225 |
|
226 if not allow_truncated and response.contentwastruncated(): |
|
227 raise ResponseTooLargeError(result) |
|
228 |
|
229 return result |
|
230 |
|
231 Fetch = fetch |
|
232 |
|
233 |
|
234 class _URLFetchResult(object): |
|
235 """A Pythonic representation of our fetch response protocol buffer.""" |
|
236 def __init__(self, response_proto): |
|
237 self.__pb = response_proto |
|
238 self.content = response_proto.content() |
|
239 self.status_code = response_proto.statuscode() |
|
240 self.content_was_truncated = response_proto.contentwastruncated() |
|
241 self.headers = _CaselessDict() |
|
242 for header_proto in response_proto.header_list(): |
|
243 self.headers[header_proto.key()] = header_proto.value() |