20 |
20 |
21 |
21 |
22 import httplib |
22 import httplib |
23 import logging |
23 import logging |
24 import socket |
24 import socket |
|
25 import urllib |
25 import urlparse |
26 import urlparse |
26 |
27 |
|
28 from google.appengine.api import apiproxy_stub |
27 from google.appengine.api import urlfetch |
29 from google.appengine.api import urlfetch |
28 from google.appengine.api import urlfetch_errors |
30 from google.appengine.api import urlfetch_errors |
29 from google.appengine.api import urlfetch_service_pb |
31 from google.appengine.api import urlfetch_service_pb |
30 from google.appengine.runtime import apiproxy_errors |
32 from google.appengine.runtime import apiproxy_errors |
31 |
33 |
39 httplib.FOUND, |
41 httplib.FOUND, |
40 httplib.SEE_OTHER, |
42 httplib.SEE_OTHER, |
41 httplib.TEMPORARY_REDIRECT, |
43 httplib.TEMPORARY_REDIRECT, |
42 ]) |
44 ]) |
43 |
45 |
44 |
46 PORTS_ALLOWED_IN_PRODUCTION = ( |
45 class URLFetchServiceStub(object): |
47 None, '80', '443', '4443', '8080', '8081', '8082', '8083', '8084', '8085', |
|
48 '8086', '8087', '8088', '8089', '8188', '8444', '8990') |
|
49 |
|
50 _API_CALL_DEADLINE = 5.0 |
|
51 |
|
52 |
|
53 _UNTRUSTED_REQUEST_HEADERS = frozenset([ |
|
54 'content-length', |
|
55 'host', |
|
56 'referer', |
|
57 'user-agent', |
|
58 'vary', |
|
59 'via', |
|
60 'x-forwarded-for', |
|
61 ]) |
|
62 |
|
63 class URLFetchServiceStub(apiproxy_stub.APIProxyStub): |
46 """Stub version of the urlfetch API to be used with apiproxy_stub_map.""" |
64 """Stub version of the urlfetch API to be used with apiproxy_stub_map.""" |
47 |
65 |
48 def MakeSyncCall(self, service, call, request, response): |
66 def __init__(self, service_name='urlfetch'): |
49 """The main RPC entry point. |
67 """Initializer. |
50 |
68 |
51 Arg: |
69 Args: |
52 service: Must be 'urlfetch'. |
70 service_name: Service name expected for all calls. |
53 call: A string representing the rpc to make. Must be part of |
71 """ |
54 URLFetchService. |
72 super(URLFetchServiceStub, self).__init__(service_name) |
55 request: A protocol buffer of the type corresponding to 'call'. |
|
56 response: A protocol buffer of the type corresponding to 'call'. |
|
57 """ |
|
58 assert service == 'urlfetch' |
|
59 assert request.IsInitialized() |
|
60 |
|
61 attr = getattr(self, '_Dynamic_' + call) |
|
62 attr(request, response) |
|
63 |
73 |
64 def _Dynamic_Fetch(self, request, response): |
74 def _Dynamic_Fetch(self, request, response): |
65 """Trivial implementation of URLFetchService::Fetch(). |
75 """Trivial implementation of URLFetchService::Fetch(). |
66 |
76 |
67 Args: |
77 Args: |
91 if not (protocol == 'http' or protocol == 'https'): |
101 if not (protocol == 'http' or protocol == 'https'): |
92 logging.error('Invalid protocol: %s', protocol) |
102 logging.error('Invalid protocol: %s', protocol) |
93 raise apiproxy_errors.ApplicationError( |
103 raise apiproxy_errors.ApplicationError( |
94 urlfetch_service_pb.URLFetchServiceError.INVALID_URL) |
104 urlfetch_service_pb.URLFetchServiceError.INVALID_URL) |
95 |
105 |
|
106 sanitized_headers = self._SanitizeHttpHeaders(_UNTRUSTED_REQUEST_HEADERS, |
|
107 request.header_list()) |
|
108 request.clear_header() |
|
109 request.header_list().extend(sanitized_headers) |
|
110 |
96 self._RetrieveURL(request.url(), payload, method, |
111 self._RetrieveURL(request.url(), payload, method, |
97 request.header_list(), response, |
112 request.header_list(), response, |
98 follow_redirects=request.followredirects()) |
113 follow_redirects=request.followredirects()) |
99 |
114 |
100 def _RetrieveURL(self, url, payload, method, headers, response, |
115 def _RetrieveURL(self, url, payload, method, headers, response, |
118 """ |
133 """ |
119 last_protocol = '' |
134 last_protocol = '' |
120 last_host = '' |
135 last_host = '' |
121 |
136 |
122 for redirect_number in xrange(MAX_REDIRECTS + 1): |
137 for redirect_number in xrange(MAX_REDIRECTS + 1): |
123 (protocol, host, path, parameters, query, fragment) = urlparse.urlparse(url) |
138 parsed = urlparse.urlparse(url) |
|
139 protocol, host, path, parameters, query, fragment = parsed |
|
140 |
|
141 port = urllib.splitport(urllib.splituser(host)[1])[1] |
|
142 |
|
143 if port not in PORTS_ALLOWED_IN_PRODUCTION: |
|
144 logging.warning( |
|
145 'urlfetch received %s ; port %s is not allowed in production!' % |
|
146 (url, port)) |
124 |
147 |
125 if host == '' and protocol == '': |
148 if host == '' and protocol == '': |
126 host = last_host |
149 host = last_host |
127 protocol = last_protocol |
150 protocol = last_protocol |
128 |
151 |
157 if query != '': |
180 if query != '': |
158 full_path = path + '?' + query |
181 full_path = path + '?' + query |
159 else: |
182 else: |
160 full_path = path |
183 full_path = path |
161 |
184 |
|
185 orig_timeout = socket.getdefaulttimeout() |
162 try: |
186 try: |
|
187 socket.setdefaulttimeout(_API_CALL_DEADLINE) |
163 connection.request(method, full_path, payload, adjusted_headers) |
188 connection.request(method, full_path, payload, adjusted_headers) |
164 http_response = connection.getresponse() |
189 http_response = connection.getresponse() |
165 http_response_data = http_response.read() |
190 http_response_data = http_response.read() |
166 finally: |
191 finally: |
|
192 socket.setdefaulttimeout(orig_timeout) |
167 connection.close() |
193 connection.close() |
168 except (httplib.error, socket.error, IOError), e: |
194 except (httplib.error, socket.error, IOError), e: |
169 raise apiproxy_errors.ApplicationError( |
195 raise apiproxy_errors.ApplicationError( |
170 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e)) |
196 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e)) |
171 |
197 |
174 if url is None: |
200 if url is None: |
175 error_msg = 'Redirecting response was missing "Location" header' |
201 error_msg = 'Redirecting response was missing "Location" header' |
176 logging.error(error_msg) |
202 logging.error(error_msg) |
177 raise apiproxy_errors.ApplicationError( |
203 raise apiproxy_errors.ApplicationError( |
178 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg) |
204 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg) |
179 else: |
|
180 method = 'GET' |
|
181 else: |
205 else: |
182 response.set_statuscode(http_response.status) |
206 response.set_statuscode(http_response.status) |
183 response.set_content(http_response_data[:MAX_RESPONSE_SIZE]) |
207 response.set_content(http_response_data[:MAX_RESPONSE_SIZE]) |
184 for header_key, header_value in http_response.getheaders(): |
208 for header_key, header_value in http_response.getheaders(): |
185 header_proto = response.add_header() |
209 header_proto = response.add_header() |
193 else: |
217 else: |
194 error_msg = 'Too many repeated redirects' |
218 error_msg = 'Too many repeated redirects' |
195 logging.error(error_msg) |
219 logging.error(error_msg) |
196 raise apiproxy_errors.ApplicationError( |
220 raise apiproxy_errors.ApplicationError( |
197 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg) |
221 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg) |
|
222 |
|
223 def _SanitizeHttpHeaders(self, untrusted_headers, headers): |
|
224 """Cleans "unsafe" headers from the HTTP request/response. |
|
225 |
|
226 Args: |
|
227 untrusted_headers: set of untrusted headers names |
|
228 headers: list of string pairs, first is header name and the second is header's value |
|
229 """ |
|
230 return (h for h in headers if h.key().lower() not in untrusted_headers) |