thirdparty/google_appengine/google/appengine/api/urlfetch_stub.py
changeset 686 df109be0567c
parent 149 f2e327a7c5de
child 828 f5fd65cc3bf3
equal deleted inserted replaced
685:a440ced9a75f 686:df109be0567c
    20 
    20 
    21 
    21 
    22 import httplib
    22 import httplib
    23 import logging
    23 import logging
    24 import socket
    24 import socket
       
    25 import urllib
    25 import urlparse
    26 import urlparse
    26 
    27 
       
    28 from google.appengine.api import apiproxy_stub
    27 from google.appengine.api import urlfetch
    29 from google.appengine.api import urlfetch
    28 from google.appengine.api import urlfetch_errors
    30 from google.appengine.api import urlfetch_errors
    29 from google.appengine.api import urlfetch_service_pb
    31 from google.appengine.api import urlfetch_service_pb
    30 from google.appengine.runtime import apiproxy_errors
    32 from google.appengine.runtime import apiproxy_errors
    31 
    33 
    39   httplib.FOUND,
    41   httplib.FOUND,
    40   httplib.SEE_OTHER,
    42   httplib.SEE_OTHER,
    41   httplib.TEMPORARY_REDIRECT,
    43   httplib.TEMPORARY_REDIRECT,
    42 ])
    44 ])
    43 
    45 
    44 
    46 PORTS_ALLOWED_IN_PRODUCTION = (
    45 class URLFetchServiceStub(object):
    47     None, '80', '443', '4443', '8080', '8081', '8082', '8083', '8084', '8085',
       
    48     '8086', '8087', '8088', '8089', '8188', '8444', '8990')
       
    49 
       
    50 _API_CALL_DEADLINE = 5.0
       
    51 
       
    52 
       
    53 _UNTRUSTED_REQUEST_HEADERS = frozenset([
       
    54   'content-length',
       
    55   'host',
       
    56   'referer',
       
    57   'user-agent',
       
    58   'vary',
       
    59   'via',
       
    60   'x-forwarded-for',
       
    61 ])
       
    62 
       
    63 class URLFetchServiceStub(apiproxy_stub.APIProxyStub):
    46   """Stub version of the urlfetch API to be used with apiproxy_stub_map."""
    64   """Stub version of the urlfetch API to be used with apiproxy_stub_map."""
    47 
    65 
    48   def MakeSyncCall(self, service, call, request, response):
    66   def __init__(self, service_name='urlfetch'):
    49     """The main RPC entry point.
    67     """Initializer.
    50 
    68 
    51     Arg:
    69     Args:
    52       service: Must be 'urlfetch'.
    70       service_name: Service name expected for all calls.
    53       call: A string representing the rpc to make.  Must be part of
    71     """
    54         URLFetchService.
    72     super(URLFetchServiceStub, self).__init__(service_name)
    55       request: A protocol buffer of the type corresponding to 'call'.
       
    56       response: A protocol buffer of the type corresponding to 'call'.
       
    57     """
       
    58     assert service == 'urlfetch'
       
    59     assert request.IsInitialized()
       
    60 
       
    61     attr = getattr(self, '_Dynamic_' + call)
       
    62     attr(request, response)
       
    63 
    73 
    64   def _Dynamic_Fetch(self, request, response):
    74   def _Dynamic_Fetch(self, request, response):
    65     """Trivial implementation of URLFetchService::Fetch().
    75     """Trivial implementation of URLFetchService::Fetch().
    66 
    76 
    67     Args:
    77     Args:
    91     if not (protocol == 'http' or protocol == 'https'):
   101     if not (protocol == 'http' or protocol == 'https'):
    92       logging.error('Invalid protocol: %s', protocol)
   102       logging.error('Invalid protocol: %s', protocol)
    93       raise apiproxy_errors.ApplicationError(
   103       raise apiproxy_errors.ApplicationError(
    94         urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
   104         urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
    95 
   105 
       
   106     sanitized_headers = self._SanitizeHttpHeaders(_UNTRUSTED_REQUEST_HEADERS,
       
   107                                                   request.header_list())
       
   108     request.clear_header()
       
   109     request.header_list().extend(sanitized_headers)
       
   110 
    96     self._RetrieveURL(request.url(), payload, method,
   111     self._RetrieveURL(request.url(), payload, method,
    97                       request.header_list(), response,
   112                       request.header_list(), response,
    98                       follow_redirects=request.followredirects())
   113                       follow_redirects=request.followredirects())
    99 
   114 
   100   def _RetrieveURL(self, url, payload, method, headers, response,
   115   def _RetrieveURL(self, url, payload, method, headers, response,
   118     """
   133     """
   119     last_protocol = ''
   134     last_protocol = ''
   120     last_host = ''
   135     last_host = ''
   121 
   136 
   122     for redirect_number in xrange(MAX_REDIRECTS + 1):
   137     for redirect_number in xrange(MAX_REDIRECTS + 1):
   123       (protocol, host, path, parameters, query, fragment) = urlparse.urlparse(url)
   138       parsed = urlparse.urlparse(url)
       
   139       protocol, host, path, parameters, query, fragment = parsed
       
   140 
       
   141       port = urllib.splitport(urllib.splituser(host)[1])[1]
       
   142 
       
   143       if port not in PORTS_ALLOWED_IN_PRODUCTION:
       
   144         logging.warning(
       
   145           'urlfetch received %s ; port %s is not allowed in production!' %
       
   146           (url, port))
   124 
   147 
   125       if host == '' and protocol == '':
   148       if host == '' and protocol == '':
   126         host = last_host
   149         host = last_host
   127         protocol = last_protocol
   150         protocol = last_protocol
   128 
   151 
   157         if query != '':
   180         if query != '':
   158           full_path = path + '?' + query
   181           full_path = path + '?' + query
   159         else:
   182         else:
   160           full_path = path
   183           full_path = path
   161 
   184 
       
   185         orig_timeout = socket.getdefaulttimeout()
   162         try:
   186         try:
       
   187           socket.setdefaulttimeout(_API_CALL_DEADLINE)
   163           connection.request(method, full_path, payload, adjusted_headers)
   188           connection.request(method, full_path, payload, adjusted_headers)
   164           http_response = connection.getresponse()
   189           http_response = connection.getresponse()
   165           http_response_data = http_response.read()
   190           http_response_data = http_response.read()
   166         finally:
   191         finally:
       
   192           socket.setdefaulttimeout(orig_timeout)
   167           connection.close()
   193           connection.close()
   168       except (httplib.error, socket.error, IOError), e:
   194       except (httplib.error, socket.error, IOError), e:
   169         raise apiproxy_errors.ApplicationError(
   195         raise apiproxy_errors.ApplicationError(
   170           urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
   196           urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
   171 
   197 
   174         if url is None:
   200         if url is None:
   175           error_msg = 'Redirecting response was missing "Location" header'
   201           error_msg = 'Redirecting response was missing "Location" header'
   176           logging.error(error_msg)
   202           logging.error(error_msg)
   177           raise apiproxy_errors.ApplicationError(
   203           raise apiproxy_errors.ApplicationError(
   178               urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)
   204               urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)
   179         else:
       
   180           method = 'GET'
       
   181       else:
   205       else:
   182         response.set_statuscode(http_response.status)
   206         response.set_statuscode(http_response.status)
   183         response.set_content(http_response_data[:MAX_RESPONSE_SIZE])
   207         response.set_content(http_response_data[:MAX_RESPONSE_SIZE])
   184         for header_key, header_value in http_response.getheaders():
   208         for header_key, header_value in http_response.getheaders():
   185           header_proto = response.add_header()
   209           header_proto = response.add_header()
   193     else:
   217     else:
   194       error_msg = 'Too many repeated redirects'
   218       error_msg = 'Too many repeated redirects'
   195       logging.error(error_msg)
   219       logging.error(error_msg)
   196       raise apiproxy_errors.ApplicationError(
   220       raise apiproxy_errors.ApplicationError(
   197           urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)
   221           urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)
       
   222 
       
   223   def _SanitizeHttpHeaders(self, untrusted_headers, headers):
       
   224     """Cleans "unsafe" headers from the HTTP request/response.
       
   225 
       
   226     Args:
       
   227       untrusted_headers: set of untrusted headers names
       
   228       headers: list of string pairs, first is header name and the second is header's value
       
   229     """
       
   230     return (h for h in headers if h.key().lower() not in untrusted_headers)