thirdparty/google_appengine/google/appengine/api/urlfetch.py
changeset 2309 be1b94099f2d
parent 686 df109be0567c
child 2413 d0b7dac5325c
equal deleted inserted replaced
2307:81c128f487e6 2309:be1b94099f2d
    28 import os
    28 import os
    29 import UserDict
    29 import UserDict
    30 import urllib2
    30 import urllib2
    31 import urlparse
    31 import urlparse
    32 
    32 
       
    33 from google.appengine.api import apiproxy_rpc
    33 from google.appengine.api import apiproxy_stub_map
    34 from google.appengine.api import apiproxy_stub_map
    34 from google.appengine.api import urlfetch_service_pb
    35 from google.appengine.api import urlfetch_service_pb
    35 from google.appengine.api.urlfetch_errors import *
    36 from google.appengine.api.urlfetch_errors import *
    36 from google.appengine.runtime import apiproxy_errors
    37 from google.appengine.runtime import apiproxy_errors
    37 
    38 
   184       return True
   185       return True
   185 
   186 
   186   return False
   187   return False
   187 
   188 
   188 
   189 
       
   190 def __create_rpc(deadline=None, callback=None):
       
   191   """DO NOT USE.  WILL CHANGE AND BREAK YOUR CODE.
       
   192 
       
   193   Creates an RPC object for use with the urlfetch API.
       
   194 
       
   195   Args:
       
   196     deadline: deadline in seconds for the operation.
       
   197     callback: callable to invoke on completion.
       
   198 
       
   199   Returns:
       
   200     A _URLFetchRPC object.
       
   201   """
       
   202   return _URLFetchRPC(deadline, callback)
       
   203 
       
   204 
   189 def fetch(url, payload=None, method=GET, headers={}, allow_truncated=False,
   205 def fetch(url, payload=None, method=GET, headers={}, allow_truncated=False,
   190           follow_redirects=True):
   206           follow_redirects=True, deadline=None):
   191   """Fetches the given HTTP URL, blocking until the result is returned.
   207   """Fetches the given HTTP URL, blocking until the result is returned.
   192 
   208 
   193   Other optional parameters are:
   209   Other optional parameters are:
   194      method: GET, POST, HEAD, PUT, or DELETE
   210      method: GET, POST, HEAD, PUT, or DELETE
   195      payload: POST or PUT payload (implies method is not GET, HEAD, or DELETE)
   211      payload: POST or PUT payload (implies method is not GET, HEAD, or DELETE).
       
   212        this is ignored if the method is not POST or PUT.
   196      headers: dictionary of HTTP headers to send with the request
   213      headers: dictionary of HTTP headers to send with the request
   197      allow_truncated: if true, truncate large responses and return them without
   214      allow_truncated: if true, truncate large responses and return them without
   198        error. otherwise, ResponseTooLargeError will be thrown when a response is
   215        error. otherwise, ResponseTooLargeError will be thrown when a response is
   199        truncated.
   216        truncated.
   200      follow_redirects: if true (the default), redirects are
   217      follow_redirects: if true (the default), redirects are
   202        redirects) contains the final destination's payload and the
   219        redirects) contains the final destination's payload and the
   203        response status is 200.  You lose, however, the redirect chain
   220        response status is 200.  You lose, however, the redirect chain
   204        information.  If false, you see the HTTP response yourself,
   221        information.  If false, you see the HTTP response yourself,
   205        including the 'Location' header, and redirects are not
   222        including the 'Location' header, and redirects are not
   206        followed.
   223        followed.
       
   224      deadline: deadline in seconds for the operation.
   207 
   225 
   208   We use a HTTP/1.1 compliant proxy to fetch the result.
   226   We use a HTTP/1.1 compliant proxy to fetch the result.
   209 
   227 
   210   The returned data structure has the following fields:
   228   The returned data structure has the following fields:
   211      content: string containing the response from the server
   229      content: string containing the response from the server
   216   urlfetch.InvalidURLError. If the server cannot be contacted, we throw a
   234   urlfetch.InvalidURLError. If the server cannot be contacted, we throw a
   217   urlfetch.DownloadError.  Note that HTTP errors are returned as a part
   235   urlfetch.DownloadError.  Note that HTTP errors are returned as a part
   218   of the returned structure, so HTTP errors like 404 do not result in an
   236   of the returned structure, so HTTP errors like 404 do not result in an
   219   exception.
   237   exception.
   220   """
   238   """
   221   if isinstance(method, basestring):
   239   rpc = __create_rpc(deadline=deadline)
   222     method = method.upper()
   240   rpc.make_call(url, payload, method, headers, follow_redirects)
   223   method = _URL_STRING_MAP.get(method, method)
   241   return rpc.get_result(allow_truncated)
   224   if method not in _VALID_METHODS:
   242 
   225     raise InvalidMethodError('Invalid method %s.' % str(method))
   243 
   226 
   244 class _URLFetchRPC(object):
   227   if _is_fetching_self(url, method):
   245   """A RPC object that manages the urlfetch RPC.
   228     raise InvalidURLError("App cannot fetch the same URL as the one used for "
   246 
   229                           "the request.")
   247   Its primary functions are the following:
   230 
   248   1. Convert error codes to the URLFetchServiceError namespace and raise them
   231   request = urlfetch_service_pb.URLFetchRequest()
   249      when get_result is called.
   232   response = urlfetch_service_pb.URLFetchResponse()
   250   2. Wrap the urlfetch response with a _URLFetchResult object.
   233   request.set_url(url)
   251   """
   234 
   252 
   235   if method == GET:
   253   def __init__(self, deadline=None, callback=None):
   236     request.set_method(urlfetch_service_pb.URLFetchRequest.GET)
   254     """Construct a new url fetch RPC.
   237   elif method == POST:
   255 
   238     request.set_method(urlfetch_service_pb.URLFetchRequest.POST)
   256     Args:
   239   elif method == HEAD:
   257       deadline: deadline in seconds for the operation.
   240     request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD)
   258       callback: callable to invoke on completion.
   241   elif method == PUT:
   259     """
   242     request.set_method(urlfetch_service_pb.URLFetchRequest.PUT)
   260     self.__rpc = apiproxy_stub_map.CreateRPC('urlfetch')
   243   elif method == DELETE:
   261     self.__rpc.deadline = deadline
   244     request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE)
   262     self.__rpc.callback = callback
   245 
   263     self.__called_hooks = False
   246   if payload and (method == POST or method == PUT):
   264 
   247     request.set_payload(payload)
   265   def make_call(self, url, payload=None, method=GET, headers={},
   248 
   266                 follow_redirects=True):
   249   for key, value in headers.iteritems():
   267     """Executes the RPC call to fetch a given HTTP URL.
   250     header_proto = request.add_header()
   268 
   251     header_proto.set_key(key)
   269     See urlfetch.fetch for a thorough description of arguments.
   252     header_proto.set_value(str(value))
   270     """
   253 
   271     assert self.__rpc.state is apiproxy_rpc.RPC.IDLE
   254   request.set_followredirects(follow_redirects)
   272     if isinstance(method, basestring):
   255 
   273       method = method.upper()
   256   try:
   274     method = _URL_STRING_MAP.get(method, method)
   257     apiproxy_stub_map.MakeSyncCall('urlfetch', 'Fetch', request, response)
   275     if method not in _VALID_METHODS:
   258   except apiproxy_errors.ApplicationError, e:
   276       raise InvalidMethodError('Invalid method %s.' % str(method))
   259     if (e.application_error ==
   277 
   260         urlfetch_service_pb.URLFetchServiceError.INVALID_URL):
   278     if _is_fetching_self(url, method):
   261       raise InvalidURLError(str(e))
   279       raise InvalidURLError("App cannot fetch the same URL as the one used for "
   262     if (e.application_error ==
   280                             "the request.")
   263         urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR):
   281 
   264       raise DownloadError(str(e))
   282     self.__request = urlfetch_service_pb.URLFetchRequest()
   265     if (e.application_error ==
   283     self.__response = urlfetch_service_pb.URLFetchResponse()
   266         urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR):
   284     self.__result = None
   267       raise DownloadError(str(e))
   285     self.__request.set_url(url)
   268     if (e.application_error ==
   286 
   269         urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE):
   287     if method == GET:
   270       raise ResponseTooLargeError(None)
   288       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.GET)
   271     if (e.application_error ==
   289     elif method == POST:
   272         urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED):
   290       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.POST)
   273       raise DownloadError(str(e))
   291     elif method == HEAD:
   274     raise e
   292       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD)
   275   result = _URLFetchResult(response)
   293     elif method == PUT:
   276 
   294       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.PUT)
   277   if not allow_truncated and response.contentwastruncated():
   295     elif method == DELETE:
   278     raise ResponseTooLargeError(result)
   296       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE)
   279 
   297 
   280   return result
   298     if payload and (method == POST or method == PUT):
       
   299       self.__request.set_payload(payload)
       
   300 
       
   301     for key, value in headers.iteritems():
       
   302       header_proto = self.__request.add_header()
       
   303       header_proto.set_key(key)
       
   304       header_proto.set_value(str(value))
       
   305 
       
   306     self.__request.set_followredirects(follow_redirects)
       
   307     if self.__rpc.deadline:
       
   308       self.__request.set_deadline(self.__rpc.deadline)
       
   309 
       
   310     apiproxy_stub_map.apiproxy.GetPreCallHooks().Call(
       
   311         'urlfetch', 'Fetch', self.__request, self.__response)
       
   312     self.__rpc.MakeCall('urlfetch', 'Fetch', self.__request, self.__response)
       
   313 
       
   314   def wait(self):
       
   315     """Waits for the urlfetch RPC to finish.  Idempotent.
       
   316     """
       
   317     assert self.__rpc.state is not apiproxy_rpc.RPC.IDLE
       
   318     if self.__rpc.state is apiproxy_rpc.RPC.RUNNING:
       
   319       self.__rpc.Wait()
       
   320 
       
   321   def check_success(self, allow_truncated=False):
       
   322     """Check success and convert RPC exceptions to urlfetch exceptions.
       
   323 
       
   324     This method waits for the RPC if it has not yet finished, and calls the
       
   325     post-call hooks on the first invocation.
       
   326 
       
   327     Args:
       
   328       allow_truncated: if False, an error is raised if the response was
       
   329         truncated.
       
   330 
       
   331     Raises:
       
   332       InvalidURLError if the url was invalid.
       
   333       DownloadError if there was a problem fetching the url.
       
   334       ResponseTooLargeError if the response was either truncated (and
       
   335         allow_truncated is false) or if it was too big for us to download.
       
   336     """
       
   337     assert self.__rpc.state is not apiproxy_rpc.RPC.IDLE
       
   338     if self.__rpc.state is apiproxy_rpc.RPC.RUNNING:
       
   339       self.wait()
       
   340 
       
   341     try:
       
   342       self.__rpc.CheckSuccess()
       
   343       if not self.__called_hooks:
       
   344         self.__called_hooks = True
       
   345         apiproxy_stub_map.apiproxy.GetPostCallHooks().Call(
       
   346             'urlfetch', 'Fetch', self.__request, self.__response)
       
   347     except apiproxy_errors.ApplicationError, e:
       
   348       if (e.application_error ==
       
   349           urlfetch_service_pb.URLFetchServiceError.INVALID_URL):
       
   350         raise InvalidURLError(str(e))
       
   351       if (e.application_error ==
       
   352           urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR):
       
   353         raise DownloadError(str(e))
       
   354       if (e.application_error ==
       
   355           urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR):
       
   356         raise DownloadError(str(e))
       
   357       if (e.application_error ==
       
   358           urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE):
       
   359         raise ResponseTooLargeError(None)
       
   360       if (e.application_error ==
       
   361           urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED):
       
   362         raise DownloadError(str(e))
       
   363       raise e
       
   364 
       
   365     if self.__response.contentwastruncated() and not allow_truncated:
       
   366       raise ResponseTooLargeError(_URLFetchResult(self.__response))
       
   367 
       
   368   def get_result(self, allow_truncated=False):
       
   369     """Returns the RPC result or raises an exception if the rpc failed.
       
   370 
       
   371     This method waits for the RPC if not completed, and checks success.
       
   372 
       
   373     Args:
       
   374       allow_truncated: if False, an error is raised if the response was
       
   375         truncated.
       
   376 
       
   377     Returns:
       
   378       The urlfetch result.
       
   379 
       
   380     Raises:
       
   381       Error if the rpc has not yet finished.
       
   382       InvalidURLError if the url was invalid.
       
   383       DownloadError if there was a problem fetching the url.
       
   384       ResponseTooLargeError if the response was either truncated (and
       
   385         allow_truncated is false) or if it was too big for us to download.
       
   386     """
       
   387     if self.__result is None:
       
   388       self.check_success(allow_truncated)
       
   389       self.__result = _URLFetchResult(self.__response)
       
   390     return self.__result
       
   391 
   281 
   392 
   282 Fetch = fetch
   393 Fetch = fetch
   283 
   394 
   284 
   395 
   285 class _URLFetchResult(object):
   396 class _URLFetchResult(object):
   286   """A Pythonic representation of our fetch response protocol buffer."""
   397   """A Pythonic representation of our fetch response protocol buffer.
       
   398   """
       
   399 
   287   def __init__(self, response_proto):
   400   def __init__(self, response_proto):
       
   401     """Constructor.
       
   402 
       
   403     Args:
       
   404       response_proto: the URLFetchResponse proto buffer to wrap.
       
   405     """
   288     self.__pb = response_proto
   406     self.__pb = response_proto
   289     self.content = response_proto.content()
   407     self.content = response_proto.content()
   290     self.status_code = response_proto.statuscode()
   408     self.status_code = response_proto.statuscode()
   291     self.content_was_truncated = response_proto.contentwastruncated()
   409     self.content_was_truncated = response_proto.contentwastruncated()
   292     self.headers = _CaselessDict()
   410     self.headers = _CaselessDict()