thirdparty/google_appengine/google/appengine/ext/bulkload/__init__.py
changeset 149 f2e327a7c5de
parent 109 620f9b141567
child 297 35211afcd563
equal deleted inserted replaced
148:37505d64e57b 149:f2e327a7c5de
    35   bulkload.main(person)
    35   bulkload.main(person)
    36 
    36 
    37 See the Loader class for more information. Then, add a handler for it in your
    37 See the Loader class for more information. Then, add a handler for it in your
    38 app.yaml, e.g.:
    38 app.yaml, e.g.:
    39 
    39 
    40   urlmap:
    40   handlers:
    41   - regex: /load
    41   - url: /load
    42     handler:
    42     script: bulkload.py
    43       type: 1
    43     login: admin
    44       path: bulkload.py
    44 
    45       requires_login: true
    45 Finally, deploy your app and run bulkloader.py. For example, to load the
    46       admin_only: true
       
    47 
       
    48 Finally, deploy your app and run bulkload_client.py. For example, to load the
       
    49 file people.csv into a dev_appserver running on your local machine:
    46 file people.csv into a dev_appserver running on your local machine:
    50 
    47 
    51 ./bulkload_client.py --filename people.csv --kind Person --cookie ... \
    48 ./bulkloader.py --filename people.csv --kind Person --cookie ... \
    52                      --url http://localhost:8080/load
    49                      --url http://localhost:8080/load
    53 
    50 
    54 The kind parameter is used to look up the Loader instance that will be used.
    51 The kind parameter is used to look up the Loader instance that will be used.
    55 The bulkload handler should usually be admin_only, so that non-admins can't use
    52 The bulkload handler should usually be admin_only, so that non-admins can't use
    56 the shell to modify your app's data. The bulkload client uses the cookie
    53 the shell to modify your app's data. The bulkload client uses the cookie
   108 import httplib
   105 import httplib
   109 import os
   106 import os
   110 import sys
   107 import sys
   111 import traceback
   108 import traceback
   112 import types
   109 import types
       
   110 import struct
   113 
   111 
   114 
   112 
   115 import google
   113 import google
   116 import wsgiref.handlers
   114 import wsgiref.handlers
   117 
   115 
   137     raise TypeError('Expected a %s, but received %s (a %s).' %
   135     raise TypeError('Expected a %s, but received %s (a %s).' %
   138                     (type, value, value.__class__))
   136                     (type, value, value.__class__))
   139 
   137 
   140 
   138 
   141 class Loader(object):
   139 class Loader(object):
   142   """ A base class for creating datastore entities from CSV input data.
   140   """A base class for creating datastore entities from input data.
   143 
   141 
   144   To add a handler for bulk loading a new entity kind into your datastore,
   142   To add a handler for bulk loading a new entity kind into your datastore,
   145   write a subclass of this class that calls Loader.__init__ from your
   143   write a subclass of this class that calls Loader.__init__ from your
   146   class's __init__.
   144   class's __init__.
   147 
   145 
   148   If you need to run extra code to convert entities from CSV, create new
   146   If you need to run extra code to convert entities from the input
   149   properties, or otherwise modify the entities before they're inserted,
   147   data, create new properties, or otherwise modify the entities before
   150   override HandleEntity.
   148   they're inserted, override HandleEntity.
       
   149 
       
   150   See the CreateEntity method for the creation of entities from the
       
   151   (parsed) input data.
   151   """
   152   """
   152 
   153 
   153   __loaders = {}
   154   __loaders = {}
   154   __kind = None
   155   __kind = None
   155   __properties = None
   156   __properties = None
   198   def kind(self):
   199   def kind(self):
   199     """ Return the entity kind that this Loader handes.
   200     """ Return the entity kind that this Loader handes.
   200     """
   201     """
   201     return self.__kind
   202     return self.__kind
   202 
   203 
   203 
   204   def CreateEntity(self, values, key_name=None):
   204   def CreateEntity(self, values):
       
   205     """ Creates an entity from a list of property values.
   205     """ Creates an entity from a list of property values.
   206 
   206 
   207     Args:
   207     Args:
   208       values: list of str
   208       values: list/tuple of str
       
   209       key_name: if provided, the name for the (single) resulting Entity
   209 
   210 
   210     Returns:
   211     Returns:
   211       list of datastore.Entity
   212       list of datastore.Entity
   212 
   213 
   213       The returned entities are populated with the property values from the
   214       The returned entities are populated with the property values from the
   214       argument, converted to native types using the properties map given in
   215       argument, converted to native types using the properties map given in
   215       the constructor, and passed through HandleEntity. They're ready to be
   216       the constructor, and passed through HandleEntity. They're ready to be
   216       inserted.
   217       inserted.
   217 
   218 
   218     Raises an AssertionError if the number of values doesn't match the number
   219     Raises:
   219     of properties in the properties map.
   220       AssertionError if the number of values doesn't match the number
   220     """
   221         of properties in the properties map.
   221     Validate(values, list)
   222     """
       
   223     Validate(values, (list, tuple))
   222     assert len(values) == len(self.__properties), (
   224     assert len(values) == len(self.__properties), (
   223       'Expected %d CSV columns, found %d.' %
   225       'Expected %d CSV columns, found %d.' %
   224       (len(self.__properties), len(values)))
   226       (len(self.__properties), len(values)))
   225 
   227 
   226     entity = datastore.Entity(self.__kind)
   228     entity = datastore.Entity(self.__kind, name=key_name)
   227     for (name, converter), val in zip(self.__properties, values):
   229     for (name, converter), val in zip(self.__properties, values):
   228       entity[name] = converter(val)
   230       entity[name] = converter(val)
   229 
   231 
   230     entities = self.HandleEntity(entity)
   232     entities = self.HandleEntity(entity)
   231 
   233 
   232     if entities is not None:
   234     if entities is not None:
   233       if not isinstance(entities, list):
   235       if not isinstance(entities, (list, tuple)):
   234         entities = [entities]
   236         entities = [entities]
   235 
   237 
   236       for entity in entities:
   238       for entity in entities:
   237         if not isinstance(entity, datastore.Entity):
   239         if not isinstance(entity, datastore.Entity):
   238           raise TypeError('Expected a datastore.Entity, received %s (a %s).' %
   240           raise TypeError('Expected a datastore.Entity, received %s (a %s).' %
   267     """
   269     """
   268     return dict(Loader.__loaders)
   270     return dict(Loader.__loaders)
   269 
   271 
   270 
   272 
   271 class BulkLoad(webapp.RequestHandler):
   273 class BulkLoad(webapp.RequestHandler):
   272   """ A handler for bulk load requests.
   274   """A handler for bulk load requests.
       
   275 
       
   276   This class contains handlers for the bulkloading process. One for
       
   277   GET to provide cookie information for the upload script, and one
       
   278   handler for a POST request to upload the entities.
       
   279 
       
   280   In the POST request, the body contains the data representing the
       
   281   entities' property values. The original format was a sequences of
       
   282   lines of comma-separated values (and is handled by the Load
       
   283   method). The current (version 1) format is a binary format described
       
   284   in the Tools and Libraries section of the documentation, and is
       
   285   handled by the LoadV1 method).
   273   """
   286   """
   274 
   287 
   275   def get(self):
   288   def get(self):
   276     """ Handle a GET. Just show an info page.
   289     """ Handle a GET. Just show an info page.
   277     """
   290     """
   281 
   294 
   282   def post(self):
   295   def post(self):
   283     """ Handle a POST. Reads CSV data, converts to entities, and stores them.
   296     """ Handle a POST. Reads CSV data, converts to entities, and stores them.
   284     """
   297     """
   285     self.response.headers['Content-Type'] = 'text/plain'
   298     self.response.headers['Content-Type'] = 'text/plain'
   286     response, output = self.Load(self.request.get(constants.KIND_PARAM),
   299     version = self.request.headers.get('GAE-Uploader-Version', '0')
   287                                  self.request.get(constants.CSV_PARAM))
   300     if version == '1':
       
   301       kind = self.request.headers.get('GAE-Uploader-Kind')
       
   302       response, output = self.LoadV1(kind, self.request.body)
       
   303     else:
       
   304       response, output = self.Load(self.request.get(constants.KIND_PARAM),
       
   305                                    self.request.get(constants.CSV_PARAM))
   288     self.response.set_status(response)
   306     self.response.set_status(response)
   289     self.response.out.write(output)
   307     self.response.out.write(output)
   290 
   308 
   291 
   309 
   292   def InfoPage(self, uri):
   310   def InfoPage(self, uri):
   367           new_entities = loader.CreateEntity(columns)
   385           new_entities = loader.CreateEntity(columns)
   368           if new_entities:
   386           if new_entities:
   369             entities.extend(new_entities)
   387             entities.extend(new_entities)
   370           output.append('done.')
   388           output.append('done.')
   371         except:
   389         except:
   372           exc_info = sys.exc_info()
   390           stacktrace = traceback.format_exc()
   373           stacktrace = traceback.format_exception(*exc_info)
       
   374           output.append('error:\n%s' % stacktrace)
   391           output.append('error:\n%s' % stacktrace)
   375           return (httplib.BAD_REQUEST, ''.join(output))
   392           return (httplib.BAD_REQUEST, ''.join(output))
   376 
   393 
   377       line_num += 1
   394       line_num += 1
   378 
   395 
   379     for entity in entities:
   396     for entity in entities:
   380       datastore.Put(entity)
   397       datastore.Put(entity)
   381 
   398 
   382     return (httplib.OK, ''.join(output))
   399     return (httplib.OK, ''.join(output))
       
   400 
       
   401   def LoadV1(self, kind, data):
       
   402     """Parses version-1 format data, converts to entities, and stores them.
       
   403 
       
   404     On error, fails fast. Returns a "bad request" HTTP response code and
       
   405     includes the traceback in the output.
       
   406 
       
   407     Args:
       
   408       kind: a string containing the entity kind that this loader handles
       
   409       data: a string containing the (v1 format) data to load
       
   410 
       
   411     Returns:
       
   412       tuple (response code, output) where:
       
   413         response code: integer HTTP response code to return
       
   414         output: string containing the HTTP response body
       
   415     """
       
   416     Validate(kind, basestring)
       
   417     Validate(data, basestring)
       
   418     output = []
       
   419 
       
   420     try:
       
   421       loader = Loader.RegisteredLoaders()[kind]
       
   422     except KeyError:
       
   423       output.append('Error: no Loader defined for kind %s.' % kind)
       
   424       return httplib.BAD_REQUEST, ''.join(output)
       
   425 
       
   426     entities = []
       
   427 
       
   428     column_count, = struct.unpack_from('!i', data)
       
   429 
       
   430     offset = 4
       
   431 
       
   432     lengths_format = '!%di' % (column_count,)
       
   433 
       
   434     while offset < len(data):
       
   435       id_num = struct.unpack_from('!i', data, offset=offset)
       
   436       offset += 4
       
   437 
       
   438       key_name = 'i%010d' % id_num
       
   439 
       
   440       value_lengths = struct.unpack_from(lengths_format, data, offset=offset)
       
   441       offset += 4 * column_count
       
   442 
       
   443       columns = struct.unpack_from(''.join('%ds' % length
       
   444                                            for length in value_lengths), data,
       
   445                                    offset=offset)
       
   446       offset += sum(value_lengths)
       
   447 
       
   448       try:
       
   449         output.append('Loading key_name=%s... ' % key_name)
       
   450         new_entities = loader.CreateEntity(columns, key_name=key_name)
       
   451         if new_entities:
       
   452           entities.extend(new_entities)
       
   453         output.append('done.\n')
       
   454       except:
       
   455         stacktrace = traceback.format_exc()
       
   456         output.append('error:\n%s' % stacktrace)
       
   457         return httplib.BAD_REQUEST, ''.join(output)
       
   458 
       
   459     for entity in entities:
       
   460       datastore.Put(entity)
       
   461 
       
   462     return httplib.OK, ''.join(output)
   383 
   463 
   384 
   464 
   385 def main(*loaders):
   465 def main(*loaders):
   386   """Starts bulk upload.
   466   """Starts bulk upload.
   387 
   467