thirdparty/google_appengine/google/appengine/ext/bulkload/__init__.py
changeset 109 620f9b141567
child 149 f2e327a7c5de
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thirdparty/google_appengine/google/appengine/ext/bulkload/__init__.py	Tue Aug 26 21:49:54 2008 +0000
@@ -0,0 +1,404 @@
+#!/usr/bin/env python
+#
+# Copyright 2007 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""A mix-in handler for bulk loading data into an application.
+
+For complete documentation, see the Tools and Libraries section of the
+documentation.
+
+To use this in your app, first write a script, e.g. bulkload.py, that
+instantiates a Loader for each entity kind you want to import and call
+bulkload.main(instance). For example:
+
+person = bulkload.Loader(
+  'Person',
+  [('name', str),
+   ('email', datastore_types.Email),
+   ('birthdate', lambda x: datetime.datetime.fromtimestamp(float(x))),
+  ])
+
+if __name__ == '__main__':
+  bulkload.main(person)
+
+See the Loader class for more information. Then, add a handler for it in your
+app.yaml, e.g.:
+
+  urlmap:
+  - regex: /load
+    handler:
+      type: 1
+      path: bulkload.py
+      requires_login: true
+      admin_only: true
+
+Finally, deploy your app and run bulkload_client.py. For example, to load the
+file people.csv into a dev_appserver running on your local machine:
+
+./bulkload_client.py --filename people.csv --kind Person --cookie ... \
+                     --url http://localhost:8080/load
+
+The kind parameter is used to look up the Loader instance that will be used.
+The bulkload handler should usually be admin_only, so that non-admins can't use
+the shell to modify your app's data. The bulkload client uses the cookie
+parameter to piggyback its HTTP requests on your login session. A GET request
+to the URL specified for your bulkload script will give you a cookie parameter
+you can use (/load in the example above).  If your bulkload handler is not
+admin_only, you may omit the cookie parameter.
+
+If you want to do extra processing before the entities are stored, you can
+subclass Loader and override HandleEntity. HandleEntity is called once with
+each entity that is imported from the CSV data. You can return one or more
+entities from HandleEntity to be stored in its place, or None if nothing
+should be stored.
+
+For example, this loads calendar events and stores them as
+datastore_entities.Event entities. It also populates their author field with a
+reference to the corresponding datastore_entites.Contact entity. If no Contact
+entity exists yet for the given author, it creates one and stores it first.
+
+class EventLoader(bulkload.Loader):
+  def __init__(self):
+    EventLoader.__init__(self, 'Event',
+                         [('title', str),
+                          ('creator', str),
+                          ('where', str),
+                          ('startTime', lambda x:
+                            datetime.datetime.fromtimestamp(float(x))),
+                          ])
+
+  def HandleEntity(self, entity):
+    event = datastore_entities.Event(entity.title)
+    event.update(entity)
+
+    creator = event['creator']
+    if creator:
+      contact = datastore.Query('Contact', {'title': creator}).Get(1)
+      if not contact:
+        contact = [datastore_entities.Contact(creator)]
+        datastore.Put(contact[0])
+      event['author'] = contact[0].key()
+
+    return event
+
+if __name__ == '__main__':
+  bulkload.main(EventLoader())
+"""
+
+
+
+
+
+import Cookie
+import StringIO
+import csv
+import httplib
+import os
+import sys
+import traceback
+import types
+
+
+import google
+import wsgiref.handlers
+
+from google.appengine.api import datastore
+from google.appengine.api import datastore_types
+from google.appengine.ext import webapp
+from google.appengine.ext.bulkload import constants
+
+
+def Validate(value, type):
+  """ Checks that value is non-empty and of the right type.
+
+  Raises ValueError if value is None or empty, TypeError if it's not the given
+  type.
+
+  Args:
+    value: any value
+    type: a type or tuple of types
+  """
+  if not value:
+    raise ValueError('Value should not be empty; received %s.' % value)
+  elif not isinstance(value, type):
+    raise TypeError('Expected a %s, but received %s (a %s).' %
+                    (type, value, value.__class__))
+
+
+class Loader(object):
+  """ A base class for creating datastore entities from CSV input data.
+
+  To add a handler for bulk loading a new entity kind into your datastore,
+  write a subclass of this class that calls Loader.__init__ from your
+  class's __init__.
+
+  If you need to run extra code to convert entities from CSV, create new
+  properties, or otherwise modify the entities before they're inserted,
+  override HandleEntity.
+  """
+
+  __loaders = {}
+  __kind = None
+  __properties = None
+
+  def __init__(self, kind, properties):
+    """ Constructor.
+
+    Populates this Loader's kind and properties map. Also registers it with
+    the bulk loader, so that all you need to do is instantiate your Loader,
+    and the bulkload handler will automatically use it.
+
+    Args:
+      kind: a string containing the entity kind that this loader handles
+
+      properties: list of (name, converter) tuples.
+
+      This is used to automatically convert the CSV columns into properties.
+      The converter should be a function that takes one argument, a string
+      value from the CSV file, and returns a correctly typed property value
+      that should be inserted. The tuples in this list should match the
+      columns in your CSV file, in order.
+
+      For example:
+        [('name', str),
+         ('id_number', int),
+         ('email', datastore_types.Email),
+         ('user', users.User),
+         ('birthdate', lambda x: datetime.datetime.fromtimestamp(float(x))),
+         ('description', datastore_types.Text),
+         ]
+    """
+    Validate(kind, basestring)
+    self.__kind = kind
+
+    Validate(properties, list)
+    for name, fn in properties:
+      Validate(name, basestring)
+      assert callable(fn), (
+        'Conversion function %s for property %s is not callable.' % (fn, name))
+
+    self.__properties = properties
+
+    Loader.__loaders[kind] = self
+
+
+  def kind(self):
+    """ Return the entity kind that this Loader handes.
+    """
+    return self.__kind
+
+
+  def CreateEntity(self, values):
+    """ Creates an entity from a list of property values.
+
+    Args:
+      values: list of str
+
+    Returns:
+      list of datastore.Entity
+
+      The returned entities are populated with the property values from the
+      argument, converted to native types using the properties map given in
+      the constructor, and passed through HandleEntity. They're ready to be
+      inserted.
+
+    Raises an AssertionError if the number of values doesn't match the number
+    of properties in the properties map.
+    """
+    Validate(values, list)
+    assert len(values) == len(self.__properties), (
+      'Expected %d CSV columns, found %d.' %
+      (len(self.__properties), len(values)))
+
+    entity = datastore.Entity(self.__kind)
+    for (name, converter), val in zip(self.__properties, values):
+      entity[name] = converter(val)
+
+    entities = self.HandleEntity(entity)
+
+    if entities is not None:
+      if not isinstance(entities, list):
+        entities = [entities]
+
+      for entity in entities:
+        if not isinstance(entity, datastore.Entity):
+          raise TypeError('Expected a datastore.Entity, received %s (a %s).' %
+                          (entity, entity.__class__))
+
+    return entities
+
+
+  def HandleEntity(self, entity):
+    """ Subclasses can override this to add custom entity conversion code.
+
+    This is called for each entity, after its properties are populated from
+    CSV but before it is stored. Subclasses can override this to add custom
+    entity handling code.
+
+    The entity to be inserted should be returned. If multiple entities should
+    be inserted, return a list of entities. If no entities should be inserted,
+    return None or [].
+
+    Args:
+      entity: datastore.Entity
+
+    Returns:
+      datastore.Entity or list of datastore.Entity
+    """
+    return entity
+
+
+  @staticmethod
+  def RegisteredLoaders():
+    """ Returns a list of the Loader instances that have been created.
+    """
+    return dict(Loader.__loaders)
+
+
+class BulkLoad(webapp.RequestHandler):
+  """ A handler for bulk load requests.
+  """
+
+  def get(self):
+    """ Handle a GET. Just show an info page.
+    """
+    page = self.InfoPage(self.request.uri)
+    self.response.out.write(page)
+
+
+  def post(self):
+    """ Handle a POST. Reads CSV data, converts to entities, and stores them.
+    """
+    self.response.headers['Content-Type'] = 'text/plain'
+    response, output = self.Load(self.request.get(constants.KIND_PARAM),
+                                 self.request.get(constants.CSV_PARAM))
+    self.response.set_status(response)
+    self.response.out.write(output)
+
+
+  def InfoPage(self, uri):
+    """ Renders an information page with the POST endpoint and cookie flag.
+
+    Args:
+      uri: a string containing the request URI
+    Returns:
+      A string with the contents of the info page to be displayed
+    """
+    page = """
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html><head>
+<title>Bulk Loader</title>
+</head><body>"""
+
+    page += ('The bulk load endpoint is: <a href="%s">%s</a><br />\n' %
+            (uri, uri))
+
+    cookies = os.environ.get('HTTP_COOKIE', None)
+    if cookies:
+      cookie = Cookie.BaseCookie(cookies)
+      for param in ['ACSID', 'dev_appserver_login']:
+        value = cookie.get(param)
+        if value:
+          page += ("Pass this flag to the client: --cookie='%s=%s'\n" %
+                   (param, value.value))
+          break
+
+    else:
+      page += 'No cookie found!\n'
+
+    page += '</body></html>'
+    return page
+
+
+  def Load(self, kind, data):
+    """ Parses CSV data, uses a Loader to convert to entities, and stores them.
+
+    On error, fails fast. Returns a "bad request" HTTP response code and
+    includes the traceback in the output.
+
+    Args:
+      kind: a string containing the entity kind that this loader handles
+      data: a string containing the CSV data to load
+
+    Returns:
+      tuple (response code, output) where:
+        response code: integer HTTP response code to return
+        output: string containing the HTTP response body
+    """
+    Validate(kind, basestring)
+    Validate(data, basestring)
+    output = []
+
+    try:
+      loader = Loader.RegisteredLoaders()[kind]
+    except KeyError:
+      output.append('Error: no Loader defined for kind %s.' % kind)
+      return (httplib.BAD_REQUEST, ''.join(output))
+
+    buffer = StringIO.StringIO(data)
+    reader = csv.reader(buffer, skipinitialspace=True)
+
+    try:
+      csv.field_size_limit(800000)
+    except AttributeError:
+      pass
+
+    entities = []
+
+    line_num = 1
+    for columns in reader:
+      if columns:
+        try:
+          output.append('\nLoading from line %d...' % line_num)
+          new_entities = loader.CreateEntity(columns)
+          if new_entities:
+            entities.extend(new_entities)
+          output.append('done.')
+        except:
+          exc_info = sys.exc_info()
+          stacktrace = traceback.format_exception(*exc_info)
+          output.append('error:\n%s' % stacktrace)
+          return (httplib.BAD_REQUEST, ''.join(output))
+
+      line_num += 1
+
+    for entity in entities:
+      datastore.Put(entity)
+
+    return (httplib.OK, ''.join(output))
+
+
+def main(*loaders):
+  """Starts bulk upload.
+
+  Raises TypeError if not, at least one Loader instance is given.
+
+  Args:
+    loaders: One or more Loader instance.
+  """
+  if not loaders:
+    raise TypeError('Expected at least one argument.')
+
+  for loader in loaders:
+    if not isinstance(loader, Loader):
+      raise TypeError('Expected a Loader instance; received %r' % loader)
+
+  application = webapp.WSGIApplication([('.*', BulkLoad)])
+  wsgiref.handlers.CGIHandler().run(application)
+
+if __name__ == '__main__':
+  main()