thirdparty/google_appengine/google/appengine/tools/bulkload_client.py
changeset 109 620f9b141567
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thirdparty/google_appengine/google/appengine/tools/bulkload_client.py	Tue Aug 26 21:49:54 2008 +0000
@@ -0,0 +1,297 @@
+#!/usr/bin/env python
+#
+# Copyright 2007 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Imports CSV data over HTTP.
+
+Usage:
+  %s [flags]
+
+    --debug             Show debugging information. (Optional)
+    --cookie=<string>   Whole Cookie header to supply to the server, including
+                        the parameter name (e.g., "ACSID=..."). (Optional)
+    --url=<string>      URL endpoint to post to for importing data. (Required)
+    --batch_size=<int>  Number of Entity objects to include in each post to
+                        the URL endpoint. The more data per row/Entity, the
+                        smaller the batch size should be. (Default 10)
+    --filename=<path>   Path to the CSV file to import. (Required)
+    --kind=<string>     Name of the Entity object kind to put in the datastore.
+                        (Required)
+
+The exit status will be 0 on success, non-zero on import failure.
+
+Works with the bulkload mix-in library for google.appengine.ext.bulkload.
+Please look there for documentation about how to setup the server side.
+"""
+
+
+import StringIO
+import httplib
+import logging
+import csv
+import getopt
+import socket
+import sys
+import urllib
+import urlparse
+
+from google.appengine.ext.bulkload import constants
+
+
+
+class Error(Exception):
+  """Base-class for exceptions in this module."""
+
+
+class PostError(Error):
+  """An error has occured while trying to post data to the server."""
+
+
+class BadServerStatusError(PostError):
+  """The server has returned an error while importing data."""
+
+
+def ContentGenerator(csv_file,
+                     batch_size,
+                     create_csv_reader=csv.reader,
+                     create_csv_writer=csv.writer):
+  """Retrieves CSV data up to a batch size at a time.
+
+  Args:
+    csv_file: A file-like object for reading CSV data.
+    batch_size: Maximum number of CSV rows to yield on each iteration.
+    create_csv_reader, create_csv_writer: Used for dependency injection.
+
+  Yields:
+    Tuple (entity_count, csv_content) where:
+      entity_count: Number of entities contained in the csv_content. Will be
+        less than or equal to the batch_size and greater than 0.
+      csv_content: String containing the CSV content containing the next
+        entity_count entities.
+  """
+  try:
+    csv.field_size_limit(800000)
+  except AttributeError:
+    pass
+
+  reader = create_csv_reader(csv_file, skipinitialspace=True)
+  exhausted = False
+
+  while not exhausted:
+    rows_written = 0
+    content = StringIO.StringIO()
+    writer = create_csv_writer(content)
+    try:
+      for i in xrange(batch_size):
+        row = reader.next()
+        writer.writerow(row)
+        rows_written += 1
+    except StopIteration:
+      exhausted = True
+
+    if rows_written > 0:
+      yield rows_written, content.getvalue()
+
+
+def PostEntities(host_port, uri, cookie, kind, content):
+  """Posts Entity records to a remote endpoint over HTTP.
+
+  Args:
+   host_port: String containing the "host:port" pair; the port is optional.
+   uri: Relative URI to access on the remote host (e.g., '/bulkload').
+   cookie: String containing the Cookie header to use, if any.
+   kind: Kind of the Entity records being posted.
+   content: String containing the CSV data for the entities.
+
+  Raises:
+    BadServerStatusError if the server was contactable but returns an error.
+    PostError If an error occurred while connecting to the server or reading
+    or writing data.
+  """
+  logging.debug('Connecting to %s', host_port)
+  try:
+    body = urllib.urlencode({
+      constants.KIND_PARAM: kind,
+      constants.CSV_PARAM: content,
+    })
+    headers = {
+      'Content-Type': 'application/x-www-form-urlencoded',
+      'Content-Length': len(body),
+      'Cookie': cookie,
+    }
+
+    logging.debug('Posting %d bytes to http://%s%s', len(body), host_port, uri)
+    connection = httplib.HTTPConnection(host_port)
+    try:
+      connection.request('POST', uri, body, headers)
+      response = connection.getresponse()
+
+      status = response.status
+      reason = response.reason
+      content = response.read()
+      logging.debug('Received response code %d: %s', status, reason)
+      if status != httplib.OK:
+        raise BadServerStatusError('Received code %d: %s\n%s' % (
+                                   status, reason, content))
+    finally:
+      connection.close()
+  except (IOError, httplib.HTTPException, socket.error), e:
+    logging.debug('Encountered exception accessing HTTP server: %s', e)
+    raise PostError(e)
+
+
+def SplitURL(url):
+  """Splits an HTTP URL into pieces.
+
+  Args:
+    url: String containing a full URL string (e.g.,
+      'http://blah.com:8080/stuff?param=1#foo')
+
+  Returns:
+    Tuple (netloc, uri) where:
+      netloc: String containing the host/port combination from the URL. The
+        port is optional. (e.g., 'blah.com:8080').
+      uri: String containing the relative URI of the URL. (e.g., '/stuff').
+  """
+  scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+  return netloc, path
+
+
+def ImportCSV(filename,
+              post_url,
+              cookie,
+              batch_size,
+              kind,
+              split_url=SplitURL,
+              openfile=file,
+              create_content_generator=ContentGenerator,
+              post_entities=PostEntities):
+  """Imports CSV data using a series of HTTP posts.
+
+  Args:
+    filename: File on disk containing CSV data.
+    post_url: URL to post the Entity data to.
+    cookie: Full cookie header to use while connecting.
+    batch_size: Maximum number of Entity objects to post with each request.
+    kind: Entity kind of the objects being posted.
+    split_url, openfile, create_content_generator, post_entities: Used for
+      dependency injection.
+
+  Returns:
+    True if all entities were imported successfully; False otherwise.
+  """
+  host_port, uri = split_url(post_url)
+  csv_file = openfile(filename, 'r')
+  try:
+    content_gen = create_content_generator(csv_file, batch_size)
+    logging.info('Starting import; maximum %d entities per post', batch_size)
+    for num_entities, content in content_gen:
+      logging.info('Importing %d entities in %d bytes',
+                   num_entities, len(content))
+      try:
+        content = post_entities(host_port, uri, cookie, kind, content)
+      except PostError, e:
+        logging.error('An error occurred while importing: %s', e)
+        return False
+  finally:
+    csv_file.close()
+  return True
+
+
+def PrintUsageExit(code):
+  """Prints usage information and exits with a status code.
+
+  Args:
+    code: Status code to pass to sys.exit() after displaying usage information.
+  """
+  print sys.modules['__main__'].__doc__ % sys.argv[0]
+  sys.stdout.flush()
+  sys.stderr.flush()
+  sys.exit(code)
+
+
+def ParseArguments(argv):
+  """Parses command-line arguments.
+
+  Prints out a help message if -h or --help is supplied.
+
+  Args:
+    argv: List of command-line arguments.
+
+  Returns:
+    Tuple (url, filename, cookie, batch_size, kind) containing the values from
+    each corresponding command-line flag.
+  """
+  opts, args = getopt.getopt(
+    argv[1:],
+    'h',
+    ['debug',
+     'help',
+     'url=',
+     'filename=',
+     'cookie=',
+     'batch_size=',
+     'kind='])
+
+  url = None
+  filename = None
+  cookie = ''
+  batch_size = 10
+  kind = None
+  encoding = None
+
+  for option, value in opts:
+    if option == '--debug':
+      logging.getLogger().setLevel(logging.DEBUG)
+    if option in ('-h', '--help'):
+      PrintUsageExit(0)
+    if option == '--url':
+      url = value
+    if option == '--filename':
+      filename = value
+    if option == '--cookie':
+      cookie = value
+    if option == '--batch_size':
+      batch_size = int(value)
+      if batch_size <= 0:
+        print >>sys.stderr, 'batch_size must be 1 or larger'
+        PrintUsageExit(1)
+    if option == '--kind':
+      kind = value
+
+  return (url, filename, cookie, batch_size, kind)
+
+
+def main(argv):
+  """Runs the importer."""
+  logging.basicConfig(
+    level=logging.INFO,
+    format='%(levelname)-8s %(asctime)s %(filename)s] %(message)s')
+
+  args = ParseArguments(argv)
+  if [arg for arg in args if arg is None]:
+    print >>sys.stderr, 'Invalid arguments'
+    PrintUsageExit(1)
+
+  url, filename, cookie, batch_size, kind = args
+  if ImportCSV(filename, url, cookie, batch_size, kind):
+    logging.info('Import succcessful')
+    return 0
+  logging.error('Import failed')
+  return 1
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))