--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/thirdparty/google_appengine/google/appengine/tools/bulkload_client.py Tue Aug 26 21:49:54 2008 +0000
@@ -0,0 +1,297 @@
+#!/usr/bin/env python
+# Copyright 2007 Google Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Imports CSV data over HTTP.
+ %s [flags]
+ --debug Show debugging information. (Optional)
+ --cookie=<string> Whole Cookie header to supply to the server, including
+ the parameter name (e.g., "ACSID=..."). (Optional)
+ --url=<string> URL endpoint to post to for importing data. (Required)
+ --batch_size=<int> Number of Entity objects to include in each post to
+ the URL endpoint. The more data per row/Entity, the
+ smaller the batch size should be. (Default 10)
+ --filename=<path> Path to the CSV file to import. (Required)
+ --kind=<string> Name of the Entity object kind to put in the datastore.
+ (Required)
+The exit status will be 0 on success, non-zero on import failure.
+Works with the bulkload mix-in library for google.appengine.ext.bulkload.
+Please look there for documentation about how to setup the server side.
+import StringIO
+import httplib
+import logging
+import csv
+import getopt
+import socket
+import sys
+import urllib
+import urlparse
+from google.appengine.ext.bulkload import constants
+class Error(Exception):
+ """Base-class for exceptions in this module."""
+class PostError(Error):
+ """An error has occured while trying to post data to the server."""
+class BadServerStatusError(PostError):
+ """The server has returned an error while importing data."""
+def ContentGenerator(csv_file,
+ batch_size,
+ create_csv_reader=csv.reader,
+ create_csv_writer=csv.writer):
+ """Retrieves CSV data up to a batch size at a time.
+ Args:
+ csv_file: A file-like object for reading CSV data.
+ batch_size: Maximum number of CSV rows to yield on each iteration.
+ create_csv_reader, create_csv_writer: Used for dependency injection.
+ Yields:
+ Tuple (entity_count, csv_content) where:
+ entity_count: Number of entities contained in the csv_content. Will be
+ less than or equal to the batch_size and greater than 0.
+ csv_content: String containing the CSV content containing the next
+ entity_count entities.
+ """
+ try:
+ csv.field_size_limit(800000)
+ except AttributeError:
+ pass
+ reader = create_csv_reader(csv_file, skipinitialspace=True)
+ exhausted = False
+ while not exhausted:
+ rows_written = 0
+ content = StringIO.StringIO()
+ writer = create_csv_writer(content)
+ try:
+ for i in xrange(batch_size):
+ row = reader.next()
+ writer.writerow(row)
+ rows_written += 1
+ except StopIteration:
+ exhausted = True
+ if rows_written > 0:
+ yield rows_written, content.getvalue()
+def PostEntities(host_port, uri, cookie, kind, content):
+ """Posts Entity records to a remote endpoint over HTTP.
+ Args:
+ host_port: String containing the "host:port" pair; the port is optional.
+ uri: Relative URI to access on the remote host (e.g., '/bulkload').
+ cookie: String containing the Cookie header to use, if any.
+ kind: Kind of the Entity records being posted.
+ content: String containing the CSV data for the entities.
+ Raises:
+ BadServerStatusError if the server was contactable but returns an error.
+ PostError If an error occurred while connecting to the server or reading
+ or writing data.
+ """
+ logging.debug('Connecting to %s', host_port)
+ try:
+ body = urllib.urlencode({
+ constants.KIND_PARAM: kind,
+ constants.CSV_PARAM: content,
+ })
+ headers = {
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Content-Length': len(body),
+ 'Cookie': cookie,
+ }
+ logging.debug('Posting %d bytes to http://%s%s', len(body), host_port, uri)
+ connection = httplib.HTTPConnection(host_port)
+ try:
+ connection.request('POST', uri, body, headers)
+ response = connection.getresponse()
+ status = response.status
+ reason = response.reason
+ content = response.read()
+ logging.debug('Received response code %d: %s', status, reason)
+ if status != httplib.OK:
+ raise BadServerStatusError('Received code %d: %s\n%s' % (
+ status, reason, content))
+ finally:
+ connection.close()
+ except (IOError, httplib.HTTPException, socket.error), e:
+ logging.debug('Encountered exception accessing HTTP server: %s', e)
+ raise PostError(e)
+def SplitURL(url):
+ """Splits an HTTP URL into pieces.
+ Args:
+ url: String containing a full URL string (e.g.,
+ 'http://blah.com:8080/stuff?param=1#foo')
+ Returns:
+ Tuple (netloc, uri) where:
+ netloc: String containing the host/port combination from the URL. The
+ port is optional. (e.g., 'blah.com:8080').
+ uri: String containing the relative URI of the URL. (e.g., '/stuff').
+ """
+ scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+ return netloc, path
+def ImportCSV(filename,
+ post_url,
+ cookie,
+ batch_size,
+ kind,
+ split_url=SplitURL,
+ openfile=file,
+ create_content_generator=ContentGenerator,
+ post_entities=PostEntities):
+ """Imports CSV data using a series of HTTP posts.
+ Args:
+ filename: File on disk containing CSV data.
+ post_url: URL to post the Entity data to.
+ cookie: Full cookie header to use while connecting.
+ batch_size: Maximum number of Entity objects to post with each request.
+ kind: Entity kind of the objects being posted.
+ split_url, openfile, create_content_generator, post_entities: Used for
+ dependency injection.
+ Returns:
+ True if all entities were imported successfully; False otherwise.
+ """
+ host_port, uri = split_url(post_url)
+ csv_file = openfile(filename, 'r')
+ try:
+ content_gen = create_content_generator(csv_file, batch_size)
+ logging.info('Starting import; maximum %d entities per post', batch_size)
+ for num_entities, content in content_gen:
+ logging.info('Importing %d entities in %d bytes',
+ num_entities, len(content))
+ try:
+ content = post_entities(host_port, uri, cookie, kind, content)
+ except PostError, e:
+ logging.error('An error occurred while importing: %s', e)
+ return False
+ finally:
+ csv_file.close()
+ return True
+def PrintUsageExit(code):
+ """Prints usage information and exits with a status code.
+ Args:
+ code: Status code to pass to sys.exit() after displaying usage information.
+ """
+ print sys.modules['__main__'].__doc__ % sys.argv[0]
+ sys.stdout.flush()
+ sys.stderr.flush()
+ sys.exit(code)
+def ParseArguments(argv):
+ """Parses command-line arguments.
+ Prints out a help message if -h or --help is supplied.
+ Args:
+ argv: List of command-line arguments.
+ Returns:
+ Tuple (url, filename, cookie, batch_size, kind) containing the values from
+ each corresponding command-line flag.
+ """
+ opts, args = getopt.getopt(
+ argv[1:],
+ 'h',
+ ['debug',
+ 'help',
+ 'url=',
+ 'filename=',
+ 'cookie=',
+ 'batch_size=',
+ 'kind='])
+ url = None
+ filename = None
+ cookie = ''
+ batch_size = 10
+ kind = None
+ encoding = None
+ for option, value in opts:
+ if option == '--debug':
+ logging.getLogger().setLevel(logging.DEBUG)
+ if option in ('-h', '--help'):
+ PrintUsageExit(0)
+ if option == '--url':
+ url = value
+ if option == '--filename':
+ filename = value
+ if option == '--cookie':
+ cookie = value
+ if option == '--batch_size':
+ batch_size = int(value)
+ if batch_size <= 0:
+ print >>sys.stderr, 'batch_size must be 1 or larger'
+ PrintUsageExit(1)
+ if option == '--kind':
+ kind = value
+ return (url, filename, cookie, batch_size, kind)
+def main(argv):
+ """Runs the importer."""
+ logging.basicConfig(
+ level=logging.INFO,
+ format='%(levelname)-8s %(asctime)s %(filename)s] %(message)s')
+ args = ParseArguments(argv)
+ if [arg for arg in args if arg is None]:
+ print >>sys.stderr, 'Invalid arguments'
+ PrintUsageExit(1)
+ url, filename, cookie, batch_size, kind = args
+ if ImportCSV(filename, url, cookie, batch_size, kind):
+ logging.info('Import succcessful')
+ return 0
+ logging.error('Import failed')
+ return 1
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))