35 bulkload.main(person) |
35 bulkload.main(person) |
36 |
36 |
37 See the Loader class for more information. Then, add a handler for it in your |
37 See the Loader class for more information. Then, add a handler for it in your |
38 app.yaml, e.g.: |
38 app.yaml, e.g.: |
39 |
39 |
40 urlmap: |
40 handlers: |
41 - regex: /load |
41 - url: /load |
42 handler: |
42 script: bulkload.py |
43 type: 1 |
43 login: admin |
44 path: bulkload.py |
44 |
45 requires_login: true |
45 Finally, deploy your app and run bulkloader.py. For example, to load the |
46 admin_only: true |
|
47 |
|
48 Finally, deploy your app and run bulkload_client.py. For example, to load the |
|
49 file people.csv into a dev_appserver running on your local machine: |
46 file people.csv into a dev_appserver running on your local machine: |
50 |
47 |
51 ./bulkload_client.py --filename people.csv --kind Person --cookie ... \ |
48 ./bulkloader.py --filename people.csv --kind Person --cookie ... \ |
52 --url http://localhost:8080/load |
49 --url http://localhost:8080/load |
53 |
50 |
54 The kind parameter is used to look up the Loader instance that will be used. |
51 The kind parameter is used to look up the Loader instance that will be used. |
55 The bulkload handler should usually be admin_only, so that non-admins can't use |
52 The bulkload handler should usually be admin_only, so that non-admins can't use |
56 the shell to modify your app's data. The bulkload client uses the cookie |
53 the shell to modify your app's data. The bulkload client uses the cookie |
137 raise TypeError('Expected a %s, but received %s (a %s).' % |
135 raise TypeError('Expected a %s, but received %s (a %s).' % |
138 (type, value, value.__class__)) |
136 (type, value, value.__class__)) |
139 |
137 |
140 |
138 |
141 class Loader(object): |
139 class Loader(object): |
142 """ A base class for creating datastore entities from CSV input data. |
140 """A base class for creating datastore entities from input data. |
143 |
141 |
144 To add a handler for bulk loading a new entity kind into your datastore, |
142 To add a handler for bulk loading a new entity kind into your datastore, |
145 write a subclass of this class that calls Loader.__init__ from your |
143 write a subclass of this class that calls Loader.__init__ from your |
146 class's __init__. |
144 class's __init__. |
147 |
145 |
148 If you need to run extra code to convert entities from CSV, create new |
146 If you need to run extra code to convert entities from the input |
149 properties, or otherwise modify the entities before they're inserted, |
147 data, create new properties, or otherwise modify the entities before |
150 override HandleEntity. |
148 they're inserted, override HandleEntity. |
|
149 |
|
150 See the CreateEntity method for the creation of entities from the |
|
151 (parsed) input data. |
151 """ |
152 """ |
152 |
153 |
153 __loaders = {} |
154 __loaders = {} |
154 __kind = None |
155 __kind = None |
155 __properties = None |
156 __properties = None |
198 def kind(self): |
199 def kind(self): |
199 """ Return the entity kind that this Loader handes. |
200 """ Return the entity kind that this Loader handes. |
200 """ |
201 """ |
201 return self.__kind |
202 return self.__kind |
202 |
203 |
203 |
204 def CreateEntity(self, values, key_name=None): |
204 def CreateEntity(self, values): |
|
205 """ Creates an entity from a list of property values. |
205 """ Creates an entity from a list of property values. |
206 |
206 |
207 Args: |
207 Args: |
208 values: list of str |
208 values: list/tuple of str |
|
209 key_name: if provided, the name for the (single) resulting Entity |
209 |
210 |
210 Returns: |
211 Returns: |
211 list of datastore.Entity |
212 list of datastore.Entity |
212 |
213 |
213 The returned entities are populated with the property values from the |
214 The returned entities are populated with the property values from the |
214 argument, converted to native types using the properties map given in |
215 argument, converted to native types using the properties map given in |
215 the constructor, and passed through HandleEntity. They're ready to be |
216 the constructor, and passed through HandleEntity. They're ready to be |
216 inserted. |
217 inserted. |
217 |
218 |
218 Raises an AssertionError if the number of values doesn't match the number |
219 Raises: |
219 of properties in the properties map. |
220 AssertionError if the number of values doesn't match the number |
220 """ |
221 of properties in the properties map. |
221 Validate(values, list) |
222 """ |
|
223 Validate(values, (list, tuple)) |
222 assert len(values) == len(self.__properties), ( |
224 assert len(values) == len(self.__properties), ( |
223 'Expected %d CSV columns, found %d.' % |
225 'Expected %d CSV columns, found %d.' % |
224 (len(self.__properties), len(values))) |
226 (len(self.__properties), len(values))) |
225 |
227 |
226 entity = datastore.Entity(self.__kind) |
228 entity = datastore.Entity(self.__kind, name=key_name) |
227 for (name, converter), val in zip(self.__properties, values): |
229 for (name, converter), val in zip(self.__properties, values): |
228 entity[name] = converter(val) |
230 entity[name] = converter(val) |
229 |
231 |
230 entities = self.HandleEntity(entity) |
232 entities = self.HandleEntity(entity) |
231 |
233 |
232 if entities is not None: |
234 if entities is not None: |
233 if not isinstance(entities, list): |
235 if not isinstance(entities, (list, tuple)): |
234 entities = [entities] |
236 entities = [entities] |
235 |
237 |
236 for entity in entities: |
238 for entity in entities: |
237 if not isinstance(entity, datastore.Entity): |
239 if not isinstance(entity, datastore.Entity): |
238 raise TypeError('Expected a datastore.Entity, received %s (a %s).' % |
240 raise TypeError('Expected a datastore.Entity, received %s (a %s).' % |
267 """ |
269 """ |
268 return dict(Loader.__loaders) |
270 return dict(Loader.__loaders) |
269 |
271 |
270 |
272 |
271 class BulkLoad(webapp.RequestHandler): |
273 class BulkLoad(webapp.RequestHandler): |
272 """ A handler for bulk load requests. |
274 """A handler for bulk load requests. |
|
275 |
|
276 This class contains handlers for the bulkloading process. One for |
|
277 GET to provide cookie information for the upload script, and one |
|
278 handler for a POST request to upload the entities. |
|
279 |
|
280 In the POST request, the body contains the data representing the |
|
281 entities' property values. The original format was a sequences of |
|
282 lines of comma-separated values (and is handled by the Load |
|
283 method). The current (version 1) format is a binary format described |
|
284 in the Tools and Libraries section of the documentation, and is |
|
285 handled by the LoadV1 method). |
273 """ |
286 """ |
274 |
287 |
275 def get(self): |
288 def get(self): |
276 """ Handle a GET. Just show an info page. |
289 """ Handle a GET. Just show an info page. |
277 """ |
290 """ |
281 |
294 |
282 def post(self): |
295 def post(self): |
283 """ Handle a POST. Reads CSV data, converts to entities, and stores them. |
296 """ Handle a POST. Reads CSV data, converts to entities, and stores them. |
284 """ |
297 """ |
285 self.response.headers['Content-Type'] = 'text/plain' |
298 self.response.headers['Content-Type'] = 'text/plain' |
286 response, output = self.Load(self.request.get(constants.KIND_PARAM), |
299 version = self.request.headers.get('GAE-Uploader-Version', '0') |
287 self.request.get(constants.CSV_PARAM)) |
300 if version == '1': |
|
301 kind = self.request.headers.get('GAE-Uploader-Kind') |
|
302 response, output = self.LoadV1(kind, self.request.body) |
|
303 else: |
|
304 response, output = self.Load(self.request.get(constants.KIND_PARAM), |
|
305 self.request.get(constants.CSV_PARAM)) |
288 self.response.set_status(response) |
306 self.response.set_status(response) |
289 self.response.out.write(output) |
307 self.response.out.write(output) |
290 |
308 |
291 |
309 |
292 def InfoPage(self, uri): |
310 def InfoPage(self, uri): |
367 new_entities = loader.CreateEntity(columns) |
385 new_entities = loader.CreateEntity(columns) |
368 if new_entities: |
386 if new_entities: |
369 entities.extend(new_entities) |
387 entities.extend(new_entities) |
370 output.append('done.') |
388 output.append('done.') |
371 except: |
389 except: |
372 exc_info = sys.exc_info() |
390 stacktrace = traceback.format_exc() |
373 stacktrace = traceback.format_exception(*exc_info) |
|
374 output.append('error:\n%s' % stacktrace) |
391 output.append('error:\n%s' % stacktrace) |
375 return (httplib.BAD_REQUEST, ''.join(output)) |
392 return (httplib.BAD_REQUEST, ''.join(output)) |
376 |
393 |
377 line_num += 1 |
394 line_num += 1 |
378 |
395 |
379 for entity in entities: |
396 for entity in entities: |
380 datastore.Put(entity) |
397 datastore.Put(entity) |
381 |
398 |
382 return (httplib.OK, ''.join(output)) |
399 return (httplib.OK, ''.join(output)) |
|
400 |
|
401 def LoadV1(self, kind, data): |
|
402 """Parses version-1 format data, converts to entities, and stores them. |
|
403 |
|
404 On error, fails fast. Returns a "bad request" HTTP response code and |
|
405 includes the traceback in the output. |
|
406 |
|
407 Args: |
|
408 kind: a string containing the entity kind that this loader handles |
|
409 data: a string containing the (v1 format) data to load |
|
410 |
|
411 Returns: |
|
412 tuple (response code, output) where: |
|
413 response code: integer HTTP response code to return |
|
414 output: string containing the HTTP response body |
|
415 """ |
|
416 Validate(kind, basestring) |
|
417 Validate(data, basestring) |
|
418 output = [] |
|
419 |
|
420 try: |
|
421 loader = Loader.RegisteredLoaders()[kind] |
|
422 except KeyError: |
|
423 output.append('Error: no Loader defined for kind %s.' % kind) |
|
424 return httplib.BAD_REQUEST, ''.join(output) |
|
425 |
|
426 entities = [] |
|
427 |
|
428 column_count, = struct.unpack_from('!i', data) |
|
429 |
|
430 offset = 4 |
|
431 |
|
432 lengths_format = '!%di' % (column_count,) |
|
433 |
|
434 while offset < len(data): |
|
435 id_num = struct.unpack_from('!i', data, offset=offset) |
|
436 offset += 4 |
|
437 |
|
438 key_name = 'i%010d' % id_num |
|
439 |
|
440 value_lengths = struct.unpack_from(lengths_format, data, offset=offset) |
|
441 offset += 4 * column_count |
|
442 |
|
443 columns = struct.unpack_from(''.join('%ds' % length |
|
444 for length in value_lengths), data, |
|
445 offset=offset) |
|
446 offset += sum(value_lengths) |
|
447 |
|
448 try: |
|
449 output.append('Loading key_name=%s... ' % key_name) |
|
450 new_entities = loader.CreateEntity(columns, key_name=key_name) |
|
451 if new_entities: |
|
452 entities.extend(new_entities) |
|
453 output.append('done.\n') |
|
454 except: |
|
455 stacktrace = traceback.format_exc() |
|
456 output.append('error:\n%s' % stacktrace) |
|
457 return httplib.BAD_REQUEST, ''.join(output) |
|
458 |
|
459 for entity in entities: |
|
460 datastore.Put(entity) |
|
461 |
|
462 return httplib.OK, ''.join(output) |
383 |
463 |
384 |
464 |
385 def main(*loaders): |
465 def main(*loaders): |
386 """Starts bulk upload. |
466 """Starts bulk upload. |
387 |
467 |