diff -r 6641e941ef1e -r ff1a9aa48cfd app/django/contrib/gis/utils/layermapping.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app/django/contrib/gis/utils/layermapping.py Tue Oct 14 16:00:59 2008 +0000 @@ -0,0 +1,684 @@ +# LayerMapping -- A Django Model/OGR Layer Mapping Utility +""" + The LayerMapping class provides a way to map the contents of OGR + vector files (e.g. SHP files) to Geographic-enabled Django models. + + This grew out of my personal needs, specifically the code repetition + that went into pulling geometries and fields out of an OGR layer, + converting to another coordinate system (e.g. WGS84), and then inserting + into a GeoDjango model. + + Please report any bugs encountered using this utility. + + Requirements: OGR C Library (from GDAL) required. + + Usage: + lm = LayerMapping(model, source_file, mapping) where, + + model: + GeoDjango model (not an instance) + + data: + OGR-supported data source file (e.g. a shapefile) or + gdal.DataSource instance + + mapping: + A python dictionary, keys are strings corresponding + to the GeoDjango model field, and values correspond to + string field names for the OGR feature, or if the model field + is a geographic then it should correspond to the OGR + geometry type, e.g. 'POINT', 'LINESTRING', 'POLYGON'. + + Keyword Args: + layer: + The index of the layer to use from the Data Source (defaults to 0) + + source_srs: + Use this to specify the source SRS manually (for example, + some shapefiles don't come with a '.prj' file). An integer SRID, + a string WKT, and SpatialReference objects are valid parameters. + + encoding: + Specifies the encoding of the string in the OGR data source. + For example, 'latin-1', 'utf-8', and 'cp437' are all valid + encoding parameters. + + transaction_mode: + May be 'commit_on_success' (default) or 'autocommit'. + + transform: + Setting this to False will disable all coordinate transformations. + + unique: + Setting this to the name, or a tuple of names, from the given + model will create models unique only to the given name(s). + Geometries will from each feature will be added into the collection + associated with the unique model. Forces transaction mode to + be 'autocommit'. + +Example: + + 1. You need a GDAL-supported data source, like a shapefile. + + Assume we're using the test_poly SHP file: + >>> from django.contrib.gis.gdal import DataSource + >>> ds = DataSource('test_poly.shp') + >>> layer = ds[0] + >>> print layer.fields # Exploring the fields in the layer, we only want the 'str' field. + ['float', 'int', 'str'] + >>> print len(layer) # getting the number of features in the layer (should be 3) + 3 + >>> print layer.geom_type # Should be 3 (a Polygon) + 3 + >>> print layer.srs # WGS84 + GEOGCS["GCS_WGS_1984", + DATUM["WGS_1984", + SPHEROID["WGS_1984",6378137,298.257223563]], + PRIMEM["Greenwich",0], + UNIT["Degree",0.017453292519943295]] + + 2. Now we define our corresponding Django model (make sure to use syncdb): + + from django.contrib.gis.db import models + class TestGeo(models.Model, models.GeoMixin): + name = models.CharField(maxlength=25) # corresponds to the 'str' field + poly = models.PolygonField(srid=4269) # we want our model in a different SRID + objects = models.GeoManager() + def __str__(self): + return 'Name: %s' % self.name + + 3. Use LayerMapping to extract all the features and place them in the database: + + >>> from django.contrib.gis.utils import LayerMapping + >>> from geoapp.models import TestGeo + >>> mapping = {'name' : 'str', # The 'name' model field maps to the 'str' layer field. + 'poly' : 'POLYGON', # For geometry fields use OGC name. + } # The mapping is a dictionary + >>> lm = LayerMapping(TestGeo, 'test_poly.shp', mapping) + >>> lm.save(verbose=True) # Save the layermap, imports the data. + Saved: Name: 1 + Saved: Name: 2 + Saved: Name: 3 + + LayerMapping just transformed the three geometries from the SHP file from their + source spatial reference system (WGS84) to the spatial reference system of + the GeoDjango model (NAD83). If no spatial reference system is defined for + the layer, use the `source_srs` keyword with a SpatialReference object to + specify one. +""" +import sys +from datetime import date, datetime +from decimal import Decimal +from django.core.exceptions import ObjectDoesNotExist +from django.contrib.gis.db.models import GeometryField +from django.contrib.gis.db.backend import SpatialBackend +from django.contrib.gis.gdal import CoordTransform, DataSource, \ + OGRException, OGRGeometry, OGRGeomType, SpatialReference +from django.contrib.gis.gdal.field import \ + OFTDate, OFTDateTime, OFTInteger, OFTReal, OFTString, OFTTime +from django.contrib.gis.models import GeometryColumns, SpatialRefSys +from django.db import models, transaction +from django.contrib.localflavor.us.models import USStateField + +# LayerMapping exceptions. +class LayerMapError(Exception): pass +class InvalidString(LayerMapError): pass +class InvalidDecimal(LayerMapError): pass +class InvalidInteger(LayerMapError): pass +class MissingForeignKey(LayerMapError): pass + +class LayerMapping(object): + "A class that maps OGR Layers to GeoDjango Models." + + # Acceptable 'base' types for a multi-geometry type. + MULTI_TYPES = {1 : OGRGeomType('MultiPoint'), + 2 : OGRGeomType('MultiLineString'), + 3 : OGRGeomType('MultiPolygon'), + } + + # Acceptable Django field types and corresponding acceptable OGR + # counterparts. + FIELD_TYPES = { + models.AutoField : OFTInteger, + models.IntegerField : (OFTInteger, OFTReal, OFTString), + models.FloatField : (OFTInteger, OFTReal), + models.DateField : OFTDate, + models.DateTimeField : OFTDateTime, + models.EmailField : OFTString, + models.TimeField : OFTTime, + models.DecimalField : (OFTInteger, OFTReal), + models.CharField : OFTString, + models.SlugField : OFTString, + models.TextField : OFTString, + models.URLField : OFTString, + USStateField : OFTString, + models.XMLField : OFTString, + models.SmallIntegerField : (OFTInteger, OFTReal, OFTString), + models.PositiveSmallIntegerField : (OFTInteger, OFTReal, OFTString), + } + + # The acceptable transaction modes. + TRANSACTION_MODES = {'autocommit' : transaction.autocommit, + 'commit_on_success' : transaction.commit_on_success, + } + + def __init__(self, model, data, mapping, layer=0, + source_srs=None, encoding=None, + transaction_mode='commit_on_success', + transform=True, unique=None): + """ + A LayerMapping object is initialized using the given Model (not an instance), + a DataSource (or string path to an OGR-supported data file), and a mapping + dictionary. See the module level docstring for more details and keyword + argument usage. + """ + # Getting the DataSource and the associated Layer. + if isinstance(data, basestring): + self.ds = DataSource(data) + else: + self.ds = data + self.layer = self.ds[layer] + + # Setting the mapping & model attributes. + self.mapping = mapping + self.model = model + + # Checking the layer -- intitialization of the object will fail if + # things don't check out before hand. + self.check_layer() + + # Getting the geometry column associated with the model (an + # exception will be raised if there is no geometry column). + self.geo_col = self.geometry_column() + + # Checking the source spatial reference system, and getting + # the coordinate transformation object (unless the `transform` + # keyword is set to False) + if transform: + self.source_srs = self.check_srs(source_srs) + self.transform = self.coord_transform() + else: + self.transform = transform + + # Setting the encoding for OFTString fields, if specified. + if encoding: + # Making sure the encoding exists, if not a LookupError + # exception will be thrown. + from codecs import lookup + lookup(encoding) + self.encoding = encoding + else: + self.encoding = None + + if unique: + self.check_unique(unique) + transaction_mode = 'autocommit' # Has to be set to autocommit. + self.unique = unique + else: + self.unique = None + + # Setting the transaction decorator with the function in the + # transaction modes dictionary. + if transaction_mode in self.TRANSACTION_MODES: + self.transaction_decorator = self.TRANSACTION_MODES[transaction_mode] + self.transaction_mode = transaction_mode + else: + raise LayerMapError('Unrecognized transaction mode: %s' % transaction_mode) + + #### Checking routines used during initialization #### + def check_fid_range(self, fid_range): + "This checks the `fid_range` keyword." + if fid_range: + if isinstance(fid_range, (tuple, list)): + return slice(*fid_range) + elif isinstance(fid_range, slice): + return fid_range + else: + raise TypeError + else: + return None + + def check_layer(self): + """ + This checks the Layer metadata, and ensures that it is compatible + with the mapping information and model. Unlike previous revisions, + there is no need to increment through each feature in the Layer. + """ + # The geometry field of the model is set here. + # TODO: Support more than one geometry field / model. However, this + # depends on the GDAL Driver in use. + self.geom_field = False + self.fields = {} + + # Getting lists of the field names and the field types available in + # the OGR Layer. + ogr_fields = self.layer.fields + ogr_field_types = self.layer.field_types + + # Function for determining if the OGR mapping field is in the Layer. + def check_ogr_fld(ogr_map_fld): + try: + idx = ogr_fields.index(ogr_map_fld) + except ValueError: + raise LayerMapError('Given mapping OGR field "%s" not found in OGR Layer.' % ogr_map_fld) + return idx + + # No need to increment through each feature in the model, simply check + # the Layer metadata against what was given in the mapping dictionary. + for field_name, ogr_name in self.mapping.items(): + # Ensuring that a corresponding field exists in the model + # for the given field name in the mapping. + try: + model_field = self.model._meta.get_field(field_name) + except models.fields.FieldDoesNotExist: + raise LayerMapError('Given mapping field "%s" not in given Model fields.' % field_name) + + # Getting the string name for the Django field class (e.g., 'PointField'). + fld_name = model_field.__class__.__name__ + + if isinstance(model_field, GeometryField): + if self.geom_field: + raise LayerMapError('LayerMapping does not support more than one GeometryField per model.') + + try: + gtype = OGRGeomType(ogr_name) + except OGRException: + raise LayerMapError('Invalid mapping for GeometryField "%s".' % field_name) + + # Making sure that the OGR Layer's Geometry is compatible. + ltype = self.layer.geom_type + if not (gtype == ltype or self.make_multi(ltype, model_field)): + raise LayerMapError('Invalid mapping geometry; model has %s, feature has %s.' % (fld_name, gtype)) + + # Setting the `geom_field` attribute w/the name of the model field + # that is a Geometry. + self.geom_field = field_name + fields_val = model_field + elif isinstance(model_field, models.ForeignKey): + if isinstance(ogr_name, dict): + # Is every given related model mapping field in the Layer? + rel_model = model_field.rel.to + for rel_name, ogr_field in ogr_name.items(): + idx = check_ogr_fld(ogr_field) + try: + rel_field = rel_model._meta.get_field(rel_name) + except models.fields.FieldDoesNotExist: + raise LayerMapError('ForeignKey mapping field "%s" not in %s fields.' % + (rel_name, rel_model.__class__.__name__)) + fields_val = rel_model + else: + raise TypeError('ForeignKey mapping must be of dictionary type.') + else: + # Is the model field type supported by LayerMapping? + if not model_field.__class__ in self.FIELD_TYPES: + raise LayerMapError('Django field type "%s" has no OGR mapping (yet).' % fld_name) + + # Is the OGR field in the Layer? + idx = check_ogr_fld(ogr_name) + ogr_field = ogr_field_types[idx] + + # Can the OGR field type be mapped to the Django field type? + if not issubclass(ogr_field, self.FIELD_TYPES[model_field.__class__]): + raise LayerMapError('OGR field "%s" (of type %s) cannot be mapped to Django %s.' % + (ogr_field, ogr_field.__name__, fld_name)) + fields_val = model_field + + self.fields[field_name] = fields_val + + def check_srs(self, source_srs): + "Checks the compatibility of the given spatial reference object." + if isinstance(source_srs, SpatialReference): + sr = source_srs + elif isinstance(source_srs, SpatialRefSys): + sr = source_srs.srs + elif isinstance(source_srs, (int, basestring)): + sr = SpatialReference(source_srs) + else: + # Otherwise just pulling the SpatialReference from the layer + sr = self.layer.srs + + if not sr: + raise LayerMapError('No source reference system defined.') + else: + return sr + + def check_unique(self, unique): + "Checks the `unique` keyword parameter -- may be a sequence or string." + if isinstance(unique, (list, tuple)): + # List of fields to determine uniqueness with + for attr in unique: + if not attr in self.mapping: raise ValueError + elif isinstance(unique, basestring): + # Only a single field passed in. + if unique not in self.mapping: raise ValueError + else: + raise TypeError('Unique keyword argument must be set with a tuple, list, or string.') + + #### Keyword argument retrieval routines #### + def feature_kwargs(self, feat): + """ + Given an OGR Feature, this will return a dictionary of keyword arguments + for constructing the mapped model. + """ + # The keyword arguments for model construction. + kwargs = {} + + # Incrementing through each model field and OGR field in the + # dictionary mapping. + for field_name, ogr_name in self.mapping.items(): + model_field = self.fields[field_name] + + if isinstance(model_field, GeometryField): + # Verify OGR geometry. + val = self.verify_geom(feat.geom, model_field) + elif isinstance(model_field, models.base.ModelBase): + # The related _model_, not a field was passed in -- indicating + # another mapping for the related Model. + val = self.verify_fk(feat, model_field, ogr_name) + else: + # Otherwise, verify OGR Field type. + val = self.verify_ogr_field(feat[ogr_name], model_field) + + # Setting the keyword arguments for the field name with the + # value obtained above. + kwargs[field_name] = val + + return kwargs + + def unique_kwargs(self, kwargs): + """ + Given the feature keyword arguments (from `feature_kwargs`) this routine + will construct and return the uniqueness keyword arguments -- a subset + of the feature kwargs. + """ + if isinstance(self.unique, basestring): + return {self.unique : kwargs[self.unique]} + else: + return dict((fld, kwargs[fld]) for fld in self.unique) + + #### Verification routines used in constructing model keyword arguments. #### + def verify_ogr_field(self, ogr_field, model_field): + """ + Verifies if the OGR Field contents are acceptable to the Django + model field. If they are, the verified value is returned, + otherwise the proper exception is raised. + """ + if (isinstance(ogr_field, OFTString) and + isinstance(model_field, (models.CharField, models.TextField))): + if self.encoding: + # The encoding for OGR data sources may be specified here + # (e.g., 'cp437' for Census Bureau boundary files). + val = unicode(ogr_field.value, self.encoding) + else: + val = ogr_field.value + if len(val) > model_field.max_length: + raise InvalidString('%s model field maximum string length is %s, given %s characters.' % + (model_field.name, model_field.max_length, len(val))) + elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField): + try: + # Creating an instance of the Decimal value to use. + d = Decimal(str(ogr_field.value)) + except: + raise InvalidDecimal('Could not construct decimal from: %s' % ogr_field.value) + + # Getting the decimal value as a tuple. + dtup = d.as_tuple() + digits = dtup[1] + d_idx = dtup[2] # index where the decimal is + + # Maximum amount of precision, or digits to the left of the decimal. + max_prec = model_field.max_digits - model_field.decimal_places + + # Getting the digits to the left of the decimal place for the + # given decimal. + if d_idx < 0: + n_prec = len(digits[:d_idx]) + else: + n_prec = len(digits) + d_idx + + # If we have more than the maximum digits allowed, then throw an + # InvalidDecimal exception. + if n_prec > max_prec: + raise InvalidDecimal('A DecimalField with max_digits %d, decimal_places %d must round to an absolute value less than 10^%d.' % + (model_field.max_digits, model_field.decimal_places, max_prec)) + val = d + elif isinstance(ogr_field, (OFTReal, OFTString)) and isinstance(model_field, models.IntegerField): + # Attempt to convert any OFTReal and OFTString value to an OFTInteger. + try: + val = int(ogr_field.value) + except: + raise InvalidInteger('Could not construct integer from: %s' % ogr_field.value) + else: + val = ogr_field.value + return val + + def verify_fk(self, feat, rel_model, rel_mapping): + """ + Given an OGR Feature, the related model and its dictionary mapping, + this routine will retrieve the related model for the ForeignKey + mapping. + """ + # TODO: It is expensive to retrieve a model for every record -- + # explore if an efficient mechanism exists for caching related + # ForeignKey models. + + # Constructing and verifying the related model keyword arguments. + fk_kwargs = {} + for field_name, ogr_name in rel_mapping.items(): + fk_kwargs[field_name] = self.verify_ogr_field(feat[ogr_name], rel_model._meta.get_field(field_name)) + + # Attempting to retrieve and return the related model. + try: + return rel_model.objects.get(**fk_kwargs) + except ObjectDoesNotExist: + raise MissingForeignKey('No ForeignKey %s model found with keyword arguments: %s' % (rel_model.__name__, fk_kwargs)) + + def verify_geom(self, geom, model_field): + """ + Verifies the geometry -- will construct and return a GeometryCollection + if necessary (for example if the model field is MultiPolygonField while + the mapped shapefile only contains Polygons). + """ + if self.make_multi(geom.geom_type, model_field): + # Constructing a multi-geometry type to contain the single geometry + multi_type = self.MULTI_TYPES[geom.geom_type.num] + g = OGRGeometry(multi_type) + g.add(geom) + else: + g = geom + + # Transforming the geometry with our Coordinate Transformation object, + # but only if the class variable `transform` is set w/a CoordTransform + # object. + if self.transform: g.transform(self.transform) + + # Returning the WKT of the geometry. + return g.wkt + + #### Other model methods #### + def coord_transform(self): + "Returns the coordinate transformation object." + try: + # Getting the target spatial reference system + target_srs = SpatialRefSys.objects.get(srid=self.geo_col.srid).srs + + # Creating the CoordTransform object + return CoordTransform(self.source_srs, target_srs) + except Exception, msg: + raise LayerMapError('Could not translate between the data source and model geometry: %s' % msg) + + def geometry_column(self): + "Returns the GeometryColumn model associated with the geographic column." + # Getting the GeometryColumn object. + try: + db_table = self.model._meta.db_table + geo_col = self.geom_field + if SpatialBackend.name == 'oracle': + # Making upper case for Oracle. + db_table = db_table.upper() + geo_col = geo_col.upper() + gc_kwargs = {GeometryColumns.table_name_col() : db_table, + GeometryColumns.geom_col_name() : geo_col, + } + return GeometryColumns.objects.get(**gc_kwargs) + except Exception, msg: + raise LayerMapError('Geometry column does not exist for model. (did you run syncdb?):\n %s' % msg) + + def make_multi(self, geom_type, model_field): + """ + Given the OGRGeomType for a geometry and its associated GeometryField, + determine whether the geometry should be turned into a GeometryCollection. + """ + return (geom_type.num in self.MULTI_TYPES and + model_field.__class__.__name__ == 'Multi%s' % geom_type.django) + + def save(self, verbose=False, fid_range=False, step=False, + progress=False, silent=False, stream=sys.stdout, strict=False): + """ + Saves the contents from the OGR DataSource Layer into the database + according to the mapping dictionary given at initialization. + + Keyword Parameters: + verbose: + If set, information will be printed subsequent to each model save + executed on the database. + + fid_range: + May be set with a slice or tuple of (begin, end) feature ID's to map + from the data source. In other words, this keyword enables the user + to selectively import a subset range of features in the geographic + data source. + + step: + If set with an integer, transactions will occur at every step + interval. For example, if step=1000, a commit would occur after + the 1,000th feature, the 2,000th feature etc. + + progress: + When this keyword is set, status information will be printed giving + the number of features processed and sucessfully saved. By default, + progress information will pe printed every 1000 features processed, + however, this default may be overridden by setting this keyword with an + integer for the desired interval. + + stream: + Status information will be written to this file handle. Defaults to + using `sys.stdout`, but any object with a `write` method is supported. + + silent: + By default, non-fatal error notifications are printed to stdout, but + this keyword may be set to disable these notifications. + + strict: + Execution of the model mapping will cease upon the first error + encountered. The default behavior is to attempt to continue. + """ + # Getting the default Feature ID range. + default_range = self.check_fid_range(fid_range) + + # Setting the progress interval, if requested. + if progress: + if progress is True or not isinstance(progress, int): + progress_interval = 1000 + else: + progress_interval = progress + + # Defining the 'real' save method, utilizing the transaction + # decorator created during initialization. + @self.transaction_decorator + def _save(feat_range=default_range, num_feat=0, num_saved=0): + if feat_range: + layer_iter = self.layer[feat_range] + else: + layer_iter = self.layer + + for feat in layer_iter: + num_feat += 1 + # Getting the keyword arguments + try: + kwargs = self.feature_kwargs(feat) + except LayerMapError, msg: + # Something borked the validation + if strict: raise + elif not silent: + stream.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg)) + else: + # Constructing the model using the keyword args + is_update = False + if self.unique: + # If we want unique models on a particular field, handle the + # geometry appropriately. + try: + # Getting the keyword arguments and retrieving + # the unique model. + u_kwargs = self.unique_kwargs(kwargs) + m = self.model.objects.get(**u_kwargs) + is_update = True + + # Getting the geometry (in OGR form), creating + # one from the kwargs WKT, adding in additional + # geometries, and update the attribute with the + # just-updated geometry WKT. + geom = getattr(m, self.geom_field).ogr + new = OGRGeometry(kwargs[self.geom_field]) + for g in new: geom.add(g) + setattr(m, self.geom_field, geom.wkt) + except ObjectDoesNotExist: + # No unique model exists yet, create. + m = self.model(**kwargs) + else: + m = self.model(**kwargs) + + try: + # Attempting to save. + m.save() + num_saved += 1 + if verbose: stream.write('%s: %s\n' % (is_update and 'Updated' or 'Saved', m)) + except SystemExit: + raise + except Exception, msg: + if self.transaction_mode == 'autocommit': + # Rolling back the transaction so that other model saves + # will work. + transaction.rollback_unless_managed() + if strict: + # Bailing out if the `strict` keyword is set. + if not silent: + stream.write('Failed to save the feature (id: %s) into the model with the keyword arguments:\n' % feat.fid) + stream.write('%s\n' % kwargs) + raise + elif not silent: + stream.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg)) + + # Printing progress information, if requested. + if progress and num_feat % progress_interval == 0: + stream.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved)) + + # Only used for status output purposes -- incremental saving uses the + # values returned here. + return num_saved, num_feat + + nfeat = self.layer.num_feat + if step and isinstance(step, int) and step < nfeat: + # Incremental saving is requested at the given interval (step) + if default_range: + raise LayerMapError('The `step` keyword may not be used in conjunction with the `fid_range` keyword.') + beg, num_feat, num_saved = (0, 0, 0) + indices = range(step, nfeat, step) + n_i = len(indices) + + for i, end in enumerate(indices): + # Constructing the slice to use for this step; the last slice is + # special (e.g, [100:] instead of [90:100]). + if i+1 == n_i: step_slice = slice(beg, None) + else: step_slice = slice(beg, end) + + try: + num_feat, num_saved = _save(step_slice, num_feat, num_saved) + beg = end + except: + stream.write('%s\nFailed to save slice: %s\n' % ('=-' * 20, step_slice)) + raise + else: + # Otherwise, just calling the previously defined _save() function. + _save()