summaryrefslogtreecommitdiff
path: root/django/contrib/gis/utils/layermapping.py
blob: 2ff23fc38be0003787f9db622c39fe26ee03a283 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
# LayerMapping -- A Django Model/OGR Layer Mapping Utility
"""
 The LayerMapping class provides a way to map the contents of OGR
 vector files (e.g. SHP files) to Geographic-enabled Django models.

 For more information, please consult the GeoDjango documentation:
   http://geodjango.org/docs/layermapping.html
"""
import sys
from decimal import Decimal, InvalidOperation as DecimalInvalidOperation
from django.core.exceptions import FieldDoesNotExist, ObjectDoesNotExist
from django.db import connections, router
from django.contrib.gis.db.models import GeometryField
from django.contrib.gis.gdal import (CoordTransform, DataSource,
    GDALException, OGRGeometry, OGRGeomType, SpatialReference)
from django.contrib.gis.gdal.field import (
    OFTDate, OFTDateTime, OFTInteger, OFTReal, OFTString, OFTTime)
from django.db import models, transaction
from django.utils import six
from django.utils.encoding import force_text


# LayerMapping exceptions.
class LayerMapError(Exception):
    pass


class InvalidString(LayerMapError):
    pass


class InvalidDecimal(LayerMapError):
    pass


class InvalidInteger(LayerMapError):
    pass


class MissingForeignKey(LayerMapError):
    pass


class LayerMapping(object):
    "A class that maps OGR Layers to GeoDjango Models."

    # Acceptable 'base' types for a multi-geometry type.
    MULTI_TYPES = {1: OGRGeomType('MultiPoint'),
                   2: OGRGeomType('MultiLineString'),
                   3: OGRGeomType('MultiPolygon'),
                   OGRGeomType('Point25D').num: OGRGeomType('MultiPoint25D'),
                   OGRGeomType('LineString25D').num: OGRGeomType('MultiLineString25D'),
                   OGRGeomType('Polygon25D').num: OGRGeomType('MultiPolygon25D'),
                   }

    # Acceptable Django field types and corresponding acceptable OGR
    # counterparts.
    FIELD_TYPES = {
        models.AutoField: OFTInteger,
        models.IntegerField: (OFTInteger, OFTReal, OFTString),
        models.FloatField: (OFTInteger, OFTReal),
        models.DateField: OFTDate,
        models.DateTimeField: OFTDateTime,
        models.EmailField: OFTString,
        models.TimeField: OFTTime,
        models.DecimalField: (OFTInteger, OFTReal),
        models.CharField: OFTString,
        models.SlugField: OFTString,
        models.TextField: OFTString,
        models.URLField: OFTString,
        models.BigIntegerField: (OFTInteger, OFTReal, OFTString),
        models.SmallIntegerField: (OFTInteger, OFTReal, OFTString),
        models.PositiveSmallIntegerField: (OFTInteger, OFTReal, OFTString),
    }

    def __init__(self, model, data, mapping, layer=0,
                 source_srs=None, encoding='utf-8',
                 transaction_mode='commit_on_success',
                 transform=True, unique=None, using=None):
        """
        A LayerMapping object is initialized using the given Model (not an instance),
        a DataSource (or string path to an OGR-supported data file), and a mapping
        dictionary.  See the module level docstring for more details and keyword
        argument usage.
        """
        # Getting the DataSource and the associated Layer.
        if isinstance(data, six.string_types):
            self.ds = DataSource(data, encoding=encoding)
        else:
            self.ds = data
        self.layer = self.ds[layer]

        self.using = using if using is not None else router.db_for_write(model)
        self.spatial_backend = connections[self.using].ops

        # Setting the mapping & model attributes.
        self.mapping = mapping
        self.model = model

        # Checking the layer -- initialization of the object will fail if
        # things don't check out before hand.
        self.check_layer()

        # Getting the geometry column associated with the model (an
        # exception will be raised if there is no geometry column).
        if connections[self.using].features.supports_transform:
            self.geo_field = self.geometry_field()
        else:
            transform = False

        # Checking the source spatial reference system, and getting
        # the coordinate transformation object (unless the `transform`
        # keyword is set to False)
        if transform:
            self.source_srs = self.check_srs(source_srs)
            self.transform = self.coord_transform()
        else:
            self.transform = transform

        # Setting the encoding for OFTString fields, if specified.
        if encoding:
            # Making sure the encoding exists, if not a LookupError
            # exception will be thrown.
            from codecs import lookup
            lookup(encoding)
            self.encoding = encoding
        else:
            self.encoding = None

        if unique:
            self.check_unique(unique)
            transaction_mode = 'autocommit'  # Has to be set to autocommit.
            self.unique = unique
        else:
            self.unique = None

        # Setting the transaction decorator with the function in the
        # transaction modes dictionary.
        self.transaction_mode = transaction_mode
        if transaction_mode == 'autocommit':
            self.transaction_decorator = None
        elif transaction_mode == 'commit_on_success':
            self.transaction_decorator = transaction.atomic
        else:
            raise LayerMapError('Unrecognized transaction mode: %s' % transaction_mode)

    #### Checking routines used during initialization ####
    def check_fid_range(self, fid_range):
        "This checks the `fid_range` keyword."
        if fid_range:
            if isinstance(fid_range, (tuple, list)):
                return slice(*fid_range)
            elif isinstance(fid_range, slice):
                return fid_range
            else:
                raise TypeError
        else:
            return None

    def check_layer(self):
        """
        This checks the Layer metadata, and ensures that it is compatible
        with the mapping information and model.  Unlike previous revisions,
        there is no need to increment through each feature in the Layer.
        """
        # The geometry field of the model is set here.
        # TODO: Support more than one geometry field / model.  However, this
        # depends on the GDAL Driver in use.
        self.geom_field = False
        self.fields = {}

        # Getting lists of the field names and the field types available in
        # the OGR Layer.
        ogr_fields = self.layer.fields
        ogr_field_types = self.layer.field_types

        # Function for determining if the OGR mapping field is in the Layer.
        def check_ogr_fld(ogr_map_fld):
            try:
                idx = ogr_fields.index(ogr_map_fld)
            except ValueError:
                raise LayerMapError('Given mapping OGR field "%s" not found in OGR Layer.' % ogr_map_fld)
            return idx

        # No need to increment through each feature in the model, simply check
        # the Layer metadata against what was given in the mapping dictionary.
        for field_name, ogr_name in self.mapping.items():
            # Ensuring that a corresponding field exists in the model
            # for the given field name in the mapping.
            try:
                model_field = self.model._meta.get_field(field_name)
            except FieldDoesNotExist:
                raise LayerMapError('Given mapping field "%s" not in given Model fields.' % field_name)

            # Getting the string name for the Django field class (e.g., 'PointField').
            fld_name = model_field.__class__.__name__

            if isinstance(model_field, GeometryField):
                if self.geom_field:
                    raise LayerMapError('LayerMapping does not support more than one GeometryField per model.')

                # Getting the coordinate dimension of the geometry field.
                coord_dim = model_field.dim

                try:
                    if coord_dim == 3:
                        gtype = OGRGeomType(ogr_name + '25D')
                    else:
                        gtype = OGRGeomType(ogr_name)
                except GDALException:
                    raise LayerMapError('Invalid mapping for GeometryField "%s".' % field_name)

                # Making sure that the OGR Layer's Geometry is compatible.
                ltype = self.layer.geom_type
                if not (ltype.name.startswith(gtype.name) or self.make_multi(ltype, model_field)):
                    raise LayerMapError('Invalid mapping geometry; model has %s%s, '
                                        'layer geometry type is %s.' %
                                        (fld_name, '(dim=3)' if coord_dim == 3 else '', ltype))

                # Setting the `geom_field` attribute w/the name of the model field
                # that is a Geometry.  Also setting the coordinate dimension
                # attribute.
                self.geom_field = field_name
                self.coord_dim = coord_dim
                fields_val = model_field
            elif isinstance(model_field, models.ForeignKey):
                if isinstance(ogr_name, dict):
                    # Is every given related model mapping field in the Layer?
                    rel_model = model_field.rel.to
                    for rel_name, ogr_field in ogr_name.items():
                        idx = check_ogr_fld(ogr_field)
                        try:
                            rel_model._meta.get_field(rel_name)
                        except FieldDoesNotExist:
                            raise LayerMapError('ForeignKey mapping field "%s" not in %s fields.' %
                                                (rel_name, rel_model.__class__.__name__))
                    fields_val = rel_model
                else:
                    raise TypeError('ForeignKey mapping must be of dictionary type.')
            else:
                # Is the model field type supported by LayerMapping?
                if model_field.__class__ not in self.FIELD_TYPES:
                    raise LayerMapError('Django field type "%s" has no OGR mapping (yet).' % fld_name)

                # Is the OGR field in the Layer?
                idx = check_ogr_fld(ogr_name)
                ogr_field = ogr_field_types[idx]

                # Can the OGR field type be mapped to the Django field type?
                if not issubclass(ogr_field, self.FIELD_TYPES[model_field.__class__]):
                    raise LayerMapError('OGR field "%s" (of type %s) cannot be mapped to Django %s.' %
                                        (ogr_field, ogr_field.__name__, fld_name))
                fields_val = model_field

            self.fields[field_name] = fields_val

    def check_srs(self, source_srs):
        "Checks the compatibility of the given spatial reference object."

        if isinstance(source_srs, SpatialReference):
            sr = source_srs
        elif isinstance(source_srs, self.spatial_backend.spatial_ref_sys()):
            sr = source_srs.srs
        elif isinstance(source_srs, (int, six.string_types)):
            sr = SpatialReference(source_srs)
        else:
            # Otherwise just pulling the SpatialReference from the layer
            sr = self.layer.srs

        if not sr:
            raise LayerMapError('No source reference system defined.')
        else:
            return sr

    def check_unique(self, unique):
        "Checks the `unique` keyword parameter -- may be a sequence or string."
        if isinstance(unique, (list, tuple)):
            # List of fields to determine uniqueness with
            for attr in unique:
                if attr not in self.mapping:
                    raise ValueError
        elif isinstance(unique, six.string_types):
            # Only a single field passed in.
            if unique not in self.mapping:
                raise ValueError
        else:
            raise TypeError('Unique keyword argument must be set with a tuple, list, or string.')

    # Keyword argument retrieval routines ####
    def feature_kwargs(self, feat):
        """
        Given an OGR Feature, this will return a dictionary of keyword arguments
        for constructing the mapped model.
        """
        # The keyword arguments for model construction.
        kwargs = {}

        # Incrementing through each model field and OGR field in the
        # dictionary mapping.
        for field_name, ogr_name in self.mapping.items():
            model_field = self.fields[field_name]

            if isinstance(model_field, GeometryField):
                # Verify OGR geometry.
                try:
                    val = self.verify_geom(feat.geom, model_field)
                except GDALException:
                    raise LayerMapError('Could not retrieve geometry from feature.')
            elif isinstance(model_field, models.base.ModelBase):
                # The related _model_, not a field was passed in -- indicating
                # another mapping for the related Model.
                val = self.verify_fk(feat, model_field, ogr_name)
            else:
                # Otherwise, verify OGR Field type.
                val = self.verify_ogr_field(feat[ogr_name], model_field)

            # Setting the keyword arguments for the field name with the
            # value obtained above.
            kwargs[field_name] = val

        return kwargs

    def unique_kwargs(self, kwargs):
        """
        Given the feature keyword arguments (from `feature_kwargs`) this routine
        will construct and return the uniqueness keyword arguments -- a subset
        of the feature kwargs.
        """
        if isinstance(self.unique, six.string_types):
            return {self.unique: kwargs[self.unique]}
        else:
            return {fld: kwargs[fld] for fld in self.unique}

    #### Verification routines used in constructing model keyword arguments. ####
    def verify_ogr_field(self, ogr_field, model_field):
        """
        Verifies if the OGR Field contents are acceptable to the Django
        model field.  If they are, the verified value is returned,
        otherwise the proper exception is raised.
        """
        if (isinstance(ogr_field, OFTString) and
                isinstance(model_field, (models.CharField, models.TextField))):
            if self.encoding:
                # The encoding for OGR data sources may be specified here
                # (e.g., 'cp437' for Census Bureau boundary files).
                val = force_text(ogr_field.value, self.encoding)
            else:
                val = ogr_field.value
            if model_field.max_length and len(val) > model_field.max_length:
                raise InvalidString('%s model field maximum string length is %s, given %s characters.' %
                                    (model_field.name, model_field.max_length, len(val)))
        elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField):
            try:
                # Creating an instance of the Decimal value to use.
                d = Decimal(str(ogr_field.value))
            except DecimalInvalidOperation:
                raise InvalidDecimal('Could not construct decimal from: %s' % ogr_field.value)

            # Getting the decimal value as a tuple.
            dtup = d.as_tuple()
            digits = dtup[1]
            d_idx = dtup[2]  # index where the decimal is

            # Maximum amount of precision, or digits to the left of the decimal.
            max_prec = model_field.max_digits - model_field.decimal_places

            # Getting the digits to the left of the decimal place for the
            # given decimal.
            if d_idx < 0:
                n_prec = len(digits[:d_idx])
            else:
                n_prec = len(digits) + d_idx

            # If we have more than the maximum digits allowed, then throw an
            # InvalidDecimal exception.
            if n_prec > max_prec:
                raise InvalidDecimal(
                    'A DecimalField with max_digits %d, decimal_places %d must '
                    'round to an absolute value less than 10^%d.' %
                    (model_field.max_digits, model_field.decimal_places, max_prec)
                )
            val = d
        elif isinstance(ogr_field, (OFTReal, OFTString)) and isinstance(model_field, models.IntegerField):
            # Attempt to convert any OFTReal and OFTString value to an OFTInteger.
            try:
                val = int(ogr_field.value)
            except ValueError:
                raise InvalidInteger('Could not construct integer from: %s' % ogr_field.value)
        else:
            val = ogr_field.value
        return val

    def verify_fk(self, feat, rel_model, rel_mapping):
        """
        Given an OGR Feature, the related model and its dictionary mapping,
        this routine will retrieve the related model for the ForeignKey
        mapping.
        """
        # TODO: It is expensive to retrieve a model for every record --
        #  explore if an efficient mechanism exists for caching related
        #  ForeignKey models.

        # Constructing and verifying the related model keyword arguments.
        fk_kwargs = {}
        for field_name, ogr_name in rel_mapping.items():
            fk_kwargs[field_name] = self.verify_ogr_field(feat[ogr_name], rel_model._meta.get_field(field_name))

        # Attempting to retrieve and return the related model.
        try:
            return rel_model.objects.using(self.using).get(**fk_kwargs)
        except ObjectDoesNotExist:
            raise MissingForeignKey(
                'No ForeignKey %s model found with keyword arguments: %s' %
                (rel_model.__name__, fk_kwargs)
            )

    def verify_geom(self, geom, model_field):
        """
        Verifies the geometry -- will construct and return a GeometryCollection
        if necessary (for example if the model field is MultiPolygonField while
        the mapped shapefile only contains Polygons).
        """
        # Downgrade a 3D geom to a 2D one, if necessary.
        if self.coord_dim != geom.coord_dim:
            geom.coord_dim = self.coord_dim

        if self.make_multi(geom.geom_type, model_field):
            # Constructing a multi-geometry type to contain the single geometry
            multi_type = self.MULTI_TYPES[geom.geom_type.num]
            g = OGRGeometry(multi_type)
            g.add(geom)
        else:
            g = geom

        # Transforming the geometry with our Coordinate Transformation object,
        # but only if the class variable `transform` is set w/a CoordTransform
        # object.
        if self.transform:
            g.transform(self.transform)

        # Returning the WKT of the geometry.
        return g.wkt

    #### Other model methods ####
    def coord_transform(self):
        "Returns the coordinate transformation object."
        SpatialRefSys = self.spatial_backend.spatial_ref_sys()
        try:
            # Getting the target spatial reference system
            target_srs = SpatialRefSys.objects.using(self.using).get(srid=self.geo_field.srid).srs

            # Creating the CoordTransform object
            return CoordTransform(self.source_srs, target_srs)
        except Exception as msg:
            new_msg = 'Could not translate between the data source and model geometry: %s' % msg
            six.reraise(LayerMapError, LayerMapError(new_msg), sys.exc_info()[2])

    def geometry_field(self):
        "Returns the GeometryField instance associated with the geographic column."
        # Use `get_field()` on the model's options so that we
        # get the correct field instance if there's model inheritance.
        opts = self.model._meta
        return opts.get_field(self.geom_field)

    def make_multi(self, geom_type, model_field):
        """
        Given the OGRGeomType for a geometry and its associated GeometryField,
        determine whether the geometry should be turned into a GeometryCollection.
        """
        return (geom_type.num in self.MULTI_TYPES and
                model_field.__class__.__name__ == 'Multi%s' % geom_type.django)

    def save(self, verbose=False, fid_range=False, step=False,
             progress=False, silent=False, stream=sys.stdout, strict=False):
        """
        Saves the contents from the OGR DataSource Layer into the database
        according to the mapping dictionary given at initialization.

        Keyword Parameters:
         verbose:
           If set, information will be printed subsequent to each model save
           executed on the database.

         fid_range:
           May be set with a slice or tuple of (begin, end) feature ID's to map
           from the data source.  In other words, this keyword enables the user
           to selectively import a subset range of features in the geographic
           data source.

         step:
           If set with an integer, transactions will occur at every step
           interval. For example, if step=1000, a commit would occur after
           the 1,000th feature, the 2,000th feature etc.

         progress:
           When this keyword is set, status information will be printed giving
           the number of features processed and successfully saved.  By default,
           progress information will pe printed every 1000 features processed,
           however, this default may be overridden by setting this keyword with an
           integer for the desired interval.

         stream:
           Status information will be written to this file handle.  Defaults to
           using `sys.stdout`, but any object with a `write` method is supported.

         silent:
           By default, non-fatal error notifications are printed to stdout, but
           this keyword may be set to disable these notifications.

         strict:
           Execution of the model mapping will cease upon the first error
           encountered.  The default behavior is to attempt to continue.
        """
        # Getting the default Feature ID range.
        default_range = self.check_fid_range(fid_range)

        # Setting the progress interval, if requested.
        if progress:
            if progress is True or not isinstance(progress, int):
                progress_interval = 1000
            else:
                progress_interval = progress

        def _save(feat_range=default_range, num_feat=0, num_saved=0):
            if feat_range:
                layer_iter = self.layer[feat_range]
            else:
                layer_iter = self.layer

            for feat in layer_iter:
                num_feat += 1
                # Getting the keyword arguments
                try:
                    kwargs = self.feature_kwargs(feat)
                except LayerMapError as msg:
                    # Something borked the validation
                    if strict:
                        raise
                    elif not silent:
                        stream.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg))
                else:
                    # Constructing the model using the keyword args
                    is_update = False
                    if self.unique:
                        # If we want unique models on a particular field, handle the
                        # geometry appropriately.
                        try:
                            # Getting the keyword arguments and retrieving
                            # the unique model.
                            u_kwargs = self.unique_kwargs(kwargs)
                            m = self.model.objects.using(self.using).get(**u_kwargs)
                            is_update = True

                            # Getting the geometry (in OGR form), creating
                            # one from the kwargs WKT, adding in additional
                            # geometries, and update the attribute with the
                            # just-updated geometry WKT.
                            geom = getattr(m, self.geom_field).ogr
                            new = OGRGeometry(kwargs[self.geom_field])
                            for g in new:
                                geom.add(g)
                            setattr(m, self.geom_field, geom.wkt)
                        except ObjectDoesNotExist:
                            # No unique model exists yet, create.
                            m = self.model(**kwargs)
                    else:
                        m = self.model(**kwargs)

                    try:
                        # Attempting to save.
                        m.save(using=self.using)
                        num_saved += 1
                        if verbose:
                            stream.write('%s: %s\n' % ('Updated' if is_update else 'Saved', m))
                    except Exception as msg:
                        if strict:
                            # Bailing out if the `strict` keyword is set.
                            if not silent:
                                stream.write(
                                    'Failed to save the feature (id: %s) into the '
                                    'model with the keyword arguments:\n' % feat.fid
                                )
                                stream.write('%s\n' % kwargs)
                            raise
                        elif not silent:
                            stream.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg))

                # Printing progress information, if requested.
                if progress and num_feat % progress_interval == 0:
                    stream.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved))

            # Only used for status output purposes -- incremental saving uses the
            # values returned here.
            return num_saved, num_feat

        if self.transaction_decorator is not None:
            _save = self.transaction_decorator(_save)

        nfeat = self.layer.num_feat
        if step and isinstance(step, int) and step < nfeat:
            # Incremental saving is requested at the given interval (step)
            if default_range:
                raise LayerMapError('The `step` keyword may not be used in conjunction with the `fid_range` keyword.')
            beg, num_feat, num_saved = (0, 0, 0)
            indices = range(step, nfeat, step)
            n_i = len(indices)

            for i, end in enumerate(indices):
                # Constructing the slice to use for this step; the last slice is
                # special (e.g, [100:] instead of [90:100]).
                if i + 1 == n_i:
                    step_slice = slice(beg, None)
                else:
                    step_slice = slice(beg, end)

                try:
                    num_feat, num_saved = _save(step_slice, num_feat, num_saved)
                    beg = end
                except:  # Deliberately catch everything
                    stream.write('%s\nFailed to save slice: %s\n' % ('=-' * 20, step_slice))
                    raise
        else:
            # Otherwise, just calling the previously defined _save() function.
            _save()