From 730b1f6d7a51c6605961de708c0674687424ad77 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 10 Jan 2008 17:11:02 +0000 Subject: [PATCH] gis: `LayerMapping`: Added the `fid_range` and `step` keywords to `save()`; moved the `silent`, `strict`, and `pipe` (now `stream`) keywords from `__init__()` to `save()`. git-svn-id: http://code.djangoproject.com/svn/django/branches/gis@7013 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/contrib/gis/tests/layermap/tests.py | 103 +++++++-- django/contrib/gis/utils/layermapping.py | 255 ++++++++++++--------- 2 files changed, 228 insertions(+), 130 deletions(-) diff --git a/django/contrib/gis/tests/layermap/tests.py b/django/contrib/gis/tests/layermap/tests.py index 382cef56ce..278c1a2998 100644 --- a/django/contrib/gis/tests/layermap/tests.py +++ b/django/contrib/gis/tests/layermap/tests.py @@ -11,6 +11,11 @@ city_shp = os.path.join(shp_path, 'cities/cities.shp') co_shp = os.path.join(shp_path, 'counties/counties.shp') inter_shp = os.path.join(shp_path, 'interstates/interstates.shp') +# Dictionaries to hold what's expected in the county shapefile. +NAMES = ['Bexar', 'Galveston', 'Harris', 'Honolulu', 'Pueblo'] +NUMS = [1, 2, 1, 19, 1] # Number of polygons for each. +STATES = ['Texas', 'Texas', 'Texas', 'Hawaii', 'Colorado'] + class LayerMapTest(unittest.TestCase): def test01_init(self): @@ -77,17 +82,16 @@ class LayerMapTest(unittest.TestCase): # When the `strict` keyword is set an error encountered will force # the importation to stop. try: - lm = LayerMapping(Interstate, inter_shp, inter_mapping, - strict=True, silent=True) - lm.save() + lm = LayerMapping(Interstate, inter_shp, inter_mapping) + lm.save(silent=True, strict=True) except InvalidDecimal: pass else: self.fail('Should have failed on strict import with invalid decimal values.') # This LayerMapping should work b/c `strict` is not set. - lm = LayerMapping(Interstate, inter_shp, inter_mapping, silent=True) - lm.save() + lm = LayerMapping(Interstate, inter_shp, inter_mapping) + lm.save(silent=True) # Two interstate should have imported correctly. self.assertEqual(2, Interstate.objects.count()) @@ -111,6 +115,20 @@ class LayerMapTest(unittest.TestCase): self.assertAlmostEqual(p1[0], p2[0], 6) self.assertAlmostEqual(p1[1], p2[1], 6) + def county_helper(self, county_feat=True): + "Helper function for ensuring the integrity of the mapped County models." + + for name, n, st in zip(NAMES, NUMS, STATES): + # Should only be one record b/c of `unique` keyword. + c = County.objects.get(name=name) + self.assertEqual(n, len(c.mpoly)) + self.assertEqual(st, c.state.name) # Checking ForeignKey mapping. + + # Multiple records because `unique` was not set. + if county_feat: + qs = CountyFeat.objects.filter(name=name) + self.assertEqual(n, qs.count()) + def test04_layermap_unique_multigeometry_fk(self): "Testing the `unique`, and `transform`, geometry collection conversion, and ForeignKey mappings." # All the following should work. @@ -145,8 +163,8 @@ class LayerMapTest(unittest.TestCase): # There exist no State models for the ForeignKey mapping to work -- should raise # a MissingForeignKey exception (this error would be ignored if the `strict` # keyword is not set). - lm = LayerMapping(County, co_shp, co_mapping, transform=False, unique='name', silent=True, strict=True) - self.assertRaises(MissingForeignKey, lm.save) + lm = LayerMapping(County, co_shp, co_mapping, transform=False, unique='name') + self.assertRaises(MissingForeignKey, lm.save, silent=True, strict=True) # Now creating the state models so the ForeignKey mapping may work. co, hi, tx = State(name='Colorado'), State(name='Hawaii'), State(name='Texas') @@ -165,29 +183,66 @@ class LayerMapTest(unittest.TestCase): # appended to the geometry collection of the unique model. Thus, # all of the various islands in Honolulu county will be in in one # database record with a MULTIPOLYGON type. - lm = LayerMapping(County, co_shp, co_mapping, transform=False, unique='name', silent=True, strict=True) - lm.save() + lm = LayerMapping(County, co_shp, co_mapping, transform=False, unique='name') + lm.save(silent=True, strict=True) # A reference that doesn't use the unique keyword; a new database record will # created for each polygon. - lm = LayerMapping(CountyFeat, co_shp, cofeat_mapping, transform=False, silent=True, strict=True) - lm.save() + lm = LayerMapping(CountyFeat, co_shp, cofeat_mapping, transform=False) + lm.save(silent=True, strict=True) - # Dictionary to hold what's expected in the shapefile. - names = ('Bexar', 'Galveston', 'Harris', 'Honolulu', 'Pueblo') - nums = (1, 2, 1, 19, 1) # Number of polygons for each. - states = ('Texas', 'Texas', 'Texas', 'Hawaii', 'Colorado') + # The county helper is called to ensure integrity of County models. + self.county_helper() - for name, n, st in zip(names, nums, states): - # Should only be one record b/c of `unique` keyword. - c = County.objects.get(name=name) - self.assertEqual(n, len(c.mpoly)) - self.assertEqual(st, c.state.name) # Checking ForeignKey mapping. + def test05_test_fid_range_step(self): + "Tests the `fid_range` keyword and the `step` keyword of .save()." + + # Function for clearing out all the counties before testing. + def clear_counties(): County.objects.all().delete() + + # Initializing the LayerMapping object to use in these tests. + lm = LayerMapping(County, co_shp, co_mapping, transform=False, unique='name') + + # Bad feature id ranges should raise a type error. + clear_counties() + bad_ranges = (5.0, 'foo', co_shp) + for bad in bad_ranges: + self.assertRaises(TypeError, lm.save, fid_range=bad) + + # Step keyword should not be allowed w/`fid_range`. + fr = (3, 5) # layer[3:5] + self.assertRaises(LayerMapError, lm.save, fid_range=fr, step=10) + lm.save(fid_range=fr) + + # Features IDs 3 & 4 are for Galveston County, Texas -- only + # one model is returned because the `unique` keyword was set. + qs = County.objects.all() + self.assertEqual(1, qs.count()) + self.assertEqual('Galveston', qs[0].name) + + # Features IDs 5 and beyond for Honolulu County, Hawaii, and + # FID 0 is for Pueblo County, Colorado. + clear_counties() + lm.save(fid_range=slice(5, None), silent=True, strict=True) # layer[5:] + lm.save(fid_range=slice(None, 1), silent=True, strict=True) # layer[:1] + + # Only Pueblo & Honolulu counties should be present because of + # the `unique` keyword. + qs = County.objects.all() + self.assertEqual(2, qs.count()) + hi, co = tuple(qs) + hi_idx, co_idx = tuple(map(NAMES.index, ('Honolulu', 'Pueblo'))) + self.assertEqual('Pueblo', co.name); self.assertEqual(NUMS[co_idx], len(co.mpoly)) + self.assertEqual('Honolulu', hi.name); self.assertEqual(NUMS[hi_idx], len(hi.mpoly)) + + # Testing the `step` keyword -- should get the same counties + # regardless of we use a step that divides equally, that is odd, + # or that is larger than the dataset. + for st in (4,7,1000): + clear_counties() + lm.save(step=st, strict=True) + self.county_helper(county_feat=False) - # Multiple records because `unique` was not set. - qs = CountyFeat.objects.filter(name=name) - self.assertEqual(n, qs.count()) - def suite(): s = unittest.TestSuite() s.addTest(unittest.makeSuite(LayerMapTest)) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index ec102f24b6..c8aae59102 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -44,24 +44,6 @@ For example, 'latin-1', 'utf-8', and 'cp437' are all valid encoding parameters. - check: - Due to optimizations, this keyword argument is deprecated and will - be removed in future revisions. - - pipe: - Status information will be written to this file handle. Defaults - to using `sys.stdout`, but any object with a `write` method is - supported. - - silent: - By default, non-fatal error notifications are printed to stdout; this - keyword may be set in order to disable these notifications. - - strict: - Setting this keyword to True will instruct the save() method to - cease execution on the first error encountered. The default behavior - is to attempt to continue even if errors are encountered. - transaction_mode: May be 'commit_on_success' (default) or 'autocommit'. @@ -175,10 +157,9 @@ class LayerMapping(object): } def __init__(self, model, data, mapping, layer=0, - source_srs=None, encoding=None, check=True, pipe=sys.stdout, - progress=False, interval=1000, strict=False, silent=False, - transaction_mode='commit_on_success', transform=True, - unique=False): + source_srs=None, encoding=None, + transaction_mode='commit_on_success', + transform=True, unique=None): """ A LayerMapping object is initialized using the given Model (not an instance), a DataSource (or string path to an OGR-supported data file), and a mapping @@ -214,15 +195,6 @@ class LayerMapping(object): # things don't check out before hand. self.check_layer() - # The strict flag -- if it is set, exceptions will be propagated up. - self.strict = strict - - # Setting the keyword arguments related to status printing. - self.silent = silent - self.progress = progress - self.pipe = pipe - self.interval = interval - # Setting the encoding for OFTString fields, if specified. if encoding: # Making sure the encoding exists, if not a LookupError @@ -249,6 +221,18 @@ class LayerMapping(object): raise LayerMapError('Unrecognized transaction mode: %s' % transaction_mode) #### Checking routines used during initialization #### + def check_fid_range(self, fid_range): + "This checks the `fid_range` keyword." + if fid_range: + if isinstance(fid_range, (tuple, list)): + return slice(*fid_range) + elif isinstance(fid_range, slice): + return fid_range + else: + raise TypeError + else: + return None + def check_layer(self): """ This checks the Layer metadata, and ensures that it is compatible @@ -367,17 +351,11 @@ class LayerMapping(object): def feature_kwargs(self, feat): """ Given an OGR Feature, this will return a dictionary of keyword arguments - for constructing the mapped model. Also returned is the `all_prepped` - flag, which is used to signal that a model corresponding to a ForeignKey - mapping does not exist. + for constructing the mapped model. """ # The keyword arguments for model construction. kwargs = {} - # The all_prepped flagged, will be set to False if there's a - # problem w/a ForeignKey that doesn't exist. - all_prepped = True - # Incrementing through each model field and OGR field in the # dictionary mapping. for field_name, ogr_name in self.mapping.items(): @@ -390,7 +368,6 @@ class LayerMapping(object): # The related _model_, not a field was passed in -- indicating # another mapping for the related Model. val = self.verify_fk(feat, model_field, ogr_name) - if not val: all_prepped = False else: # Otherwise, verify OGR Field type. val = self.verify_ogr_field(feat[ogr_name], model_field) @@ -399,7 +376,7 @@ class LayerMapping(object): # value obtained above. kwargs[field_name] = val - return kwargs, all_prepped + return kwargs def unique_kwargs(self, kwargs): """ @@ -480,9 +457,8 @@ class LayerMapping(object): try: return rel_model.objects.get(**fk_kwargs) except ObjectDoesNotExist: - if self.strict: raise MissingForeignKey('No %s model found with keyword arguments: %s' % (rel_model.__name__, fk_kwargs)) - else: return None - + raise MissingForeignKey('No ForeignKey %s model found with keyword arguments: %s' % (rel_model.__name__, fk_kwargs)) + def verify_geom(self, geom, model_field): """ Verifies the geometry -- will construct and return a GeometryCollection @@ -536,84 +512,151 @@ class LayerMapping(object): return (geom_type.num in self.MULTI_TYPES and model_field.__class__.__name__ == 'Multi%s' % geom_type.django) - def save(self, verbose=False): + def save(self, verbose=False, fid_range=False, step=False, + progress=False, silent=False, stream=sys.stdout, strict=False): """ Saves the contents from the OGR DataSource Layer into the database - according to the mapping dictionary given at initialization. If - the `verbose` keyword is set, information will be printed subsequent - to each model save executed on the database. + according to the mapping dictionary given at initialization. + + Keyword Parameters: + verbose: + If set, information will be printed subsequent to each model save + executed on the database. + + fid_range: + May be set with a slice or tuple of (begin, end) feature ID's to map + from the data source. In other words, this keyword enables the user + to selectively import a subset range of features in the geographic + data source. + + step: + If set with an integer, transactions will occur at every step + interval. For example, if step=1000, a commit would occur after + the 1,000th feature, the 2,000th feature etc. + + progress: + When this keyword is set, status information will be printed giving + the number of features processed and sucessfully saved. By default, + progress information will pe printed every 1000 features processed, + however, this default may be overridden by setting this keyword with an + integer for the desired interval. + + stream: + Status information will be written to this file handle. Defaults to + using `sys.stdout`, but any object with a `write` method is supported. + + silent: + By default, non-fatal error notifications are printed to stdout, but + this keyword may be set to disable these notifications. + + strict: + Execution of the model mapping will cease upon the first error + encountered. The default behavior is to attempt to continue. """ + # Getting the default Feature ID range. + default_range = self.check_fid_range(fid_range) + + # Setting the progress interval, if requested. + if progress: + if progress is True or not isinstance(progress, int): + progress_interval = 1000 + else: + progress_interval = progress + # Defining the 'real' save method, utilizing the transaction # decorator created during initialization. @self.transaction_decorator - def _save(): - num_feat = 0 - num_saved = 0 + def _save(feat_range=default_range, num_feat=0, num_saved=0): + if feat_range: + layer_iter = self.layer[feat_range] + else: + layer_iter = self.layer - for feat in self.layer: + for feat in layer_iter: num_feat += 1 # Getting the keyword arguments try: - kwargs, all_prepped = self.feature_kwargs(feat) + kwargs = self.feature_kwargs(feat) except LayerMapError, msg: # Something borked the validation - if self.strict: raise - elif not self.silent: - self.pipe.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg)) + if strict: raise + elif not silent: + stream.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg)) else: # Constructing the model using the keyword args - if all_prepped: - if self.unique: - # If we want unique models on a particular field, handle the - # geometry appropriately. - try: - # Getting the keyword arguments and retrieving - # the unique model. - u_kwargs = self.unique_kwargs(kwargs) - m = self.model.objects.get(**u_kwargs) - - # Getting the geometry (in OGR form), creating - # one from the kwargs WKT, adding in additional - # geometries, and update the attribute with the - # just-updated geometry WKT. - geom = getattr(m, self.geom_field).ogr - new = OGRGeometry(kwargs[self.geom_field]) - for g in new: geom.add(g) - setattr(m, self.geom_field, geom.wkt) - except ObjectDoesNotExist: - # No unique model exists yet, create. - m = self.model(**kwargs) - else: - m = self.model(**kwargs) - + if self.unique: + # If we want unique models on a particular field, handle the + # geometry appropriately. try: - # Attempting to save. - m.save() - num_saved += 1 - if verbose: self.pipe.write('Saved: %s\n' % m) - except SystemExit: - raise - except Exception, msg: - if self.transaction_mode == 'autocommit': - # Rolling back the transaction so that other model saves - # will work. - transaction.rollback_unless_managed() - if self.strict: - # Bailing out if the `strict` keyword is set. - if not self.silent: - self.pipe.write('Failed to save the feature (id: %s) into the model with the keyword arguments:\n' % feat.fid) - self.pipe.write('%s\n' % kwargs) - raise - elif not self.silent: - self.pipe.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg)) + # Getting the keyword arguments and retrieving + # the unique model. + u_kwargs = self.unique_kwargs(kwargs) + m = self.model.objects.get(**u_kwargs) + + # Getting the geometry (in OGR form), creating + # one from the kwargs WKT, adding in additional + # geometries, and update the attribute with the + # just-updated geometry WKT. + geom = getattr(m, self.geom_field).ogr + new = OGRGeometry(kwargs[self.geom_field]) + for g in new: geom.add(g) + setattr(m, self.geom_field, geom.wkt) + except ObjectDoesNotExist: + # No unique model exists yet, create. + m = self.model(**kwargs) else: - if not self.silent: self.pipe.write('Skipping due to missing relation:\n%s\n' % kwargs) - + m = self.model(**kwargs) + + try: + # Attempting to save. + m.save() + num_saved += 1 + if verbose: stream.write('Saved: %s\n' % m) + except SystemExit: + raise + except Exception, msg: + if self.transaction_mode == 'autocommit': + # Rolling back the transaction so that other model saves + # will work. + transaction.rollback_unless_managed() + if strict: + # Bailing out if the `strict` keyword is set. + if not silent: + stream.write('Failed to save the feature (id: %s) into the model with the keyword arguments:\n' % feat.fid) + stream.write('%s\n' % kwargs) + raise + elif not silent: + stream.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg)) # Printing progress information, if requested. - if self.progress and num_feat % self.interval == 0: - self.pipe.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved)) - - # Calling our defined function, which will use the specified - # trasaction mode. - _save() + if progress and num_feat % progress_interval == 0: + stream.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved)) + + # Only used for status output purposes -- incremental saving uses the + # values returned here. + return num_saved, num_feat + + nfeat = self.layer.num_feat + if step and isinstance(step, int) and step < nfeat: + # Incremental saving is requested at the given interval (step) + if default_range: + raise LayerMapError('The `step` keyword may not be used in conjunction with the `fid_range` keyword.') + beg, num_feat, num_saved = (0, 0, 0) + indices = range(step, nfeat, step) + n_i = len(indices) + + for i, end in enumerate(indices): + # Constructing the slice to use for this step; the last slice is + # special (e.g, [100:] instead of [90:100]). + if i+1 == n_i: step_slice = slice(beg, None) + else: step_slice = slice(beg, end) + + try: + num_feat, num_saved = _save(step_slice, num_feat, num_saved) + beg = end + except: + stream.write('%s\nFailed to save slice: %s\n' % ('=-' * 20, step_slice)) + raise + else: + # Otherwise, just calling the previously defined _save() function. + _save()