From 9a2bceed1aab52f65820c378f5ae1f608322b55c Mon Sep 17 00:00:00 2001
From: Claude Paroz <claude@2xlibre.net>
Date: Sat, 6 Oct 2012 22:56:47 +0200
Subject: [PATCH] Use smarter string decoding in GeoDjango

The first try to solve the Python 3 GIS encoding/decoding issue
was too naive. Using decode() on all read strings is bound to fail
as soon as a non-ascii string is concerned.
This patch is a little more clever, leaving ascii decoding when
plain ascii strings are expected, and allowing to specify a custom
encoding in DataSource hierarchy.
---
 django/contrib/gis/gdal/datasource.py             |   9 ++++++---
 django/contrib/gis/gdal/feature.py                |   9 +++++++--
 django/contrib/gis/gdal/field.py                  |   8 ++++++--
 django/contrib/gis/gdal/layer.py                  |  10 ++++++----
 django/contrib/gis/gdal/prototypes/ds.py          |   2 +-
 django/contrib/gis/gdal/prototypes/errcheck.py    |   9 ++++-----
 django/contrib/gis/gdal/prototypes/generation.py  |  14 ++++++++++----
 django/contrib/gis/gdal/prototypes/geom.py        |  10 +++++-----
 django/contrib/gis/gdal/prototypes/srs.py         |  14 +++++++-------
 django/contrib/gis/gdal/srs.py                    |   5 ++---
 django/contrib/gis/gdal/tests/test_ds.py          |   3 ++-
 django/contrib/gis/tests/data/ch-city/ch-city.dbf | Bin 0 -> 285 bytes
 django/contrib/gis/tests/data/ch-city/ch-city.prj |   1 +
 django/contrib/gis/tests/data/ch-city/ch-city.shp | Bin 0 -> 128 bytes
 django/contrib/gis/tests/data/ch-city/ch-city.shx | Bin 0 -> 108 bytes
 django/contrib/gis/tests/layermap/tests.py        |  10 +++++++++-
 django/contrib/gis/utils/layermapping.py          |   8 +++++---
 17 files changed, 71 insertions(+), 41 deletions(-)
 create mode 100644 django/contrib/gis/tests/data/ch-city/ch-city.dbf
 create mode 100644 django/contrib/gis/tests/data/ch-city/ch-city.prj
 create mode 100644 django/contrib/gis/tests/data/ch-city/ch-city.shp
 create mode 100644 django/contrib/gis/tests/data/ch-city/ch-city.shx

diff --git a/django/contrib/gis/gdal/datasource.py b/django/contrib/gis/gdal/datasource.py
index fa3d86afba..c92b2e170b 100644
--- a/django/contrib/gis/gdal/datasource.py
+++ b/django/contrib/gis/gdal/datasource.py
@@ -45,7 +45,7 @@ from django.contrib.gis.gdal.layer import Layer
 # Getting the ctypes prototypes for the DataSource.
 from django.contrib.gis.gdal.prototypes import ds as capi
 
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
 from django.utils import six
 from django.utils.six.moves import xrange
 
@@ -57,12 +57,14 @@ class DataSource(GDALBase):
     "Wraps an OGR Data Source object."
 
     #### Python 'magic' routines ####
-    def __init__(self, ds_input, ds_driver=False, write=False):
+    def __init__(self, ds_input, ds_driver=False, write=False, encoding='utf-8'):
         # The write flag.
         if write:
             self._write = 1
         else:
             self._write = 0
+        # See also http://trac.osgeo.org/gdal/wiki/rfc23_ogr_unicode
+        self.encoding = encoding
 
         # Registering all the drivers, this needs to be done
         #  _before_ we try to open up a data source.
@@ -129,4 +131,5 @@ class DataSource(GDALBase):
     @property
     def name(self):
         "Returns the name of the data source."
-        return capi.get_ds_name(self._ptr)
+        name = capi.get_ds_name(self._ptr)
+        return force_text(name, self.encoding, strings_only=True)
diff --git a/django/contrib/gis/gdal/feature.py b/django/contrib/gis/gdal/feature.py
index 6f338ad269..a11a6873c5 100644
--- a/django/contrib/gis/gdal/feature.py
+++ b/django/contrib/gis/gdal/feature.py
@@ -7,7 +7,7 @@ from django.contrib.gis.gdal.geometries import OGRGeometry, OGRGeomType
 # ctypes function prototypes
 from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api
 
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
 from django.utils import six
 from django.utils.six.moves import xrange
 
@@ -68,6 +68,10 @@ class Feature(GDALBase):
         return bool(capi.feature_equal(self.ptr, other._ptr))
 
     #### Feature Properties ####
+    @property
+    def encoding(self):
+        return self._layer._ds.encoding
+
     @property
     def fid(self):
         "Returns the feature identifier."
@@ -76,7 +80,8 @@ class Feature(GDALBase):
     @property
     def layer_name(self):
         "Returns the name of the layer for the feature."
-        return capi.get_feat_name(self._layer._ldefn)
+        name = capi.get_feat_name(self._layer._ldefn)
+        return force_text(name, self.encoding, strings_only=True)
 
     @property
     def num_fields(self):
diff --git a/django/contrib/gis/gdal/field.py b/django/contrib/gis/gdal/field.py
index 16230afdc8..2415f32b26 100644
--- a/django/contrib/gis/gdal/field.py
+++ b/django/contrib/gis/gdal/field.py
@@ -3,6 +3,8 @@ from datetime import date, datetime, time
 from django.contrib.gis.gdal.base import GDALBase
 from django.contrib.gis.gdal.error import OGRException
 from django.contrib.gis.gdal.prototypes import ds as capi
+from django.utils.encoding import force_text
+
 
 # For more information, see the OGR C API source code:
 #  http://www.gdal.org/ogr/ogr__api_8h.html
@@ -53,7 +55,8 @@ class Field(GDALBase):
 
     def as_string(self):
         "Retrieves the Field's value as a string."
-        return capi.get_field_as_string(self._feat.ptr, self._index)
+        string = capi.get_field_as_string(self._feat.ptr, self._index)
+        return force_text(string, encoding=self._feat.encoding, strings_only=True)
 
     def as_datetime(self):
         "Retrieves the Field's value as a tuple of date & time components."
@@ -70,7 +73,8 @@ class Field(GDALBase):
     @property
     def name(self):
         "Returns the name of this Field."
-        return capi.get_field_name(self.ptr)
+        name = capi.get_field_name(self.ptr)
+        return force_text(name, encoding=self._feat.encoding, strings_only=True)
 
     @property
     def precision(self):
diff --git a/django/contrib/gis/gdal/layer.py b/django/contrib/gis/gdal/layer.py
index da0f566969..7f935cd114 100644
--- a/django/contrib/gis/gdal/layer.py
+++ b/django/contrib/gis/gdal/layer.py
@@ -14,7 +14,7 @@ from django.contrib.gis.gdal.srs import SpatialReference
 # GDAL ctypes function prototypes.
 from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api
 
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
 from django.utils import six
 from django.utils.six.moves import xrange
 
@@ -103,7 +103,8 @@ class Layer(GDALBase):
     @property
     def name(self):
         "Returns the name of this layer in the Data Source."
-        return capi.get_fd_name(self._ldefn)
+        name = capi.get_fd_name(self._ldefn)
+        return force_text(name, self._ds.encoding, strings_only=True)
 
     @property
     def num_feat(self, force=1):
@@ -135,8 +136,9 @@ class Layer(GDALBase):
         Returns a list of string names corresponding to each of the Fields
         available in this Layer.
         """
-        return [capi.get_field_name(capi.get_field_defn(self._ldefn, i))
-                for i in xrange(self.num_fields) ]
+        return [force_text(capi.get_field_name(capi.get_field_defn(self._ldefn, i)),
+                           self._ds.encoding, strings_only=True)
+                for i in xrange(self.num_fields)]
 
     @property
     def field_types(self):
diff --git a/django/contrib/gis/gdal/prototypes/ds.py b/django/contrib/gis/gdal/prototypes/ds.py
index d8537bcaa4..f798069fd0 100644
--- a/django/contrib/gis/gdal/prototypes/ds.py
+++ b/django/contrib/gis/gdal/prototypes/ds.py
@@ -17,7 +17,7 @@ cleanup_all = void_output(lgdal.OGRCleanupAll, [], errcheck=False)
 get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int])
 get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p])
 get_driver_count = int_output(lgdal.OGRGetDriverCount, [])
-get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p])
+get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p], decoding='ascii')
 
 ### DataSource ###
 open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)])
diff --git a/django/contrib/gis/gdal/prototypes/errcheck.py b/django/contrib/gis/gdal/prototypes/errcheck.py
index 9103022896..2d2791124c 100644
--- a/django/contrib/gis/gdal/prototypes/errcheck.py
+++ b/django/contrib/gis/gdal/prototypes/errcheck.py
@@ -30,10 +30,9 @@ def check_const_string(result, func, cargs, offset=None):
     if offset:
         check_err(result)
         ptr = ptr_byref(cargs, offset)
-        return ptr.value.decode()
+        return ptr.value
     else:
-        if result is not None:
-            return result.decode()
+        return result
 
 def check_string(result, func, cargs, offset=-1, str_result=False):
     """
@@ -48,13 +47,13 @@ def check_string(result, func, cargs, offset=-1, str_result=False):
         # For routines that return a string.
         ptr = result
         if not ptr: s = None
-        else: s = string_at(result).decode()
+        else: s = string_at(result)
     else:
         # Error-code return specified.
         check_err(result)
         ptr = ptr_byref(cargs, offset)
         # Getting the string value
-        s = ptr.value.decode()
+        s = ptr.value
     # Correctly freeing the allocated memory beind GDAL pointer
     # w/the VSIFree routine.
     if ptr: lgdal.VSIFree(ptr)
diff --git a/django/contrib/gis/gdal/prototypes/generation.py b/django/contrib/gis/gdal/prototypes/generation.py
index 45cffd645a..577d29bbaa 100644
--- a/django/contrib/gis/gdal/prototypes/generation.py
+++ b/django/contrib/gis/gdal/prototypes/generation.py
@@ -57,7 +57,7 @@ def srs_output(func, argtypes):
     func.errcheck = check_srs
     return func
 
-def const_string_output(func, argtypes, offset=None):
+def const_string_output(func, argtypes, offset=None, decoding=None):
     func.argtypes = argtypes
     if offset:
         func.restype = c_int
@@ -65,12 +65,15 @@ def const_string_output(func, argtypes, offset=None):
         func.restype = c_char_p
 
     def _check_const(result, func, cargs):
-        return check_const_string(result, func, cargs, offset=offset)
+        res = check_const_string(result, func, cargs, offset=offset)
+        if res and decoding:
+            res = res.decode(decoding)
+        return res
     func.errcheck = _check_const
 
     return func
 
-def string_output(func, argtypes, offset=-1, str_result=False):
+def string_output(func, argtypes, offset=-1, str_result=False, decoding=None):
     """
     Generates a ctypes prototype for the given function with the
     given argument types that returns a string from a GDAL pointer.
@@ -90,8 +93,11 @@ def string_output(func, argtypes, offset=-1, str_result=False):
     # Dynamically defining our error-checking function with the
     # given offset.
     def _check_str(result, func, cargs):
-        return check_string(result, func, cargs,
+        res = check_string(result, func, cargs,
                             offset=offset, str_result=str_result)
+        if res and decoding:
+            res = res.decode(decoding)
+        return res
     func.errcheck = _check_str
     return func
 
diff --git a/django/contrib/gis/gdal/prototypes/geom.py b/django/contrib/gis/gdal/prototypes/geom.py
index f2c833d576..fa0b503c65 100644
--- a/django/contrib/gis/gdal/prototypes/geom.py
+++ b/django/contrib/gis/gdal/prototypes/geom.py
@@ -27,8 +27,8 @@ def topology_func(f):
 
 # GeoJSON routines.
 from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p])
-to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True)
-to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True)
+to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True, decoding='ascii')
+to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True, decoding='ascii')
 
 # GetX, GetY, GetZ all return doubles.
 getx = pnt_func(lgdal.OGR_G_GetX)
@@ -57,8 +57,8 @@ destroy_geom = void_output(lgdal.OGR_G_DestroyGeometry, [c_void_p], errcheck=Fal
 
 # Geometry export routines.
 to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB.
-to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)])
-to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True)
+to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True, decoding='ascii')
 get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p])
 
 # Geometry spatial-reference related routines.
@@ -73,7 +73,7 @@ get_coord_dim = int_output(lgdal.OGR_G_GetCoordinateDimension, [c_void_p])
 set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False)
 
 get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p])
-get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p])
+get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p], decoding='ascii')
 get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p])
 get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p])
 get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False)
diff --git a/django/contrib/gis/gdal/prototypes/srs.py b/django/contrib/gis/gdal/prototypes/srs.py
index 66cf84c34f..58ceb75456 100644
--- a/django/contrib/gis/gdal/prototypes/srs.py
+++ b/django/contrib/gis/gdal/prototypes/srs.py
@@ -49,17 +49,17 @@ linear_units = units_func(lgdal.OSRGetLinearUnits)
 angular_units = units_func(lgdal.OSRGetAngularUnits)
 
 # For exporting to WKT, PROJ.4, "Pretty" WKT, and XML.
-to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)])
-to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)])
-to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2)
+to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2, decoding='ascii')
 
 # Memory leak fixed in GDAL 1.5; still exists in 1.4.
-to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2)
+to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2, decoding='ascii')
 
 # String attribute retrival routines.
-get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int])
-get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p])
-get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p])
+get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int], decoding='ascii')
+get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p], decoding='ascii')
+get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p], decoding='ascii')
 
 # SRS Properties
 isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p])
diff --git a/django/contrib/gis/gdal/srs.py b/django/contrib/gis/gdal/srs.py
index 1a110b0114..66a8d4ec93 100644
--- a/django/contrib/gis/gdal/srs.py
+++ b/django/contrib/gis/gdal/srs.py
@@ -34,7 +34,7 @@ from django.contrib.gis.gdal.error import SRSException
 from django.contrib.gis.gdal.prototypes import srs as capi
 
 from django.utils import six
-from django.utils.encoding import force_bytes, force_text
+from django.utils.encoding import force_bytes
 
 
 #### Spatial Reference class. ####
@@ -139,8 +139,7 @@ class SpatialReference(GDALBase):
         """
         if not isinstance(target, six.string_types) or not isinstance(index, int):
             raise TypeError
-        value = capi.get_attr_value(self.ptr, force_bytes(target), index)
-        return force_text(value, 'ascii', strings_only=True)
+        return capi.get_attr_value(self.ptr, force_bytes(target), index)
 
     def auth_name(self, target):
         "Returns the authority name for the given string target node."
diff --git a/django/contrib/gis/gdal/tests/test_ds.py b/django/contrib/gis/gdal/tests/test_ds.py
index 634f204b86..a87a1c6c35 100644
--- a/django/contrib/gis/gdal/tests/test_ds.py
+++ b/django/contrib/gis/gdal/tests/test_ds.py
@@ -167,7 +167,8 @@ class DataSourceTest(unittest.TestCase):
                         self.assertEqual(True, isinstance(feat[k], v))
 
                     # Testing Feature.__iter__
-                    for fld in feat: self.assertEqual(True, fld.name in source.fields.keys())
+                    for fld in feat:
+                        self.assertEqual(True, fld.name in source.fields.keys())
 
     def test05_geometries(self):
         "Testing Geometries from Data Source Features."
diff --git a/django/contrib/gis/tests/data/ch-city/ch-city.dbf b/django/contrib/gis/tests/data/ch-city/ch-city.dbf
new file mode 100644
index 0000000000000000000000000000000000000000..6ba9d698f71d22173e926533d44b0bbcf43b3122
GIT binary patch
literal 285
zcmZRMXP07RU|?9tPy-|}fnQ>7Dpb@NL<gWL3CJ%f%}FfD%+F)+14(hC$-AWH6=#-I
zf<(dc68xwNoQqNuOHxxnf-WF}14JMJUWKT`dx|oXGZd%_jLeJ-3=BX<niv_I=mFWt
Nz|hjt(!kWn7yts9BN_kz

literal 0
HcmV?d00001

diff --git a/django/contrib/gis/tests/data/ch-city/ch-city.prj b/django/contrib/gis/tests/data/ch-city/ch-city.prj
new file mode 100644
index 0000000000..a30c00a55d
--- /dev/null
+++ b/django/contrib/gis/tests/data/ch-city/ch-city.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
\ No newline at end of file
diff --git a/django/contrib/gis/tests/data/ch-city/ch-city.shp b/django/contrib/gis/tests/data/ch-city/ch-city.shp
new file mode 100644
index 0000000000000000000000000000000000000000..e430020de09066c7bff31e952196cf556b5156c7
GIT binary patch
literal 128
ycmZQzQ0HR64jf)EGcYg$<+!b%mR*{(-huB@%#<aU%?_aRv1>;W1sTJI%`5<|Ga3B=

literal 0
HcmV?d00001

diff --git a/django/contrib/gis/tests/data/ch-city/ch-city.shx b/django/contrib/gis/tests/data/ch-city/ch-city.shx
new file mode 100644
index 0000000000000000000000000000000000000000..a1487ad82911e9af570d3a04af7163aa8976d89b
GIT binary patch
literal 108
wcmZQzQ0HR64$NLKGcYg$<+!b%mR*{(-huB@%#<aU%?_aRv1>;WH3IUu00pxV@&Et;

literal 0
HcmV?d00001

diff --git a/django/contrib/gis/tests/layermap/tests.py b/django/contrib/gis/tests/layermap/tests.py
index 557bdf9117..a976954d25 100644
--- a/django/contrib/gis/tests/layermap/tests.py
+++ b/django/contrib/gis/tests/layermap/tests.py
@@ -1,4 +1,5 @@
-from __future__ import absolute_import
+# coding: utf-8
+from __future__ import absolute_import, unicode_literals
 
 import os
 from copy import copy
@@ -286,6 +287,13 @@ class LayerMapTest(TestCase):
         self.assertEqual(City.objects.count(), 3)
         self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston")
 
+    def test_encoded_name(self):
+        """ Test a layer containing utf-8-encoded name """
+        city_shp = os.path.join(shp_path, 'ch-city', 'ch-city.shp')
+        lm = LayerMapping(City, city_shp, city_mapping)
+        lm.save(silent=True, strict=True)
+        self.assertEqual(City.objects.count(), 1)
+        self.assertEqual(City.objects.all()[0].name, "Zürich")
 
 class OtherRouter(object):
     def db_for_read(self, model, **hints):
diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py
index 9511815426..8a793b96c3 100644
--- a/django/contrib/gis/utils/layermapping.py
+++ b/django/contrib/gis/utils/layermapping.py
@@ -18,6 +18,8 @@ from django.contrib.gis.gdal.field import (
 from django.db import models, transaction
 from django.contrib.localflavor.us.models import USStateField
 from django.utils import six
+from django.utils.encoding import force_text
+
 
 # LayerMapping exceptions.
 class LayerMapError(Exception): pass
@@ -65,7 +67,7 @@ class LayerMapping(object):
                          }
 
     def __init__(self, model, data, mapping, layer=0,
-                 source_srs=None, encoding=None,
+                 source_srs=None, encoding='utf-8',
                  transaction_mode='commit_on_success',
                  transform=True, unique=None, using=None):
         """
@@ -76,7 +78,7 @@ class LayerMapping(object):
         """
         # Getting the DataSource and the associated Layer.
         if isinstance(data, six.string_types):
-            self.ds = DataSource(data)
+            self.ds = DataSource(data, encoding=encoding)
         else:
             self.ds = data
         self.layer = self.ds[layer]
@@ -330,7 +332,7 @@ class LayerMapping(object):
             if self.encoding:
                 # The encoding for OGR data sources may be specified here
                 # (e.g., 'cp437' for Census Bureau boundary files).
-                val = six.text_type(ogr_field.value, self.encoding)
+                val = force_text(ogr_field.value, self.encoding)
             else:
                 val = ogr_field.value
                 if model_field.max_length and len(val) > model_field.max_length: