From 9d001fa7f957f686d61b82dbf0d60c34ba0dd092 Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Tue, 15 May 2007 16:14:55 +0000 Subject: [PATCH] unicode: Made the serializers unicode-aware. Refs #3878, #4227. git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5248 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/core/serializers/__init__.py | 16 ++-- django/core/serializers/base.py | 5 +- django/core/serializers/python.py | 49 +++++------ django/core/serializers/pyyaml.py | 4 +- django/core/serializers/xml_serializer.py | 82 ++++++++++--------- .../serializers_regress/tests.py | 49 +++++------ 6 files changed, 103 insertions(+), 102 deletions(-) diff --git a/django/core/serializers/__init__.py b/django/core/serializers/__init__.py index 494393f3cf..049edf7521 100644 --- a/django/core/serializers/__init__.py +++ b/django/core/serializers/__init__.py @@ -6,7 +6,7 @@ Usage:: >>> from django.core import serializers >>> json = serializers.serialize("json", some_query_set) >>> objects = list(serializers.deserialize("json", json)) - + To add your own serializers, use the SERIALIZATION_MODULES setting:: SERIALIZATION_MODULES = { @@ -30,19 +30,19 @@ try: import yaml BUILTIN_SERIALIZERS["yaml"] = "django.core.serializers.pyyaml" except ImportError: - pass + pass _serializers = {} - + def register_serializer(format, serializer_module): """Register a new serializer by passing in a module name.""" module = __import__(serializer_module, {}, {}, ['']) _serializers[format] = module - + def unregister_serializer(format): """Unregister a given serializer""" del _serializers[format] - + def get_serializer(format): if not _serializers: _load_serializers() @@ -52,12 +52,12 @@ def get_serializer_formats(): if not _serializers: _load_serializers() return _serializers.keys() - + def get_deserializer(format): if not _serializers: _load_serializers() return _serializers[format].Deserializer - + def serialize(format, queryset, **options): """ Serialize a queryset (or any iterator that returns database objects) using @@ -87,4 +87,4 @@ def _load_serializers(): register_serializer(format, BUILTIN_SERIALIZERS[format]) if hasattr(settings, "SERIALIZATION_MODULES"): for format in settings.SERIALIZATION_MODULES: - register_serializer(format, settings.SERIALIZATION_MODULES[format]) \ No newline at end of file + register_serializer(format, settings.SERIALIZATION_MODULES[format]) diff --git a/django/core/serializers/base.py b/django/core/serializers/base.py index 86d0037c17..1ef7bee472 100644 --- a/django/core/serializers/base.py +++ b/django/core/serializers/base.py @@ -7,6 +7,7 @@ try: except ImportError: from StringIO import StringIO from django.db import models +from django.utils.encoding import smart_str, smart_unicode class SerializationError(Exception): """Something bad happened during serialization.""" @@ -59,7 +60,7 @@ class Serializer(object): value = getattr(obj, "get_%s_url" % field.name, lambda: None)() else: value = field.flatten_data(follow=None, obj=obj).get(field.name, "") - return str(value) + return smart_unicode(value) def start_serialization(self): """ @@ -154,7 +155,7 @@ class DeserializedObject(object): self.m2m_data = m2m_data def __repr__(self): - return "" % str(self.object) + return "" % smart_str(self.object) def save(self, save_m2m=True): self.object.save() diff --git a/django/core/serializers/python.py b/django/core/serializers/python.py index 66dbbff335..2eb2beeb48 100644 --- a/django/core/serializers/python.py +++ b/django/core/serializers/python.py @@ -7,49 +7,50 @@ other serializers. from django.conf import settings from django.core.serializers import base from django.db import models +from django.utils.encoding import smart_unicode class Serializer(base.Serializer): """ Serializes a QuerySet to basic Python objects. """ - + def start_serialization(self): self._current = None self.objects = [] - + def end_serialization(self): pass - + def start_object(self, obj): self._current = {} - + def end_object(self, obj): self.objects.append({ - "model" : str(obj._meta), - "pk" : str(obj._get_pk_val()), + "model" : smart_unicode(obj._meta), + "pk" : smart_unicode(obj._get_pk_val()), "fields" : self._current }) self._current = None - + def handle_field(self, obj, field): self._current[field.name] = getattr(obj, field.name) - + def handle_fk_field(self, obj, field): related = getattr(obj, field.name) if related is not None: related = getattr(related, field.rel.field_name) self._current[field.name] = related - + def handle_m2m_field(self, obj, field): self._current[field.name] = [related._get_pk_val() for related in getattr(obj, field.name).iterator()] - + def getvalue(self): return self.objects def Deserializer(object_list, **options): """ Deserialize simple Python objects back into Django ORM instances. - + It's expected that you pass the Python objects themselves (instead of a stream or a string) to the constructor """ @@ -59,36 +60,30 @@ def Deserializer(object_list, **options): Model = _get_model(d["model"]) data = {Model._meta.pk.attname : Model._meta.pk.to_python(d["pk"])} m2m_data = {} - + # Handle each field for (field_name, field_value) in d["fields"].iteritems(): - if isinstance(field_value, unicode): - field_value = field_value.encode(options.get("encoding", settings.DEFAULT_CHARSET)) - + if isinstance(field_value, str): + field_value = smart_unicode(field_value, options.get("encoding", settings.DEFAULT_CHARSET)) + field = Model._meta.get_field(field_name) - + # Handle M2M relations if field.rel and isinstance(field.rel, models.ManyToManyRel): - pks = [] m2m_convert = field.rel.to._meta.pk.to_python - for pk in field_value: - if isinstance(pk, unicode): - pks.append(m2m_convert(pk.encode(options.get("encoding", settings.DEFAULT_CHARSET)))) - else: - pks.append(m2m_convert(pk)) - m2m_data[field.name] = pks - + m2m_data[field.name] = [m2m_convert(smart_unicode(pk)) for pk in field_value] + # Handle FK fields elif field.rel and isinstance(field.rel, models.ManyToOneRel): if field_value: data[field.attname] = field.rel.to._meta.get_field(field.rel.field_name).to_python(field_value) else: data[field.attname] = None - + # Handle all other fields else: data[field.name] = field.to_python(field_value) - + yield base.DeserializedObject(Model(**data), m2m_data) def _get_model(model_identifier): @@ -100,5 +95,5 @@ def _get_model(model_identifier): except TypeError: Model = None if Model is None: - raise base.DeserializationError("Invalid model identifier: '%s'" % model_identifier) + raise base.DeserializationError(u"Invalid model identifier: '%s'" % model_identifier) return Model diff --git a/django/core/serializers/pyyaml.py b/django/core/serializers/pyyaml.py index fa3dec984e..e239681912 100644 --- a/django/core/serializers/pyyaml.py +++ b/django/core/serializers/pyyaml.py @@ -19,7 +19,7 @@ class Serializer(PythonSerializer): """ def end_serialization(self): yaml.dump(self.objects, self.stream, **self.options) - + def getvalue(self): return self.stream.getvalue() @@ -33,4 +33,4 @@ def Deserializer(stream_or_string, **options): stream = stream_or_string for obj in PythonDeserializer(yaml.load(stream)): yield obj - + diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py index 633001f5f0..35f18a8589 100644 --- a/django/core/serializers/xml_serializer.py +++ b/django/core/serializers/xml_serializer.py @@ -6,13 +6,14 @@ from django.conf import settings from django.core.serializers import base from django.db import models from django.utils.xmlutils import SimplerXMLGenerator +from django.utils.encoding import smart_unicode from xml.dom import pulldom class Serializer(base.Serializer): """ Serializes a QuerySet to XML. """ - + def indent(self, level): if self.options.get('indent', None) is not None: self.xml.ignorableWhitespace('\n' + ' ' * self.options.get('indent', None) * level) @@ -24,7 +25,7 @@ class Serializer(base.Serializer): self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET)) self.xml.startDocument() self.xml.startElement("django-objects", {"version" : "1.0"}) - + def end_serialization(self): """ End serialization -- end the document. @@ -32,27 +33,27 @@ class Serializer(base.Serializer): self.indent(0) self.xml.endElement("django-objects") self.xml.endDocument() - + def start_object(self, obj): """ Called as each object is handled. """ if not hasattr(obj, "_meta"): raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj)) - + self.indent(1) self.xml.startElement("object", { - "pk" : str(obj._get_pk_val()), - "model" : str(obj._meta), + "pk" : smart_unicode(obj._get_pk_val()), + "model" : smart_unicode(obj._meta), }) - + def end_object(self, obj): """ Called after handling all fields for an object. """ self.indent(1) self.xml.endElement("object") - + def handle_field(self, obj, field): """ Called to handle each field on an object (except for ForeignKeys and @@ -63,17 +64,17 @@ class Serializer(base.Serializer): "name" : field.name, "type" : field.get_internal_type() }) - + # Get a "string version" of the object's data (this is handled by the - # serializer base class). + # serializer base class). if getattr(obj, field.name) is not None: value = self.get_string_value(obj, field) - self.xml.characters(str(value)) + self.xml.characters(smart_unicode(value)) else: self.xml.addQuickElement("None") self.xml.endElement("field") - + def handle_fk_field(self, obj, field): """ Called to handle a ForeignKey (we need to treat them slightly @@ -82,11 +83,11 @@ class Serializer(base.Serializer): self._start_relational_field(field) related = getattr(obj, field.name) if related is not None: - self.xml.characters(str(getattr(related, field.rel.field_name))) + self.xml.characters(smart_unicode(getattr(related, field.rel.field_name))) else: self.xml.addQuickElement("None") self.xml.endElement("field") - + def handle_m2m_field(self, obj, field): """ Called to handle a ManyToManyField. Related objects are only @@ -95,9 +96,9 @@ class Serializer(base.Serializer): """ self._start_relational_field(field) for relobj in getattr(obj, field.name).iterator(): - self.xml.addQuickElement("object", attrs={"pk" : str(relobj._get_pk_val())}) + self.xml.addQuickElement("object", attrs={"pk" : smart_unicode(relobj._get_pk_val())}) self.xml.endElement("field") - + def _start_relational_field(self, field): """ Helper to output the element for relational fields @@ -106,33 +107,33 @@ class Serializer(base.Serializer): self.xml.startElement("field", { "name" : field.name, "rel" : field.rel.__class__.__name__, - "to" : str(field.rel.to._meta), + "to" : smart_unicode(field.rel.to._meta), }) - + class Deserializer(base.Deserializer): """ Deserialize XML. """ - + def __init__(self, stream_or_string, **options): super(Deserializer, self).__init__(stream_or_string, **options) - self.encoding = self.options.get("encoding", settings.DEFAULT_CHARSET) - self.event_stream = pulldom.parse(self.stream) - + self.event_stream = pulldom.parse(self.stream) + def next(self): for event, node in self.event_stream: if event == "START_ELEMENT" and node.nodeName == "object": self.event_stream.expandNode(node) return self._handle_object(node) raise StopIteration - + def _handle_object(self, node): """ Convert an node to a DeserializedObject. """ - # Look up the model using the model loading mechanism. If this fails, bail. + # Look up the model using the model loading mechanism. If this fails, + # bail. Model = self._get_model_from_node(node, "model") - + # Start building a data dictionary from the object. If the node is # missing the pk attribute, bail. pk = node.getAttribute("pk") @@ -140,11 +141,11 @@ class Deserializer(base.Deserializer): raise base.DeserializationError(" node is missing the 'pk' attribute") data = {Model._meta.pk.attname : Model._meta.pk.to_python(pk)} - + # Also start building a dict of m2m data (this is saved as # {m2m_accessor_attribute : [list_of_related_objects]}) m2m_data = {} - + # Deseralize each field. for field_node in node.getElementsByTagName("field"): # If the field is missing the name attribute, bail (are you @@ -152,12 +153,12 @@ class Deserializer(base.Deserializer): field_name = field_node.getAttribute("name") if not field_name: raise base.DeserializationError(" node is missing the 'name' attribute") - + # Get the field from the Model. This will raise a # FieldDoesNotExist if, well, the field doesn't exist, which will # be propagated correctly. field = Model._meta.get_field(field_name) - + # As is usually the case, relation fields get the special treatment. if field.rel and isinstance(field.rel, models.ManyToManyRel): m2m_data[field.name] = self._handle_m2m_field_node(field_node, field) @@ -167,12 +168,12 @@ class Deserializer(base.Deserializer): if len(field_node.childNodes) == 1 and field_node.childNodes[0].nodeName == 'None': value = None else: - value = field.to_python(getInnerText(field_node).strip().encode(self.encoding)) + value = field.to_python(getInnerText(field_node).strip()) data[field.name] = value - + # Return a DeserializedObject so that the m2m data has a place to live. return base.DeserializedObject(Model(**data), m2m_data) - + def _handle_fk_field_node(self, node, field): """ Handle a node for a ForeignKey @@ -182,16 +183,16 @@ class Deserializer(base.Deserializer): return None else: return field.rel.to._meta.get_field(field.rel.field_name).to_python( - getInnerText(node).strip().encode(self.encoding)) - + getInnerText(node).strip()) + def _handle_m2m_field_node(self, node, field): """ - Handle a node for a ManyToManyField + Handle a node for a ManyToManyField. """ return [field.rel.to._meta.pk.to_python( - c.getAttribute("pk").encode(self.encoding)) + c.getAttribute("pk")) for c in node.getElementsByTagName("object")] - + def _get_model_from_node(self, node, attr): """ Helper to look up a model from a or a node has invalid model identifier: '%s'" % \ (node.nodeName, model_identifier)) return Model - - + + def getInnerText(node): """ Get all the inner text of a DOM node (recursively). @@ -226,4 +227,5 @@ def getInnerText(node): inner_text.extend(getInnerText(child)) else: pass - return "".join(inner_text) \ No newline at end of file + return u"".join(inner_text) + diff --git a/tests/regressiontests/serializers_regress/tests.py b/tests/regressiontests/serializers_regress/tests.py index 317739dac4..9da3467702 100644 --- a/tests/regressiontests/serializers_regress/tests.py +++ b/tests/regressiontests/serializers_regress/tests.py @@ -2,7 +2,7 @@ A test spanning all the capabilities of all the serializers. This class defines sample data and a dynamically generated -test case that is capable of testing the capabilities of +test case that is capable of testing the capabilities of the serializers. This includes all valid data values, plus forward, backwards and self references. """ @@ -22,7 +22,7 @@ from models import * def data_create(pk, klass, data): instance = klass(id=pk) instance.data = data - instance.save() + instance.save() return instance def generic_create(pk, klass, data): @@ -32,13 +32,13 @@ def generic_create(pk, klass, data): for tag in data[1:]: instance.tags.create(data=tag) return instance - + def fk_create(pk, klass, data): instance = klass(id=pk) setattr(instance, 'data_id', data) instance.save() return instance - + def m2m_create(pk, klass, data): instance = klass(id=pk) instance.save() @@ -61,14 +61,14 @@ def pk_create(pk, klass, data): # test data objects of various kinds def data_compare(testcase, pk, klass, data): instance = klass.objects.get(id=pk) - testcase.assertEqual(data, instance.data, + testcase.assertEqual(data, instance.data, "Objects with PK=%d not equal; expected '%s' (%s), got '%s' (%s)" % (pk,data, type(data), instance.data, type(instance.data))) def generic_compare(testcase, pk, klass, data): instance = klass.objects.get(id=pk) testcase.assertEqual(data[0], instance.data) testcase.assertEqual(data[1:], [t.data for t in instance.tags.all()]) - + def fk_compare(testcase, pk, klass, data): instance = klass.objects.get(id=pk) testcase.assertEqual(data, instance.data_id) @@ -84,7 +84,7 @@ def o2o_compare(testcase, pk, klass, data): def pk_compare(testcase, pk, klass, data): instance = klass.objects.get(data=data) testcase.assertEqual(data, instance.data) - + # Define some data types. Each data type is # actually a pair of functions; one to create # and one to compare objects of that type @@ -96,7 +96,7 @@ o2o_obj = (o2o_create, o2o_compare) pk_obj = (pk_create, pk_compare) test_data = [ - # Format: (data type, PK value, Model Class, data) + # Format: (data type, PK value, Model Class, data) (data_obj, 1, BooleanData, True), (data_obj, 2, BooleanData, False), (data_obj, 10, CharData, "Test Char Data"), @@ -105,6 +105,9 @@ test_data = [ (data_obj, 13, CharData, "null"), (data_obj, 14, CharData, "NULL"), (data_obj, 15, CharData, None), + # (We use something that will fit into a latin1 database encoding here, + # because that is still the default used on many system setups.) + (data_obj, 16, CharData, u'\xa5'), (data_obj, 20, DateData, datetime.date(2006,6,16)), (data_obj, 21, DateData, None), (data_obj, 30, DateTimeData, datetime.datetime(2006,6,16,10,42,37)), @@ -137,10 +140,10 @@ test_data = [ (data_obj, 131, PositiveSmallIntegerData, None), (data_obj, 140, SlugData, "this-is-a-slug"), (data_obj, 141, SlugData, None), - (data_obj, 150, SmallData, 12), - (data_obj, 151, SmallData, -12), - (data_obj, 152, SmallData, 0), - (data_obj, 153, SmallData, None), + (data_obj, 150, SmallData, 12), + (data_obj, 151, SmallData, -12), + (data_obj, 152, SmallData, 0), + (data_obj, 153, SmallData, None), (data_obj, 160, TextData, """This is a long piece of text. It contains line breaks. Several of them. @@ -188,7 +191,7 @@ The end."""), (fk_obj, 450, FKDataToField, "UAnchor 1"), (fk_obj, 451, FKDataToField, "UAnchor 2"), (fk_obj, 452, FKDataToField, None), - + (data_obj, 500, Anchor, "Anchor 3"), (data_obj, 501, Anchor, "Anchor 4"), (data_obj, 502, UniqueAnchor, "UAnchor 2"), @@ -215,9 +218,9 @@ The end."""), (pk_obj, 720, PositiveIntegerPKData, 123456789), (pk_obj, 730, PositiveSmallIntegerPKData, 12), (pk_obj, 740, SlugPKData, "this-is-a-slug"), - (pk_obj, 750, SmallPKData, 12), - (pk_obj, 751, SmallPKData, -12), - (pk_obj, 752, SmallPKData, 0), + (pk_obj, 750, SmallPKData, 12), + (pk_obj, 751, SmallPKData, -12), + (pk_obj, 752, SmallPKData, 0), # (pk_obj, 760, TextPKData, """This is a long piece of text. # It contains line breaks. # Several of them. @@ -226,7 +229,7 @@ The end."""), (pk_obj, 780, USStatePKData, "MA"), # (pk_obj, 790, XMLPKData, ""), ] - + # Dynamically create serializer tests to ensure that all # registered serializers are automatically tested. class SerializerTests(unittest.TestCase): @@ -234,7 +237,7 @@ class SerializerTests(unittest.TestCase): def serializerTest(format, self): # Clear the database first - management.flush(verbosity=0, interactive=False) + management.flush(verbosity=0, interactive=False) # Create all the objects defined in the test data objects = [] @@ -245,14 +248,14 @@ def serializerTest(format, self): transaction.commit() transaction.leave_transaction_management() - # Add the generic tagged objects to the object list + # Add the generic tagged objects to the object list objects.extend(Tag.objects.all()) - + # Serialize the test database serialized_data = serializers.serialize(format, objects, indent=2) # Flush the database and recreate from the serialized data - management.flush(verbosity=0, interactive=False) + management.flush(verbosity=0, interactive=False) transaction.enter_transaction_management() transaction.managed(True) for obj in serializers.deserialize(format, serialized_data): @@ -260,10 +263,10 @@ def serializerTest(format, self): transaction.commit() transaction.leave_transaction_management() - # Assert that the deserialized data is the same + # Assert that the deserialized data is the same # as the original source for (func, pk, klass, datum) in test_data: func[1](self, pk, klass, datum) - + for format in serializers.get_serializer_formats(): setattr(SerializerTests, 'test_'+format+'_serializer', curry(serializerTest, format))