mirror of
https://github.com/django/django.git
synced 2024-12-22 09:05:43 +00:00
Fixed #29522 -- Refactored the Deserializer functions to classes.
Co-authored-by: Emad Mokhtar <emad.mokhtar@veneficus.nl>
This commit is contained in:
parent
a060a22ee2
commit
ee5147cfd7
2
AUTHORS
2
AUTHORS
@ -68,7 +68,7 @@ answer newbie questions, and generally made Django that much better:
|
||||
Aljaž Košir <aljazkosir5@gmail.com>
|
||||
Aljosa Mohorovic <aljosa.mohorovic@gmail.com>
|
||||
Alokik Vijay <alokik.roe@gmail.com>
|
||||
Amir Karimi <amk9978@gmail.com>
|
||||
Amir Karimi <https://github.com/amk9978>
|
||||
Amit Chakradeo <https://amit.chakradeo.net/>
|
||||
Amit Ramon <amit.ramon@gmail.com>
|
||||
Amit Upadhyay <http://www.amitu.com/blog/>
|
||||
|
@ -59,19 +59,27 @@ class Serializer(PythonSerializer):
|
||||
return super(PythonSerializer, self).getvalue()
|
||||
|
||||
|
||||
def Deserializer(stream_or_string, **options):
|
||||
class Deserializer(PythonDeserializer):
|
||||
"""Deserialize a stream or string of JSON data."""
|
||||
if not isinstance(stream_or_string, (bytes, str)):
|
||||
stream_or_string = stream_or_string.read()
|
||||
if isinstance(stream_or_string, bytes):
|
||||
stream_or_string = stream_or_string.decode()
|
||||
try:
|
||||
objects = json.loads(stream_or_string)
|
||||
yield from PythonDeserializer(objects, **options)
|
||||
except (GeneratorExit, DeserializationError):
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise DeserializationError() from exc
|
||||
|
||||
def __init__(self, stream_or_string, **options):
|
||||
if not isinstance(stream_or_string, (bytes, str)):
|
||||
stream_or_string = stream_or_string.read()
|
||||
if isinstance(stream_or_string, bytes):
|
||||
stream_or_string = stream_or_string.decode()
|
||||
try:
|
||||
objects = json.loads(stream_or_string)
|
||||
except Exception as exc:
|
||||
raise DeserializationError() from exc
|
||||
super().__init__(objects, **options)
|
||||
|
||||
def _handle_object(self, obj):
|
||||
try:
|
||||
yield from super()._handle_object(obj)
|
||||
except (GeneratorExit, DeserializationError):
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise DeserializationError(f"Error deserializing object: {exc}") from exc
|
||||
|
||||
|
||||
class DjangoJSONEncoder(json.JSONEncoder):
|
||||
|
@ -39,19 +39,30 @@ class Serializer(PythonSerializer):
|
||||
return super(PythonSerializer, self).getvalue()
|
||||
|
||||
|
||||
def Deserializer(stream_or_string, **options):
|
||||
class Deserializer(PythonDeserializer):
|
||||
"""Deserialize a stream or string of JSON data."""
|
||||
if isinstance(stream_or_string, bytes):
|
||||
stream_or_string = stream_or_string.decode()
|
||||
if isinstance(stream_or_string, (bytes, str)):
|
||||
stream_or_string = stream_or_string.split("\n")
|
||||
|
||||
for line in stream_or_string:
|
||||
if not line.strip():
|
||||
continue
|
||||
def __init__(self, stream_or_string, **options):
|
||||
if isinstance(stream_or_string, bytes):
|
||||
stream_or_string = stream_or_string.decode()
|
||||
if isinstance(stream_or_string, str):
|
||||
stream_or_string = stream_or_string.splitlines()
|
||||
super().__init__(Deserializer._get_lines(stream_or_string), **options)
|
||||
|
||||
def _handle_object(self, obj):
|
||||
try:
|
||||
yield from PythonDeserializer([json.loads(line)], **options)
|
||||
yield from super()._handle_object(obj)
|
||||
except (GeneratorExit, DeserializationError):
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise DeserializationError() from exc
|
||||
raise DeserializationError(f"Error deserializing object: {exc}") from exc
|
||||
|
||||
@staticmethod
|
||||
def _get_lines(stream):
|
||||
for line in stream:
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
yield json.loads(line)
|
||||
except Exception as exc:
|
||||
raise DeserializationError() from exc
|
||||
|
@ -96,45 +96,60 @@ class Serializer(base.Serializer):
|
||||
return self.objects
|
||||
|
||||
|
||||
def Deserializer(
|
||||
object_list, *, using=DEFAULT_DB_ALIAS, ignorenonexistent=False, **options
|
||||
):
|
||||
class Deserializer(base.Deserializer):
|
||||
"""
|
||||
Deserialize simple Python objects back into Django ORM instances.
|
||||
|
||||
It's expected that you pass the Python objects themselves (instead of a
|
||||
stream or a string) to the constructor
|
||||
"""
|
||||
handle_forward_references = options.pop("handle_forward_references", False)
|
||||
field_names_cache = {} # Model: <list of field_names>
|
||||
|
||||
for d in object_list:
|
||||
# Look up the model and starting build a dict of data for it.
|
||||
try:
|
||||
Model = _get_model(d["model"])
|
||||
except base.DeserializationError:
|
||||
if ignorenonexistent:
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
def __init__(
|
||||
self, object_list, *, using=DEFAULT_DB_ALIAS, ignorenonexistent=False, **options
|
||||
):
|
||||
super().__init__(object_list, **options)
|
||||
self.handle_forward_references = options.pop("handle_forward_references", False)
|
||||
self.using = using
|
||||
self.ignorenonexistent = ignorenonexistent
|
||||
self.field_names_cache = {} # Model: <list of field_names>
|
||||
self._iterator = None
|
||||
|
||||
def __iter__(self):
|
||||
for obj in self.stream:
|
||||
yield from self._handle_object(obj)
|
||||
|
||||
def __next__(self):
|
||||
if self._iterator is None:
|
||||
self._iterator = iter(self)
|
||||
return next(self._iterator)
|
||||
|
||||
def _handle_object(self, obj):
|
||||
data = {}
|
||||
if "pk" in d:
|
||||
try:
|
||||
data[Model._meta.pk.attname] = Model._meta.pk.to_python(d.get("pk"))
|
||||
except Exception as e:
|
||||
raise base.DeserializationError.WithData(
|
||||
e, d["model"], d.get("pk"), None
|
||||
)
|
||||
m2m_data = {}
|
||||
deferred_fields = {}
|
||||
|
||||
if Model not in field_names_cache:
|
||||
field_names_cache[Model] = {f.name for f in Model._meta.get_fields()}
|
||||
field_names = field_names_cache[Model]
|
||||
# Look up the model and starting build a dict of data for it.
|
||||
try:
|
||||
Model = self._get_model_from_node(obj["model"])
|
||||
except base.DeserializationError:
|
||||
if self.ignorenonexistent:
|
||||
return
|
||||
raise
|
||||
if "pk" in obj:
|
||||
try:
|
||||
data[Model._meta.pk.attname] = Model._meta.pk.to_python(obj.get("pk"))
|
||||
except Exception as e:
|
||||
raise base.DeserializationError.WithData(
|
||||
e, obj["model"], obj.get("pk"), None
|
||||
)
|
||||
|
||||
if Model not in self.field_names_cache:
|
||||
self.field_names_cache[Model] = {f.name for f in Model._meta.get_fields()}
|
||||
field_names = self.field_names_cache[Model]
|
||||
|
||||
# Handle each field
|
||||
for field_name, field_value in d["fields"].items():
|
||||
if ignorenonexistent and field_name not in field_names:
|
||||
for field_name, field_value in obj["fields"].items():
|
||||
if self.ignorenonexistent and field_name not in field_names:
|
||||
# skip fields no longer on model
|
||||
continue
|
||||
|
||||
@ -145,51 +160,59 @@ def Deserializer(
|
||||
field.remote_field, models.ManyToManyRel
|
||||
):
|
||||
try:
|
||||
values = base.deserialize_m2m_values(
|
||||
field, field_value, using, handle_forward_references
|
||||
)
|
||||
values = self._handle_m2m_field_node(field, field_value)
|
||||
if values == base.DEFER_FIELD:
|
||||
deferred_fields[field] = field_value
|
||||
else:
|
||||
m2m_data[field.name] = values
|
||||
except base.M2MDeserializationError as e:
|
||||
raise base.DeserializationError.WithData(
|
||||
e.original_exc, d["model"], d.get("pk"), e.pk
|
||||
e.original_exc, obj["model"], obj.get("pk"), e.pk
|
||||
)
|
||||
if values == base.DEFER_FIELD:
|
||||
deferred_fields[field] = field_value
|
||||
else:
|
||||
m2m_data[field.name] = values
|
||||
|
||||
# Handle FK fields
|
||||
elif field.remote_field and isinstance(
|
||||
field.remote_field, models.ManyToOneRel
|
||||
):
|
||||
try:
|
||||
value = base.deserialize_fk_value(
|
||||
field, field_value, using, handle_forward_references
|
||||
)
|
||||
value = self._handle_fk_field_node(field, field_value)
|
||||
if value == base.DEFER_FIELD:
|
||||
deferred_fields[field] = field_value
|
||||
else:
|
||||
data[field.attname] = value
|
||||
except Exception as e:
|
||||
raise base.DeserializationError.WithData(
|
||||
e, d["model"], d.get("pk"), field_value
|
||||
e, obj["model"], obj.get("pk"), field_value
|
||||
)
|
||||
if value == base.DEFER_FIELD:
|
||||
deferred_fields[field] = field_value
|
||||
else:
|
||||
data[field.attname] = value
|
||||
|
||||
# Handle all other fields
|
||||
else:
|
||||
try:
|
||||
data[field.name] = field.to_python(field_value)
|
||||
except Exception as e:
|
||||
raise base.DeserializationError.WithData(
|
||||
e, d["model"], d.get("pk"), field_value
|
||||
e, obj["model"], obj.get("pk"), field_value
|
||||
)
|
||||
|
||||
obj = base.build_instance(Model, data, using)
|
||||
yield base.DeserializedObject(obj, m2m_data, deferred_fields)
|
||||
model_instance = base.build_instance(Model, data, self.using)
|
||||
yield base.DeserializedObject(model_instance, m2m_data, deferred_fields)
|
||||
|
||||
|
||||
def _get_model(model_identifier):
|
||||
"""Look up a model from an "app_label.model_name" string."""
|
||||
try:
|
||||
return apps.get_model(model_identifier)
|
||||
except (LookupError, TypeError):
|
||||
raise base.DeserializationError(
|
||||
"Invalid model identifier: '%s'" % model_identifier
|
||||
def _handle_m2m_field_node(self, field, field_value):
|
||||
return base.deserialize_m2m_values(
|
||||
field, field_value, self.using, self.handle_forward_references
|
||||
)
|
||||
|
||||
def _handle_fk_field_node(self, field, field_value):
|
||||
return base.deserialize_fk_value(
|
||||
field, field_value, self.using, self.handle_forward_references
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_model_from_node(model_identifier):
|
||||
"""Look up a model from an "app_label.model_name" string."""
|
||||
try:
|
||||
return apps.get_model(model_identifier)
|
||||
except (LookupError, TypeError):
|
||||
raise base.DeserializationError(
|
||||
f"Invalid model identifier: {model_identifier}"
|
||||
)
|
||||
|
@ -6,7 +6,6 @@ Requires PyYaml (https://pyyaml.org/), but that's checked for in __init__.
|
||||
|
||||
import collections
|
||||
import decimal
|
||||
from io import StringIO
|
||||
|
||||
import yaml
|
||||
|
||||
@ -66,17 +65,23 @@ class Serializer(PythonSerializer):
|
||||
return super(PythonSerializer, self).getvalue()
|
||||
|
||||
|
||||
def Deserializer(stream_or_string, **options):
|
||||
class Deserializer(PythonDeserializer):
|
||||
"""Deserialize a stream or string of YAML data."""
|
||||
if isinstance(stream_or_string, bytes):
|
||||
stream_or_string = stream_or_string.decode()
|
||||
if isinstance(stream_or_string, str):
|
||||
stream = StringIO(stream_or_string)
|
||||
else:
|
||||
|
||||
def __init__(self, stream_or_string, **options):
|
||||
stream = stream_or_string
|
||||
try:
|
||||
yield from PythonDeserializer(yaml.load(stream, Loader=SafeLoader), **options)
|
||||
except (GeneratorExit, DeserializationError):
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise DeserializationError() from exc
|
||||
if isinstance(stream_or_string, bytes):
|
||||
stream = stream_or_string.decode()
|
||||
try:
|
||||
objects = yaml.load(stream, Loader=SafeLoader)
|
||||
except Exception as exc:
|
||||
raise DeserializationError() from exc
|
||||
super().__init__(objects, **options)
|
||||
|
||||
def _handle_object(self, obj):
|
||||
try:
|
||||
yield from super()._handle_object(obj)
|
||||
except (GeneratorExit, DeserializationError):
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise DeserializationError(f"Error deserializing object: {exc}") from exc
|
||||
|
@ -241,7 +241,9 @@ Security
|
||||
Serialization
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
* ...
|
||||
* Each serialization format now defines a ``Deserializer`` class, rather than a
|
||||
function, to improve extensibility when defining a
|
||||
:ref:`custom serialization format <custom-serialization-formats>`.
|
||||
|
||||
Signals
|
||||
~~~~~~~
|
||||
|
@ -347,6 +347,86 @@ again a mapping with the key being name of the field and the value the value:
|
||||
|
||||
Referential fields are again represented by the PK or sequence of PKs.
|
||||
|
||||
.. _custom-serialization-formats:
|
||||
|
||||
Custom serialization formats
|
||||
----------------------------
|
||||
|
||||
In addition to the default formats, you can create a custom serialization
|
||||
format.
|
||||
|
||||
For example, let’s consider a csv serializer and deserializer. First, define a
|
||||
``Serializer`` and a ``Deserializer`` class. These can override existing
|
||||
serialization format classes:
|
||||
|
||||
.. code-block:: python
|
||||
:caption: ``path/to/custom_csv_serializer.py``
|
||||
|
||||
import csv
|
||||
|
||||
from django.apps import apps
|
||||
from django.core import serializers
|
||||
from django.core.serializers.base import DeserializationError
|
||||
|
||||
|
||||
class Serializer(serializers.python.Serializer):
|
||||
def get_dump_object(self, obj):
|
||||
dumped_object = super().get_dump_object(obj)
|
||||
row = [dumped_object["model"], str(dumped_object["pk"])]
|
||||
row += [str(value) for value in dumped_object["fields"].values()]
|
||||
return ",".join(row), dumped_object["model"]
|
||||
|
||||
def end_object(self, obj):
|
||||
dumped_object_str, model = self.get_dump_object(obj)
|
||||
if self.first:
|
||||
fields = [field.name for field in apps.get_model(model)._meta.fields]
|
||||
header = ",".join(fields)
|
||||
self.stream.write(f"model,{header}\n")
|
||||
self.stream.write(f"{dumped_object_str}\n")
|
||||
|
||||
def getvalue(self):
|
||||
return super(serializers.python.Serializer, self).getvalue()
|
||||
|
||||
|
||||
class Deserializer(serializers.python.Deserializer):
|
||||
def __init__(self, stream_or_string, **options):
|
||||
if isinstance(stream_or_string, bytes):
|
||||
stream_or_string = stream_or_string.decode()
|
||||
if isinstance(stream_or_string, str):
|
||||
stream_or_string = stream_or_string.splitlines()
|
||||
try:
|
||||
objects = csv.DictReader(stream_or_string)
|
||||
except Exception as exc:
|
||||
raise DeserializationError() from exc
|
||||
super().__init__(objects, **options)
|
||||
|
||||
def _handle_object(self, obj):
|
||||
try:
|
||||
model_fields = apps.get_model(obj["model"])._meta.fields
|
||||
obj["fields"] = {
|
||||
field.name: obj[field.name]
|
||||
for field in model_fields
|
||||
if field.name in obj
|
||||
}
|
||||
yield from super()._handle_object(obj)
|
||||
except (GeneratorExit, DeserializationError):
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise DeserializationError(f"Error deserializing object: {exc}") from exc
|
||||
|
||||
Then add the module containing the serializer definitions to your
|
||||
:setting:`SERIALIZATION_MODULES` setting::
|
||||
|
||||
SERIALIZATION_MODULES = {
|
||||
"csv": "path.to.custom_csv_serializer",
|
||||
"json": "django.core.serializers.json",
|
||||
}
|
||||
|
||||
.. versionchanged:: 5.2
|
||||
|
||||
A ``Deserializer`` class definition was added to each of the provided
|
||||
serialization formats.
|
||||
|
||||
.. _topics-serialization-natural-keys:
|
||||
|
||||
Natural keys
|
||||
|
125
tests/serializers/test_deserialization.py
Normal file
125
tests/serializers/test_deserialization.py
Normal file
@ -0,0 +1,125 @@
|
||||
import json
|
||||
|
||||
from django.core.serializers.base import DeserializationError, DeserializedObject
|
||||
from django.core.serializers.json import Deserializer as JsonDeserializer
|
||||
from django.core.serializers.jsonl import Deserializer as JsonlDeserializer
|
||||
from django.core.serializers.python import Deserializer
|
||||
from django.core.serializers.pyyaml import Deserializer as YamlDeserializer
|
||||
from django.test import SimpleTestCase
|
||||
|
||||
from .models import Author
|
||||
|
||||
|
||||
class TestDeserializer(SimpleTestCase):
|
||||
def setUp(self):
|
||||
self.object_list = [
|
||||
{"pk": 1, "model": "serializers.author", "fields": {"name": "Jane"}},
|
||||
{"pk": 2, "model": "serializers.author", "fields": {"name": "Joe"}},
|
||||
]
|
||||
self.deserializer = Deserializer(self.object_list)
|
||||
self.jane = Author(name="Jane", pk=1)
|
||||
self.joe = Author(name="Joe", pk=2)
|
||||
|
||||
def test_deserialized_object_repr(self):
|
||||
deserial_obj = DeserializedObject(obj=self.jane)
|
||||
self.assertEqual(
|
||||
repr(deserial_obj), "<DeserializedObject: serializers.Author(pk=1)>"
|
||||
)
|
||||
|
||||
def test_next_functionality(self):
|
||||
first_item = next(self.deserializer)
|
||||
|
||||
self.assertEqual(first_item.object, self.jane)
|
||||
|
||||
second_item = next(self.deserializer)
|
||||
self.assertEqual(second_item.object, self.joe)
|
||||
|
||||
with self.assertRaises(StopIteration):
|
||||
next(self.deserializer)
|
||||
|
||||
def test_invalid_model_identifier(self):
|
||||
invalid_object_list = [
|
||||
{"pk": 1, "model": "serializers.author2", "fields": {"name": "Jane"}}
|
||||
]
|
||||
self.deserializer = Deserializer(invalid_object_list)
|
||||
with self.assertRaises(DeserializationError):
|
||||
next(self.deserializer)
|
||||
|
||||
deserializer = Deserializer(object_list=[])
|
||||
with self.assertRaises(StopIteration):
|
||||
next(deserializer)
|
||||
|
||||
def test_custom_deserializer(self):
|
||||
class CustomDeserializer(Deserializer):
|
||||
@staticmethod
|
||||
def _get_model_from_node(model_identifier):
|
||||
return Author
|
||||
|
||||
deserializer = CustomDeserializer(self.object_list)
|
||||
result = next(iter(deserializer))
|
||||
deserialized_object = result.object
|
||||
self.assertEqual(
|
||||
self.jane,
|
||||
deserialized_object,
|
||||
)
|
||||
|
||||
def test_empty_object_list(self):
|
||||
deserializer = Deserializer(object_list=[])
|
||||
with self.assertRaises(StopIteration):
|
||||
next(deserializer)
|
||||
|
||||
def test_json_bytes_input(self):
|
||||
test_string = json.dumps(self.object_list)
|
||||
stream = test_string.encode("utf-8")
|
||||
deserializer = JsonDeserializer(stream_or_string=stream)
|
||||
|
||||
first_item = next(deserializer)
|
||||
second_item = next(deserializer)
|
||||
|
||||
self.assertEqual(first_item.object, self.jane)
|
||||
self.assertEqual(second_item.object, self.joe)
|
||||
|
||||
def test_jsonl_bytes_input(self):
|
||||
test_string = """
|
||||
{"pk": 1, "model": "serializers.author", "fields": {"name": "Jane"}}
|
||||
{"pk": 2, "model": "serializers.author", "fields": {"name": "Joe"}}
|
||||
{"pk": 3, "model": "serializers.author", "fields": {"name": "John"}}
|
||||
{"pk": 4, "model": "serializers.author", "fields": {"name": "Smith"}}"""
|
||||
stream = test_string.encode("utf-8")
|
||||
deserializer = JsonlDeserializer(stream_or_string=stream)
|
||||
|
||||
first_item = next(deserializer)
|
||||
second_item = next(deserializer)
|
||||
|
||||
self.assertEqual(first_item.object, self.jane)
|
||||
self.assertEqual(second_item.object, self.joe)
|
||||
|
||||
def test_yaml_bytes_input(self):
|
||||
test_string = """- pk: 1
|
||||
model: serializers.author
|
||||
fields:
|
||||
name: Jane
|
||||
|
||||
- pk: 2
|
||||
model: serializers.author
|
||||
fields:
|
||||
name: Joe
|
||||
|
||||
- pk: 3
|
||||
model: serializers.author
|
||||
fields:
|
||||
name: John
|
||||
|
||||
- pk: 4
|
||||
model: serializers.author
|
||||
fields:
|
||||
name: Smith
|
||||
"""
|
||||
stream = test_string.encode("utf-8")
|
||||
deserializer = YamlDeserializer(stream_or_string=stream)
|
||||
|
||||
first_item = next(deserializer)
|
||||
second_item = next(deserializer)
|
||||
|
||||
self.assertEqual(first_item.object, self.jane)
|
||||
self.assertEqual(second_item.object, self.joe)
|
@ -1,13 +0,0 @@
|
||||
from django.core.serializers.base import DeserializedObject
|
||||
from django.test import SimpleTestCase
|
||||
|
||||
from .models import Author
|
||||
|
||||
|
||||
class TestDeserializedObjectTests(SimpleTestCase):
|
||||
def test_repr(self):
|
||||
author = Author(name="John", pk=1)
|
||||
deserial_obj = DeserializedObject(obj=author)
|
||||
self.assertEqual(
|
||||
repr(deserial_obj), "<DeserializedObject: serializers.Author(pk=1)>"
|
||||
)
|
Loading…
Reference in New Issue
Block a user