1
0
mirror of https://github.com/django/django.git synced 2025-10-24 06:06:09 +00:00

Added initial cut at serialization framework, along with some basic tests and a stab at some docs. This is all a bit rough right now, so expect some bumps.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@3225 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss
2006-06-28 16:00:37 +00:00
parent 414bc24e81
commit 4ea7a11659
6 changed files with 632 additions and 0 deletions

View File

@@ -0,0 +1,76 @@
"""
Interfaces for serializing Django objects.
Usage::
>>> from django.core import serializers
>>> json = serializers.serialize("json", some_query_set)
>>> objects = list(serializers.deserialize("json", json))
To add your own serializers, use the SERIALIZATION_MODULES setting::
SERIALIZATION_MODULES = {
"csv" : "path.to.csv.serializer",
"txt" : "path.to.txt.serializer",
}
"""
from django.conf import settings
# Built-in serializers
BUILTIN_SERIALIZERS = {
"xml" : "django.core.serializers.xml_serializer",
}
_serializers = {}
def register_serializer(format, serializer_module):
"""Register a new serializer by passing in a module name."""
module = __import__(serializer_module, '', '', [''])
_serializers[format] = module
def unregister_serializer(format):
"""Unregister a given serializer"""
del _serializers[format]
def get_serializer(format):
if not _serializers:
_load_serializers()
return _serializers[format].Serializer
def get_deserializer(format):
if not _serializers:
_load_serializers()
return _serializers[format].Deserializer
def serialize(format, queryset, **options):
"""
Serialize a queryset (or any iterator that returns database objects) using
a certain serializer.
"""
s = get_serializer(format)()
s.serialize(queryset, **options)
return s.getvalue()
def deserialize(format, stream_or_string):
"""
Deserialize a stream or a string. Returns an iterator that yields ``(obj,
m2m_relation_dict)``, where ``obj`` is a instantiated -- but *unsaved* --
object, and ``m2m_relation_dict`` is a dictionary of ``{m2m_field_name :
list_of_related_objects}``.
"""
d = get_deserializer(format)
return d(stream_or_string)
def _load_serializers():
"""
Register built-in and settings-defined serializers. This is done lazily so
that user code has a chance to (e.g.) set up custom settings without
needing to be careful of import order.
"""
for format in BUILTIN_SERIALIZERS:
register_serializer(format, BUILTIN_SERIALIZERS[format])
if hasattr(settings, "SERIALIZATION_MODULES"):
for format in settings.SERIALIZATION_MODULES:
register_serializer(format, settings.SERIALIZATION_MODULES[format])

View File

@@ -0,0 +1,159 @@
"""
Module for abstract serializer/unserializer base classes.
"""
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.db import models
class SerializationError(Exception):
"""Something bad happened during serialization."""
pass
class DeserializationError(Exception):
"""Something bad happened during deserialization."""
pass
class Serializer(object):
"""
Abstract serializer base class.
"""
def serialize(self, queryset, **options):
"""
Serialize a queryset.
"""
self.options = options
self.stream = options.get("stream", StringIO())
self.start_serialization()
for obj in queryset:
self.start_object(obj)
for field in obj._meta.fields:
if field.rel is None:
self.handle_field(obj, field)
else:
self.handle_fk_field(obj, field)
for field in obj._meta.many_to_many:
self.handle_m2m_field(obj, field)
self.end_object(obj)
self.end_serialization()
return self.getvalue()
def get_string_value(self, obj, field):
"""
Convert a field's value to a string.
"""
if isinstance(field, models.DateTimeField):
value = getattr(obj, field.name).strftime("%Y-%m-%d %H:%M:%S")
elif isinstance(field, models.FileField):
value = getattr(obj, "get_%s_url" % field.name, lambda: None)()
else:
value = field.flatten_data(follow=None, obj=obj).get(field.name, "")
return str(value)
def start_serialization(self):
"""
Called when serializing of the queryset starts.
"""
raise NotImplementedError
def end_serialization(self):
"""
Called when serializing of the queryset ends.
"""
pass
def start_object(self, obj):
"""
Called when serializing of an object starts.
"""
raise NotImplementedError
def end_object(self, obj):
"""
Called when serializing of an object ends.
"""
pass
def handle_field(self, obj, field):
"""
Called to handle each individual (non-relational) field on an object.
"""
raise NotImplementedError
def handle_fk_field(self, obj, field):
"""
Called to handle a ForeignKey field.
"""
raise NotImplementedError
def handle_m2m_field(self, obj, field):
"""
Called to handle a ManyToManyField.
"""
raise NotImplementedError
def getvalue(self):
"""
Return the fully serialized queryset.
"""
return self.stream.getvalue()
class Deserializer(object):
"""
Abstract base deserializer class.
"""
def __init__(self, stream_or_string, **options):
"""
Init this serializer given a stream or a string
"""
self.options = options
if isinstance(stream_or_string, basestring):
self.stream = StringIO(stream_or_string)
else:
self.stream = stream_or_string
# hack to make sure that the models have all been loaded before
# deserialization starts (otherwise subclass calls to get_model()
# and friends might fail...)
models.get_apps()
def __iter__(self):
return self
def next(self):
"""Iteration iterface -- return the next item in the stream"""
raise NotImplementedError
class DeserializedObject(object):
"""
A deserialzed model.
Basically a container for holding the pre-saved deserialized data along
with the many-to-many data saved with the object.
Call ``save()`` to save the object (with the many-to-many data) to the
database; call ``save(save_m2m=False)`` to save just the object fields
(and not touch the many-to-many stuff.)
"""
def __init__(self, obj, m2m_data=None):
self.object = obj
self.m2m_data = m2m_data
def __repr__(self):
return "<DeserializedObject: %s>" % str(self.object)
def save(self, save_m2m=True):
self.object.save()
if self.m2m_data and save_m2m:
for accessor_name, object_list in self.m2m_data.items():
setattr(self.object, accessor_name, object_list)
# prevent a second (possibly accidental) call to save() from saving
# the m2m data twice.
self.m2m_data = None

View File

@@ -0,0 +1,218 @@
"""
XML serializer.
"""
from xml.dom import pulldom
from django.utils.xmlutils import SimplerXMLGenerator
from django.core.serializers import base
from django.db import models
class Serializer(base.Serializer):
"""
Serializes a QuerySet to XML.
"""
def start_serialization(self):
"""
Start serialization -- open the XML document and the root element.
"""
self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", "utf-8"))
self.xml.startDocument()
self.xml.startElement("django-objects", {"version" : "1.0"})
def end_serialization(self):
"""
End serialization -- end the document.
"""
self.xml.endElement("django-objects")
self.xml.endDocument()
def start_object(self, obj):
"""
Called as each object is handled.
"""
if not hasattr(obj, "_meta"):
raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj))
self.xml.startElement("object", {
"pk" : str(obj._get_pk_val()),
"model" : str(obj._meta),
})
def end_object(self, obj):
"""
Called after handling all fields for an object.
"""
self.xml.endElement("object")
def handle_field(self, obj, field):
"""
Called to handle each field on an object (except for ForeignKeys and
ManyToManyFields)
"""
self.xml.startElement("field", {
"name" : field.name,
"type" : field.get_internal_type()
})
# Get a "string version" of the object's data (this is handled by the
# serializer base class). None is handled specially.
value = self.get_string_value(obj, field)
if value is None:
self.xml.addQuickElement("None")
else:
self.xml.characters(str(value))
self.xml.endElement("field")
def handle_fk_field(self, obj, field):
"""
Called to handle a ForeignKey (we need to treat them slightly
differently from regular fields).
"""
self._start_relational_field(field)
related = getattr(obj, field.name)
if related is not None:
self.xml.characters(str(related._get_pk_val()))
else:
self.xml.addQuickElement("None")
self.xml.endElement("field")
def handle_m2m_field(self, obj, field):
"""
Called to handle a ManyToManyField. Related objects are only
serialized as references to the object's PK (i.e. the related *data*
is not dumped, just the relation).
"""
self._start_relational_field(field)
for relobj in getattr(obj, field.name).iterator():
self.xml.addQuickElement("object", attrs={"pk" : str(relobj._get_pk_val())})
self.xml.endElement("field")
def _start_relational_field(self, field):
"""
Helper to output the <field> element for relational fields
"""
self.xml.startElement("field", {
"name" : field.name,
"rel" : field.rel.__class__.__name__,
"to" : str(field.rel.to._meta),
})
class Deserializer(base.Deserializer):
"""
Deserialize XML.
"""
def __init__(self, stream_or_string, **options):
super(Deserializer, self).__init__(stream_or_string, **options)
self.encoding = self.options.get("encoding", "utf-8")
self.event_stream = pulldom.parse(self.stream)
def next(self):
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "object":
self.event_stream.expandNode(node)
return self._handle_object(node)
raise StopIteration
def _handle_object(self, node):
"""
Convert an <object> node to a DeserializedObject.
"""
# Look up the model using the model loading mechanism. If this fails, bail.
Model = self._get_model_from_node(node, "model")
# Start building a data dictionary from the object. If the node is
# missing the pk attribute, bail.
pk = node.getAttribute("pk")
if not pk:
raise base.DeserializationError("<object> node is missing the 'pk' attribute")
data = {Model._meta.pk.name : pk}
# Also start building a dict of m2m data (this is saved as
# {m2m_accessor_attribute : [list_of_related_objects]})
m2m_data = {}
# Deseralize each field.
for field_node in node.getElementsByTagName("field"):
# If the field is missing the name attribute, bail (are you
# sensing a pattern here?)
field_name = field_node.getAttribute("name")
if not field_name:
raise base.DeserializationError("<field> node is missing the 'name' attribute")
# Get the field from the Model. This will raise a
# FieldDoesNotExist if, well, the field doesn't exist, which will
# be propagated correctly.
field = Model._meta.get_field(field_name)
# As is usually the case, relation fields get the special treatment.
if field.rel and isinstance(field.rel, models.ManyToManyRel):
m2m_data[field.name] = self._handle_m2m_field_node(field_node)
elif field.rel and isinstance(field.rel, models.ManyToOneRel):
data[field.name] = self._handle_fk_field_node(field_node)
else:
value = field.to_python(getInnerText(field_node).strip().encode(self.encoding))
data[field.name] = value
# Return a DeserializedObject so that the m2m data has a place to live.
return base.DeserializedObject(Model(**data), m2m_data)
def _handle_fk_field_node(self, node):
"""
Handle a <field> node for a ForeignKey
"""
# Try to set the foreign key by looking up the foreign related object.
# If it doesn't exist, set the field to None (which might trigger
# validation error, but that's expected).
RelatedModel = self._get_model_from_node(node, "to")
return RelatedModel.objects.get(pk=getInnerText(node).strip().encode(self.encoding))
def _handle_m2m_field_node(self, node):
"""
Handle a <field> node for a ManyToManyField
"""
# Load the related model
RelatedModel = self._get_model_from_node(node, "to")
# Look up all the related objects. Using the in_bulk() lookup ensures
# that missing related objects don't cause an exception
related_ids = [c.getAttribute("pk").encode(self.encoding) for c in node.getElementsByTagName("object")]
return RelatedModel._default_manager.in_bulk(related_ids).values()
def _get_model_from_node(self, node, attr):
"""
Helper to look up a model from a <object model=...> or a <field
rel=... to=...> node.
"""
model_identifier = node.getAttribute(attr)
if not model_identifier:
raise base.DeserializationError(
"<%s> node is missing the required '%s' attribute" \
% (node.nodeName, attr))
try:
Model = models.get_model(*model_identifier.split("."))
except TypeError:
Model = None
if Model is None:
raise base.DeserializationError(
"<%s> node has invalid model identifier: '%s'" % \
(node.nodeName, model_identifier))
return Model
def getInnerText(node):
"""
Get all the inner text of a DOM node (recursively).
"""
# inspired by http://mail.python.org/pipermail/xml-sig/2005-March/011022.html
inner_text = []
for child in node.childNodes:
if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE:
inner_text.append(child.data)
elif child.nodeType == child.ELEMENT_NODE:
inner_text.extend(getInnerText(child))
else:
pass
return "".join(inner_text)