1
0
mirror of https://github.com/django/django.git synced 2025-03-31 19:46:42 +00:00

Fixed #20197 -- Made XML serializer fail loudly when outputting unserializable chars

Thanks Tim Graham for the review.
This commit is contained in:
Claude Paroz 2015-06-19 08:42:48 +02:00
parent b769bbd4f6
commit 9368f51e12
5 changed files with 49 additions and 2 deletions

View File

@ -14,7 +14,9 @@ from django.conf import settings
from django.core.serializers import base from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models from django.db import DEFAULT_DB_ALIAS, models
from django.utils.encoding import smart_text from django.utils.encoding import smart_text
from django.utils.xmlutils import SimplerXMLGenerator from django.utils.xmlutils import (
SimplerXMLGenerator, UnserializableContentError,
)
class Serializer(base.Serializer): class Serializer(base.Serializer):
@ -78,7 +80,11 @@ class Serializer(base.Serializer):
# Get a "string version" of the object's data. # Get a "string version" of the object's data.
if getattr(obj, field.name) is not None: if getattr(obj, field.name) is not None:
self.xml.characters(field.value_to_string(obj)) try:
self.xml.characters(field.value_to_string(obj))
except UnserializableContentError:
raise ValueError("%s.%s (pk:%s) contains unserializable characters" % (
obj.__class__.__name__, field.name, obj._get_pk_val()))
else: else:
self.xml.addQuickElement("None") self.xml.addQuickElement("None")

View File

@ -2,9 +2,14 @@
Utilities for XML generation/parsing. Utilities for XML generation/parsing.
""" """
import re
from xml.sax.saxutils import XMLGenerator from xml.sax.saxutils import XMLGenerator
class UnserializableContentError(ValueError):
pass
class SimplerXMLGenerator(XMLGenerator): class SimplerXMLGenerator(XMLGenerator):
def addQuickElement(self, name, contents=None, attrs=None): def addQuickElement(self, name, contents=None, attrs=None):
"Convenience method for adding an element with no children" "Convenience method for adding an element with no children"
@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator):
if contents is not None: if contents is not None:
self.characters(contents) self.characters(contents)
self.endElement(name) self.endElement(name)
def characters(self, content):
if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content):
# Fail loudly when content has control chars (unsupported in XML 1.0)
# See http://www.w3.org/International/questions/qa-controls
raise UnserializableContentError("Control characters are not supported in XML 1.0")
XMLGenerator.characters(self, content)

View File

@ -720,6 +720,10 @@ Miscellaneous
* Private function ``django.utils.functional.total_ordering()`` has been * Private function ``django.utils.functional.total_ordering()`` has been
removed. It contained a workaround for a ``functools.total_ordering()`` bug removed. It contained a workaround for a ``functools.total_ordering()`` bug
in Python versions older than 2.7.3. in Python versions older than 2.7.3.
* XML serialization (either through :djadmin:`dumpdata` or the syndication
framework) used to output any characters it received. Now if the content to
be serialized contains any control characters not allowed in the XML 1.0
standard, the serialization will fail with a :exc:`ValueError`.
.. _deprecated-features-1.9: .. _deprecated-features-1.9:

View File

@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model::
This example links the given user with the permission models with PKs 46 and 47. This example links the given user with the permission models with PKs 46 and 47.
.. admonition:: Control characters
.. versionchanged:: 1.9
If the content to be serialized contains control characters that are not
accepted in the XML 1.0 standard, the serialization will fail with a
:exc:`ValueError` exception. Read also the W3C's explanation of `HTML,
XHTML, XML and Control Codes
<http://www.w3.org/International/questions/qa-controls>`_.
.. _serialization-formats-json: .. _serialization-formats-json:
JSON JSON

View File

@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase):
ret_list.append("".join(temp)) ret_list.append("".join(temp))
return ret_list return ret_list
def test_control_char_failure(self):
"""
Serializing control characters with XML should fail as those characters
are not supported in the XML 1.0 standard (except HT, LF, CR).
"""
self.a1.headline = "This contains \u0001 control \u0011 chars"
msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk
with self.assertRaisesMessage(ValueError, msg):
serializers.serialize(self.serializer_name, [self.a1])
self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed"
self.assertIn(
"HT \t, LF \n, and CR \r are allowed",
serializers.serialize(self.serializer_name, [self.a1])
)
class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase): class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase):
serializer_name = "xml" serializer_name = "xml"