diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py index ea333a22bd..9dd4eee282 100644 --- a/django/core/serializers/xml_serializer.py +++ b/django/core/serializers/xml_serializer.py @@ -10,6 +10,8 @@ from django.db import models, DEFAULT_DB_ALIAS from django.utils.xmlutils import SimplerXMLGenerator from django.utils.encoding import smart_text from xml.dom import pulldom +from xml.sax import handler +from xml.sax.expatreader import ExpatParser as _ExpatParser class Serializer(base.Serializer): """ @@ -151,9 +153,13 @@ class Deserializer(base.Deserializer): def __init__(self, stream_or_string, **options): super(Deserializer, self).__init__(stream_or_string, **options) - self.event_stream = pulldom.parse(self.stream) + self.event_stream = pulldom.parse(self.stream, self._make_parser()) self.db = options.pop('using', DEFAULT_DB_ALIAS) + def _make_parser(self): + """Create a hardened XML parser (no custom/external entities).""" + return DefusedExpatParser() + def __next__(self): for event, node in self.event_stream: if event == "START_ELEMENT" and node.nodeName == "object": @@ -292,3 +298,90 @@ def getInnerText(node): else: pass return "".join(inner_text) + + +# Below code based on Christian Heimes' defusedxml + + +class DefusedExpatParser(_ExpatParser): + """ + An expat parser hardened against XML bomb attacks. + + Forbids DTDs, external entity references + + """ + def __init__(self, *args, **kwargs): + _ExpatParser.__init__(self, *args, **kwargs) + self.setFeature(handler.feature_external_ges, False) + self.setFeature(handler.feature_external_pes, False) + + def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): + raise DTDForbidden(name, sysid, pubid) + + def entity_decl(self, name, is_parameter_entity, value, base, + sysid, pubid, notation_name): + raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name) + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + # expat 1.2 + raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) + + def external_entity_ref_handler(self, context, base, sysid, pubid): + raise ExternalReferenceForbidden(context, base, sysid, pubid) + + def reset(self): + _ExpatParser.reset(self) + parser = self._parser + parser.StartDoctypeDeclHandler = self.start_doctype_decl + parser.EntityDeclHandler = self.entity_decl + parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + parser.ExternalEntityRefHandler = self.external_entity_ref_handler + + +class DefusedXmlException(ValueError): + """Base exception.""" + def __repr__(self): + return str(self) + + +class DTDForbidden(DefusedXmlException): + """Document type definition is forbidden.""" + def __init__(self, name, sysid, pubid): + super(DTDForbidden, self).__init__() + self.name = name + self.sysid = sysid + self.pubid = pubid + + def __str__(self): + tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})" + return tpl.format(self.name, self.sysid, self.pubid) + + +class EntitiesForbidden(DefusedXmlException): + """Entity definition is forbidden.""" + def __init__(self, name, value, base, sysid, pubid, notation_name): + super(EntitiesForbidden, self).__init__() + self.name = name + self.value = value + self.base = base + self.sysid = sysid + self.pubid = pubid + self.notation_name = notation_name + + def __str__(self): + tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})" + return tpl.format(self.name, self.sysid, self.pubid) + + +class ExternalReferenceForbidden(DefusedXmlException): + """Resolving an external reference is forbidden.""" + def __init__(self, context, base, sysid, pubid): + super(ExternalReferenceForbidden, self).__init__() + self.context = context + self.base = base + self.sysid = sysid + self.pubid = pubid + + def __str__(self): + tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})" + return tpl.format(self.sysid, self.pubid) diff --git a/tests/regressiontests/serializers_regress/tests.py b/tests/regressiontests/serializers_regress/tests.py index 67aec08af2..e586d76e7f 100644 --- a/tests/regressiontests/serializers_regress/tests.py +++ b/tests/regressiontests/serializers_regress/tests.py @@ -10,6 +10,7 @@ from __future__ import absolute_import, unicode_literals import datetime import decimal +from django.core.serializers.xml_serializer import DTDForbidden try: import yaml @@ -514,3 +515,17 @@ for format in serializers.get_serializer_formats(): if format != 'python': setattr(SerializerTests, 'test_' + format + '_serializer_stream', curry(streamTest, format)) + +class XmlDeserializerSecurityTests(TestCase): + + def test_no_dtd(self): + """ + The XML deserializer shouldn't allow a DTD. + + This is the most straightforward way to prevent all entity definitions + and avoid both external entities and entity-expansion attacks. + + """ + xml = '' + with self.assertRaises(DTDForbidden): + next(serializers.deserialize('xml', xml))