From b1851cca3ee1494cc020f1676e2029ef138250da Mon Sep 17 00:00:00 2001
From: Luke Plant <L.Plant.98@cantab.net>
Date: Sat, 21 Jun 2008 20:57:05 +0000
Subject: [PATCH] Fixed bug with Model.delete() which did not always delete
 objects in the right order.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@7722 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/db/models/base.py          |  19 +++--
 django/db/models/query.py         | 116 +++++++++++++++++++++++++++---
 tests/modeltests/delete/models.py | 103 ++++++++++++++++++++++----
 3 files changed, 205 insertions(+), 33 deletions(-)

diff --git a/django/db/models/base.py b/django/db/models/base.py
index 3b6a5fe1ea..14a48044df 100644
--- a/django/db/models/base.py
+++ b/django/db/models/base.py
@@ -10,7 +10,7 @@ from django.core import validators
 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
 from django.db.models.fields import AutoField, ImageField, FieldDoesNotExist
 from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
-from django.db.models.query import delete_objects, Q
+from django.db.models.query import delete_objects, Q, CollectedObjects
 from django.db.models.options import Options, AdminOptions
 from django.db import connection, transaction
 from django.db.models import signals
@@ -368,17 +368,16 @@ class Model(object):
                 error_dict[f.name] = errors
         return error_dict
 
-    def _collect_sub_objects(self, seen_objs):
+    def _collect_sub_objects(self, seen_objs, parent=None, nullable=False):
         """
         Recursively populates seen_objs with all objects related to this object.
-        When done, seen_objs will be in the format:
-            {model_class: {pk_val: obj, pk_val: obj, ...},
-             model_class: {pk_val: obj, pk_val: obj, ...}, ...}
+        When done, seen_objs.items() will be in the format:
+            [(model_class, {pk_val: obj, pk_val: obj, ...}),
+             (model_class, {pk_val: obj, pk_val: obj, ...}),...]
         """
         pk_val = self._get_pk_val()
-        if pk_val in seen_objs.setdefault(self.__class__, {}):
+        if seen_objs.add(self.__class__, pk_val, self, parent, nullable):
             return
-        seen_objs[self.__class__][pk_val] = self
 
         for related in self._meta.get_all_related_objects():
             rel_opts_name = related.get_accessor_name()
@@ -388,16 +387,16 @@ class Model(object):
                 except ObjectDoesNotExist:
                     pass
                 else:
-                    sub_obj._collect_sub_objects(seen_objs)
+                    sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null)
             else:
                 for sub_obj in getattr(self, rel_opts_name).all():
-                    sub_obj._collect_sub_objects(seen_objs)
+                    sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null)
 
     def delete(self):
         assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)
 
         # Find all the objects than need to be deleted
-        seen_objs = SortedDict()
+        seen_objs = CollectedObjects()
         self._collect_sub_objects(seen_objs)
 
         # Actually delete the objects
diff --git a/django/db/models/query.py b/django/db/models/query.py
index fb6d116a6e..8714cffb7f 100644
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@@ -16,6 +16,92 @@ ITER_CHUNK_SIZE = CHUNK_SIZE
 # Pull into this namespace for backwards compatibility
 EmptyResultSet = sql.EmptyResultSet
 
+class CyclicDependency(Exception):
+    pass
+
+class CollectedObjects(object):
+    """
+    A container that stores keys and lists of values along with
+    remembering the parent objects for all the keys.
+
+    This is used for the database object deletion routines so that we
+    can calculate the 'leaf' objects which should be deleted first.
+    """
+
+    def __init__(self):
+        self.data = {}
+        self.children = {}
+
+    def add(self, model, pk, obj, parent_model, nullable=False):
+        """
+        Adds an item.
+        model is the class of the object being added,
+        pk is the primary key, obj is the object itself, 
+        parent_model is the model of the parent object
+        that this object was reached through, nullable should
+        be True if this relation is nullable.
+
+        If the item already existed in the structure,
+        returns true, otherwise false.
+        """
+        d = self.data.setdefault(model, SortedDict())
+        retval = pk in d
+        d[pk] = obj
+        # Nullable relationships can be ignored -- they
+        # are nulled out before deleting, and therefore
+        # do not affect the order in which objects have
+        # to be deleted.
+        if parent_model is not None and not nullable:
+            self.children.setdefault(parent_model, []).append(model)
+
+        return retval
+
+    def __contains__(self, key):
+        return self.data.__contains__(key)
+
+    def __getitem__(self, key):
+        return self.data[key]
+
+    def __nonzero__(self):
+        return bool(self.data)
+
+    def iteritems(self):
+        for k in self.ordered_keys():
+            yield k, self[k]
+
+    def items(self):
+        return list(self.iteritems())
+
+    def keys(self):
+        return self.ordered_keys()
+
+    def ordered_keys(self):
+        """
+        Returns the models in the order that they should be 
+        dealth with i.e. models with no dependencies first.
+        """
+        dealt_with = SortedDict()
+        # Start with items that have no children
+        models = self.data.keys()
+        while len(dealt_with) < len(models):
+            found = False
+            for model in models:
+                children = self.children.setdefault(model, [])
+                if len([c for c in children if c not in dealt_with]) == 0:
+                    dealt_with[model] = None
+                    found = True
+            if not found:
+                raise CyclicDependency("There is a cyclic dependency of items to be processed.")
+            
+        return dealt_with.keys()
+
+    def unordered_keys(self):
+        """
+        Fallback for the case where is a cyclic dependency but we
+        don't care.
+        """
+        return self.data.keys()
+
 class QuerySet(object):
     "Represents a lazy database lookup for a set of objects"
     def __init__(self, model=None, query=None):
@@ -275,7 +361,7 @@ class QuerySet(object):
         while 1:
             # Collect all the objects to be deleted in this chunk, and all the
             # objects that are related to the objects that are to be deleted.
-            seen_objs = SortedDict()
+            seen_objs = CollectedObjects()
             for object in del_query[:CHUNK_SIZE]:
                 object._collect_sub_objects(seen_objs)
 
@@ -682,19 +768,27 @@ def delete_objects(seen_objs):
     Iterate through a list of seen classes, and remove any instances that are
     referred to.
     """
-    ordered_classes = seen_objs.keys()
-    ordered_classes.reverse()
+    try:
+        ordered_classes = seen_objs.keys()
+    except CyclicDependency:
+        # if there is a cyclic dependency, we cannot in general delete
+        # the objects.  However, if an appropriate transaction is set
+        # up, or if the database is lax enough, it will succeed. 
+        # So for now, we go ahead and try anway.
+        ordered_classes = seen_objs.unordered_keys()
 
+    obj_pairs = {}
     for cls in ordered_classes:
-        seen_objs[cls] = seen_objs[cls].items()
-        seen_objs[cls].sort()
+        items = seen_objs[cls].items()
+        items.sort()
+        obj_pairs[cls] = items
 
         # Pre notify all instances to be deleted
-        for pk_val, instance in seen_objs[cls]:
+        for pk_val, instance in items:
             dispatcher.send(signal=signals.pre_delete, sender=cls,
                     instance=instance)
 
-        pk_list = [pk for pk,instance in seen_objs[cls]]
+        pk_list = [pk for pk,instance in items]
         del_query = sql.DeleteQuery(cls, connection)
         del_query.delete_batch_related(pk_list)
 
@@ -705,15 +799,17 @@ def delete_objects(seen_objs):
 
     # Now delete the actual data
     for cls in ordered_classes:
-        seen_objs[cls].reverse()
-        pk_list = [pk for pk,instance in seen_objs[cls]]
+        items = obj_pairs[cls]
+        items.reverse()
+
+        pk_list = [pk for pk,instance in items]
         del_query = sql.DeleteQuery(cls, connection)
         del_query.delete_batch(pk_list)
 
         # Last cleanup; set NULLs where there once was a reference to the
         # object, NULL the primary key of the found objects, and perform
         # post-notification.
-        for pk_val, instance in seen_objs[cls]:
+        for pk_val, instance in items:
             for field in cls._meta.fields:
                 if field.rel and field.null and field.rel.to in seen_objs:
                     setattr(instance, field.attname, None)
diff --git a/tests/modeltests/delete/models.py b/tests/modeltests/delete/models.py
index d4f7dd1d1f..f5b423e9ff 100644
--- a/tests/modeltests/delete/models.py
+++ b/tests/modeltests/delete/models.py
@@ -33,8 +33,46 @@ class D(DefaultRepr, models.Model):
 # However, if we start at As, we might find Bs first (in which 
 # case things will be nice), or find Ds first.
 
+# Some mutually dependent models, but nullable
+class E(DefaultRepr, models.Model):
+    f = models.ForeignKey('F', null=True, related_name='e_rel')
+
+class F(DefaultRepr, models.Model):
+    e = models.ForeignKey(E, related_name='f_rel')
+
 
 __test__ = {'API_TESTS': """
+# First, some tests for the datastructure we use
+
+>>> from django.db.models.query import CollectedObjects
+
+>>> g = CollectedObjects()
+>>> g.add("key1", 1, "item1", None)
+False
+>>> g["key1"]
+{1: 'item1'}
+>>> g.add("key2", 1, "item1", "key1")
+False
+>>> g.add("key2", 2, "item2", "key1")
+False
+>>> g["key2"]
+{1: 'item1', 2: 'item2'}
+>>> g.add("key3", 1, "item1", "key1")
+False
+>>> g.add("key3", 1, "item1", "key2")
+True
+>>> g.ordered_keys()
+['key3', 'key2', 'key1']
+
+>>> g.add("key2", 1, "item1", "key3")
+True
+>>> g.ordered_keys()
+Traceback (most recent call last):
+    ...
+CyclicDependency: There is a cyclic dependency of items to be processed.
+
+
+
 # Due to the way that transactions work in the test harness,
 # doing m.delete() here can work but fail in a real situation,
 # since it may delete all objects, but not in the right order.
@@ -42,11 +80,10 @@ __test__ = {'API_TESTS': """
 
 # Also, it is possible that the order is correct 'accidentally', due
 # solely to order of imports etc.  To check this, we set the order
-# that 'get_models()' will retrieve to a known 'tricky' order, and
-# then try again with the reverse and try again.  Slightly naughty
-# access to internals here.
+# that 'get_models()' will retrieve to a known 'nice' order, and
+# then try again with a known 'tricky' order.  Slightly naughty
+# access to internals here :-)
 
->>> from django.utils.datastructures import SortedDict
 >>> from django.db.models.loading import cache
 
 # Nice order
@@ -56,8 +93,6 @@ __test__ = {'API_TESTS': """
 >>> del C._meta._related_objects_cache
 >>> del D._meta._related_objects_cache
 
-
-
 >>> a1 = A()
 >>> a1.save()
 >>> b1 = B(a=a1)
@@ -67,9 +102,9 @@ __test__ = {'API_TESTS': """
 >>> d1 = D(c=c1, a=a1)
 >>> d1.save()
 
->>> sd = SortedDict()
->>> a1._collect_sub_objects(sd)
->>> list(reversed(sd.keys()))
+>>> o = CollectedObjects()
+>>> a1._collect_sub_objects(o)
+>>> o.keys()
 [<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>]
 >>> a1.delete()
 
@@ -80,7 +115,6 @@ __test__ = {'API_TESTS': """
 >>> del C._meta._related_objects_cache
 >>> del D._meta._related_objects_cache
 
-
 >>> a2 = A()
 >>> a2.save()
 >>> b2 = B(a=a2)
@@ -90,13 +124,56 @@ __test__ = {'API_TESTS': """
 >>> d2 = D(c=c2, a=a2)
 >>> d2.save()
 
->>> sd2 = SortedDict()
->>> a2._collect_sub_objects(sd2)
->>> list(reversed(sd2.keys()))
+>>> o = CollectedObjects()
+>>> a2._collect_sub_objects(o)
+>>> o.keys()
 [<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>]
 >>> a2.delete()
 
+# Tests for nullable related fields
 
+>>> g = CollectedObjects()
+>>> g.add("key1", 1, "item1", None)
+False
+>>> g.add("key2", 1, "item1", "key1", nullable=True)
+False
+>>> g.add("key1", 1, "item1", "key2")
+True
+>>> g.ordered_keys()
+['key1', 'key2']
+
+>>> e1 = E()
+>>> e1.save()
+>>> f1 = F(e=e1)
+>>> f1.save()
+>>> e1.f = f1
+>>> e1.save()
+
+# Since E.f is nullable, we should delete F first (after nulling out
+# the E.f field), then E.
+
+>>> o = CollectedObjects()
+>>> e1._collect_sub_objects(o)
+>>> o.keys()
+[<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>]
+
+>>> e1.delete()
+
+>>> e2 = E()
+>>> e2.save()
+>>> f2 = F(e=e2)
+>>> f2.save()
+>>> e2.f = f2
+>>> e2.save()
+
+# Same deal as before, though we are starting from the other object.
+
+>>> o = CollectedObjects()
+>>> f2._collect_sub_objects(o)
+>>> o.keys()
+[<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>]
+
+>>> f2.delete()
 
 """
 }