1
0
mirror of https://github.com/django/django.git synced 2025-11-07 07:15:35 +00:00

Fixed #5423 -- Made dumpdata output one row at a time.

This should prevent storing all rows in memory when big sets of
data are dumped.
See ticket for heroic contributors.
This commit is contained in:
Claude Paroz
2012-05-26 11:43:37 +02:00
parent c2139bbcef
commit 3b5083bee5
6 changed files with 75 additions and 40 deletions

View File

@@ -49,23 +49,23 @@ class OutputWrapper(object):
"""
Wrapper around stdout/stderr
"""
def __init__(self, out, style_func=None):
def __init__(self, out, style_func=None, ending='\n'):
self._out = out
self.style_func = None
if hasattr(out, 'isatty') and out.isatty():
self.style_func = style_func
self.ending = ending
def __getattr__(self, name):
return getattr(self._out, name)
def write(self, msg, style_func=None, ending='\n'):
def write(self, msg, style_func=None, ending=None):
ending = ending is None and self.ending or ending
if ending and not msg.endswith(ending):
msg += ending
if style_func is not None:
msg = style_func(msg)
elif self.style_func is not None:
msg = self.style_func(msg)
self._out.write(smart_str(msg))
style_func = [f for f in (style_func, self.style_func, lambda x:x)
if f is not None][0]
self._out.write(smart_str(style_func(msg)))
class BaseCommand(object):

View File

@@ -4,6 +4,7 @@ from django.core import serializers
from django.db import router, DEFAULT_DB_ALIAS
from django.utils.datastructures import SortedDict
import sys
from optparse import make_option
class Command(BaseCommand):
@@ -97,21 +98,24 @@ class Command(BaseCommand):
except KeyError:
raise CommandError("Unknown serialization format: %s" % format)
# Now collate the objects to be serialized.
objects = []
for model in sort_dependencies(app_list.items()):
if model in excluded_models:
continue
if not model._meta.proxy and router.allow_syncdb(using, model):
if use_base_manager:
objects.extend(model._base_manager.using(using).all())
else:
objects.extend(model._default_manager.using(using).all())
def get_objects():
# Collate the objects to be serialized.
for model in sort_dependencies(app_list.items()):
if model in excluded_models:
continue
if not model._meta.proxy and router.allow_syncdb(using, model):
if use_base_manager:
objects = model._base_manager
else:
objects = model._default_manager
for obj in objects.using(using).\
order_by(model._meta.pk.name).iterator():
yield obj
try:
self.stdout.write(serializers.serialize(format, objects,
indent=indent, use_natural_keys=use_natural_keys),
ending='')
self.stdout.ending = None
serializers.serialize(format, get_objects(), indent=indent,
use_natural_keys=use_natural_keys, stream=self.stdout)
except Exception as e:
if show_traceback:
raise

View File

@@ -39,6 +39,7 @@ class Serializer(object):
self.use_natural_keys = options.pop("use_natural_keys", False)
self.start_serialization()
self.first = True
for obj in queryset:
self.start_object(obj)
# Use the concrete parent class' _meta instead of the object's _meta
@@ -57,6 +58,8 @@ class Serializer(object):
if self.selected_fields is None or field.attname in self.selected_fields:
self.handle_m2m_field(obj, field)
self.end_object(obj)
if self.first:
self.first = False
self.end_serialization()
return self.getvalue()

View File

@@ -21,13 +21,38 @@ class Serializer(PythonSerializer):
"""
internal_use_only = False
def end_serialization(self):
def start_serialization(self):
if json.__version__.split('.') >= ['2', '1', '3']:
# Use JS strings to represent Python Decimal instances (ticket #16850)
self.options.update({'use_decimal': False})
json.dump(self.objects, self.stream, cls=DjangoJSONEncoder, **self.options)
self._current = None
self.json_kwargs = self.options.copy()
self.json_kwargs.pop('stream', None)
self.json_kwargs.pop('fields', None)
self.stream.write("[")
def end_serialization(self):
if self.options.get("indent"):
self.stream.write("\n")
self.stream.write("]")
if self.options.get("indent"):
self.stream.write("\n")
def end_object(self, obj):
# self._current has the field data
indent = self.options.get("indent")
if not self.first:
self.stream.write(",")
if not indent:
self.stream.write(" ")
if indent:
self.stream.write("\n")
json.dump(self.get_dump_object(obj), self.stream,
cls=DjangoJSONEncoder, **self.json_kwargs)
self._current = None
def getvalue(self):
# overwrite PythonSerializer.getvalue() with base Serializer.getvalue()
if callable(getattr(self.stream, 'getvalue', None)):
return self.stream.getvalue()

View File

@@ -27,13 +27,16 @@ class Serializer(base.Serializer):
self._current = {}
def end_object(self, obj):
self.objects.append({
"model" : smart_unicode(obj._meta),
"pk" : smart_unicode(obj._get_pk_val(), strings_only=True),
"fields" : self._current
})
self.objects.append(self.get_dump_object(obj))
self._current = None
def get_dump_object(self, obj):
return {
"pk": smart_unicode(obj._get_pk_val(), strings_only=True),
"model": smart_unicode(obj._meta),
"fields": self._current
}
def handle_field(self, obj, field):
value = field._get_val_from_obj(obj)
# Protected types (i.e., primitives like None, numbers, dates,