From 51aa000378c00a442273f01142acdebc94dec68d Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 2 Jun 2013 10:40:54 +0200 Subject: [PATCH] Fixed #20485 -- Refactored loaddata for speed. Thanks Anssi for reporting this performance bottleneck. --- django/core/management/commands/loaddata.py | 302 +++++++++++--------- tests/fixtures_regress/tests.py | 2 +- 2 files changed, 165 insertions(+), 139 deletions(-) diff --git a/django/core/management/commands/loaddata.py b/django/core/management/commands/loaddata.py index ab9f7468c4..bfcea35d5c 100644 --- a/django/core/management/commands/loaddata.py +++ b/django/core/management/commands/loaddata.py @@ -1,7 +1,8 @@ from __future__ import unicode_literals -import os +import glob import gzip +import os import zipfile from optparse import make_option import warnings @@ -12,9 +13,9 @@ from django.core.management.base import BaseCommand, CommandError from django.core.management.color import no_style from django.db import (connections, router, transaction, DEFAULT_DB_ALIAS, IntegrityError, DatabaseError) -from django.db.models import get_apps +from django.db.models import get_app_paths from django.utils.encoding import force_text -from django.utils._os import upath +from django.utils.functional import cached_property, memoize from itertools import product try: @@ -44,9 +45,8 @@ class Command(BaseCommand): if not len(fixture_labels): raise CommandError( - "No database fixture specified. Please provide the path of at " - "least one fixture in the command line." - ) + "No database fixture specified. Please provide the path " + "of at least one fixture in the command line.") self.verbosity = int(options.get('verbosity')) @@ -69,37 +69,18 @@ class Command(BaseCommand): self.fixture_object_count = 0 self.models = set() - class SingleZipReader(zipfile.ZipFile): - def __init__(self, *args, **kwargs): - zipfile.ZipFile.__init__(self, *args, **kwargs) - if settings.DEBUG: - assert len(self.namelist()) == 1, "Zip-compressed fixtures must contain only one file." - def read(self): - return zipfile.ZipFile.read(self, self.namelist()[0]) - - self.compression_types = { + self.serialization_formats = serializers.get_public_serializer_formats() + self.compression_formats = { None: open, 'gz': gzip.GzipFile, 'zip': SingleZipReader } if has_bz2: - self.compression_types['bz2'] = bz2.BZ2File - - app_module_paths = [] - for app in get_apps(): - if hasattr(app, '__path__'): - # It's a 'models/' subpackage - for path in app.__path__: - app_module_paths.append(upath(path)) - else: - # It's a models.py module - app_module_paths.append(upath(app.__file__)) - - app_fixtures = [os.path.join(os.path.dirname(path), 'fixtures') for path in app_module_paths] + self.compression_formats['bz2'] = bz2.BZ2File with connection.constraint_checks_disabled(): for fixture_label in fixture_labels: - self.load_label(fixture_label, app_fixtures) + self.load_label(fixture_label) # Since we disabled constraint checks, we must manually check for # any invalid keys that might have been added @@ -124,129 +105,174 @@ class Command(BaseCommand): if self.verbosity >= 1: if self.fixture_object_count == self.loaded_object_count: - self.stdout.write("Installed %d object(s) from %d fixture(s)" % ( - self.loaded_object_count, self.fixture_count)) + self.stdout.write("Installed %d object(s) from %d fixture(s)" % + (self.loaded_object_count, self.fixture_count)) else: - self.stdout.write("Installed %d object(s) (of %d) from %d fixture(s)" % ( - self.loaded_object_count, self.fixture_object_count, self.fixture_count)) + self.stdout.write("Installed %d object(s) (of %d) from %d fixture(s)" % + (self.loaded_object_count, self.fixture_object_count, self.fixture_count)) - def load_label(self, fixture_label, app_fixtures): + def load_label(self, fixture_label): + """ + Loads fixtures files for a given label. + """ + for fixture_file, fixture_dir, fixture_name in self.find_fixtures(fixture_label): + _, ser_fmt, cmp_fmt = self.parse_name(os.path.basename(fixture_file)) + open_method = self.compression_formats[cmp_fmt] + fixture = open_method(fixture_file, 'r') + try: + self.fixture_count += 1 + objects_in_fixture = 0 + loaded_objects_in_fixture = 0 + if self.verbosity >= 2: + self.stdout.write("Installing %s fixture '%s' from %s." % + (ser_fmt, fixture_name, humanize(fixture_dir))) - parts = fixture_label.split('.') + objects = serializers.deserialize(ser_fmt, fixture, + using=self.using, ignorenonexistent=self.ignore) - if len(parts) > 1 and parts[-1] in self.compression_types: - compression_formats = [parts[-1]] - parts = parts[:-1] - else: - compression_formats = self.compression_types.keys() + for obj in objects: + objects_in_fixture += 1 + if router.allow_syncdb(self.using, obj.object.__class__): + loaded_objects_in_fixture += 1 + self.models.add(obj.object.__class__) + try: + obj.save(using=self.using) + except (DatabaseError, IntegrityError) as e: + e.args = ("Could not load %(app_label)s.%(object_name)s(pk=%(pk)s): %(error_msg)s" % { + 'app_label': obj.object._meta.app_label, + 'object_name': obj.object._meta.object_name, + 'pk': obj.object.pk, + 'error_msg': force_text(e) + },) + raise - if len(parts) == 1: - fixture_name = parts[0] - formats = serializers.get_public_serializer_formats() - else: - fixture_name, format = '.'.join(parts[:-1]), parts[-1] - if format in serializers.get_public_serializer_formats(): - formats = [format] - else: - formats = [] + self.loaded_object_count += loaded_objects_in_fixture + self.fixture_object_count += objects_in_fixture + except Exception as e: + if not isinstance(e, CommandError): + e.args = ("Problem installing fixture '%s': %s" % (fixture_file, e),) + raise + finally: + fixture.close() - if formats: - if self.verbosity >= 2: - self.stdout.write("Loading '%s' fixtures..." % fixture_name) - else: + # If the fixture we loaded contains 0 objects, assume that an + # error was encountered during fixture loading. + if objects_in_fixture == 0: + raise CommandError( + "No fixture data found for '%s'. " + "(File format may be invalid.)" % fixture_name) + + def _find_fixtures(self, fixture_label): + """ + Finds fixture files for a given label. + """ + fixture_name, ser_fmt, cmp_fmt = self.parse_name(fixture_label) + databases = [self.using, None] + cmp_fmts = list(self.compression_formats.keys()) if cmp_fmt is None else [cmp_fmt] + ser_fmts = serializers.get_public_serializer_formats() if ser_fmt is None else [ser_fmt] + + # Check kept for backwards-compatibility; it doesn't look very useful. + if '.' in fixture_name: raise CommandError( - "Problem installing fixture '%s': %s is not a known serialization format." % - (fixture_name, format)) - - if os.path.isabs(fixture_name): - fixture_dirs = [fixture_name] - else: - fixture_dirs = app_fixtures + list(settings.FIXTURE_DIRS) + [''] - - label_found = False - for fixture_dir in fixture_dirs: - found = self.process_dir(fixture_dir, fixture_name, - compression_formats, formats) - label_found = label_found or found - - if fixture_name != 'initial_data' and not label_found: - warnings.warn("No fixture named '%s' found." % fixture_name) - - def process_dir(self, fixture_dir, fixture_name, compression_formats, - serialization_formats): - - humanize = lambda dirname: "'%s'" % dirname if dirname else 'absolute path' + "Problem installing fixture '%s': %s is not a known " + "serialization format." % tuple(fixture_name.rsplit('.'))) if self.verbosity >= 2: - self.stdout.write("Checking %s for fixtures..." % humanize(fixture_dir)) + self.stdout.write("Loading '%s' fixtures..." % fixture_name) - label_found = False - for combo in product([self.using, None], serialization_formats, compression_formats): - database, format, compression_format = combo - file_name = '.'.join( - p for p in [ - fixture_name, database, format, compression_format - ] - if p - ) + if os.path.isabs(fixture_name): + fixture_dirs = [os.path.dirname(fixture_name)] + fixture_name = os.path.basename(fixture_name) + else: + fixture_dirs = self.fixture_dirs - if self.verbosity >= 3: - self.stdout.write("Trying %s for %s fixture '%s'..." % \ - (humanize(fixture_dir), file_name, fixture_name)) - full_path = os.path.join(fixture_dir, file_name) - open_method = self.compression_types[compression_format] - try: - fixture = open_method(full_path, 'r') - except IOError: - if self.verbosity >= 2: - self.stdout.write("No %s fixture '%s' in %s." % \ - (format, fixture_name, humanize(fixture_dir))) - else: - try: - if label_found: - raise CommandError("Multiple fixtures named '%s' in %s. Aborting." % - (fixture_name, humanize(fixture_dir))) + suffixes = ('.'.join(ext for ext in combo if ext) + for combo in product(databases, ser_fmts, cmp_fmts)) + targets = set('.'.join((fixture_name, suffix)) for suffix in suffixes) - self.fixture_count += 1 - objects_in_fixture = 0 - loaded_objects_in_fixture = 0 - if self.verbosity >= 2: - self.stdout.write("Installing %s fixture '%s' from %s." % \ - (format, fixture_name, humanize(fixture_dir))) + fixture_files = [] + for fixture_dir in fixture_dirs: + if self.verbosity >= 2: + self.stdout.write("Checking %s for fixtures..." % humanize(fixture_dir)) + fixture_files_in_dir = [] + for candidate in glob.iglob(os.path.join(fixture_dir, fixture_name + '*')): + if os.path.basename(candidate) in targets: + # Save the fixture_dir and fixture_name for future error messages. + fixture_files_in_dir.append((candidate, fixture_dir, fixture_name)) - objects = serializers.deserialize(format, fixture, using=self.using, ignorenonexistent=self.ignore) + if self.verbosity >= 2 and not fixture_files_in_dir: + self.stdout.write("No fixture '%s' in %s." % + (fixture_name, humanize(fixture_dir))) - for obj in objects: - objects_in_fixture += 1 - if router.allow_syncdb(self.using, obj.object.__class__): - loaded_objects_in_fixture += 1 - self.models.add(obj.object.__class__) - try: - obj.save(using=self.using) - except (DatabaseError, IntegrityError) as e: - e.args = ("Could not load %(app_label)s.%(object_name)s(pk=%(pk)s): %(error_msg)s" % { - 'app_label': obj.object._meta.app_label, - 'object_name': obj.object._meta.object_name, - 'pk': obj.object.pk, - 'error_msg': force_text(e) - },) - raise + # Check kept for backwards-compatibility; it isn't clear why + # duplicates are only allowed in different directories. + if len(fixture_files_in_dir) > 1: + raise CommandError( + "Multiple fixtures named '%s' in %s. Aborting." % + (fixture_name, humanize(fixture_dir))) + fixture_files.extend(fixture_files_in_dir) - self.loaded_object_count += loaded_objects_in_fixture - self.fixture_object_count += objects_in_fixture - label_found = True - except Exception as e: - if not isinstance(e, CommandError): - e.args = ("Problem installing fixture '%s': %s" % (full_path, e),) - raise - finally: - fixture.close() + if fixture_name != 'initial_data' and not fixture_files: + # Warning kept for backwards-compatibility; why not an exception? + warnings.warn("No fixture named '%s' found." % fixture_name) - # If the fixture we loaded contains 0 objects, assume that an - # error was encountered during fixture loading. - if objects_in_fixture == 0: - raise CommandError( - "No fixture data found for '%s'. (File format may be invalid.)" % - (fixture_name)) + return fixture_files - return label_found + _label_to_fixtures_cache = {} + find_fixtures = memoize(_find_fixtures, _label_to_fixtures_cache, 2) + + @cached_property + def fixture_dirs(self): + """ + Return a list of fixture directories. + + The list contains the 'fixtures' subdirectory of each installed + application, if it exists, the directories in FIXTURE_DIRS, and the + current directory. + """ + dirs = [] + for path in get_app_paths(): + d = os.path.join(os.path.dirname(path), 'fixtures') + if os.path.isdir(d): + dirs.append(d) + dirs.extend(list(settings.FIXTURE_DIRS)) + dirs.append('') + dirs = [os.path.abspath(os.path.realpath(d)) for d in dirs] + return dirs + + def parse_name(self, fixture_name): + """ + Splits fixture name in name, serialization format, compression format. + """ + parts = fixture_name.rsplit('.', 2) + + if len(parts) > 1 and parts[-1] in self.compression_formats: + cmp_fmt = parts[-1] + parts = parts[:-1] + else: + cmp_fmt = None + + if len(parts) > 1 and parts[-1] in self.serialization_formats: + ser_fmt = parts[-1] + parts = parts[:-1] + else: + ser_fmt = None + + name = '.'.join(parts) + + return name, ser_fmt, cmp_fmt + + +class SingleZipReader(zipfile.ZipFile): + + def __init__(self, *args, **kwargs): + zipfile.ZipFile.__init__(self, *args, **kwargs) + if len(self.namelist()) != 1: + raise ValueError("Zip-compressed fixtures must contain one file.") + + def read(self): + return zipfile.ZipFile.read(self, self.namelist()[0]) + + +def humanize(dirname): + return "'%s'" % dirname if dirname else 'absolute path' diff --git a/tests/fixtures_regress/tests.py b/tests/fixtures_regress/tests.py index 5114302267..52526ec338 100644 --- a/tests/fixtures_regress/tests.py +++ b/tests/fixtures_regress/tests.py @@ -450,7 +450,7 @@ class TestFixtures(TestCase): commit=False, stdout=stdout_output, ) - self.assertTrue("No xml fixture 'this_fixture_doesnt_exist' in" in + self.assertTrue("No fixture 'this_fixture_doesnt_exist' in" in force_text(stdout_output.getvalue()))