From f998c7bcc6d6b42b98bc3ca8e65b70b023bd3bea Mon Sep 17 00:00:00 2001 From: Adrian Holovaty Date: Sun, 8 Jan 2006 06:16:05 +0000 Subject: [PATCH] magic-removal: Fixed #1186 -- Fixed problem resolving primary key in some 'pk' database queries. Also lightly refactored query-parsing code. Thanks, Russ git-svn-id: http://code.djangoproject.com/svn/django/branches/magic-removal@1856 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/models/query.py | 360 ++++++++++++---------- django/utils/datastructures.py | 35 +++ tests/modeltests/custom_columns/models.py | 2 +- tests/modeltests/custom_pk/models.py | 23 ++ tests/modeltests/many_to_many/models.py | 10 + tests/modeltests/many_to_one/models.py | 47 ++- tests/modeltests/one_to_one/models.py | 28 +- 7 files changed, 333 insertions(+), 172 deletions(-) diff --git a/django/db/models/query.py b/django/db/models/query.py index 07389388be..8d52be3232 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -1,5 +1,6 @@ from django.db import backend, connection from django.db.models.exceptions import * +from django.utils.datastructures import SortedDict LOOKUP_SEPARATOR = '__' @@ -170,15 +171,14 @@ def fill_table_cache(opts, select, tables, where, old_prefix, cache_tables_seen) select.extend(['%s.%s' % (backend.quote_name(db_table), backend.quote_name(f2.column)) for f2 in f.rel.to._meta.fields]) fill_table_cache(f.rel.to._meta, select, tables, where, db_table, cache_tables_seen) -def throw_bad_kwarg_error(kwarg): - # Helper function to remove redundancy. - raise TypeError, "got unexpected keyword argument '%s'" % kwarg - def parse_lookup(kwarg_items, opts): - # Helper function that handles converting API kwargs (e.g. - # "name__exact": "tom") to SQL. + # Helper function that handles converting API kwargs + # (e.g. "name__exact": "tom") to SQL. - # 'joins' is a dictionary describing the tables that must be joined to complete the query. + # 'joins' is a sorted dictionary describing the tables that must be joined + # to complete the query. The dictionary is sorted because creation order + # is significant; it is a dictionary to ensure uniqueness of alias names. + # # Each key-value pair follows the form # alias: (table, join_type, condition) # where @@ -186,177 +186,203 @@ def parse_lookup(kwarg_items, opts): # table is the actual table name to be joined # join_type is the type of join (INNER JOIN, LEFT OUTER JOIN, etc) # condition is the where-like statement over which narrows the join. + # alias will be derived from the lookup list name. # - # alias will be derived from the lookup list name. - # At present, this method only every returns INNER JOINs; the option is there for others - # to implement custom Q()s, etc that return other join types. - tables, joins, where, params = [], {}, [], [] - for kwarg, kwarg_value in kwarg_items: + # At present, this method only every returns INNER JOINs; the option is + # there for others to implement custom Q()s, etc that return other join + # types. + tables, joins, where, params = [], SortedDict(), [], [] + for kwarg, value in kwarg_items: if kwarg in ('order_by', 'limit', 'offset', 'select_related', 'distinct', 'select', 'tables', 'where', 'params'): - continue - if kwarg_value is None: - continue - if kwarg == 'complex': - tables2, joins2, where2, params2 = kwarg_value.get_sql(opts) + pass + elif value is None: + pass + elif kwarg == 'complex': + tables2, joins2, where2, params2 = value.get_sql(opts) tables.extend(tables2) joins.update(joins2) where.extend(where2) params.extend(params2) - continue - if kwarg == '_or': - for val in kwarg_value: + elif kwarg == '_or': + for val in value: tables2, joins2, where2, params2 = parse_lookup(val, opts) tables.extend(tables2) joins.update(joins2) where.append('(%s)' % ' OR '.join(where2)) params.extend(params2) - continue - lookup_list = kwarg.split(LOOKUP_SEPARATOR) - # pk="value" is shorthand for (primary key)__exact="value" - if lookup_list[-1] == 'pk': - if opts.pk.rel: - lookup_list = lookup_list[:-1] + [opts.pk.name, opts.pk.rel.field_name, 'exact'] - else: - lookup_list = lookup_list[:-1] + [opts.pk.name, 'exact'] - if len(lookup_list) == 1: - throw_bad_kwarg_error(kwarg) - lookup_type = lookup_list.pop() - current_opts = opts # We'll be overwriting this, so keep a reference to the original opts. - current_table_alias = current_opts.db_table - param_required = False - while lookup_list or param_required: - try: - # "current" is a piece of the lookup list. For example, in - # choices.get_list(poll__sites__id__exact=5), lookup_list is - # ["poll", "sites", "id"], and the first current is "poll". - try: - current = lookup_list.pop(0) - except IndexError: - # If we're here, lookup_list is empty but param_required - # is set to True, which means the kwarg was bad. - # Example: choices.get_list(poll__exact='foo') - throw_bad_kwarg_error(kwarg) - # Try many-to-many relationships in the direction in which they are - # originally defined (i.e., the class that defines the ManyToManyField) - for f in current_opts.many_to_many: - if f.name == current: - rel_table_alias = backend.quote_name("m2m_" + current_table_alias + LOOKUP_SEPARATOR + current) + else: # Must be a search parameter. + path = kwarg.split(LOOKUP_SEPARATOR) - joins[rel_table_alias] = ( - backend.quote_name(f.get_m2m_db_table(current_opts)), - "INNER JOIN", - '%s.%s = %s.%s' % - (backend.quote_name(current_table_alias), - backend.quote_name(current_opts.pk.column), - rel_table_alias, - backend.quote_name(current_opts.object_name.lower() + '_id')) - ) + # Extract the last elements of the kwarg. + # The very-last is the clause (equals, like, etc). + # The second-last is the table column on which the clause is + # to be performed. + # The only exception to this is "pk", which is an implicit + # id__exact; if we find "pk", make the clause "exact', and + # insert a dummy name of None, which we will replace when + # we know which table column to grab as the primary key. + clause = path.pop() + if clause == 'pk': + clause = 'exact' + path.append(None) + if len(path) < 1: + raise TypeError, "Cannot parse keyword query %r" % kwarg + + tables2, joins2, where2, params2 = lookup_inner(path, clause, value, opts, opts.db_table, None) + tables.extend(tables2) + joins.update(joins2) + where.extend(where2) + params.extend(params2) + return tables, joins, where, params + +class FieldFound(Exception): + "Exception used to short circuit field-finding operations." + pass + +def find_field(name, field_list): + """ + Finds a field with a specific name in a list of field instances. + Returns None if there are no matches, or several matches. + """ + matches = [f for f in field_list if f.name == name] + if len(matches) != 1: + return None + return matches[0] + +def lookup_inner(path, clause, value, opts, table, column): + tables, joins, where, params = [], SortedDict(), [], [] + current_opts = opts + current_table = table + current_column = column + intermediate_table = None + join_required = False + + name = path.pop(0) + # Has the primary key been requested? If so, expand it out + # to be the name of the current class' primary key + if name is None: + name = current_opts.pk.name + + # Try to find the name in the fields associated with the current class + try: + # Does the name belong to a defined many-to-many field? + field = find_field(name, current_opts.many_to_many) + if field: + new_table = current_table + LOOKUP_SEPARATOR + name + new_opts = field.rel.to._meta + new_column = new_opts.pk.column + + # Need to create an intermediate table join over the m2m table + # This process hijacks current_table/column to point to the + # intermediate table. + current_table = "m2m_" + new_table + join_column = new_opts.object_name.lower() + '_id' + intermediate_table = field.get_m2m_db_table(current_opts) + + raise FieldFound() + + # Does the name belong to a reverse defined many-to-many field? + field = find_field(name, current_opts.get_all_related_many_to_many_objects()) + if field: + new_table = current_table + LOOKUP_SEPARATOR + name + new_opts = field.opts + new_column = new_opts.pk.column + + # Need to create an intermediate table join over the m2m table. + # This process hijacks current_table/column to point to the + # intermediate table. + current_table = "m2m_" + new_table + join_column = new_opts.object_name.lower() + '_id' + intermediate_table = field.field.get_m2m_db_table(new_opts) + + raise FieldFound() + + # Does the name belong to a one-to-many field? + field = find_field(name, opts.get_all_related_objects()) + if field: + new_table = table + LOOKUP_SEPARATOR + name + new_opts = field.opts + new_column = field.field.column + join_column = opts.pk.column + + # 1-N fields MUST be joined, regardless of any other conditions. + join_required = True + + raise FieldFound() + + # Does the name belong to a one-to-one, many-to-one, or regular field? + field = find_field(name, current_opts.fields) + if field: + if field.rel: # One-to-One/Many-to-one field + new_table = current_table + LOOKUP_SEPARATOR + name + new_opts = field.rel.to._meta + new_column = new_opts.pk.column + join_column = field.column + + raise FieldFound() + + except FieldFound: # Match found, loop has been shortcut. + pass + except: # Any other exception; rethrow + raise + else: # No match found. + raise TypeError, "Cannot resolve keyword '%s' into field" % name + + # Check to see if an intermediate join is required between current_table + # and new_table. + if intermediate_table: + joins[backend.quote_name(current_table)] = ( + backend.quote_name(intermediate_table), + "INNER JOIN", + "%s.%s = %s.%s" % \ + (backend.quote_name(table), + backend.quote_name(current_opts.pk.column), + backend.quote_name(current_table), + backend.quote_name(current_opts.object_name.lower() + '_id')) + ) + + if path: + if len(path) == 1 and path[0] in (new_opts.pk.name, None) \ + and clause in ('exact', 'isnull') and not join_required: + # If the last name query is for a key, and the search is for + # isnull/exact, then the current (for N-1) or intermediate + # (for N-N) table can be used for the search - no need to join an + # extra table just to check the primary key. + new_table = current_table + else: + # There are 1 or more name queries pending, and we have ruled out + # any shortcuts; therefore, a join is required. + joins[backend.quote_name(new_table)] = ( + backend.quote_name(new_opts.db_table), + "INNER JOIN", + "%s.%s = %s.%s" % + (backend.quote_name(current_table), + backend.quote_name(join_column), + backend.quote_name(new_table), + backend.quote_name(new_column)) + ) + # If we have made the join, we don't need to tell subsequent + # recursive calls about the column name we joined on. + join_column = None + + # There are name queries remaining. Recurse deeper. + tables2, joins2, where2, params2 = lookup_inner(path, clause, value, new_opts, new_table, join_column) + + tables.extend(tables2) + joins.update(joins2) + where.extend(where2) + params.extend(params2) + else: + # Evaluate clause on current table. + if name in (current_opts.pk.name, None) and clause in ('exact', 'isnull') and current_column: + # If this is an exact/isnull key search, and the last pass + # found/introduced a current/intermediate table that we can use to + # optimize the query, then use that column name. + column = current_column + else: + column = field.column + + where.append(get_where_clause(clause, current_table + '.', column, value)) + params.extend(field.get_db_prep_lookup(clause, value)) - # Optimization: In the case of primary-key lookups, we - # don't have to do an extra join. - if lookup_list and lookup_list[0] == f.rel.to._meta.pk.name and lookup_type == 'exact': - where.append(get_where_clause(lookup_type, rel_table_alias+'.', - f.rel.to._meta.object_name.lower()+'_id', kwarg_value)) - params.extend(f.get_db_prep_lookup(lookup_type, kwarg_value)) - lookup_list.pop() - param_required = False - else: - new_table_alias = current_table_alias + LOOKUP_SEPARATOR + current - - joins[backend.quote_name(new_table_alias)] = ( - backend.quote_name(f.rel.to._meta.db_table), - "INNER JOIN", - '%s.%s = %s.%s' % - (rel_table_alias, - backend.quote_name(f.rel.to._meta.object_name.lower() + '_id'), - backend.quote_name(new_table_alias), - backend.quote_name(f.rel.to._meta.pk.column)) - ) - current_table_alias = new_table_alias - param_required = True - current_opts = f.rel.to._meta - raise StopIteration - # Try many-to-many relationships first in the reverse direction - # (i.e., from the class does not have the ManyToManyField) - for f in current_opts.get_all_related_many_to_many_objects(): - if f.name == current: - rel_table_alias = backend.quote_name("m2m_" + current_table_alias + LOOKUP_SEPARATOR + current) - - joins[rel_table_alias] = ( - backend.quote_name(f.field.get_m2m_db_table(f.opts)), - "INNER JOIN", - '%s.%s = %s.%s' % - (backend.quote_name(current_table_alias), - backend.quote_name(current_opts.pk.column), - rel_table_alias, - backend.quote_name(current_opts.object_name.lower() + '_id')) - ) - - # Optimization: In the case of primary-key lookups, we - # don't have to do an extra join. - if lookup_list and lookup_list[0] == f.opts.pk.name and lookup_type == 'exact': - where.append(get_where_clause(lookup_type, rel_table_alias+'.', - f.opts.object_name.lower()+'_id', kwarg_value)) - params.extend(f.field.get_db_prep_lookup(lookup_type, kwarg_value)) - lookup_list.pop() - param_required = False - else: - new_table_alias = current_table_alias + LOOKUP_SEPARATOR + current - - joins[backend.quote_name(new_table_alias)] = ( - backend.quote_name(f.opts.db_table), - "INNER JOIN", - '%s.%s = %s.%s' % - (rel_table_alias, - backend.quote_name(f.opts.object_name.lower() + '_id'), - backend.quote_name(new_table_alias), - backend.quote_name(f.opts.pk.column)) - ) - current_table_alias = new_table_alias - param_required = True - current_opts = f.opts - raise StopIteration - for f in current_opts.fields: - # Try many-to-one relationships... - if f.rel and f.name == current: - # Optimization: In the case of primary-key lookups, we - # don't have to do an extra join. - if lookup_list and lookup_list[0] == f.rel.to._meta.pk.name and lookup_type == 'exact': - where.append(get_where_clause(lookup_type, current_table_alias+'.', f.column, kwarg_value)) - params.extend(f.get_db_prep_lookup(lookup_type, kwarg_value)) - lookup_list.pop() - param_required = False - # 'isnull' lookups in many-to-one relationships are a special case, - # because we don't want to do a join. We just want to find out - # whether the foreign key field is NULL. - elif lookup_type == 'isnull' and not lookup_list: - where.append(get_where_clause(lookup_type, current_table_alias+'.', f.column, kwarg_value)) - params.extend(f.get_db_prep_lookup(lookup_type, kwarg_value)) - else: - new_table_alias = current_table_alias + LOOKUP_SEPARATOR + current - - joins[backend.quote_name(new_table_alias)] = ( - backend.quote_name(f.rel.to._meta.db_table), - "INNER JOIN", - '%s.%s = %s.%s' % - (backend.quote_name(current_table_alias), - backend.quote_name(f.column), - backend.quote_name(new_table_alias), - backend.quote_name(f.rel.to._meta.pk.column)) - ) - current_table_alias = new_table_alias - param_required = True - current_opts = f.rel.to._meta - raise StopIteration - # Try direct field-name lookups... - if f.name == current: - where.append(get_where_clause(lookup_type, current_table_alias+'.', f.column, kwarg_value)) - params.extend(f.get_db_prep_lookup(lookup_type, kwarg_value)) - param_required = False - raise StopIteration - # If we haven't hit StopIteration at this point, "current" must be - # an invalid lookup, so raise an exception. - throw_bad_kwarg_error(kwarg) - except StopIteration: - continue return tables, joins, where, params diff --git a/django/utils/datastructures.py b/django/utils/datastructures.py index 7ffeee72e5..818e5b90f2 100644 --- a/django/utils/datastructures.py +++ b/django/utils/datastructures.py @@ -40,6 +40,41 @@ class MergeDict: return True return False +class SortedDict(dict): + "A dictionary that keeps its keys in the order in which they're inserted." + def __init__(self, data={}): + dict.__init__(self, data) + self.keyOrder = data.keys() + + def __setitem__(self, key, value): + dict.__setitem__(self, key, value) + if key not in self.keyOrder: + self.keyOrder.append(key) + + def __delitem__(self, key, value): + dict.__delitem__(self, key, value) + self.keyOrder.remove(key) + + def __iter__(self): + for k in self.keyOrder: + yield k + + def items(self): + for k in self.keyOrder: + yield k, dict.__getitem__(self, k) + + def keys(self): + for k in self.keyOrder: + yield k + + def values(self): + for k in self.keyOrder: + yield dict.__getitem__(self, k) + + def update(self, dict): + for k, v in dict.items(): + self.__setitem__(k, v) + class MultiValueDictKeyError(KeyError): pass diff --git a/tests/modeltests/custom_columns/models.py b/tests/modeltests/custom_columns/models.py index 9c96401215..f21bc721d0 100644 --- a/tests/modeltests/custom_columns/models.py +++ b/tests/modeltests/custom_columns/models.py @@ -35,7 +35,7 @@ John Smith >>> Person.objects.get_list(firstname__exact='John') Traceback (most recent call last): ... -TypeError: got unexpected keyword argument 'firstname__exact' +TypeError: Cannot resolve keyword 'firstname' into field >>> p = Person.objects.get_object(last_name__exact='Smith') >>> p.first_name diff --git a/tests/modeltests/custom_pk/models.py b/tests/modeltests/custom_pk/models.py index aec17ff055..001552dbfe 100644 --- a/tests/modeltests/custom_pk/models.py +++ b/tests/modeltests/custom_pk/models.py @@ -47,6 +47,10 @@ Traceback (most recent call last): ... DoesNotExist: Employee does not exist for {'pk': 'foo'} +# Use the name of the primary key, rather than pk. +>>> Employee.objects.get_object(employee_code__exact='ABC123') +Dan Jones + # Fran got married and changed her last name. >>> fran = Employee.objects.get_object(pk='XYZ456') >>> fran.last_name = 'Jones' @@ -66,4 +70,23 @@ True [Sears] >>> Business.objects.get_in_bulk(['Sears']) {'Sears': Sears} + +>>> Business.objects.get_list(name__exact='Sears') +[Sears] +>>> Business.objects.get_list(pk='Sears') +[Sears] + +# Queries across tables, involving primary key +>>> Employee.objects.get_list(businesses__name__exact='Sears') +[Dan Jones, Fran Jones] +>>> Employee.objects.get_list(businesses__pk='Sears') +[Dan Jones, Fran Jones] + +>>> Business.objects.get_list(employees__employee_code__exact='ABC123') +[Sears] +>>> Business.objects.get_list(employees__pk='ABC123') +[Sears] +>>> Business.objects.get_list(employees__first_name__startswith='Fran') +[Sears] + """ diff --git a/tests/modeltests/many_to_many/models.py b/tests/modeltests/many_to_many/models.py index 825743a03c..5b1c6435db 100644 --- a/tests/modeltests/many_to_many/models.py +++ b/tests/modeltests/many_to_many/models.py @@ -68,6 +68,8 @@ True [Django lets you build Web apps easily, NASA uses Python] # We can perform kwarg queries across m2m relationships +>>> Article.objects.get_list(publications__id__exact=1) +[Django lets you build Web apps easily, NASA uses Python] >>> Article.objects.get_list(publications__pk=1) [Django lets you build Web apps easily, NASA uses Python] @@ -78,9 +80,17 @@ True [NASA uses Python] # Reverse m2m queries (i.e., start at the table that doesn't have a ManyToManyField) +>>> Publication.objects.get_list(id__exact=1) +[The Python Journal] +>>> Publication.objects.get_list(pk=1) +[The Python Journal] + >>> Publication.objects.get_list(articles__headline__startswith="NASA") [The Python Journal, Science News, Science Weekly] +>>> Publication.objects.get_list(articles__id__exact=1) +[The Python Journal] + >>> Publication.objects.get_list(articles__pk=1) [The Python Journal] diff --git a/tests/modeltests/many_to_one/models.py b/tests/modeltests/many_to_one/models.py index 5acaae7af0..d847b94346 100644 --- a/tests/modeltests/many_to_one/models.py +++ b/tests/modeltests/many_to_one/models.py @@ -22,6 +22,7 @@ class Article(models.Model): def __repr__(self): return self.headline + API_TESTS = """ # Create a Reporter. >>> r = Reporter(first_name='John', last_name='Smith', email='john@example.com') @@ -60,6 +61,16 @@ This is a test >>> r.get_article_count() 2 +# Get articles by id +>>> Article.objects.get_list(id__exact=1) +[This is a test] +>>> Article.objects.get_list(pk=1) +[This is a test] + +# Query on an article property +>>> Article.objects.get_list(headline__startswith='This') +[This is a test] + # The API automatically follows relationships as far as you need. # Use double underscores to separate relationships. # This works as many levels deep as you want. There's no limit. @@ -83,12 +94,20 @@ This is a test # Find all Articles for the Reporter whose ID is 1. >>> Article.objects.get_list(reporter__id__exact=1, order_by=['pub_date']) [This is a test, John's second story] +>>> Article.objects.get_list(reporter__pk=1, order_by=['pub_date']) +[This is a test, John's second story] -# Note you need two underscores between "reporter" and "id" -- not one. +# You need two underscores between "reporter" and "id" -- not one. >>> Article.objects.get_list(reporter_id__exact=1) Traceback (most recent call last): ... -TypeError: got unexpected keyword argument 'reporter_id__exact' +TypeError: Cannot resolve keyword 'reporter_id' into field + +# You need to specify a comparison clause +>>> Article.objects.get_list(reporter_id=1) +Traceback (most recent call last): + ... +TypeError: Cannot parse keyword query 'reporter_id' # "pk" shortcut syntax works in a related context, too. >>> Article.objects.get_list(reporter__pk=1, order_by=['pub_date']) @@ -109,4 +128,28 @@ John Smith >>> a4.get_reporter() John Smith +# Reporters can be queried +>>> Reporter.objects.get_list(id__exact=1) +[John Smith] +>>> Reporter.objects.get_list(pk=1) +[John Smith] +>>> Reporter.objects.get_list(first_name__startswith='John') +[John Smith] + +# Reporters can query in opposite direction of ForeignKey definition +>>> Reporter.objects.get_list(articles__id__exact=1) +[John Smith] +>>> Reporter.objects.get_list(articles__pk=1) +[John Smith] +>>> Reporter.objects.get_list(articles__headline__startswith='This') +[John Smith, John Smith, John Smith] +>>> Reporter.objects.get_list(articles__headline__startswith='This', distinct=True) +[John Smith] + +# Queries can go round in circles. +>>> Reporter.objects.get_list(articles__reporter__first_name__startswith='John') +[John Smith, John Smith, John Smith, John Smith] +>>> Reporter.objects.get_list(articles__reporter__first_name__startswith='John', distinct=True) +[John Smith] + """ diff --git a/tests/modeltests/one_to_one/models.py b/tests/modeltests/one_to_one/models.py index a45d1a75f0..4c35604d2c 100644 --- a/tests/modeltests/one_to_one/models.py +++ b/tests/modeltests/one_to_one/models.py @@ -66,10 +66,23 @@ DoesNotExist: Restaurant does not exist for {'place__id__exact': ...} >>> Restaurant.objects.get_object(place__id__exact=1) Demon Dogs the restaurant ->>> Restaurant.objects.get_object(place__name__startswith="Demon") -Demon Dogs the restaurant >>> Restaurant.objects.get_object(pk=1) Demon Dogs the restaurant +>>> Restaurant.objects.get_object(place__exact=1) +Demon Dogs the restaurant +>>> Restaurant.objects.get_object(place__pk=1) +Demon Dogs the restaurant +>>> Restaurant.objects.get_object(place__name__startswith="Demon") +Demon Dogs the restaurant + +>>> Place.objects.get_object(id__exact=1) +Demon Dogs the place +>>> Place.objects.get_object(pk=1) +Demon Dogs the place +>>> Place.objects.get_object(restaurants__place__exact=1) +Demon Dogs the place +>>> Place.objects.get_object(restaurants__pk=1) +Demon Dogs the place # Add a Waiter to the Restaurant. >>> w = r.add_waiter(name='Joe') @@ -77,6 +90,17 @@ Demon Dogs the restaurant >>> w Joe the waiter at Demon Dogs the restaurant +# Query the waiters +>>> Waiter.objects.get_list(restaurant__place__exact=1) +[Joe the waiter at Demon Dogs the restaurant] +>>> Waiter.objects.get_list(restaurant__pk=1) +[Joe the waiter at Demon Dogs the restaurant] +>>> Waiter.objects.get_list(id__exact=1) +[Joe the waiter at Demon Dogs the restaurant] +>>> Waiter.objects.get_list(pk=1) +[Joe the waiter at Demon Dogs the restaurant] + +# Delete the restaurant; the waiter should also be removed >>> r = Restaurant.objects.get_object(pk=1) >>> r.delete() """