mirror of
				https://github.com/django/django.git
				synced 2025-10-25 14:46:09 +00:00 
			
		
		
		
	Fixed #28194 -- Added support for normalization and cover density to SearchRank.
This commit is contained in:
		
				
					committed by
					
						 Mariusz Felisiak
						Mariusz Felisiak
					
				
			
			
				
	
			
			
			
						parent
						
							4ed534758c
						
					
				
				
					commit
					0b51a4f894
				
			| @@ -208,7 +208,10 @@ class SearchRank(Func): | |||||||
|     function = 'ts_rank' |     function = 'ts_rank' | ||||||
|     output_field = FloatField() |     output_field = FloatField() | ||||||
|  |  | ||||||
|     def __init__(self, vector, query, weights=None): |     def __init__( | ||||||
|  |         self, vector, query, weights=None, normalization=None, | ||||||
|  |         cover_density=False, | ||||||
|  |     ): | ||||||
|         if not hasattr(vector, 'resolve_expression'): |         if not hasattr(vector, 'resolve_expression'): | ||||||
|             vector = SearchVector(vector) |             vector = SearchVector(vector) | ||||||
|         if not hasattr(query, 'resolve_expression'): |         if not hasattr(query, 'resolve_expression'): | ||||||
| @@ -218,6 +221,12 @@ class SearchRank(Func): | |||||||
|             if not hasattr(weights, 'resolve_expression'): |             if not hasattr(weights, 'resolve_expression'): | ||||||
|                 weights = Value(weights) |                 weights = Value(weights) | ||||||
|             expressions = (weights,) + expressions |             expressions = (weights,) + expressions | ||||||
|  |         if normalization is not None: | ||||||
|  |             if not hasattr(normalization, 'resolve_expression'): | ||||||
|  |                 normalization = Value(normalization) | ||||||
|  |             expressions += (normalization,) | ||||||
|  |         if cover_density: | ||||||
|  |             self.function = 'ts_rank_cd' | ||||||
|         super().__init__(*expressions) |         super().__init__(*expressions) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -118,7 +118,7 @@ See :ref:`postgresql-fts-search-configuration` for an explanation of the | |||||||
| ``SearchRank`` | ``SearchRank`` | ||||||
| ============== | ============== | ||||||
|  |  | ||||||
| .. class:: SearchRank(vector, query, weights=None) | .. class:: SearchRank(vector, query, weights=None, normalization=None, cover_density=False) | ||||||
|  |  | ||||||
| So far, we've returned the results for which any match between the vector and | So far, we've returned the results for which any match between the vector and | ||||||
| the query are possible. It's likely you may wish to order the results by some | the query are possible. It's likely you may wish to order the results by some | ||||||
| @@ -137,6 +137,32 @@ order by relevancy:: | |||||||
| See :ref:`postgresql-fts-weighting-queries` for an explanation of the | See :ref:`postgresql-fts-weighting-queries` for an explanation of the | ||||||
| ``weights`` parameter. | ``weights`` parameter. | ||||||
|  |  | ||||||
|  | Set the ``cover_density`` parameter to ``True`` to enable the cover density | ||||||
|  | ranking, which means that the proximity of matching query terms is taken into | ||||||
|  | account. | ||||||
|  |  | ||||||
|  | Provide an integer to the ``normalization`` parameter to control rank | ||||||
|  | normalization. This integer is a bit mask, so you can combine multiple | ||||||
|  | behaviors:: | ||||||
|  |  | ||||||
|  |     >>> from django.db.models import Value | ||||||
|  |     >>> Entry.objects.annotate( | ||||||
|  |     ...     rank=SearchRank( | ||||||
|  |     ...         vector, | ||||||
|  |     ...         query, | ||||||
|  |     ...         normalization=Value(2).bitor(Value(4)), | ||||||
|  |     ...     ) | ||||||
|  |     ... ) | ||||||
|  |  | ||||||
|  | The PostgreSQL documentation has more details about `different rank | ||||||
|  | normalization options`_. | ||||||
|  |  | ||||||
|  | .. _different rank normalization options: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING | ||||||
|  |  | ||||||
|  | .. versionadded:: 3.1 | ||||||
|  |  | ||||||
|  |     The ``normalization`` and ``cover_density`` parameters were added. | ||||||
|  |  | ||||||
| ``SearchHeadline`` | ``SearchHeadline`` | ||||||
| ================== | ================== | ||||||
|  |  | ||||||
|   | |||||||
| @@ -160,6 +160,14 @@ Minor features | |||||||
|  |  | ||||||
| * :lookup:`search` lookup now supports query expressions. | * :lookup:`search` lookup now supports query expressions. | ||||||
|  |  | ||||||
|  | * The new ``cover_density`` parameter of | ||||||
|  |   :class:`~django.contrib.postgres.search.SearchRank` allows ranking by cover | ||||||
|  |   density. | ||||||
|  |  | ||||||
|  | * The new ``normalization`` parameter of | ||||||
|  |   :class:`~django.contrib.postgres.search.SearchRank` allows rank | ||||||
|  |   normalization. | ||||||
|  |  | ||||||
| :mod:`django.contrib.redirects` | :mod:`django.contrib.redirects` | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  |  | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the | |||||||
| transcript. | transcript. | ||||||
| """ | """ | ||||||
| from django.db import connection | from django.db import connection | ||||||
| from django.db.models import F | from django.db.models import F, Value | ||||||
| from django.test import modify_settings, skipUnlessDBFeature | from django.test import modify_settings, skipUnlessDBFeature | ||||||
|  |  | ||||||
| from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase | from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase | ||||||
| @@ -449,6 +449,66 @@ class TestRankingAndWeights(GrailTestData, PostgreSQLTestCase): | |||||||
|         ).filter(rank__gt=0.3) |         ).filter(rank__gt=0.3) | ||||||
|         self.assertSequenceEqual(searched, [self.verse0]) |         self.assertSequenceEqual(searched, [self.verse0]) | ||||||
|  |  | ||||||
|  |     def test_cover_density_ranking(self): | ||||||
|  |         not_dense_verse = Line.objects.create( | ||||||
|  |             scene=self.robin, | ||||||
|  |             character=self.minstrel, | ||||||
|  |             dialogue=( | ||||||
|  |                 'Bravely taking to his feet, he beat a very brave retreat. ' | ||||||
|  |                 'A brave retreat brave Sir Robin.' | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |         searched = Line.objects.filter(character=self.minstrel).annotate( | ||||||
|  |             rank=SearchRank( | ||||||
|  |                 SearchVector('dialogue'), | ||||||
|  |                 SearchQuery('brave robin'), | ||||||
|  |                 cover_density=True, | ||||||
|  |             ), | ||||||
|  |         ).order_by('rank', '-pk') | ||||||
|  |         self.assertSequenceEqual( | ||||||
|  |             searched, | ||||||
|  |             [self.verse2, not_dense_verse, self.verse1, self.verse0], | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def test_ranking_with_normalization(self): | ||||||
|  |         short_verse = Line.objects.create( | ||||||
|  |             scene=self.robin, | ||||||
|  |             character=self.minstrel, | ||||||
|  |             dialogue='A brave retreat brave Sir Robin.', | ||||||
|  |         ) | ||||||
|  |         searched = Line.objects.filter(character=self.minstrel).annotate( | ||||||
|  |             rank=SearchRank( | ||||||
|  |                 SearchVector('dialogue'), | ||||||
|  |                 SearchQuery('brave sir robin'), | ||||||
|  |                 # Divide the rank by the document length. | ||||||
|  |                 normalization=2, | ||||||
|  |             ), | ||||||
|  |         ).order_by('rank') | ||||||
|  |         self.assertSequenceEqual( | ||||||
|  |             searched, | ||||||
|  |             [self.verse2, self.verse1, self.verse0, short_verse], | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def test_ranking_with_masked_normalization(self): | ||||||
|  |         short_verse = Line.objects.create( | ||||||
|  |             scene=self.robin, | ||||||
|  |             character=self.minstrel, | ||||||
|  |             dialogue='A brave retreat brave Sir Robin.', | ||||||
|  |         ) | ||||||
|  |         searched = Line.objects.filter(character=self.minstrel).annotate( | ||||||
|  |             rank=SearchRank( | ||||||
|  |                 SearchVector('dialogue'), | ||||||
|  |                 SearchQuery('brave sir robin'), | ||||||
|  |                 # Divide the rank by the document length and by the number of | ||||||
|  |                 # unique words in document. | ||||||
|  |                 normalization=Value(2).bitor(Value(8)), | ||||||
|  |             ), | ||||||
|  |         ).order_by('rank') | ||||||
|  |         self.assertSequenceEqual( | ||||||
|  |             searched, | ||||||
|  |             [self.verse2, self.verse1, self.verse0, short_verse], | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class SearchVectorIndexTests(PostgreSQLTestCase): | class SearchVectorIndexTests(PostgreSQLTestCase): | ||||||
|     def test_search_vector_index(self): |     def test_search_vector_index(self): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user