mirror of
https://github.com/django/django.git
synced 2025-09-17 14:39:17 +00:00
Fixed #28041 -- Added Lexeme expression to contrib.postgres.search.
This expression automatically escapes its input and allows fine-grained control over prefix matching and term weighting via logical combinations. Thanks Mariusz Felisiak, Adam Zapletal, Paolo Melchiorre, Jacob Walls, Adam Johnson, and Simon Charette for reviews. Co-authored-by: joetsoi <joetsoi@users.noreply.github.com> Co-authored-by: Karl Hobley <karl@kaed.uk> Co-authored-by: Alexandr Tatarinov <tatarinov1997@gmail.com>
This commit is contained in:
parent
e08fa42fa6
commit
218f69f05e
@ -1,3 +1,4 @@
|
|||||||
|
from django.db.backends.postgresql.psycopg_any import is_psycopg3
|
||||||
from django.db.models import (
|
from django.db.models import (
|
||||||
CharField,
|
CharField,
|
||||||
Expression,
|
Expression,
|
||||||
@ -10,9 +11,45 @@ from django.db.models import (
|
|||||||
)
|
)
|
||||||
from django.db.models.expressions import CombinedExpression, register_combinable_fields
|
from django.db.models.expressions import CombinedExpression, register_combinable_fields
|
||||||
from django.db.models.functions import Cast, Coalesce
|
from django.db.models.functions import Cast, Coalesce
|
||||||
|
from django.utils.regex_helper import _lazy_re_compile
|
||||||
|
|
||||||
from .utils import CheckPostgresInstalledMixin
|
from .utils import CheckPostgresInstalledMixin
|
||||||
|
|
||||||
|
if is_psycopg3:
|
||||||
|
from psycopg.adapt import Dumper
|
||||||
|
|
||||||
|
class UTF8Dumper(Dumper):
|
||||||
|
def dump(self, obj):
|
||||||
|
return bytes(obj, "utf-8")
|
||||||
|
|
||||||
|
def quote_lexeme(value):
|
||||||
|
return UTF8Dumper(str).quote(psql_escape(value)).decode()
|
||||||
|
|
||||||
|
else:
|
||||||
|
from psycopg2.extensions import adapt
|
||||||
|
|
||||||
|
def quote_lexeme(value):
|
||||||
|
adapter = adapt(psql_escape(value))
|
||||||
|
adapter.encoding = "utf-8"
|
||||||
|
return adapter.getquoted().decode()
|
||||||
|
|
||||||
|
|
||||||
|
spec_chars_re = _lazy_re_compile(r"['\0\[\]()|&:*!@<>\\]")
|
||||||
|
multiple_spaces_re = _lazy_re_compile(r"\s{2,}")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_spaces(val):
|
||||||
|
"""Convert multiple spaces to single and strip from both sides."""
|
||||||
|
if not (val := val.strip()):
|
||||||
|
return None
|
||||||
|
return multiple_spaces_re.sub(" ", val)
|
||||||
|
|
||||||
|
|
||||||
|
def psql_escape(query):
|
||||||
|
"""Replace chars not fit for use in search queries with a single space."""
|
||||||
|
query = spec_chars_re.sub(" ", query)
|
||||||
|
return normalize_spaces(query)
|
||||||
|
|
||||||
|
|
||||||
class SearchVectorExact(Lookup):
|
class SearchVectorExact(Lookup):
|
||||||
lookup_name = "exact"
|
lookup_name = "exact"
|
||||||
@ -205,6 +242,9 @@ class SearchQuery(SearchQueryCombinable, Func):
|
|||||||
invert=False,
|
invert=False,
|
||||||
search_type="plain",
|
search_type="plain",
|
||||||
):
|
):
|
||||||
|
if isinstance(value, LexemeCombinable):
|
||||||
|
search_type = "raw"
|
||||||
|
|
||||||
self.function = self.SEARCH_TYPES.get(search_type)
|
self.function = self.SEARCH_TYPES.get(search_type)
|
||||||
if self.function is None:
|
if self.function is None:
|
||||||
raise ValueError("Unknown search_type argument '%s'." % search_type)
|
raise ValueError("Unknown search_type argument '%s'." % search_type)
|
||||||
@ -383,3 +423,104 @@ class TrigramWordSimilarity(TrigramWordBase):
|
|||||||
|
|
||||||
class TrigramStrictWordSimilarity(TrigramWordBase):
|
class TrigramStrictWordSimilarity(TrigramWordBase):
|
||||||
function = "STRICT_WORD_SIMILARITY"
|
function = "STRICT_WORD_SIMILARITY"
|
||||||
|
|
||||||
|
|
||||||
|
class LexemeCombinable:
|
||||||
|
BITAND = "&"
|
||||||
|
BITOR = "|"
|
||||||
|
|
||||||
|
def _combine(self, other, connector, reversed):
|
||||||
|
if not isinstance(other, LexemeCombinable):
|
||||||
|
raise TypeError(
|
||||||
|
"A Lexeme can only be combined with another Lexeme, "
|
||||||
|
f"got {other.__class__.__name__}."
|
||||||
|
)
|
||||||
|
if reversed:
|
||||||
|
return CombinedLexeme(other, connector, self)
|
||||||
|
return CombinedLexeme(self, connector, other)
|
||||||
|
|
||||||
|
# On Combinable, these are not implemented to reduce confusion with Q. In
|
||||||
|
# this case we are actually (ab)using them to do logical combination so
|
||||||
|
# it's consistent with other usage in Django.
|
||||||
|
def __or__(self, other):
|
||||||
|
return self._combine(other, self.BITOR, False)
|
||||||
|
|
||||||
|
def __ror__(self, other):
|
||||||
|
return self._combine(other, self.BITOR, True)
|
||||||
|
|
||||||
|
def __and__(self, other):
|
||||||
|
return self._combine(other, self.BITAND, False)
|
||||||
|
|
||||||
|
def __rand__(self, other):
|
||||||
|
return self._combine(other, self.BITAND, True)
|
||||||
|
|
||||||
|
|
||||||
|
class Lexeme(LexemeCombinable, Value):
|
||||||
|
_output_field = SearchQueryField()
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, value, output_field=None, *, invert=False, prefix=False, weight=None
|
||||||
|
):
|
||||||
|
if value == "":
|
||||||
|
raise ValueError("Lexeme value cannot be empty.")
|
||||||
|
|
||||||
|
if not isinstance(value, str):
|
||||||
|
raise TypeError(
|
||||||
|
f"Lexeme value must be a string, got {value.__class__.__name__}."
|
||||||
|
)
|
||||||
|
|
||||||
|
if weight is not None and (
|
||||||
|
not isinstance(weight, str) or weight.lower() not in {"a", "b", "c", "d"}
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
f"Weight must be one of 'A', 'B', 'C', and 'D', got {weight!r}."
|
||||||
|
)
|
||||||
|
|
||||||
|
self.prefix = prefix
|
||||||
|
self.invert = invert
|
||||||
|
self.weight = weight
|
||||||
|
super().__init__(value, output_field=output_field)
|
||||||
|
|
||||||
|
def as_sql(self, compiler, connection):
|
||||||
|
param = quote_lexeme(self.value)
|
||||||
|
label = ""
|
||||||
|
if self.prefix:
|
||||||
|
label += "*"
|
||||||
|
if self.weight:
|
||||||
|
label += self.weight
|
||||||
|
|
||||||
|
if label:
|
||||||
|
param = f"{param}:{label}"
|
||||||
|
if self.invert:
|
||||||
|
param = f"!{param}"
|
||||||
|
|
||||||
|
return "%s", (param,)
|
||||||
|
|
||||||
|
def __invert__(self):
|
||||||
|
cloned = self.copy()
|
||||||
|
cloned.invert = not self.invert
|
||||||
|
return cloned
|
||||||
|
|
||||||
|
|
||||||
|
class CombinedLexeme(LexemeCombinable, CombinedExpression):
|
||||||
|
_output_field = SearchQueryField()
|
||||||
|
|
||||||
|
def as_sql(self, compiler, connection):
|
||||||
|
value_params = []
|
||||||
|
lsql, params = compiler.compile(self.lhs)
|
||||||
|
value_params.extend(params)
|
||||||
|
|
||||||
|
rsql, params = compiler.compile(self.rhs)
|
||||||
|
value_params.extend(params)
|
||||||
|
|
||||||
|
combined_sql = f"({lsql} {self.connector} {rsql})"
|
||||||
|
combined_value = combined_sql % tuple(value_params)
|
||||||
|
return "%s", (combined_value,)
|
||||||
|
|
||||||
|
def __invert__(self):
|
||||||
|
# Apply De Morgan's theorem.
|
||||||
|
cloned = self.copy()
|
||||||
|
cloned.connector = self.BITAND if self.connector == self.BITOR else self.BITOR
|
||||||
|
cloned.lhs = ~self.lhs
|
||||||
|
cloned.rhs = ~self.rhs
|
||||||
|
return cloned
|
||||||
|
@ -96,7 +96,7 @@ Examples:
|
|||||||
|
|
||||||
.. code-block:: pycon
|
.. code-block:: pycon
|
||||||
|
|
||||||
>>> from django.contrib.postgres.search import SearchQuery
|
>>> from django.contrib.postgres.search import SearchQuery, Lexeme
|
||||||
>>> SearchQuery("red tomato") # two keywords
|
>>> SearchQuery("red tomato") # two keywords
|
||||||
>>> SearchQuery("tomato red") # same results as above
|
>>> SearchQuery("tomato red") # same results as above
|
||||||
>>> SearchQuery("red tomato", search_type="phrase") # a phrase
|
>>> SearchQuery("red tomato", search_type="phrase") # a phrase
|
||||||
@ -105,6 +105,7 @@ Examples:
|
|||||||
>>> SearchQuery(
|
>>> SearchQuery(
|
||||||
... "'tomato' ('red' OR 'green')", search_type="websearch"
|
... "'tomato' ('red' OR 'green')", search_type="websearch"
|
||||||
... ) # websearch operators
|
... ) # websearch operators
|
||||||
|
>>> SearchQuery(Lexeme("tomato") & (Lexeme("red") | Lexeme("green"))) # Lexeme objects
|
||||||
|
|
||||||
``SearchQuery`` terms can be combined logically to provide more flexibility:
|
``SearchQuery`` terms can be combined logically to provide more flexibility:
|
||||||
|
|
||||||
@ -118,6 +119,10 @@ Examples:
|
|||||||
See :ref:`postgresql-fts-search-configuration` for an explanation of the
|
See :ref:`postgresql-fts-search-configuration` for an explanation of the
|
||||||
``config`` parameter.
|
``config`` parameter.
|
||||||
|
|
||||||
|
.. versionchanged:: 6.0
|
||||||
|
|
||||||
|
:class:`Lexeme` objects were added.
|
||||||
|
|
||||||
``SearchRank``
|
``SearchRank``
|
||||||
==============
|
==============
|
||||||
|
|
||||||
@ -276,6 +281,53 @@ floats to :class:`SearchRank` as ``weights`` in the same order above:
|
|||||||
>>> rank = SearchRank(vector, query, weights=[0.2, 0.4, 0.6, 0.8])
|
>>> rank = SearchRank(vector, query, weights=[0.2, 0.4, 0.6, 0.8])
|
||||||
>>> Entry.objects.annotate(rank=rank).filter(rank__gte=0.3).order_by("-rank")
|
>>> Entry.objects.annotate(rank=rank).filter(rank__gte=0.3).order_by("-rank")
|
||||||
|
|
||||||
|
``Lexeme``
|
||||||
|
==========
|
||||||
|
|
||||||
|
.. versionadded:: 6.0
|
||||||
|
|
||||||
|
.. class:: Lexeme(value, output_field=None, *, invert=False, prefix=False, weight=None)
|
||||||
|
|
||||||
|
``Lexeme`` objects allow search operators to be safely used with strings from
|
||||||
|
an untrusted source. The content of each lexeme is escaped so that any
|
||||||
|
operators that may exist in the string itself will not be interpreted.
|
||||||
|
|
||||||
|
You can combine lexemes with other lexemes using the ``&`` and ``|`` operators
|
||||||
|
and also negate them with the ``~`` operator. For example:
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
>>> from django.contrib.postgres.search import SearchQuery, SearchVector, Lexeme
|
||||||
|
>>> vector = SearchVector("body_text", "blog__tagline")
|
||||||
|
>>> Entry.objects.annotate(search=vector).filter(
|
||||||
|
... search=SearchQuery(Lexeme("fruit") & Lexeme("dessert"))
|
||||||
|
... )
|
||||||
|
<QuerySet [<Entry: Apple Crumble Recipes>, <Entry: Banana Split Recipes>]>
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
>>> Entry.objects.annotate(search=vector).filter(
|
||||||
|
... search=SearchQuery(Lexeme("fruit") & Lexeme("dessert") & ~Lexeme("banana"))
|
||||||
|
... )
|
||||||
|
<QuerySet [<Entry: Apple Crumble Recipes>]>
|
||||||
|
|
||||||
|
Lexeme objects also support term weighting and prefixes:
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
>>> Entry.objects.annotate(search=vector).filter(
|
||||||
|
... search=SearchQuery(Lexeme("Pizza") | Lexeme("Cheese"))
|
||||||
|
... )
|
||||||
|
<QuerySet [<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]>
|
||||||
|
>>> Entry.objects.annotate(search=vector).filter(
|
||||||
|
... search=SearchQuery(Lexeme("Pizza") | Lexeme("Cheese", weight="A"))
|
||||||
|
... )
|
||||||
|
<QuerySet [<Entry: Pizza recipes>]>
|
||||||
|
>>> Entry.objects.annotate(search=vector).filter(
|
||||||
|
... search=SearchQuery(Lexeme("za", prefix=True))
|
||||||
|
... )
|
||||||
|
<QuerySet []>
|
||||||
|
|
||||||
Performance
|
Performance
|
||||||
===========
|
===========
|
||||||
|
|
||||||
|
@ -171,6 +171,12 @@ Minor features
|
|||||||
:mod:`django.contrib.postgres`
|
:mod:`django.contrib.postgres`
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* The new :class:`Lexeme <django.contrib.postgres.search.Lexeme>` expression
|
||||||
|
for full text search provides fine-grained control over search terms.
|
||||||
|
``Lexeme`` objects automatically escape their input and support logical
|
||||||
|
combination operators (``&``, ``|``, ``~``), prefix matching, and term
|
||||||
|
weighting.
|
||||||
|
|
||||||
* Model fields, indexes, and constraints from :mod:`django.contrib.postgres`
|
* Model fields, indexes, and constraints from :mod:`django.contrib.postgres`
|
||||||
now include system checks to verify that ``django.contrib.postgres`` is an
|
now include system checks to verify that ``django.contrib.postgres`` is an
|
||||||
installed app.
|
installed app.
|
||||||
|
@ -6,6 +6,7 @@ All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the
|
|||||||
transcript.
|
transcript.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from django.db import connection
|
||||||
from django.db.models import F, Value
|
from django.db.models import F, Value
|
||||||
|
|
||||||
from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
|
from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
|
||||||
@ -13,11 +14,13 @@ from .models import Character, Line, LineSavedSearch, Scene
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from django.contrib.postgres.search import (
|
from django.contrib.postgres.search import (
|
||||||
|
Lexeme,
|
||||||
SearchConfig,
|
SearchConfig,
|
||||||
SearchHeadline,
|
SearchHeadline,
|
||||||
SearchQuery,
|
SearchQuery,
|
||||||
SearchRank,
|
SearchRank,
|
||||||
SearchVector,
|
SearchVector,
|
||||||
|
quote_lexeme,
|
||||||
)
|
)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
@ -769,3 +772,223 @@ class SearchHeadlineTests(GrailTestData, PostgreSQLTestCase):
|
|||||||
"<b>Brave</b>, <b>brave</b>, <b>brave</b>...<br>"
|
"<b>Brave</b>, <b>brave</b>, <b>brave</b>...<br>"
|
||||||
"<b>brave</b> <b>Sir</b> <b>Robin</b>",
|
"<b>brave</b> <b>Sir</b> <b>Robin</b>",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestLexemes(GrailTestData, PostgreSQLTestCase):
|
||||||
|
def test_and(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue"),
|
||||||
|
).filter(search=SearchQuery(Lexeme("bedemir") & Lexeme("scales")))
|
||||||
|
self.assertSequenceEqual(searched, [self.bedemir0])
|
||||||
|
|
||||||
|
def test_multiple_and(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue"),
|
||||||
|
).filter(
|
||||||
|
search=SearchQuery(
|
||||||
|
Lexeme("bedemir") & Lexeme("scales") & Lexeme("nostrils")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertSequenceEqual(searched, [])
|
||||||
|
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue"),
|
||||||
|
).filter(search=SearchQuery(Lexeme("shall") & Lexeme("use") & Lexeme("larger")))
|
||||||
|
self.assertSequenceEqual(searched, [self.bedemir0])
|
||||||
|
|
||||||
|
def test_or(self):
|
||||||
|
searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
|
||||||
|
search=SearchQuery(Lexeme("kneecaps") | Lexeme("nostrils"))
|
||||||
|
)
|
||||||
|
self.assertCountEqual(searched, [self.verse1, self.verse2])
|
||||||
|
|
||||||
|
def test_multiple_or(self):
|
||||||
|
searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
|
||||||
|
search=SearchQuery(
|
||||||
|
Lexeme("kneecaps") | Lexeme("nostrils") | Lexeme("Sir Robin")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertCountEqual(searched, [self.verse1, self.verse2, self.verse0])
|
||||||
|
|
||||||
|
def test_advanced(self):
|
||||||
|
"""
|
||||||
|
Combination of & and |
|
||||||
|
This is mainly helpful for checking the test_advanced_invert below
|
||||||
|
"""
|
||||||
|
searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
|
||||||
|
search=SearchQuery(
|
||||||
|
Lexeme("shall") & Lexeme("use") & Lexeme("larger") | Lexeme("nostrils")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.assertCountEqual(searched, [self.bedemir0, self.verse2])
|
||||||
|
|
||||||
|
def test_invert(self):
|
||||||
|
searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
|
||||||
|
character=self.minstrel, search=SearchQuery(~Lexeme("kneecaps"))
|
||||||
|
)
|
||||||
|
self.assertCountEqual(searched, [self.verse0, self.verse2])
|
||||||
|
|
||||||
|
def test_advanced_invert(self):
|
||||||
|
"""
|
||||||
|
Inverting a query that uses a combination of & and |
|
||||||
|
should return the opposite of test_advanced.
|
||||||
|
"""
|
||||||
|
searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
|
||||||
|
search=SearchQuery(
|
||||||
|
~(
|
||||||
|
Lexeme("shall") & Lexeme("use") & Lexeme("larger")
|
||||||
|
| Lexeme("nostrils")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
expected_result = Line.objects.exclude(
|
||||||
|
id__in=[self.bedemir0.id, self.verse2.id]
|
||||||
|
)
|
||||||
|
self.assertCountEqual(searched, expected_result)
|
||||||
|
|
||||||
|
def test_as_sql(self):
|
||||||
|
query = Line.objects.all().query
|
||||||
|
compiler = query.get_compiler(connection.alias)
|
||||||
|
|
||||||
|
tests = (
|
||||||
|
(Lexeme("a"), ("'a'",)),
|
||||||
|
(Lexeme("a", invert=True), ("!'a'",)),
|
||||||
|
(~Lexeme("a"), ("!'a'",)),
|
||||||
|
(Lexeme("a", prefix=True), ("'a':*",)),
|
||||||
|
(Lexeme("a", weight="D"), ("'a':D",)),
|
||||||
|
(Lexeme("a", invert=True, prefix=True, weight="D"), ("!'a':*D",)),
|
||||||
|
(Lexeme("a") | Lexeme("b") & ~Lexeme("c"), ("('a' | ('b' & !'c'))",)),
|
||||||
|
(
|
||||||
|
~(Lexeme("a") | Lexeme("b") & ~Lexeme("c")),
|
||||||
|
("(!'a' & (!'b' | 'c'))",),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
for expression, expected_params in tests:
|
||||||
|
with self.subTest(expression=expression, expected_params=expected_params):
|
||||||
|
_, params = expression.as_sql(compiler, connection)
|
||||||
|
self.assertEqual(params, expected_params)
|
||||||
|
|
||||||
|
def test_quote_lexeme(self):
|
||||||
|
tests = (
|
||||||
|
("L'amour piqué par une abeille", "'L amour piqué par une abeille'"),
|
||||||
|
("'starting quote", "'starting quote'"),
|
||||||
|
("ending quote'", "'ending quote'"),
|
||||||
|
("double quo''te", "'double quo te'"),
|
||||||
|
("triple quo'''te", "'triple quo te'"),
|
||||||
|
("backslash\\", "'backslash'"),
|
||||||
|
("exclamation!", "'exclamation'"),
|
||||||
|
("ampers&nd", "'ampers nd'"),
|
||||||
|
)
|
||||||
|
for lexeme, quoted in tests:
|
||||||
|
with self.subTest(lexeme=lexeme):
|
||||||
|
self.assertEqual(quote_lexeme(lexeme), quoted)
|
||||||
|
|
||||||
|
def test_prefix_searching(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue"),
|
||||||
|
).filter(search=SearchQuery(Lexeme("hear", prefix=True)))
|
||||||
|
|
||||||
|
self.assertSequenceEqual(searched, [self.verse2])
|
||||||
|
|
||||||
|
def test_inverse_prefix_searching(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue"),
|
||||||
|
).filter(search=SearchQuery(Lexeme("Robi", prefix=True, invert=True)))
|
||||||
|
self.assertEqual(
|
||||||
|
set(searched),
|
||||||
|
{
|
||||||
|
self.verse2,
|
||||||
|
self.bedemir0,
|
||||||
|
self.bedemir1,
|
||||||
|
self.french,
|
||||||
|
self.crowd,
|
||||||
|
self.witch,
|
||||||
|
self.duck,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_lexemes_multiple_and(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue"),
|
||||||
|
).filter(
|
||||||
|
search=SearchQuery(
|
||||||
|
Lexeme("Robi", prefix=True) & Lexeme("Camel", prefix=True)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertSequenceEqual(searched, [self.verse0])
|
||||||
|
|
||||||
|
def test_lexemes_multiple_or(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue"),
|
||||||
|
).filter(
|
||||||
|
search=SearchQuery(
|
||||||
|
Lexeme("kneecap", prefix=True) | Lexeme("afrai", prefix=True)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertSequenceEqual(searched, [self.verse0, self.verse1])
|
||||||
|
|
||||||
|
def test_config_query_explicit(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue", config="french"),
|
||||||
|
).filter(search=SearchQuery(Lexeme("cadeaux"), config="french"))
|
||||||
|
|
||||||
|
self.assertSequenceEqual(searched, [self.french])
|
||||||
|
|
||||||
|
def test_config_query_implicit(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue", config="french"),
|
||||||
|
).filter(search=Lexeme("cadeaux"))
|
||||||
|
|
||||||
|
self.assertSequenceEqual(searched, [self.french])
|
||||||
|
|
||||||
|
def test_config_from_field_explicit(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector(
|
||||||
|
"scene__setting", "dialogue", config=F("dialogue_config")
|
||||||
|
),
|
||||||
|
).filter(search=SearchQuery(Lexeme("cadeaux"), config=F("dialogue_config")))
|
||||||
|
self.assertSequenceEqual(searched, [self.french])
|
||||||
|
|
||||||
|
def test_config_from_field_implicit(self):
|
||||||
|
searched = Line.objects.annotate(
|
||||||
|
search=SearchVector(
|
||||||
|
"scene__setting", "dialogue", config=F("dialogue_config")
|
||||||
|
),
|
||||||
|
).filter(search=Lexeme("cadeaux"))
|
||||||
|
self.assertSequenceEqual(searched, [self.french])
|
||||||
|
|
||||||
|
def test_invalid_combinations(self):
|
||||||
|
msg = "A Lexeme can only be combined with another Lexeme, got NoneType."
|
||||||
|
with self.assertRaisesMessage(TypeError, msg):
|
||||||
|
Line.objects.filter(dialogue__search=None | Lexeme("kneecaps"))
|
||||||
|
|
||||||
|
with self.assertRaisesMessage(TypeError, msg):
|
||||||
|
Line.objects.filter(dialogue__search=None & Lexeme("kneecaps"))
|
||||||
|
|
||||||
|
def test_invalid_weights(self):
|
||||||
|
invalid_weights = ["E", "Drandom", "AB", "C ", 0, "", " ", [1, 2, 3]]
|
||||||
|
for weight in invalid_weights:
|
||||||
|
with self.subTest(weight=weight):
|
||||||
|
with self.assertRaisesMessage(
|
||||||
|
ValueError,
|
||||||
|
f"Weight must be one of 'A', 'B', 'C', and 'D', got {weight!r}.",
|
||||||
|
):
|
||||||
|
Line.objects.filter(
|
||||||
|
dialogue__search=Lexeme("kneecaps", weight=weight)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_empty(self):
|
||||||
|
with self.assertRaisesMessage(ValueError, "Lexeme value cannot be empty."):
|
||||||
|
Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue")
|
||||||
|
).filter(search=SearchQuery(Lexeme("")))
|
||||||
|
|
||||||
|
def test_non_string_values(self):
|
||||||
|
msg = "Lexeme value must be a string, got NoneType."
|
||||||
|
with self.assertRaisesMessage(TypeError, msg):
|
||||||
|
Line.objects.annotate(
|
||||||
|
search=SearchVector("scene__setting", "dialogue")
|
||||||
|
).filter(search=SearchQuery(Lexeme(None)))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user