summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Marchant <nikita.marchant@gmail.com>2021-09-15 12:57:49 +0200
committerMariusz Felisiak <felisiak.mariusz@gmail.com>2021-09-17 13:05:15 +0200
commit4e4082f9396e21de0bd88dbfc651da9ad01c7c0c (patch)
tree34d93d1ef520a097f95cf46cd854ea47700214ef
parent4ca508a68916dd43da45fd6e8b9004824a62d9c8 (diff)
downloaddjango-4e4082f9396e21de0bd88dbfc651da9ad01c7c0c.tar.gz
Fixed #32492 -- Added TrigramWordSimilarity() and TrigramWordDistance() on PostgreSQL.
-rw-r--r--AUTHORS1
-rw-r--r--django/contrib/postgres/apps.py6
-rw-r--r--django/contrib/postgres/lookups.py5
-rw-r--r--django/contrib/postgres/search.py18
-rw-r--r--docs/ref/contrib/postgres/lookups.txt30
-rw-r--r--docs/ref/contrib/postgres/search.txt47
-rw-r--r--docs/releases/4.0.txt7
-rw-r--r--tests/postgres_tests/migrations/0002_create_test_models.py2
-rw-r--r--tests/postgres_tests/models.py2
-rw-r--r--tests/postgres_tests/test_trigram.py39
10 files changed, 148 insertions, 9 deletions
diff --git a/AUTHORS b/AUTHORS
index 265163357e..214aca2480 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -710,6 +710,7 @@ answer newbie questions, and generally made Django that much better:
Nicola Larosa <nico@teknico.net>
Nicolas Lara <nicolaslara@gmail.com>
Nicolas Noé <nicolas@niconoe.eu>
+ Nikita Marchant <nikita.marchant@gmail.com>
Niran Babalola <niran@niran.org>
Nis Jørgensen <nis@superlativ.dk>
Nowell Strite <https://nowell.strite.org/>
diff --git a/django/contrib/postgres/apps.py b/django/contrib/postgres/apps.py
index 781c8728f2..91fc29ac05 100644
--- a/django/contrib/postgres/apps.py
+++ b/django/contrib/postgres/apps.py
@@ -13,7 +13,7 @@ from django.test.signals import setting_changed
from django.utils.translation import gettext_lazy as _
from .indexes import OpClass
-from .lookups import SearchLookup, TrigramSimilar, Unaccent
+from .lookups import SearchLookup, TrigramSimilar, TrigramWordSimilar, Unaccent
from .serializers import RangeSerializer
from .signals import register_type_handlers
@@ -33,6 +33,8 @@ def uninstall_if_needed(setting, value, enter, **kwargs):
TextField._unregister_lookup(SearchLookup)
CharField._unregister_lookup(TrigramSimilar)
TextField._unregister_lookup(TrigramSimilar)
+ CharField._unregister_lookup(TrigramWordSimilar)
+ TextField._unregister_lookup(TrigramWordSimilar)
# Disconnect this receiver until the next time this app is installed
# and ready() connects it again to prevent unnecessary processing on
# each setting change.
@@ -65,5 +67,7 @@ class PostgresConfig(AppConfig):
TextField.register_lookup(SearchLookup)
CharField.register_lookup(TrigramSimilar)
TextField.register_lookup(TrigramSimilar)
+ CharField.register_lookup(TrigramWordSimilar)
+ TextField.register_lookup(TrigramWordSimilar)
MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
IndexExpression.register_wrappers(OrderBy, OpClass, Collate)
diff --git a/django/contrib/postgres/lookups.py b/django/contrib/postgres/lookups.py
index 28d8590e1d..f7c6fc4b0c 100644
--- a/django/contrib/postgres/lookups.py
+++ b/django/contrib/postgres/lookups.py
@@ -58,3 +58,8 @@ class SearchLookup(SearchVectorExact):
class TrigramSimilar(PostgresOperatorLookup):
lookup_name = 'trigram_similar'
postgres_operator = '%%'
+
+
+class TrigramWordSimilar(PostgresOperatorLookup):
+ lookup_name = 'trigram_word_similar'
+ postgres_operator = '%%>'
diff --git a/django/contrib/postgres/search.py b/django/contrib/postgres/search.py
index f1640d85ba..164d359b91 100644
--- a/django/contrib/postgres/search.py
+++ b/django/contrib/postgres/search.py
@@ -293,6 +293,15 @@ class TrigramBase(Func):
super().__init__(expression, string, **extra)
+class TrigramWordBase(Func):
+ output_field = FloatField()
+
+ def __init__(self, string, expression, **extra):
+ if not hasattr(string, 'resolve_expression'):
+ string = Value(string)
+ super().__init__(string, expression, **extra)
+
+
class TrigramSimilarity(TrigramBase):
function = 'SIMILARITY'
@@ -300,3 +309,12 @@ class TrigramSimilarity(TrigramBase):
class TrigramDistance(TrigramBase):
function = ''
arg_joiner = ' <-> '
+
+
+class TrigramWordDistance(TrigramWordBase):
+ function = ''
+ arg_joiner = ' <<-> '
+
+
+class TrigramWordSimilarity(TrigramWordBase):
+ function = 'WORD_SIMILARITY'
diff --git a/docs/ref/contrib/postgres/lookups.txt b/docs/ref/contrib/postgres/lookups.txt
index ab7a954bf2..d9f76318cc 100644
--- a/docs/ref/contrib/postgres/lookups.txt
+++ b/docs/ref/contrib/postgres/lookups.txt
@@ -14,9 +14,8 @@ returns results that have a similarity measurement greater than the current
similarity threshold.
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
-and activate the `pg_trgm extension
-<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
-install the extension using the
+and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
+extension using the
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
operation.
@@ -26,6 +25,31 @@ The ``trigram_similar`` lookup can be used on
>>> City.objects.filter(name__trigram_similar="Middlesborough")
['<City: Middlesbrough>']
+.. fieldlookup:: trigram_word_similar
+
+.. versionadded:: 4.0
+
+The ``trigram_word_similar`` lookup allows you to perform trigram word
+similarity lookups using a dedicated PostgreSQL extension. It can be
+approximately understood as measuring the greatest number of trigrams shared
+between the parameter and any substring of the field. A trigram word lookup is
+given an expression and returns results that have a word similarity measurement
+greater than the current similarity threshold.
+
+To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
+and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
+extension using the
+:class:`~django.contrib.postgres.operations.TrigramExtension` migration
+operation.
+
+The ``trigram_word_similar`` lookup can be used on
+:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
+
+ >>> Sentence.objects.filter(name__trigram_word_similar='Middlesborough')
+ ['<Sentence: Gumby rides on the path of Middlesbrough>']
+
+.. _`pg_trgm extension`: https://www.postgresql.org/docs/current/pgtrgm.html
+
``Unaccent``
============
diff --git a/docs/ref/contrib/postgres/search.txt b/docs/ref/contrib/postgres/search.txt
index fe4e86f05e..cfed877d9c 100644
--- a/docs/ref/contrib/postgres/search.txt
+++ b/docs/ref/contrib/postgres/search.txt
@@ -280,8 +280,9 @@ Trigram similarity
==================
Another approach to searching is trigram similarity. A trigram is a group of
-three consecutive characters. In addition to the :lookup:`trigram_similar`
-lookup, you can use a couple of other expressions.
+three consecutive characters. In addition to the :lookup:`trigram_similar` and
+:lookup:`trigram_word_similar` lookups, you can use a couple of other
+expressions.
To use them, you need to activate the `pg_trgm extension
<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
@@ -308,6 +309,27 @@ Usage example::
... ).filter(similarity__gt=0.3).order_by('-similarity')
[<Author: Katy Stevens>, <Author: Stephen Keats>]
+``TrigramWordSimilarity``
+-------------------------
+
+.. versionadded:: 4.0
+
+.. class:: TrigramWordSimilarity(string, expression, **extra)
+
+Accepts a string or expression, and a field name or expression. Returns the
+trigram word similarity between the two arguments.
+
+Usage example::
+
+ >>> from django.contrib.postgres.search import TrigramWordSimilarity
+ >>> Author.objects.create(name='Katy Stevens')
+ >>> Author.objects.create(name='Stephen Keats')
+ >>> test = 'Kat'
+ >>> Author.objects.annotate(
+ ... similarity=TrigramWordSimilarity(test, 'name'),
+ ... ).filter(similarity__gt=0.3).order_by('-similarity')
+ [<Author: Katy Stevens>]
+
``TrigramDistance``
-------------------
@@ -326,3 +348,24 @@ Usage example::
... distance=TrigramDistance('name', test),
... ).filter(distance__lte=0.7).order_by('distance')
[<Author: Katy Stevens>, <Author: Stephen Keats>]
+
+``TrigramWordDistance``
+-----------------------
+
+.. versionadded:: 4.0
+
+.. class:: TrigramWordDistance(string, expression, **extra)
+
+Accepts a string or expression, and a field name or expression. Returns the
+trigram word distance between the two arguments.
+
+Usage example::
+
+ >>> from django.contrib.postgres.search import TrigramWordDistance
+ >>> Author.objects.create(name='Katy Stevens')
+ >>> Author.objects.create(name='Stephen Keats')
+ >>> test = 'Kat'
+ >>> Author.objects.annotate(
+ ... distance=TrigramWordDistance(test, 'name'),
+ ... ).filter(distance__lte=0.7).order_by('distance')
+ [<Author: Katy Stevens>]
diff --git a/docs/releases/4.0.txt b/docs/releases/4.0.txt
index 6ec04824f2..cf8fb3dff4 100644
--- a/docs/releases/4.0.txt
+++ b/docs/releases/4.0.txt
@@ -200,6 +200,13 @@ Minor features
expression allows using subqueries to construct lists of values on
PostgreSQL.
+* The new :lookup:`trigram_word_similar` lookup, and the
+ :class:`TrigramWordDistance()
+ <django.contrib.postgres.search.TrigramWordDistance>` and
+ :class:`TrigramWordSimilarity()
+ <django.contrib.postgres.search.TrigramWordSimilarity>` expressions allow
+ using trigram word similarity.
+
:mod:`django.contrib.redirects`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/tests/postgres_tests/migrations/0002_create_test_models.py b/tests/postgres_tests/migrations/0002_create_test_models.py
index c3ab5efed7..1ce875441b 100644
--- a/tests/postgres_tests/migrations/0002_create_test_models.py
+++ b/tests/postgres_tests/migrations/0002_create_test_models.py
@@ -110,7 +110,7 @@ class Migration(migrations.Migration):
name='CharFieldModel',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
- ('field', models.CharField(max_length=16)),
+ ('field', models.CharField(max_length=64)),
],
options=None,
bases=None,
diff --git a/tests/postgres_tests/models.py b/tests/postgres_tests/models.py
index c7f55a2661..ddae9d1edb 100644
--- a/tests/postgres_tests/models.py
+++ b/tests/postgres_tests/models.py
@@ -83,7 +83,7 @@ class ArrayEnumModel(PostgreSQLModel):
class CharFieldModel(models.Model):
- field = models.CharField(max_length=16)
+ field = models.CharField(max_length=64)
class TextFieldModel(models.Model):
diff --git a/tests/postgres_tests/test_trigram.py b/tests/postgres_tests/test_trigram.py
index a5d7d868be..079a32a19b 100644
--- a/tests/postgres_tests/test_trigram.py
+++ b/tests/postgres_tests/test_trigram.py
@@ -5,7 +5,8 @@ from .models import CharFieldModel, TextFieldModel
try:
from django.contrib.postgres.search import (
- TrigramDistance, TrigramSimilarity,
+ TrigramDistance, TrigramSimilarity, TrigramWordDistance,
+ TrigramWordSimilarity,
)
except ImportError:
pass
@@ -30,6 +31,15 @@ class TrigramTest(PostgreSQLTestCase):
transform=lambda instance: instance.field,
)
+ def test_trigram_word_search(self):
+ obj = self.Model.objects.create(
+ field='Gumby rides on the path of Middlesbrough',
+ )
+ self.assertSequenceEqual(
+ self.Model.objects.filter(field__trigram_word_similar='Middlesborough'),
+ [obj],
+ )
+
def test_trigram_similarity(self):
search = 'Bat sat on cat.'
# Round result of similarity because PostgreSQL 12+ uses greater
@@ -43,6 +53,20 @@ class TrigramTest(PostgreSQLTestCase):
ordered=True,
)
+ def test_trigram_word_similarity(self):
+ search = 'mat'
+ self.assertSequenceEqual(
+ self.Model.objects.filter(
+ field__trigram_word_similar=search,
+ ).annotate(
+ word_similarity=TrigramWordSimilarity(search, 'field'),
+ ).values('field', 'word_similarity').order_by('-word_similarity'),
+ [
+ {'field': 'Cat sat on mat.', 'word_similarity': 1.0},
+ {'field': 'Matthew', 'word_similarity': 0.75},
+ ],
+ )
+
def test_trigram_similarity_alternate(self):
# Round result of distance because PostgreSQL 12+ uses greater
# precision.
@@ -55,6 +79,19 @@ class TrigramTest(PostgreSQLTestCase):
ordered=True,
)
+ def test_trigram_word_similarity_alternate(self):
+ self.assertSequenceEqual(
+ self.Model.objects.annotate(
+ word_distance=TrigramWordDistance('mat', 'field'),
+ ).filter(
+ word_distance__lte=0.7,
+ ).values('field', 'word_distance').order_by('word_distance'),
+ [
+ {'field': 'Cat sat on mat.', 'word_distance': 0},
+ {'field': 'Matthew', 'word_distance': 0.25},
+ ],
+ )
+
class TrigramTextFieldTest(TrigramTest):
"""