summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Somerville <matthew-github@dracos.co.uk>2015-06-05 17:37:48 +0100
committerTim Graham <timograham@gmail.com>2016-05-13 12:38:21 -0400
commit1962a96a30e02de78a674a2e02979c00cc55655b (patch)
tree2abaf90658eaf619563b96aa7c8c810e5b83f365
parentd7334b405fb0e677e79cb064074df68188f0ddb5 (diff)
downloaddjango-1962a96a30e02de78a674a2e02979c00cc55655b.tar.gz
Fixed #24938 -- Added PostgreSQL trigram support.
-rw-r--r--django/contrib/postgres/apps.py4
-rw-r--r--django/contrib/postgres/lookups.py5
-rw-r--r--django/contrib/postgres/operations.py6
-rw-r--r--django/contrib/postgres/search.py16
-rw-r--r--docs/ref/contrib/postgres/lookups.txt26
-rw-r--r--docs/ref/contrib/postgres/operations.txt10
-rw-r--r--docs/ref/contrib/postgres/search.txt55
-rw-r--r--docs/releases/1.10.txt4
-rw-r--r--docs/topics/db/search.txt5
-rw-r--r--tests/postgres_tests/migrations/0001_setup_extensions.py4
-rw-r--r--tests/postgres_tests/test_trigram.py53
11 files changed, 184 insertions, 4 deletions
diff --git a/django/contrib/postgres/apps.py b/django/contrib/postgres/apps.py
index e7cdfd3866..e7f76b2017 100644
--- a/django/contrib/postgres/apps.py
+++ b/django/contrib/postgres/apps.py
@@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created
from django.db.models import CharField, TextField
from django.utils.translation import ugettext_lazy as _
-from .lookups import SearchLookup, Unaccent
+from .lookups import SearchLookup, TrigramSimilar, Unaccent
from .signals import register_hstore_handler
@@ -17,3 +17,5 @@ class PostgresConfig(AppConfig):
TextField.register_lookup(Unaccent)
CharField.register_lookup(SearchLookup)
TextField.register_lookup(SearchLookup)
+ CharField.register_lookup(TrigramSimilar)
+ TextField.register_lookup(TrigramSimilar)
diff --git a/django/contrib/postgres/lookups.py b/django/contrib/postgres/lookups.py
index a39c9679e7..53a62eacd1 100644
--- a/django/contrib/postgres/lookups.py
+++ b/django/contrib/postgres/lookups.py
@@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact):
self.lhs = SearchVector(self.lhs)
lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection)
return lhs, lhs_params
+
+
+class TrigramSimilar(PostgresSimpleLookup):
+ lookup_name = 'trigram_similar'
+ operator = '%%'
diff --git a/django/contrib/postgres/operations.py b/django/contrib/postgres/operations.py
index 2a0a8402ed..5992f00725 100644
--- a/django/contrib/postgres/operations.py
+++ b/django/contrib/postgres/operations.py
@@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension):
def __init__(self):
self.name = 'unaccent'
+
+
+class TrigramExtension(CreateExtension):
+
+ def __init__(self):
+ self.name = 'pg_trgm'
diff --git a/django/contrib/postgres/search.py b/django/contrib/postgres/search.py
index 91358c62aa..4628f4cf19 100644
--- a/django/contrib/postgres/search.py
+++ b/django/contrib/postgres/search.py
@@ -185,3 +185,19 @@ class SearchRank(Func):
SearchVectorField.register_lookup(SearchVectorExact)
+
+
+class TrigramBase(Func):
+ def __init__(self, expression, string, **extra):
+ if not hasattr(string, 'resolve_expression'):
+ string = Value(string)
+ super(TrigramBase, self).__init__(expression, string, output_field=FloatField(), **extra)
+
+
+class TrigramSimilarity(TrigramBase):
+ function = 'SIMILARITY'
+
+
+class TrigramDistance(TrigramBase):
+ function = ''
+ arg_joiner = ' <-> '
diff --git a/docs/ref/contrib/postgres/lookups.txt b/docs/ref/contrib/postgres/lookups.txt
index 1f0af07b0d..daf784e221 100644
--- a/docs/ref/contrib/postgres/lookups.txt
+++ b/docs/ref/contrib/postgres/lookups.txt
@@ -2,6 +2,32 @@
PostgreSQL specific lookups
===========================
+Trigram similarity
+==================
+
+.. fieldlookup:: trigram_similar
+
+.. versionadded:: 1.10
+
+The ``trigram_similar`` lookup allows you to perform trigram lookups,
+measuring the number of trigrams (three consecutive characters) shared, using a
+dedicated PostgreSQL extension. A trigram lookup is given an expression and
+returns results that have a similarity measurement greater than the current
+similarity threshold.
+
+To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
+and activate the `pg_trgm extension
+<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
+PostgreSQL. You can install the extension using the
+:class:`~django.contrib.postgres.operations.TrigramExtension` migration
+operation.
+
+The ``trigram_similar`` lookup can be used on
+:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
+
+ >>> City.objects.filter(name__trigram_similar="Middlesborough")
+ ['<City: Middlesbrough>']
+
``Unaccent``
============
diff --git a/docs/ref/contrib/postgres/operations.txt b/docs/ref/contrib/postgres/operations.txt
index b889105283..f4dd44ebbd 100644
--- a/docs/ref/contrib/postgres/operations.txt
+++ b/docs/ref/contrib/postgres/operations.txt
@@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module.
which will install the ``hstore`` extension and also immediately set up the
connection to interpret hstore data.
+``TrigramExtension``
+====================
+
+.. class:: TrigramExtension()
+
+ .. versionadded:: 1.10
+
+ A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
+ that installs the ``pg_trgm`` extension.
+
``UnaccentExtension``
=====================
diff --git a/docs/ref/contrib/postgres/search.txt b/docs/ref/contrib/postgres/search.txt
index 592e730e8e..24000d341b 100644
--- a/docs/ref/contrib/postgres/search.txt
+++ b/docs/ref/contrib/postgres/search.txt
@@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``::
[<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]
.. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
+
+Trigram similarity
+==================
+
+Another approach to searching is trigram similarity. A trigram is a group of
+three consecutive characters. In addition to the :lookup:`trigram_similar`
+lookup, you can use a couple of other expressions.
+
+To use them, you need to activate the `pg_trgm extension
+<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
+PostgreSQL. You can install it using the
+:class:`~django.contrib.postgres.operations.TrigramExtension` migration
+operation.
+
+``TrigramSimilarity``
+---------------------
+
+.. class:: TrigramSimilarity(expression, string, **extra)
+
+.. versionadded:: 1.10
+
+Accepts a field name or expression, and a string or expression. Returns the
+trigram similarity between the two arguments.
+
+Usage example::
+
+ >>> from django.contrib.postgres.search import TrigramSimilarity
+ >>> Author.objects.create(name='Katy Stevens')
+ >>> Author.objects.create(name='Stephen Keats')
+ >>> test = 'Katie Stephens'
+ >>> Author.objects.annotate(
+ ... similarity=TrigramSimilarity('name', test),
+ ... ).filter(similarity__gt=0.3).order_by('-similarity')
+ [<Author: Katy Stephens>, <Author: Stephen Keats>]
+
+``TrigramDistance``
+-------------------
+
+.. class:: TrigramDistance(expression, string, **extra)
+
+.. versionadded:: 1.10
+
+Accepts a field name or expression, and a string or expression. Returns the
+trigram distance between the two arguments.
+
+Usage example::
+
+ >>> from django.contrib.postgres.search import TrigramDistance
+ >>> Author.objects.create(name='Katy Stevens')
+ >>> Author.objects.create(name='Stephen Keats')
+ >>> test = 'Katie Stephens'
+ >>> Author.objects.annotate(
+ ... distance=TrigramDistance('name', test),
+ ... ).filter(distance__lte=0.7).order_by('distance')
+ [<Author: Katy Stephens>, <Author: Stephen Keats>]
diff --git a/docs/releases/1.10.txt b/docs/releases/1.10.txt
index 29af587eec..0463a284b3 100644
--- a/docs/releases/1.10.txt
+++ b/docs/releases/1.10.txt
@@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational
database, combine the searches with other lookups, use different language
configurations and weightings, and rank the results by relevance.
+It also now includes trigram support, using the :lookup:`trigram_similar`
+lookup, and the :class:`~django.contrib.postgres.search.TrigramSimilarity` and
+:class:`~django.contrib.postgres.search.TrigramDistance` expressions.
+
Minor features
--------------
diff --git a/docs/topics/db/search.txt b/docs/topics/db/search.txt
index fd62c6909c..04d84552df 100644
--- a/docs/topics/db/search.txt
+++ b/docs/topics/db/search.txt
@@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`::
This shows another issue, where we are matching against a different spelling of
the name. In this case we have an asymmetry though - a search for ``Helen``
will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option
-would be to use a trigram comparison, which compares sequences of letters.
+would be to use a :lookup:`trigram_similar` comparison, which compares
+sequences of letters.
For example::
- >>> Author.objects.filter(name__unaccent__lower__trigram='Hélène')
+ >>> Author.objects.filter(name__unaccent__lower__trigram_similar='Hélène')
[<Author: Helen Mirren>, <Actor: Hélène Joy>]
Now we have a different problem - the longer name of "Helena Bonham Carter"
diff --git a/tests/postgres_tests/migrations/0001_setup_extensions.py b/tests/postgres_tests/migrations/0001_setup_extensions.py
index 07d5bfc7e7..400dd091f4 100644
--- a/tests/postgres_tests/migrations/0001_setup_extensions.py
+++ b/tests/postgres_tests/migrations/0001_setup_extensions.py
@@ -5,12 +5,13 @@ from django.db import migrations
try:
from django.contrib.postgres.operations import (
- CreateExtension, HStoreExtension, UnaccentExtension,
+ CreateExtension, HStoreExtension, TrigramExtension, UnaccentExtension,
)
except ImportError:
from django.test import mock
CreateExtension = mock.Mock()
HStoreExtension = mock.Mock()
+ TrigramExtension = mock.Mock()
UnaccentExtension = mock.Mock()
@@ -21,5 +22,6 @@ class Migration(migrations.Migration):
# dash in its name.
CreateExtension('uuid-ossp'),
HStoreExtension(),
+ TrigramExtension(),
UnaccentExtension(),
]
diff --git a/tests/postgres_tests/test_trigram.py b/tests/postgres_tests/test_trigram.py
new file mode 100644
index 0000000000..b340b41869
--- /dev/null
+++ b/tests/postgres_tests/test_trigram.py
@@ -0,0 +1,53 @@
+from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
+from django.test import modify_settings
+
+from . import PostgreSQLTestCase
+from .models import CharFieldModel, TextFieldModel
+
+
+@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
+class TrigramTest(PostgreSQLTestCase):
+ Model = CharFieldModel
+
+ @classmethod
+ def setUpTestData(cls):
+ cls.Model.objects.bulk_create([
+ cls.Model(field='Matthew'),
+ cls.Model(field='Cat sat on mat.'),
+ cls.Model(field='Dog sat on rug.'),
+ ])
+
+ def test_trigram_search(self):
+ self.assertQuerysetEqual(
+ self.Model.objects.filter(field__trigram_similar='Mathew'),
+ ['Matthew'],
+ transform=lambda instance: instance.field,
+ )
+
+ def test_trigram_similarity(self):
+ search = 'Bat sat on cat.'
+ self.assertQuerysetEqual(
+ self.Model.objects.filter(
+ field__trigram_similar=search,
+ ).annotate(similarity=TrigramSimilarity('field', search)).order_by('-similarity'),
+ [('Cat sat on mat.', 0.625), ('Dog sat on rug.', 0.333333)],
+ transform=lambda instance: (instance.field, instance.similarity),
+ ordered=True,
+ )
+
+ def test_trigram_similarity_alternate(self):
+ self.assertQuerysetEqual(
+ self.Model.objects.annotate(
+ distance=TrigramDistance('field', 'Bat sat on cat.'),
+ ).filter(distance__lte=0.7).order_by('distance'),
+ [('Cat sat on mat.', 0.375), ('Dog sat on rug.', 0.666667)],
+ transform=lambda instance: (instance.field, instance.distance),
+ ordered=True,
+ )
+
+
+class TrigramTextFieldTest(TrigramTest):
+ """
+ TextField has the same behavior as CharField regarding trigram lookups.
+ """
+ Model = TextFieldModel