From d549b8805053d4b064bf492ba90e90db5d7e2a6b Mon Sep 17 00:00:00 2001 From: Mads Jensen Date: Mon, 18 Sep 2017 15:42:29 +0200 Subject: Fixed #26608 -- Added support for window expressions (OVER clause). Thanks Josh Smeaton, Mariusz Felisiak, Sergey Fedoseev, Simon Charettes, Adam Chainz/Johnson and Tim Graham for comments and reviews and Jamie Cockburn for initial patch. --- tests/expressions_window/__init__.py | 0 tests/expressions_window/models.py | 11 + tests/expressions_window/tests.py | 783 +++++++++++++++++++++++++++++++++++ 3 files changed, 794 insertions(+) create mode 100644 tests/expressions_window/__init__.py create mode 100644 tests/expressions_window/models.py create mode 100644 tests/expressions_window/tests.py (limited to 'tests/expressions_window') diff --git a/tests/expressions_window/__init__.py b/tests/expressions_window/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/expressions_window/models.py b/tests/expressions_window/models.py new file mode 100644 index 0000000000..94cade7ed7 --- /dev/null +++ b/tests/expressions_window/models.py @@ -0,0 +1,11 @@ +from django.db import models + + +class Employee(models.Model): + name = models.CharField(max_length=40, blank=False, null=False) + salary = models.PositiveIntegerField() + department = models.CharField(max_length=40, blank=False, null=False) + hire_date = models.DateField(blank=False, null=False) + + def __str__(self): + return '{}, {}, {}, {}'.format(self.name, self.department, self.salary, self.hire_date) diff --git a/tests/expressions_window/tests.py b/tests/expressions_window/tests.py new file mode 100644 index 0000000000..e61516b4b6 --- /dev/null +++ b/tests/expressions_window/tests.py @@ -0,0 +1,783 @@ +import datetime +from unittest import skipIf, skipUnless + +from django.core.exceptions import FieldError +from django.db import NotSupportedError, connection +from django.db.models import ( + F, RowRange, Value, ValueRange, Window, WindowFrame, +) +from django.db.models.aggregates import Avg, Max, Min, Sum +from django.db.models.functions import ( + CumeDist, DenseRank, ExtractYear, FirstValue, Lag, LastValue, Lead, + NthValue, Ntile, PercentRank, Rank, RowNumber, Upper, +) +from django.test import SimpleTestCase, TestCase, skipUnlessDBFeature + +from .models import Employee + + +@skipUnlessDBFeature('supports_over_clause') +class WindowFunctionTests(TestCase): + @classmethod + def setUpTestData(cls): + Employee.objects.bulk_create([ + Employee(name=e[0], salary=e[1], department=e[2], hire_date=e[3]) + for e in [ + ('Jones', 45000, 'Accounting', datetime.datetime(2005, 11, 1)), + ('Williams', 37000, 'Accounting', datetime.datetime(2009, 6, 1)), + ('Jenson', 45000, 'Accounting', datetime.datetime(2008, 4, 1)), + ('Adams', 50000, 'Accounting', datetime.datetime(2013, 7, 1)), + ('Smith', 55000, 'Sales', datetime.datetime(2007, 6, 1)), + ('Brown', 53000, 'Sales', datetime.datetime(2009, 9, 1)), + ('Johnson', 40000, 'Marketing', datetime.datetime(2012, 3, 1)), + ('Smith', 38000, 'Marketing', datetime.datetime(2009, 10, 1)), + ('Wilkinson', 60000, 'IT', datetime.datetime(2011, 3, 1)), + ('Moore', 34000, 'IT', datetime.datetime(2013, 8, 1)), + ('Miller', 100000, 'Management', datetime.datetime(2005, 6, 1)), + ('Johnson', 80000, 'Management', datetime.datetime(2005, 7, 1)), + ] + ]) + + def test_dense_rank(self): + qs = Employee.objects.annotate(rank=Window( + expression=DenseRank(), + order_by=ExtractYear(F('hire_date')).asc(), + )) + self.assertQuerysetEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 1), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 1), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), 1), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 2), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 3), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 4), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), 4), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 4), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 5), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), 6), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), 7), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), 7), + ], lambda entry: (entry.name, entry.salary, entry.department, entry.hire_date, entry.rank), ordered=False) + + def test_department_salary(self): + qs = Employee.objects.annotate(department_sum=Window( + expression=Sum('salary'), + partition_by=F('department'), + order_by=[F('hire_date').asc()], + )).order_by('department', 'department_sum') + self.assertQuerysetEqual(qs, [ + ('Jones', 'Accounting', 45000, 45000), + ('Jenson', 'Accounting', 45000, 90000), + ('Williams', 'Accounting', 37000, 127000), + ('Adams', 'Accounting', 50000, 177000), + ('Wilkinson', 'IT', 60000, 60000), + ('Moore', 'IT', 34000, 94000), + ('Miller', 'Management', 100000, 100000), + ('Johnson', 'Management', 80000, 180000), + ('Smith', 'Marketing', 38000, 38000), + ('Johnson', 'Marketing', 40000, 78000), + ('Smith', 'Sales', 55000, 55000), + ('Brown', 'Sales', 53000, 108000), + ], lambda entry: (entry.name, entry.department, entry.salary, entry.department_sum)) + + def test_rank(self): + """ + Rank the employees based on the year they're were hired. Since there + are multiple employees hired in different years, this will contain + gaps. + """ + qs = Employee.objects.annotate(rank=Window( + expression=Rank(), + order_by=ExtractYear(F('hire_date')).asc(), + )) + self.assertQuerysetEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 1), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 1), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), 1), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 4), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 5), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 6), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), 6), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 6), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 9), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), 10), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), 11), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), 11), + ], lambda entry: (entry.name, entry.salary, entry.department, entry.hire_date, entry.rank), ordered=False) + + def test_row_number(self): + """ + The row number window function computes the number based on the order + in which the tuples were inserted. Depending on the backend, + + Oracle requires an ordering-clause in the Window expression. + """ + qs = Employee.objects.annotate(row_number=Window( + expression=RowNumber(), + order_by=F('pk').asc(), + )).order_by('pk') + self.assertQuerysetEqual(qs, [ + ('Jones', 'Accounting', 1), + ('Williams', 'Accounting', 2), + ('Jenson', 'Accounting', 3), + ('Adams', 'Accounting', 4), + ('Smith', 'Sales', 5), + ('Brown', 'Sales', 6), + ('Johnson', 'Marketing', 7), + ('Smith', 'Marketing', 8), + ('Wilkinson', 'IT', 9), + ('Moore', 'IT', 10), + ('Miller', 'Management', 11), + ('Johnson', 'Management', 12), + ], lambda entry: (entry.name, entry.department, entry.row_number)) + + @skipIf(connection.vendor == 'oracle', "Oracle requires ORDER BY in row_number, ANSI:SQL doesn't") + def test_row_number_no_ordering(self): + """ + The row number window function computes the number based on the order + in which the tuples were inserted. + """ + # Add a default ordering for consistent results across databases. + qs = Employee.objects.annotate(row_number=Window( + expression=RowNumber(), + )).order_by('pk') + self.assertQuerysetEqual(qs, [ + ('Jones', 'Accounting', 1), + ('Williams', 'Accounting', 2), + ('Jenson', 'Accounting', 3), + ('Adams', 'Accounting', 4), + ('Smith', 'Sales', 5), + ('Brown', 'Sales', 6), + ('Johnson', 'Marketing', 7), + ('Smith', 'Marketing', 8), + ('Wilkinson', 'IT', 9), + ('Moore', 'IT', 10), + ('Miller', 'Management', 11), + ('Johnson', 'Management', 12), + ], lambda entry: (entry.name, entry.department, entry.row_number)) + + def test_avg_salary_department(self): + qs = Employee.objects.annotate(avg_salary=Window( + expression=Avg('salary'), + order_by=F('department').asc(), + partition_by='department', + )).order_by('department', '-salary', 'name') + self.assertQuerysetEqual(qs, [ + ('Adams', 50000, 'Accounting', 44250.00), + ('Jenson', 45000, 'Accounting', 44250.00), + ('Jones', 45000, 'Accounting', 44250.00), + ('Williams', 37000, 'Accounting', 44250.00), + ('Wilkinson', 60000, 'IT', 47000.00), + ('Moore', 34000, 'IT', 47000.00), + ('Miller', 100000, 'Management', 90000.00), + ('Johnson', 80000, 'Management', 90000.00), + ('Johnson', 40000, 'Marketing', 39000.00), + ('Smith', 38000, 'Marketing', 39000.00), + ('Smith', 55000, 'Sales', 54000.00), + ('Brown', 53000, 'Sales', 54000.00), + ], transform=lambda row: (row.name, row.salary, row.department, row.avg_salary)) + + def test_lag(self): + """ + Compute the difference between an employee's salary and the next + highest salary in the employee's department. Return None if the + employee has the lowest salary. + """ + qs = Employee.objects.annotate(lag=Window( + expression=Lag(expression='salary', offset=1), + partition_by=F('department'), + order_by=[F('salary').asc(), F('name').asc()], + )).order_by('department') + self.assertQuerysetEqual(qs, [ + ('Williams', 37000, 'Accounting', None), + ('Jenson', 45000, 'Accounting', 37000), + ('Jones', 45000, 'Accounting', 45000), + ('Adams', 50000, 'Accounting', 45000), + ('Moore', 34000, 'IT', None), + ('Wilkinson', 60000, 'IT', 34000), + ('Johnson', 80000, 'Management', None), + ('Miller', 100000, 'Management', 80000), + ('Smith', 38000, 'Marketing', None), + ('Johnson', 40000, 'Marketing', 38000), + ('Brown', 53000, 'Sales', None), + ('Smith', 55000, 'Sales', 53000), + ], transform=lambda row: (row.name, row.salary, row.department, row.lag)) + + def test_first_value(self): + qs = Employee.objects.annotate(first_value=Window( + expression=FirstValue('salary'), + partition_by=F('department'), + order_by=F('hire_date').asc(), + )).order_by('department', 'hire_date') + self.assertQuerysetEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 45000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 45000), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 45000), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), 45000), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 60000), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), 60000), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 100000), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), 100000), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 38000), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), 38000), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 55000), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), 55000), + ], lambda row: (row.name, row.salary, row.department, row.hire_date, row.first_value)) + + def test_last_value(self): + qs = Employee.objects.annotate(last_value=Window( + expression=LastValue('hire_date'), + partition_by=F('department'), + order_by=F('hire_date').asc(), + )) + self.assertQuerysetEqual(qs, [ + ('Adams', 'Accounting', datetime.date(2013, 7, 1), 50000, datetime.date(2013, 7, 1)), + ('Jenson', 'Accounting', datetime.date(2008, 4, 1), 45000, datetime.date(2008, 4, 1)), + ('Jones', 'Accounting', datetime.date(2005, 11, 1), 45000, datetime.date(2005, 11, 1)), + ('Williams', 'Accounting', datetime.date(2009, 6, 1), 37000, datetime.date(2009, 6, 1)), + ('Moore', 'IT', datetime.date(2013, 8, 1), 34000, datetime.date(2013, 8, 1)), + ('Wilkinson', 'IT', datetime.date(2011, 3, 1), 60000, datetime.date(2011, 3, 1)), + ('Miller', 'Management', datetime.date(2005, 6, 1), 100000, datetime.date(2005, 6, 1)), + ('Johnson', 'Management', datetime.date(2005, 7, 1), 80000, datetime.date(2005, 7, 1)), + ('Johnson', 'Marketing', datetime.date(2012, 3, 1), 40000, datetime.date(2012, 3, 1)), + ('Smith', 'Marketing', datetime.date(2009, 10, 1), 38000, datetime.date(2009, 10, 1)), + ('Brown', 'Sales', datetime.date(2009, 9, 1), 53000, datetime.date(2009, 9, 1)), + ('Smith', 'Sales', datetime.date(2007, 6, 1), 55000, datetime.date(2007, 6, 1)), + ], transform=lambda row: (row.name, row.department, row.hire_date, row.salary, row.last_value), ordered=False) + + def test_function_list_of_values(self): + qs = Employee.objects.annotate(lead=Window( + expression=Lead(expression='salary'), + order_by=[F('hire_date').asc(), F('name').desc()], + partition_by='department', + )).values_list('name', 'salary', 'department', 'hire_date', 'lead') + self.assertNotIn('GROUP BY', str(qs.query)) + self.assertSequenceEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 45000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 37000), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 50000), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), None), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 34000), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), None), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 80000), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), None), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 40000), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), None), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 53000), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), None), + ]) + + def test_min_department(self): + """An alternative way to specify a query for FirstValue.""" + qs = Employee.objects.annotate(min_salary=Window( + expression=Min('salary'), + partition_by=F('department'), + order_by=[F('salary').asc(), F('name').asc()] + )).order_by('department', 'salary', 'name') + self.assertQuerysetEqual(qs, [ + ('Williams', 'Accounting', 37000, 37000), + ('Jenson', 'Accounting', 45000, 37000), + ('Jones', 'Accounting', 45000, 37000), + ('Adams', 'Accounting', 50000, 37000), + ('Moore', 'IT', 34000, 34000), + ('Wilkinson', 'IT', 60000, 34000), + ('Johnson', 'Management', 80000, 80000), + ('Miller', 'Management', 100000, 80000), + ('Smith', 'Marketing', 38000, 38000), + ('Johnson', 'Marketing', 40000, 38000), + ('Brown', 'Sales', 53000, 53000), + ('Smith', 'Sales', 55000, 53000), + ], lambda row: (row.name, row.department, row.salary, row.min_salary)) + + def test_max_per_year(self): + """ + Find the maximum salary awarded in the same year as the + employee was hired, regardless of the department. + """ + qs = Employee.objects.annotate(max_salary_year=Window( + expression=Max('salary'), + order_by=ExtractYear('hire_date').asc(), + partition_by=ExtractYear('hire_date') + )).order_by(ExtractYear('hire_date'), 'salary') + self.assertQuerysetEqual(qs, [ + ('Jones', 'Accounting', 45000, 2005, 100000), + ('Johnson', 'Management', 80000, 2005, 100000), + ('Miller', 'Management', 100000, 2005, 100000), + ('Smith', 'Sales', 55000, 2007, 55000), + ('Jenson', 'Accounting', 45000, 2008, 45000), + ('Williams', 'Accounting', 37000, 2009, 53000), + ('Smith', 'Marketing', 38000, 2009, 53000), + ('Brown', 'Sales', 53000, 2009, 53000), + ('Wilkinson', 'IT', 60000, 2011, 60000), + ('Johnson', 'Marketing', 40000, 2012, 40000), + ('Moore', 'IT', 34000, 2013, 50000), + ('Adams', 'Accounting', 50000, 2013, 50000), + ], lambda row: (row.name, row.department, row.salary, row.hire_date.year, row.max_salary_year)) + + def test_cume_dist(self): + """ + Compute the cumulative distribution for the employees based on the + salary in increasing order. Equal to rank/total number of rows (12). + """ + qs = Employee.objects.annotate(cume_dist=Window( + expression=CumeDist(), + order_by=F('salary').asc(), + )).order_by('salary', 'name') + # Round result of cume_dist because Oracle uses greater precision. + self.assertQuerysetEqual(qs, [ + ('Moore', 'IT', 34000, 0.0833333333), + ('Williams', 'Accounting', 37000, 0.1666666667), + ('Smith', 'Marketing', 38000, 0.25), + ('Johnson', 'Marketing', 40000, 0.3333333333), + ('Jenson', 'Accounting', 45000, 0.5), + ('Jones', 'Accounting', 45000, 0.5), + ('Adams', 'Accounting', 50000, 0.5833333333), + ('Brown', 'Sales', 53000, 0.6666666667), + ('Smith', 'Sales', 55000, 0.75), + ('Wilkinson', 'IT', 60000, 0.8333333333), + ('Johnson', 'Management', 80000, 0.9166666667), + ('Miller', 'Management', 100000, 1), + ], lambda row: (row.name, row.department, row.salary, round(row.cume_dist, 10))) + + def test_nthvalue(self): + qs = Employee.objects.annotate( + nth_value=Window(expression=NthValue( + expression='salary', nth=2), + order_by=[F('hire_date').asc(), F('name').desc()], + partition_by=F('department'), + ) + ).order_by('department', 'hire_date', 'name') + self.assertQuerysetEqual(qs, [ + ('Jones', 'Accounting', datetime.date(2005, 11, 1), 45000, None), + ('Jenson', 'Accounting', datetime.date(2008, 4, 1), 45000, 45000), + ('Williams', 'Accounting', datetime.date(2009, 6, 1), 37000, 45000), + ('Adams', 'Accounting', datetime.date(2013, 7, 1), 50000, 45000), + ('Wilkinson', 'IT', datetime.date(2011, 3, 1), 60000, None), + ('Moore', 'IT', datetime.date(2013, 8, 1), 34000, 34000), + ('Miller', 'Management', datetime.date(2005, 6, 1), 100000, None), + ('Johnson', 'Management', datetime.date(2005, 7, 1), 80000, 80000), + ('Smith', 'Marketing', datetime.date(2009, 10, 1), 38000, None), + ('Johnson', 'Marketing', datetime.date(2012, 3, 1), 40000, 40000), + ('Smith', 'Sales', datetime.date(2007, 6, 1), 55000, None), + ('Brown', 'Sales', datetime.date(2009, 9, 1), 53000, 53000), + ], lambda row: (row.name, row.department, row.hire_date, row.salary, row.nth_value)) + + def test_lead(self): + """ + Determine what the next person hired in the same department makes. + Because the dataset is ambiguous, the name is also part of the + ordering clause. No default is provided, so None/NULL should be + returned. + """ + qs = Employee.objects.annotate(lead=Window( + expression=Lead(expression='salary'), + order_by=[F('hire_date').asc(), F('name').desc()], + partition_by='department', + )).order_by('department') + self.assertQuerysetEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 45000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 37000), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 50000), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), None), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 34000), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), None), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 80000), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), None), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 40000), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), None), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 53000), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), None), + ], transform=lambda row: (row.name, row.salary, row.department, row.hire_date, row.lead)) + + def test_lead_offset(self): + """ + Determine what the person hired after someone makes. Due to + ambiguity, the name is also included in the ordering. + """ + qs = Employee.objects.annotate(lead=Window( + expression=Lead('salary', offset=2), + partition_by='department', + order_by=F('hire_date').asc(), + )) + self.assertQuerysetEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 37000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 50000), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), None), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), None), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), None), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), None), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), None), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), None), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), None), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), None), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), None), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), None), + ], transform=lambda row: (row.name, row.salary, row.department, row.hire_date, row.lead), + ordered=False + ) + + def test_lead_default(self): + qs = Employee.objects.annotate(lead_default=Window( + expression=Lead(expression='salary', offset=5, default=60000), + partition_by=F('department'), + order_by=F('department').asc(), + )) + self.assertEqual(list(qs.values_list('lead_default', flat=True).distinct()), [60000]) + + def test_ntile(self): + """ + Compute the group for each of the employees across the entire company, + based on how high the salary is for them. There are twelve employees + so it divides evenly into four groups. + """ + qs = Employee.objects.annotate(ntile=Window( + expression=Ntile(num_buckets=4), + order_by=F('salary').desc(), + )).order_by('ntile', '-salary', 'name') + self.assertQuerysetEqual(qs, [ + ('Miller', 'Management', 100000, 1), + ('Johnson', 'Management', 80000, 1), + ('Wilkinson', 'IT', 60000, 1), + ('Smith', 'Sales', 55000, 2), + ('Brown', 'Sales', 53000, 2), + ('Adams', 'Accounting', 50000, 2), + ('Jenson', 'Accounting', 45000, 3), + ('Jones', 'Accounting', 45000, 3), + ('Johnson', 'Marketing', 40000, 3), + ('Smith', 'Marketing', 38000, 4), + ('Williams', 'Accounting', 37000, 4), + ('Moore', 'IT', 34000, 4), + ], lambda x: (x.name, x.department, x.salary, x.ntile)) + + def test_percent_rank(self): + """ + Calculate the percentage rank of the employees across the entire + company based on salary and name (in case of ambiguity). + """ + qs = Employee.objects.annotate(percent_rank=Window( + expression=PercentRank(), + order_by=[F('salary').asc(), F('name').asc()], + )).order_by('percent_rank') + # Round to account for precision differences among databases. + self.assertQuerysetEqual(qs, [ + ('Moore', 'IT', 34000, 0.0), + ('Williams', 'Accounting', 37000, 0.0909090909), + ('Smith', 'Marketing', 38000, 0.1818181818), + ('Johnson', 'Marketing', 40000, 0.2727272727), + ('Jenson', 'Accounting', 45000, 0.3636363636), + ('Jones', 'Accounting', 45000, 0.4545454545), + ('Adams', 'Accounting', 50000, 0.5454545455), + ('Brown', 'Sales', 53000, 0.6363636364), + ('Smith', 'Sales', 55000, 0.7272727273), + ('Wilkinson', 'IT', 60000, 0.8181818182), + ('Johnson', 'Management', 80000, 0.9090909091), + ('Miller', 'Management', 100000, 1.0), + ], transform=lambda row: (row.name, row.department, row.salary, round(row.percent_rank, 10))) + + def test_nth_returns_null(self): + """ + Find the nth row of the data set. None is returned since there are + fewer than 20 rows in the test data. + """ + qs = Employee.objects.annotate(nth_value=Window( + expression=NthValue('salary', nth=20), + order_by=F('salary').asc() + )) + self.assertEqual(list(qs.values_list('nth_value', flat=True).distinct()), [None]) + + def test_multiple_partitioning(self): + """ + Find the maximum salary for each department for people hired in the + same year. + """ + qs = Employee.objects.annotate(max=Window( + expression=Max('salary'), + partition_by=[F('department'), ExtractYear(F('hire_date'))], + )).order_by('department', 'hire_date', 'name') + self.assertQuerysetEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 45000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 45000), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 37000), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), 50000), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 60000), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), 34000), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 100000), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), 100000), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 38000), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), 40000), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 55000), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), 53000), + ], transform=lambda row: (row.name, row.salary, row.department, row.hire_date, row.max)) + + def test_multiple_ordering(self): + """ + Accumulate the salaries over the departments based on hire_date. + If two people were hired on the same date in the same department, the + ordering clause will render a different result for those people. + """ + qs = Employee.objects.annotate(sum=Window( + expression=Sum('salary'), + partition_by='department', + order_by=[F('hire_date').asc(), F('name').asc()], + )).order_by('department', 'sum') + self.assertQuerysetEqual(qs, [ + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 45000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 90000), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 127000), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), 177000), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 60000), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), 94000), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 100000), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), 180000), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 38000), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), 78000), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 55000), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), 108000), + ], transform=lambda row: (row.name, row.salary, row.department, row.hire_date, row.sum)) + + @skipIf(connection.vendor == 'postgresql', 'n following/preceding not supported by PostgreSQL') + def test_range_n_preceding_and_following(self): + qs = Employee.objects.annotate(sum=Window( + expression=Sum('salary'), + order_by=F('salary').asc(), + partition_by='department', + frame=ValueRange(start=-2, end=2), + )) + self.assertIn('RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING', str(qs.query)) + self.assertQuerysetEqual(qs, [ + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 37000), + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 90000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 90000), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), 50000), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), 53000), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 55000), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), 40000), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 38000), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 60000), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), 34000), + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 100000), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), 80000), + ], transform=lambda row: (row.name, row.salary, row.department, row.hire_date, row.sum), ordered=False) + + def test_range_unbound(self): + """A query with RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.""" + qs = Employee.objects.annotate(sum=Window( + expression=Sum('salary'), + partition_by='department', + order_by=[F('hire_date').asc(), F('name').asc()], + frame=ValueRange(start=None, end=None), + )).order_by('department', 'hire_date', 'name') + self.assertIn('RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING', str(qs.query)) + self.assertQuerysetEqual(qs, [ + ('Jones', 'Accounting', 45000, datetime.date(2005, 11, 1), 177000), + ('Jenson', 'Accounting', 45000, datetime.date(2008, 4, 1), 177000), + ('Williams', 'Accounting', 37000, datetime.date(2009, 6, 1), 177000), + ('Adams', 'Accounting', 50000, datetime.date(2013, 7, 1), 177000), + ('Wilkinson', 'IT', 60000, datetime.date(2011, 3, 1), 94000), + ('Moore', 'IT', 34000, datetime.date(2013, 8, 1), 94000), + ('Miller', 'Management', 100000, datetime.date(2005, 6, 1), 180000), + ('Johnson', 'Management', 80000, datetime.date(2005, 7, 1), 180000), + ('Smith', 'Marketing', 38000, datetime.date(2009, 10, 1), 78000), + ('Johnson', 'Marketing', 40000, datetime.date(2012, 3, 1), 78000), + ('Smith', 'Sales', 55000, datetime.date(2007, 6, 1), 108000), + ('Brown', 'Sales', 53000, datetime.date(2009, 9, 1), 108000), + ], transform=lambda row: (row.name, row.department, row.salary, row.hire_date, row.sum)) + + def test_row_range_rank(self): + """ + A query with ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING. + The resulting sum is the sum of the three next (if they exist) and all + previous rows according to the ordering clause. + """ + qs = Employee.objects.annotate(sum=Window( + expression=Sum('salary'), + order_by=[F('hire_date').asc(), F('name').desc()], + frame=RowRange(start=None, end=3), + )).order_by('sum', 'hire_date') + self.assertIn('ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING', str(qs.query)) + self.assertQuerysetEqual(qs, [ + ('Miller', 100000, 'Management', datetime.date(2005, 6, 1), 280000), + ('Johnson', 80000, 'Management', datetime.date(2005, 7, 1), 325000), + ('Jones', 45000, 'Accounting', datetime.date(2005, 11, 1), 362000), + ('Smith', 55000, 'Sales', datetime.date(2007, 6, 1), 415000), + ('Jenson', 45000, 'Accounting', datetime.date(2008, 4, 1), 453000), + ('Williams', 37000, 'Accounting', datetime.date(2009, 6, 1), 513000), + ('Brown', 53000, 'Sales', datetime.date(2009, 9, 1), 553000), + ('Smith', 38000, 'Marketing', datetime.date(2009, 10, 1), 603000), + ('Wilkinson', 60000, 'IT', datetime.date(2011, 3, 1), 637000), + ('Johnson', 40000, 'Marketing', datetime.date(2012, 3, 1), 637000), + ('Adams', 50000, 'Accounting', datetime.date(2013, 7, 1), 637000), + ('Moore', 34000, 'IT', datetime.date(2013, 8, 1), 637000), + ], transform=lambda row: (row.name, row.salary, row.department, row.hire_date, row.sum)) + + @skipUnlessDBFeature('can_distinct_on_fields') + def test_distinct_window_function(self): + """ + Window functions are not aggregates, and hence a query to filter out + duplicates may be useful. + """ + qs = Employee.objects.annotate( + sum=Window( + expression=Sum('salary'), + partition_by=ExtractYear('hire_date'), + order_by=ExtractYear('hire_date') + ), + year=ExtractYear('hire_date'), + ).values('year', 'sum').distinct('year').order_by('year') + results = [ + {'year': 2005, 'sum': 225000}, {'year': 2007, 'sum': 55000}, + {'year': 2008, 'sum': 45000}, {'year': 2009, 'sum': 128000}, + {'year': 2011, 'sum': 60000}, {'year': 2012, 'sum': 40000}, + {'year': 2013, 'sum': 84000}, + ] + for idx, val in zip(range(len(results)), results): + with self.subTest(result=val): + self.assertEqual(qs[idx], val) + + def test_fail_update(self): + """Window expressions can't be used in an UPDATE statement.""" + msg = 'Window expressions are not allowed in this query' + with self.assertRaisesMessage(FieldError, msg): + Employee.objects.filter(department='Management').update( + salary=Window(expression=Max('salary'), partition_by='department'), + ) + + def test_fail_insert(self): + """Window expressions can't be used in an INSERT statement.""" + msg = 'Window expressions are not allowed in this query' + with self.assertRaisesMessage(FieldError, msg): + Employee.objects.create( + name='Jameson', department='Management', hire_date=datetime.date(2007, 7, 1), + salary=Window(expression=Sum(Value(10000), order_by=F('pk').asc())), + ) + + def test_invalid_start_value_range(self): + msg = "start argument must be a negative integer, zero, or None, but got '3'." + with self.assertRaisesMessage(ValueError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + order_by=F('hire_date').asc(), + frame=ValueRange(start=3), + ))) + + def test_invalid_end_value_range(self): + msg = "end argument must be a positive integer, zero, or None, but got '-3'." + with self.assertRaisesMessage(ValueError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + order_by=F('hire_date').asc(), + frame=ValueRange(end=-3), + ))) + + def test_invalid_type_end_value_range(self): + msg = "end argument must be a positive integer, zero, or None, but got 'a'." + with self.assertRaisesMessage(ValueError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + order_by=F('hire_date').asc(), + frame=ValueRange(end='a'), + ))) + + def test_invalid_type_start_value_range(self): + msg = "start argument must be a negative integer, zero, or None, but got 'a'." + with self.assertRaisesMessage(ValueError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + frame=ValueRange(start='a'), + ))) + + def test_invalid_type_end_row_range(self): + msg = "end argument must be a positive integer, zero, or None, but got 'a'." + with self.assertRaisesMessage(ValueError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + frame=RowRange(end='a'), + ))) + + @skipUnless(connection.vendor == 'postgresql', 'Frame construction not allowed on PostgreSQL') + def test_postgresql_illegal_range_frame_start(self): + msg = 'PostgreSQL only supports UNBOUNDED together with PRECEDING and FOLLOWING.' + with self.assertRaisesMessage(NotSupportedError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + order_by=F('hire_date').asc(), + frame=ValueRange(start=-1), + ))) + + @skipUnless(connection.vendor == 'postgresql', 'Frame construction not allowed on PostgreSQL') + def test_postgresql_illegal_range_frame_end(self): + msg = 'PostgreSQL only supports UNBOUNDED together with PRECEDING and FOLLOWING.' + with self.assertRaisesMessage(NotSupportedError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + order_by=F('hire_date').asc(), + frame=ValueRange(end=1), + ))) + + def test_invalid_type_start_row_range(self): + msg = "start argument must be a negative integer, zero, or None, but got 'a'." + with self.assertRaisesMessage(ValueError, msg): + list(Employee.objects.annotate(test=Window( + expression=Sum('salary'), + order_by=F('hire_date').asc(), + frame=RowRange(start='a'), + ))) + + +class NonQueryWindowTests(SimpleTestCase): + def test_window_repr(self): + self.assertEqual( + repr(Window(expression=Sum('salary'), partition_by='department')), + '' + ) + self.assertEqual( + repr(Window(expression=Avg('salary'), order_by=F('department').asc())), + '' + ) + + def test_window_frame_repr(self): + self.assertEqual( + repr(RowRange(start=-1)), + '' + ) + self.assertEqual( + repr(ValueRange(start=None, end=1)), + '' + ) + self.assertEqual( + repr(ValueRange(start=0, end=0)), + '' + ) + self.assertEqual( + repr(RowRange(start=0, end=0)), + '' + ) + + def test_empty_group_by_cols(self): + window = Window(expression=Sum('pk')) + self.assertEqual(window.get_group_by_cols(), []) + self.assertFalse(window.contains_aggregate) + + def test_frame_empty_group_by_cols(self): + frame = WindowFrame() + self.assertEqual(frame.get_group_by_cols(), []) + + def test_frame_window_frame_notimplemented(self): + frame = WindowFrame() + msg = 'Subclasses must implement window_frame_start_end().' + with self.assertRaisesMessage(NotImplementedError, msg): + frame.window_frame_start_end(None, None, None) + + def test_invalid_filter(self): + msg = 'Window is disallowed in the filter clause' + with self.assertRaisesMessage(NotSupportedError, msg): + Employee.objects.annotate(dense_rank=Window(expression=DenseRank())).filter(dense_rank__gte=1) + + def test_invalid_order_by(self): + msg = 'order_by must be either an Expression or a sequence of expressions' + with self.assertRaisesMessage(ValueError, msg): + Window(expression=Sum('power'), order_by='-horse') + + def test_invalid_source_expression(self): + msg = "Expression 'Upper' isn't compatible with OVER clauses." + with self.assertRaisesMessage(ValueError, msg): + Window(expression=Upper('name')) -- cgit v1.2.1