From 684e4a92d5f8a9b8744ea7f994d636e7483e530f Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Thu, 30 Apr 2020 22:29:18 -0400 Subject: BUG: lib: Fix a problem with vectorize with default parameters. When `otypes` is given to `vectorize` and then the instance is called, it creates a ufunc by calling numpy.core.umath.frompyfunc. The number of arguments given to this ufunc is set to the number of arguments in the call of the vectorize instance. This ufunc is cached, so frompyfunc does not have to be called on the next call. The problem is that, if the function being wrapped has parameters with default values, the number of arguments passed to the vectorize instance can change, and when that happens, a new ufunc must be created by calling frompyfunc with the correct number of arguments. This commit changes the cache of the ufunc from a simple attribute that holds the most recent ufunc to a dictionary whose keys are the number of arguments in the call. The cache is only used when the vectorized function is called with only positional arguments and there are no excluded arguments. If keywords are used, the number of arguments is no longer sufficient to uniquely identify a previously created ufunc. Closes gh-16120. --- numpy/lib/function_base.py | 22 ++++++---- numpy/lib/tests/test_function_base.py | 77 +++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 7 deletions(-) diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 7eeed7825..d2859d94d 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -2026,7 +2026,7 @@ class vectorize: self.pyfunc = pyfunc self.cache = cache self.signature = signature - self._ufunc = None # Caching to improve default performance + self._ufunc = {} # Caching to improve default performance if doc is None: self.__doc__ = pyfunc.__doc__ @@ -2091,14 +2091,22 @@ class vectorize: if self.otypes is not None: otypes = self.otypes - nout = len(otypes) - # Note logic here: We only *use* self._ufunc if func is self.pyfunc - # even though we set self._ufunc regardless. - if func is self.pyfunc and self._ufunc is not None: - ufunc = self._ufunc + # self._ufunc is a dictionary whose keys are the number of + # arguments (i.e. len(args)) and whose values are ufuncs created + # by frompyfunc. len(args) can be different for different calls if + # self.pyfunc has parameters with default values. We only use the + # cache when func is self.pyfunc, which occurs when the call uses + # only positional arguments and no arguments are excluded. + + nin = len(args) + nout = len(self.otypes) + if func is not self.pyfunc or nin not in self._ufunc: + ufunc = frompyfunc(func, nin, nout) else: - ufunc = self._ufunc = frompyfunc(func, len(args), nout) + ufunc = None # We'll get it from self._ufunc + if func is self.pyfunc: + ufunc = self._ufunc.setdefault(nin, ufunc) else: # Get number of outputs and output types by calling the function on # the first entries of args. We also cache the result to prevent diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 23bf3296d..9470a355d 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1273,6 +1273,83 @@ class TestVectorize: r2 = np.array([3, 4, 5]) assert_array_equal(r1, r2) + def test_keywords_with_otypes_order1(self): + # gh-1620: The second call of f would crash with + # `ValueError: invalid number of arguments`. + + import math + + def foo(x, y=1.0): + return y*math.floor(x) + + f = vectorize(foo, otypes=[float]) + r1 = f(np.arange(3.0), 1.0) + r2 = f(np.arange(3.0)) + assert_array_equal(r1, r2) + + def test_keywords_with_otypes_order2(self): + # gh-1620: The second call of f would crash with + # `ValueError: non-broadcastable output operand with shape () + # doesn't match the broadcast shape (3,)`. + + import math + + def foo(x, y=1.0): + return y*math.floor(x) + + f = vectorize(foo, otypes=[float]) + r1 = f(np.arange(3.0)) + r2 = f(np.arange(3.0), 1.0) + assert_array_equal(r1, r2) + + def test_keywords_with_otypes_order3(self): + # gh-1620: The third call of f would crash with + # `ValueError: invalid number of arguments`. + + import math + + def foo(x, y=1.0): + return y*math.floor(x) + + f = vectorize(foo, otypes=[float]) + r1 = f(np.arange(3.0)) + r2 = f(np.arange(3.0), y=1.0) + r3 = f(np.arange(3.0)) + assert_array_equal(r1, r2) + assert_array_equal(r1, r3) + + def test_keywords_with_otypes_several_kwd_args1(self): + # gh-1620 Make sure different uses of keyword arguments + # don't break the vectorized function. + + import math + + def foo(x, y=1.0, z=0.0): + return y*math.floor(x) + z + + f = vectorize(foo, otypes=[float]) + r1 = f(10.4, z=100) + r2 = f(10.4, y=-1) + r3 = f(10.4) + assert_equal(r1, foo(10.4, z=100)) + assert_equal(r2, foo(10.4, y=-1)) + assert_equal(r3, foo(10.4)) + + def test_keywords_with_otypes_several_kwd_args2(self): + # gh-1620 Make sure different uses of keyword arguments + # don't break the vectorized function. + + import math + + def foo(x, y=1.0, z=0.0): + return y*math.floor(x) + z + + f = vectorize(foo, otypes=[float]) + r1 = f(z=100, x=10.4, y=-1) + r2 = f(1, 2, 3) + assert_equal(r1, foo(z=100, x=10.4, y=-1)) + assert_equal(r2, foo(1, 2, 3)) + def test_keywords_no_func_code(self): # This needs to test a function that has keywords but # no func_code attribute, since otherwise vectorize will -- cgit v1.2.1 From c888073415d60e47a36cb87d72488e12107b34be Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Sat, 2 May 2020 10:55:21 -0400 Subject: MAINT: lib: A few changes in some vectorize tests. Added a note to some of the tests that the order of the calls is an important part of the test, because the code is testing the caching of the ufuncs. Also reuse a couple functions in the vectorize tests by defining them at the module level. --- numpy/lib/tests/test_function_base.py | 72 +++++++++++++++-------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 9470a355d..b4e928273 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -3,6 +3,7 @@ import warnings import sys import decimal from fractions import Fraction +import math import pytest import numpy as np @@ -1221,6 +1222,16 @@ class TestExtins: assert_array_equal(a, ac) +# _foo1 and _foo2 are used in some tests in TestVectorize. + +def _foo1(x, y=1.0): + return y*math.floor(x) + + +def _foo2(x, y=1.0, z=0.0): + return y*math.floor(x) + z + + class TestVectorize: def test_simple(self): @@ -1252,7 +1263,6 @@ class TestVectorize: assert_array_equal(y, x) def test_ufunc(self): - import math f = vectorize(math.cos) args = np.array([0, 0.5 * np.pi, np.pi, 1.5 * np.pi, 2 * np.pi]) r1 = f(args) @@ -1276,13 +1286,9 @@ class TestVectorize: def test_keywords_with_otypes_order1(self): # gh-1620: The second call of f would crash with # `ValueError: invalid number of arguments`. - - import math - - def foo(x, y=1.0): - return y*math.floor(x) - - f = vectorize(foo, otypes=[float]) + f = vectorize(_foo1, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. r1 = f(np.arange(3.0), 1.0) r2 = f(np.arange(3.0)) assert_array_equal(r1, r2) @@ -1291,13 +1297,9 @@ class TestVectorize: # gh-1620: The second call of f would crash with # `ValueError: non-broadcastable output operand with shape () # doesn't match the broadcast shape (3,)`. - - import math - - def foo(x, y=1.0): - return y*math.floor(x) - - f = vectorize(foo, otypes=[float]) + f = vectorize(_foo1, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. r1 = f(np.arange(3.0)) r2 = f(np.arange(3.0), 1.0) assert_array_equal(r1, r2) @@ -1305,13 +1307,9 @@ class TestVectorize: def test_keywords_with_otypes_order3(self): # gh-1620: The third call of f would crash with # `ValueError: invalid number of arguments`. - - import math - - def foo(x, y=1.0): - return y*math.floor(x) - - f = vectorize(foo, otypes=[float]) + f = vectorize(_foo1, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. r1 = f(np.arange(3.0)) r2 = f(np.arange(3.0), y=1.0) r3 = f(np.arange(3.0)) @@ -1321,34 +1319,26 @@ class TestVectorize: def test_keywords_with_otypes_several_kwd_args1(self): # gh-1620 Make sure different uses of keyword arguments # don't break the vectorized function. - - import math - - def foo(x, y=1.0, z=0.0): - return y*math.floor(x) + z - - f = vectorize(foo, otypes=[float]) + f = vectorize(_foo2, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. r1 = f(10.4, z=100) r2 = f(10.4, y=-1) r3 = f(10.4) - assert_equal(r1, foo(10.4, z=100)) - assert_equal(r2, foo(10.4, y=-1)) - assert_equal(r3, foo(10.4)) + assert_equal(r1, _foo2(10.4, z=100)) + assert_equal(r2, _foo2(10.4, y=-1)) + assert_equal(r3, _foo2(10.4)) def test_keywords_with_otypes_several_kwd_args2(self): # gh-1620 Make sure different uses of keyword arguments # don't break the vectorized function. - - import math - - def foo(x, y=1.0, z=0.0): - return y*math.floor(x) + z - - f = vectorize(foo, otypes=[float]) + f = vectorize(_foo2, otypes=[float]) + # We're testing the caching of ufuncs by vectorize, so the order + # of these function calls is an important part of the test. r1 = f(z=100, x=10.4, y=-1) r2 = f(1, 2, 3) - assert_equal(r1, foo(z=100, x=10.4, y=-1)) - assert_equal(r2, foo(1, 2, 3)) + assert_equal(r1, _foo2(z=100, x=10.4, y=-1)) + assert_equal(r2, _foo2(1, 2, 3)) def test_keywords_no_func_code(self): # This needs to test a function that has keywords but -- cgit v1.2.1