diff options
author | Jason Madden <jamadden@gmail.com> | 2020-03-06 18:38:50 -0600 |
---|---|---|
committer | Jason Madden <jamadden@gmail.com> | 2020-03-18 12:26:35 -0500 |
commit | 8ac3bd088d9b924cfb3170b77b41effd2de39d23 (patch) | |
tree | 42c4c9f92b09dab763c0ecd0a9a916a7f24caad7 | |
parent | 13de77d4edfc64c89533656220030230bcf2d895 (diff) | |
download | zope-interface-8ac3bd088d9b924cfb3170b77b41effd2de39d23.tar.gz |
Move Interface hashing and comparison to C; 2.5 to 15x speedup in micro benchmarks
Included benchmark numbers:
Current master, Python 3.8:
.....................
contains (empty dict): Mean +- std dev: 198 ns +- 5 ns
.....................
contains (populated dict): Mean +- std dev: 197 ns +- 6 ns
.....................
contains (populated list): Mean +- std dev: 53.1 us +- 1.2 us
This code:
.....................
contains (empty dict): Mean +- std dev: 77.9 ns +- 2.3 ns
.....................
contains (populated dict): Mean +- std dev: 78.4 ns +- 3.1 ns
.....................
contains (populated list): Mean +- std dev: 3.69 us +- 0.08 us
So anywhere from 2.5 to 15x faster. Not sure how that will translate to
larger benchmarks, but I'm hopeful.
It turns out that messing with ``__module__`` is nasty, tricky
business, especially when you do it from C. Everytime you define a new
subclass, the descriptors that you set get overridden by the type
machinery (PyType_Ready). I'm using a data descriptor and a meta class
right now to avoid that but I'm not super happy with that and would
like to find a better way. (At least, maybe the data part of the
descriptor isn't necessary?) It may be needed to move more code into
C, I don't want a slowdown accessing ``__module__`` either; copying
around the standard PyGetSet or PyMember descriptors isn't enough
because they don't work on the class object (so
``classImplements(InterfaceClass, IInterface)`` fails).
-rw-r--r-- | MANIFEST.in | 1 | ||||
-rw-r--r-- | benchmarks/micro.py | 42 | ||||
-rw-r--r-- | src/zope/interface/_zope_interface_coptimizations.c | 212 | ||||
-rw-r--r-- | src/zope/interface/interface.py | 161 | ||||
-rw-r--r-- | src/zope/interface/tests/test_interface.py | 2 | ||||
-rw-r--r-- | src/zope/interface/tests/test_sorting.py | 17 |
6 files changed, 361 insertions, 74 deletions
diff --git a/MANIFEST.in b/MANIFEST.in index 1dcbd31..83a0eb8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -18,3 +18,4 @@ global-exclude coverage.xml global-exclude appveyor.yml prune docs/_build +prune benchmarks diff --git a/benchmarks/micro.py b/benchmarks/micro.py new file mode 100644 index 0000000..0494b56 --- /dev/null +++ b/benchmarks/micro.py @@ -0,0 +1,42 @@ +import pyperf + +from zope.interface import Interface +from zope.interface.interface import InterfaceClass + +ifaces = [ + InterfaceClass('I' + str(i), (Interface,), {}) + for i in range(100) +] + +INNER = 1000 + +def bench_in(loops, o): + t0 = pyperf.perf_counter() + for _ in range(loops): + for _ in range(INNER): + o.__contains__(Interface) + + return pyperf.perf_counter() - t0 + +runner = pyperf.Runner() + +runner.bench_time_func( + 'contains (empty dict)', + bench_in, + {}, + inner_loops=INNER +) + +runner.bench_time_func( + 'contains (populated dict)', + bench_in, + {k: k for k in ifaces}, + inner_loops=INNER +) + +runner.bench_time_func( + 'contains (populated list)', + bench_in, + ifaces, + inner_loops=INNER +) diff --git a/src/zope/interface/_zope_interface_coptimizations.c b/src/zope/interface/_zope_interface_coptimizations.c index 180cdef..7341474 100644 --- a/src/zope/interface/_zope_interface_coptimizations.c +++ b/src/zope/interface/_zope_interface_coptimizations.c @@ -33,6 +33,9 @@ #if PY_MAJOR_VERSION >= 3 #define PY3K +#define PyNative_FromString PyUnicode_FromString +#else +#define PyNative_FromString PyString_FromString #endif static PyObject *str__dict__, *str__implemented__, *strextends; @@ -45,6 +48,7 @@ static PyObject *str_uncached_subscriptions; static PyObject *str_registry, *strro, *str_generation, *strchanged; static PyObject *str__self__; + static PyTypeObject *Implements; static int imported_declarations = 0; @@ -709,6 +713,17 @@ __adapt__(PyObject *self, PyObject *obj) return Py_None; } +#ifndef PY3K +typedef long Py_hash_t; +#endif + +typedef struct { + Spec spec; + PyObject* __name__; + PyObject* __module__; + Py_hash_t _v_cached_hash; +} IB; + static struct PyMethodDef ib_methods[] = { {"__adapt__", (PyCFunction)__adapt__, METH_O, "Adapt an object to the reciever"}, @@ -776,13 +791,188 @@ ib_call(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } + +static int +IB_traverse(IB* self, visitproc visit, void* arg) +{ + Py_VISIT(self->__name__); + Py_VISIT(self->__module__); + return 0; +} + +static int +IB_clear(IB* self) +{ + Py_CLEAR(self->__name__); + Py_CLEAR(self->__module__); + return 0; +} + +static void +IB_dealloc(IB* self) +{ + IB_clear(self); + Py_TYPE(self)->tp_free(OBJECT(self)); +} + +static PyMemberDef IB_members[] = { + {"__name__", T_OBJECT_EX, offsetof(IB, __name__), 0, ""}, + {"__module__", T_OBJECT_EX, offsetof(IB, __module__), 0, ""}, + {"__ibmodule__", T_OBJECT_EX, offsetof(IB, __module__), 0, ""}, + {NULL} +}; + +static Py_hash_t +IB_hash(IB* self) +{ + PyObject* tuple; + if (!self->__module__) { + PyErr_SetString(PyExc_AttributeError, "__module__"); + return -1; + } + if (!self->__name__) { + PyErr_SetString(PyExc_AttributeError, "__name__"); + return -1; + } + + if (self->_v_cached_hash) { + return self->_v_cached_hash; + } + + tuple = PyTuple_Pack(2, self->__name__, self->__module__); + if (!tuple) { + return -1; + } + self->_v_cached_hash = PyObject_Hash(tuple); + Py_CLEAR(tuple); + return self->_v_cached_hash; +} + +static PyTypeObject InterfaceBaseType; + +static PyObject* +IB_richcompare(IB* self, PyObject* other, int op) +{ + PyObject* othername; + PyObject* othermod; + PyObject* oresult; + IB* otherib; + int result; + + otherib = NULL; + oresult = othername = othermod = NULL; + + if (OBJECT(self) == other) { + switch(op) { + case Py_EQ: + case Py_LE: + case Py_GE: + Py_RETURN_TRUE; + break; + case Py_NE: + Py_RETURN_FALSE; + } + } + + if (other == Py_None) { + switch(op) { + case Py_LT: + case Py_LE: + case Py_NE: + Py_RETURN_TRUE; + default: + Py_RETURN_FALSE; + } + } + + if (PyObject_TypeCheck(other, &InterfaceBaseType)) { + otherib = (IB*)other; + othername = otherib->__name__; + othermod = otherib->__module__; + } + else { + othername = PyObject_GetAttrString(other, "__name__"); + // TODO: Optimize this case. + if (othername == NULL) { + PyErr_Clear(); + othername = PyNative_FromString(""); + } + othermod = PyObject_GetAttrString(other, "__module__"); + if (othermod == NULL) { + PyErr_Clear(); + othermod = PyNative_FromString(""); + } + } +#if 0 +// This is the simple, straightforward version of what Python does. + PyObject* pt1 = PyTuple_Pack(2, self->__name__, self->__module__); + PyObject* pt2 = PyTuple_Pack(2, othername, othermod); + oresult = PyObject_RichCompare(pt1, pt2, op); +#endif + + // tuple comparison is decided by the first non-equal element. + result = PyObject_RichCompareBool(self->__name__, othername, Py_EQ); + if (result == 0) { + result = PyObject_RichCompareBool(self->__name__, othername, op); + } + else if (result == 1) { + result = PyObject_RichCompareBool(self->__module__, othermod, op); + } + if (result == -1) { + goto cleanup; + } + + oresult = result ? Py_True : Py_False; + Py_INCREF(oresult); + + +cleanup: + if (!otherib) { + Py_XDECREF(othername); + Py_XDECREF(othermod); + } + return oresult; + +} + +static PyObject* +IB_module_get(IB* self, void* context) +{ + return self->__module__; +} + +static int +IB_module_set(IB* self, PyObject* value, void* context) +{ + Py_XINCREF(value); + Py_XDECREF(self->__module__); + self->__module__ = value; + return 0; +} + +static int +IB_init(IB* self, PyObject* args, PyObject* kwargs) +{ + IB_clear(self); + self->__module__ = Py_None; + Py_INCREF(self->__module__); + self->__name__ = Py_None; + Py_INCREF(self->__name__); + return 0; +} + +static PyGetSetDef IB_getsets[] = { + {"__module__", (getter)IB_module_get, (setter)IB_module_set, 0, NULL}, + {NULL} +}; + static PyTypeObject InterfaceBaseType = { PyVarObject_HEAD_INIT(NULL, 0) /* tp_name */ "_zope_interface_coptimizations." "InterfaceBase", - /* tp_basicsize */ 0, + /* tp_basicsize */ sizeof(IB), /* tp_itemsize */ 0, - /* tp_dealloc */ (destructor)0, + /* tp_dealloc */ (destructor)IB_dealloc, /* tp_print */ (printfunc)0, /* tp_getattr */ (getattrfunc)0, /* tp_setattr */ (setattrfunc)0, @@ -791,22 +981,30 @@ static PyTypeObject InterfaceBaseType = { /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, - /* tp_hash */ (hashfunc)0, + /* tp_hash */ (hashfunc)IB_hash, /* tp_call */ (ternaryfunc)ib_call, /* tp_str */ (reprfunc)0, /* tp_getattro */ (getattrofunc)0, /* tp_setattro */ (setattrofunc)0, /* tp_as_buffer */ 0, /* tp_flags */ Py_TPFLAGS_DEFAULT - | Py_TPFLAGS_BASETYPE , + | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_doc */ "Interface base type providing __call__ and __adapt__", - /* tp_traverse */ (traverseproc)0, - /* tp_clear */ (inquiry)0, - /* tp_richcompare */ (richcmpfunc)0, + /* tp_traverse */ (traverseproc)IB_traverse, + /* tp_clear */ (inquiry)IB_clear, + /* tp_richcompare */ (richcmpfunc)IB_richcompare, /* tp_weaklistoffset */ (long)0, /* tp_iter */ (getiterfunc)0, /* tp_iternext */ (iternextfunc)0, /* tp_methods */ ib_methods, + /* tp_members */ IB_members, + /* tp_getset */ IB_getsets, + /* tp_base */ &SpecType, + /* tp_dict */ 0, + /* tp_descr_get */ 0, + /* tp_descr_set */ 0, + /* tp_dictoffset */ 0, + /* tp_init */ (initproc)IB_init, }; /* =================== End: __call__ and __adapt__ ==================== */ diff --git a/src/zope/interface/interface.py b/src/zope/interface/interface.py index a8eac01..8707b9e 100644 --- a/src/zope/interface/interface.py +++ b/src/zope/interface/interface.py @@ -174,6 +174,10 @@ class InterfaceBase(object): __slots__ = () + def __init__(self): + self.__name__ = None + self.__module__ = None + def _call_conform(self, conform): raise NotImplementedError @@ -209,6 +213,67 @@ class InterfaceBase(object): if adapter is not None: return adapter + def __cmp(self, other): + # Yes, I did mean to name this __cmp, rather than __cmp__. + # It is a private method used by __lt__ and __gt__. + # I don't want to override __eq__ because I want the default + # __eq__, which is really fast. + """Make interfaces sortable + + TODO: It would ne nice if: + + More specific interfaces should sort before less specific ones. + Otherwise, sort on name and module. + + But this is too complicated, and we're going to punt on it + for now. + + For now, sort on interface and module name. + + None is treated as a pseudo interface that implies the loosest + contact possible, no contract. For that reason, all interfaces + sort before None. + + """ + if other is None: + return -1 + + n1 = (self.__name__, self.__module__) + n2 = (getattr(other, '__name__', ''), getattr(other, '__module__', '')) + + # This spelling works under Python3, which doesn't have cmp(). + return (n1 > n2) - (n1 < n2) + + def __hash__(self): + try: + return self._v_cached_hash + except AttributeError: + self._v_cached_hash = hash((self.__name__, self.__module__)) + return self._v_cached_hash + + def __eq__(self, other): + c = self.__cmp(other) + return c == 0 + + def __ne__(self, other): + c = self.__cmp(other) + return c != 0 + + def __lt__(self, other): + c = self.__cmp(other) + return c < 0 + + def __le__(self, other): + c = self.__cmp(other) + return c <= 0 + + def __gt__(self, other): + c = self.__cmp(other) + return c > 0 + + def __ge__(self, other): + c = self.__cmp(other) + return c >= 0 adapter_hooks = _use_c_impl([], 'adapter_hooks') @@ -420,8 +485,33 @@ class Specification(SpecificationBase): return default if attr is None else attr +class _ModuleDescriptor(object): + def __init__(self, saved): + self._saved = saved + + def __get__(self, inst, kind): + if inst is None: + return self._saved + return inst.__ibmodule__ + + def __set__(self, inst, val): + inst.__ibmodule__ = val + +# The simple act of having *any* metaclass besides type +# makes our __module__ shenanigans work. Doing this at the class level, +# and manually copying it around doesn't work. +class _MC(type): + def __new__(cls, name, bases, attrs): + attrs['__module__'] = _ModuleDescriptor(attrs['__module__']) + return type.__new__(cls, name, bases, attrs) + +_InterfaceClassBase = _MC( + 'InterfaceClass', + (Element, InterfaceBase, Specification), + {'__module__': __name__}) + -class InterfaceClass(Element, InterfaceBase, Specification): +class InterfaceClass(_InterfaceClassBase): """Prototype (scarecrow) Interfaces Implementation.""" # We can't say this yet because we don't have enough @@ -449,6 +539,7 @@ class InterfaceClass(Element, InterfaceBase, Specification): pass self.__module__ = __module__ + assert '__module__' not in self.__dict__ d = attrs.get('__doc__') if d is not None: @@ -471,6 +562,7 @@ class InterfaceClass(Element, InterfaceBase, Specification): if not isinstance(base, InterfaceClass): raise TypeError('Expected base interfaces') + Specification.__init__(self, bases) # Make sure that all recorded attributes (and methods) are of type @@ -495,7 +587,7 @@ class InterfaceClass(Element, InterfaceBase, Specification): self.__attrs = attrs - self.__identifier__ = "%s.%s" % (self.__module__, self.__name__) + self.__identifier__ = "%s.%s" % (__module__, name) def interfaces(self): """Return an iterator for the interfaces in the specification. @@ -607,7 +699,7 @@ class InterfaceClass(Element, InterfaceBase, Specification): return self._v_repr except AttributeError: name = self.__name__ - m = self.__module__ + m = self.__ibmodule__ if m: name = '%s.%s' % (m, name) r = "<%s %s>" % (self.__class__.__name__, name) @@ -636,69 +728,6 @@ class InterfaceClass(Element, InterfaceBase, Specification): def __reduce__(self): return self.__name__ - def __cmp(self, other): - # Yes, I did mean to name this __cmp, rather than __cmp__. - # It is a private method used by __lt__ and __gt__. - # I don't want to override __eq__ because I want the default - # __eq__, which is really fast. - """Make interfaces sortable - - TODO: It would ne nice if: - - More specific interfaces should sort before less specific ones. - Otherwise, sort on name and module. - - But this is too complicated, and we're going to punt on it - for now. - - For now, sort on interface and module name. - - None is treated as a pseudo interface that implies the loosest - contact possible, no contract. For that reason, all interfaces - sort before None. - - """ - if other is None: - return -1 - - n1 = (self.__name__, self.__module__) - n2 = (getattr(other, '__name__', ''), getattr(other, '__module__', '')) - - # This spelling works under Python3, which doesn't have cmp(). - return (n1 > n2) - (n1 < n2) - - def __hash__(self): - try: - return self._v_cached_hash - except AttributeError: - self._v_cached_hash = hash((self.__name__, self.__module__)) - return self._v_cached_hash - - def __eq__(self, other): - c = self.__cmp(other) - return c == 0 - - def __ne__(self, other): - c = self.__cmp(other) - return c != 0 - - def __lt__(self, other): - c = self.__cmp(other) - return c < 0 - - def __le__(self, other): - c = self.__cmp(other) - return c <= 0 - - def __gt__(self, other): - c = self.__cmp(other) - return c > 0 - - def __ge__(self, other): - c = self.__cmp(other) - return c >= 0 - - Interface = InterfaceClass("Interface", __module__='zope.interface') # Interface is the only member of its own SRO. Interface._calculate_sro = lambda: (Interface,) diff --git a/src/zope/interface/tests/test_interface.py b/src/zope/interface/tests/test_interface.py index da7eb8c..737b229 100644 --- a/src/zope/interface/tests/test_interface.py +++ b/src/zope/interface/tests/test_interface.py @@ -284,7 +284,6 @@ class GenericInterfaceBaseTests(unittest.TestCase): self.assertRaises(TypeError, ib, adapted) - class InterfaceBaseTests(GenericInterfaceBaseTests, OptimizationTestMixin): # Tests that work with the C implementation @@ -1049,6 +1048,7 @@ class InterfaceTests(unittest.TestCase): self.assertTrue(ICurrent.implementedBy(Current)) self.assertFalse(IOther.implementedBy(Current)) + self.assertEqual(ICurrent, ICurrent) self.assertTrue(ICurrent in implementedBy(Current)) self.assertFalse(IOther in implementedBy(Current)) self.assertTrue(ICurrent in providedBy(current)) diff --git a/src/zope/interface/tests/test_sorting.py b/src/zope/interface/tests/test_sorting.py index 73613d0..0e33f47 100644 --- a/src/zope/interface/tests/test_sorting.py +++ b/src/zope/interface/tests/test_sorting.py @@ -45,3 +45,20 @@ class Test(unittest.TestCase): l = [I1, m1_I1] l.sort() self.assertEqual(l, [m1_I1, I1]) + + def test_I1_I2(self): + self.assertLess(I1.__name__, I2.__name__) + self.assertEqual(I1.__module__, I2.__module__) + self.assertEqual(I1.__module__, __name__) + self.assertLess(I1, I2) + + def _makeI1(self): + class I1(Interface): + pass + return I1 + + def test_nested(self): + nested_I1 = self._makeI1() + self.assertEqual(I1, nested_I1) + self.assertEqual(nested_I1, I1) + self.assertEqual(hash(I1), hash(nested_I1)) |