From 75df68b5ca238eabc2de144dce08d481c59ffcaf Mon Sep 17 00:00:00 2001 From: Julian Taylor Date: Mon, 10 Jun 2013 23:02:36 +0200 Subject: ENH: tell gcc to unroll strided copy loops The strided copy loops profit a lot from unrolling as the number of operations executed is in each iterations very small. GCC needs to be told explicitly to do unrolling even on O3. Unrolling is only profitable if the move can be done in a single instruction, else the increased code size makes it slower, thus the flag is only used for operations on element sizes less equal the native pointer size. Tested to improve performance of by 20-50% on intel core2duo, xeon 5xxx/7xxx and amd phenom x4. --- numpy/core/setup_common.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'numpy/core/setup_common.py') diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 284acfe21..cb30c83c9 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -113,6 +113,13 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_bswap64", '5u'), ] +# gcc function attributes +# (attribute as understood by gcc, function name), +# function name will be converted to HAVE_ preprocessor macro +OPTIONAL_GCC_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', + 'attribute_optimize_unroll_loops'), + ] + # Subset of OPTIONAL_STDFUNCS which may alreay have HAVE_* defined by Python.h OPTIONAL_STDFUNCS_MAYBE = ["expm1", "log1p", "acosh", "atanh", "asinh", "hypot", "copysign"] -- cgit v1.2.1