diff options
Diffstat (limited to 'libgfortran/m4/matmul.m4')
-rw-r--r-- | libgfortran/m4/matmul.m4 | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4 index 812a7e7e571..7976fda8bb4 100644 --- a/libgfortran/m4/matmul.m4 +++ b/libgfortran/m4/matmul.m4 @@ -121,19 +121,24 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray, { static void (*matmul_p) ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); + + void (*matmul_fn) ('rtype` * const restrict retarray, + 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_'rtype_code`_vanilla; + matmul_fn = matmul_'rtype_code`_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_'rtype_code`_avx512f; - goto tailcall; + matmul_fn = matmul_'rtype_code`_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -142,8 +147,8 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_'rtype_code`_avx2; - goto tailcall; + matmul_fn = matmul_'rtype_code`_avx2; + goto store; } #endif @@ -151,15 +156,16 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_'rtype_code`_avx; - goto tailcall; + matmul_fn = matmul_'rtype_code`_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ |