/* PR81833: This used to fail due to improper implementation of vec_msum. */ /* Test case relies on -mcpu=power7 or later. Currently we don't have machinery to express that, so we have two separate tests for -mcpu=power7 and -mcpu=power8 to catch 32-bit BE on P7 and 64-bit BE/LE on P8. */ /* { dg-do run } */ /* { dg-require-effective-target p8vector_hw } */ /* { dg-options "-mdejagnu-cpu=power8 -O2" } */ #include #define vec_u8 vector unsigned char #define vec_s8 vector signed char #define vec_u16 vector unsigned short #define vec_s16 vector signed short #define vec_u32 vector unsigned int #define vec_s32 vector signed int #define vec_f vector float #define LOAD_ZERO const vec_u8 zerov = vec_splat_u8 (0) #define zero_u8v (vec_u8) zerov #define zero_s8v (vec_s8) zerov #define zero_u16v (vec_u16) zerov #define zero_s16v (vec_s16) zerov #define zero_u32v (vec_u32) zerov #define zero_s32v (vec_s32) zerov signed int __attribute__((noinline)) scalarproduct_int16_vsx (const signed short *v1, const signed short *v2, int order) { int i; LOAD_ZERO; register vec_s16 vec1; register vec_s32 res = vec_splat_s32 (0), t; signed int ires; for (i = 0; i < order; i += 8) { vec1 = vec_vsx_ld (0, v1); t = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v); res = vec_sums (t, res); v1 += 8; v2 += 8; } res = vec_splat (res, 3); vec_ste (res, 0, &ires); return ires; } int main(void) { const signed short test_vec[] = { 1, 1, 1, 1, 1, 1, 1, 1 }; if (scalarproduct_int16_vsx (test_vec, test_vec, 8) != 8) __builtin_abort (); return 0; }