diff options
-rw-r--r-- | gcc/ChangeLog | 549 | ||||
-rw-r--r-- | gcc/config.gcc | 14 | ||||
-rw-r--r-- | gcc/config/i386/avx512cdintrin.h | 219 | ||||
-rw-r--r-- | gcc/config/i386/avx512erintrin.h | 333 | ||||
-rw-r--r-- | gcc/config/i386/avx512fintrin.h | 12147 | ||||
-rw-r--r-- | gcc/config/i386/avx512pfintrin.h | 130 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin-types.def | 259 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 1745 | ||||
-rw-r--r-- | gcc/config/i386/immintrin.h | 8 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-1.c | 175 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-22.c | 424 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-23.c | 175 |
13 files changed, 16150 insertions, 58 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1bb86a88fc9..be9476cae0d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -8,6 +8,555 @@ Kirill Yukhin <kirill.yukhin@intel.com> Michael Zolotukhin <michael.v.zolotukhin@intel.com> + * config.gcc (extra_headers): Add avx512fintrin.h, avx512cdintrin.h, + avx512erintrin.h, avx512pfintrin.h. + * config/i386/avx512cdintrin.h: New file. + * config/i386/avx512erintrin.h: New file. + * config/i386/avx512fintrin.h: New file. + * config/i386/avx512pfintrin.h: New file. + * config/i386/i386-builtin-types.def: Add V16UHI, V32SF, V16SF, V8DF, + V8DI, V16SI, V64QI, PV8DF, PV8DI, PV16SI, PV16SF, PCV8DF, PCV16SF, + PCV8DI, PCV16SI, V16QI_FTYPE_V16SI, V8DF_FTYPE_V8SI, V8DF_FTYPE_V8DF, + V8HI_FTYPE_V8DI, V16SF_FTYPE_V16SF, V8SI_FTYPE_V8DI, V8SF_FTYPE_V8DF, + V8SF_FTYPE_V8DF_V8SF_QI, V16HI_FTYPE_V16SI, V16SF_FTYPE_FLOAT, + V16SI_FTYPE_INT, V8DF_FTYPE_DOUBLE, V8DI_FTYPE_INT64, + V16SF_FTYPE_V4SF, V8DF_FTYPE_V4DF, V8DI_FTYPE_V4DI, V16QI_FTYPE_V8DI, + UINT_FTYPE_V4SF, UINT64_FTYPE_V4SF, UINT_FTYPE_V2DF, + UINT64_FTYPE_V2DF, V16SI_FTYPE_V16SI, V16SI_FTYPE_V16SI_V16SI_HI, + V8DI_FTYPE_V8DI, V8DI_FTYPE_V8DI_V8DI_QI, V16SI_FTYPE_PV4SI, + V16SF_FTYPE_PV4SF, V8DI_FTYPE_PV4DI, V8DF_FTYPE_PV4DF, + V8UHI_FTYPE_V8UHI, V8USI_FTYPE_V8USI, V2DF_FTYPE_V2DF_UINT, + V2DF_FTYPE_V2DF_UINT64, V4DF_FTYPE_V8DF_INT, + V4DF_FTYPE_V8DF_INT_V4DF_QI, V8DF_FTYPE_V8DF_V8DI, + V4SF_FTYPE_V4SF_UINT, V4SF_FTYPE_V4SF_UINT64, + INT_FTYPE_V4SF_V4SF_INT_INT, INT_FTYPE_V2DF_V2DF_INT_INT, + V16SF_FTYPE_V16SF_INT, V4SF_FTYPE_V16SF_INT, + V4SF_FTYPE_V16SF_INT_V4SF_QI, V16SF_FTYPE_V16SF_V16SF, + V16SF_FTYPE_V16SF_V16SI, V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI, + V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI, V8DF_FTYPE_V8DF_INT_V8DF_QI, + V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT, V8DF_FTYPE_V8DF_V8DF, + V16SF_FTYPE_V16SF_V16SF_INT, V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI, + V16SF_FTYPE_V16SF_INT_V16SF_HI, V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI, + V16SF_FTYPE_V16SF_V16SF_V16SI_INT, + V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI, + V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT, + V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI, + V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT, + V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI, + V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT, V16SF_FTYPE_V16SF_V4SF_INT, + V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI, V16HI_FTYPE_V16SF_INT, + V16HI_FTYPE_V16SF_INT_V16HI_HI, V16HI_FTYPE_V16HI_V16HI_INT_V16HI_HI, + V16SI_FTYPE_V16SI_V4SI, V16SI_FTYPE_V16SI_V4SI_INT, + V4SI_FTYPE_V16SI_INT, V4SI_FTYPE_V16SI_INT_V4SI_QI, + V16SI_FTYPE_V16SI_V16SI, V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI, + V16SI_FTYPE_V16SI_SI, V16SI_FTYPE_V16SI_INT, + V16SI_FTYPE_V16SI_V4SI_V16SI_HI, V16SI_FTYPE_V16SI_INT_V16SI_HI, + V8DI_FTYPE_V8DI_V8DI, V16SI_FTYPE_V8DF_V8DF, + V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI, V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI, + V8DI_FTYPE_V8DI_V2DI, V4DI_FTYPE_V8DI_INT, + V4DI_FTYPE_V8DI_INT_V4DI_QI, V8DI_FTYPE_V8DI_V2DI_V8DI_QI, + V8DI_FTYPE_V8DI_INT_V8DI_QI, VOID_FTYPE_PDOUBLE_V8DF, + VOID_FTYPE_PFLOAT_V16SF, VOID_FTYPE_PV8DI_V8DI, HI_FTYPE_HI, + HI_FTYPE_HI_HI, HI_FTYPE_HI_INT, QI_FTYPE_V8DI_V8DI, + QI_FTYPE_V8DI_V8DI_QI, HI_FTYPE_V16SI_V16SI, HI_FTYPE_V16SI_V16SI_HI, + QI_FTYPE_V8DI_V8DI_INT, QI_FTYPE_V8DI_V8DI_INT_QI, + HI_FTYPE_V16SI_V16SI_INT, HI_FTYPE_V16SI_V16SI_INT ,HI, + QI_FTYPE_V8DF_V8DF_INT, QI_FTYPE_V8DF_V8DF_INT_QI, + QI_FTYPE_V8DF_V8DF_INT_QI_INT, HI_FTYPE_V16SF_V16SF_INT, + HI_FTYPE_V16SF_V16SF_INT_HI, HI_FTYPE_V16SF_V16SF_INT_HI_INT, + QI_FTYPE_V2DF_V2DF_INT, QI_FTYPE_V2DF_V2DF_INT_QI, + QI_FTYPE_V2DF_V2DF_INT_QI_INT, QI_FTYPE_V4SF_V4SF_INT, + QI_FTYPE_V4SF_V4SF_INT_QI, QI_FTYPE_V4SF_V4SF_INT_QI_INT, + V16SI_FTYPE_HI, V8DI_FTYPE_QI, V8DF_FTYPE_V8DF_V8DF_V8DF, + V16SF_FTYPE_V16SF_V16SF_V16SF, V8DF_FTYPE_V8DF_V8DF_QI, + V8DF_FTYPE_V8SF_V8DF_QI, V8DF_FTYPE_V8SI_V8DF_QI, + V8DI_FTYPE_V8SI_V8DI_QI, V8DI_FTYPE_V8HI_V8DI_QI, + V8DI_FTYPE_V16QI_V8DI_QI, V8DI_FTYPE_V8DI_V8DI_V8DI_QI, + V8DF_FTYPE_V8DI_V8DF_V8DF, V8DF_FTYPE_V8DI_V8DF_V8DF_QI, + V8DF_FTYPE_V8DF_V8DI_V8DF_QI, V8DF_FTYPE_V8DF_V8DF_V8DF_QI, + V16SI_FTYPE_V16SI_V16SI_V16SI_HI, V2DF_FTYPE_V2DF_V2DF_V2DF_QI, + V2DF_FTYPE_V2DF_V4SF_V2DF_QI, V16SF_FTYPE_V16SF_V16SF_HI, + V16SF_FTYPE_V16SI_V16SF_HI, V16SF_FTYPE_V16SF_V16SF_V16SF_HI, + V16SF_FTYPE_V16SI_V16SF_V16SF, V16SF_FTYPE_V16SI_V16SF_V16SF_HI, + V16SF_FTYPE_V16SF_V16SI_V16SF_HI, V4SF_FTYPE_V4SF_V2DF_V4SF_QI, + V4SF_FTYPE_V4SF_V4SF_V4SF_QI, V16SF_FTYPE_V4SF_V16SF_HI, + V8DF_FTYPE_V4DF_V8DF_QI, V8DF_FTYPE_V2DF_V8DF_QI, + V16SI_FTYPE_V4SI_V16SI_HI, V16SI_FTYPE_SI_V16SI_HI, + V16SI_FTYPE_V16HI_V16SI_HI, V16SI_FTYPE_V16QI_V16SI_HI, + V8SI_FTYPE_V8DF_V8SI_QI, V8DI_FTYPE_V4DI_V8DI_QI, + V8DI_FTYPE_V2DI_V8DI_QI, V8DI_FTYPE_DI_V8DI_QI, + V16SF_FTYPE_PCV16SF_V16SF_HI, V8DF_FTYPE_PCV8DF_V8DF_QI, + V16SI_FTYPE_PCV16SI_V16SI_HI, V8DI_FTYPE_PCV8DI_V8DI_QI, + V2DF_FTYPE_PCDOUBLE_V2DF_QI, V4SF_FTYPE_PCFLOAT_V4SF_QI, + V16QI_FTYPE_V16SI_V16QI_HI, V16HI_FTYPE_V16SI_V16HI_HI, + V8SI_FTYPE_V8DI_V8SI_QI, V8HI_FTYPE_V8DI_V8HI_QI, + V16QI_FTYPE_V8DI_V16QI_QI, VOID_FTYPE_PV8DF_V8DF_QI, + VOID_FTYPE_PV16SF_V16SF_HI, VOID_FTYPE_PV8DI_V8DI_QI, + VOID_FTYPE_PV16SI_V16SI_HI, VOID_FTYPE_PDOUBLE_V2DF_QI, + VOID_FTYPE_PFLOAT_V4SF_QI, V16SI_FTYPE_V16SF_V16SI_HI, + V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI, + V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI, V8DI_FTYPE_V8DI_V8DI_V8DI, + V16SI_FTYPE_V16SI_V16SI_V16SI, V8DF_FTYPE_V8DF_V8DI_V8DF, + V16SF_FTYPE_V16SF_V16SI_V16SF, V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI, + V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI, V8DI_FTYPE_V16SI_V16SI_V8DI_QI, + UINT64_FTYPE_V2DF_INT, UINT64_FTYPE_V4SF_INT, UINT_FTYPE_V2DF_INT, + UINT_FTYPE_V4SF_INT, INT64_FTYPE_V2DF_INT, INT64_FTYPE_V4SF_INT, + INT_FTYPE_V2DF_INT, INT_FTYPE_V4SF_INT, V2DF_FTYPE_V2DF_UINT64_INT, + V4SF_FTYPE_V4SF_UINT64_INT, V4SF_FTYPE_V4SF_UINT_INT, + V2DF_FTYPE_V2DF_INT64_INT, V4SF_FTYPE_V4SF_INT64_INT, + V4SF_FTYPE_V4SF_INT_INT, V16SI_FTYPE_V16SF_V16SI_HI_INT, + V16SF_FTYPE_V16SI_V16SF_HI_INT, V16SF_FTYPE_V16SF_V16SF_HI_INT, + V16SF_FTYPE_V16HI_V16SF_HI_INT, V8SI_FTYPE_V8DF_V8SI_QI_INT, + V8SF_FTYPE_V8DF_V8SF_QI_INT, V8DF_FTYPE_V8DF_V8DF_QI_INT, + V8DF_FTYPE_V8SF_V8DF_QI_INT, V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT, + V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT, V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT, + V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT, V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT, + V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT, V2DF_FTYPE_V2DF_V2DF_V2DF_INT, + V16SF_FTYPE_V16SF_INT_V16SF_HI_INT, V8DF_FTYPE_V8DF_INT_V8DF_QI_INT, + V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT, + V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT, V8DI_FTYPE_V8DI_SI_V8DI_V8DI, + V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT, + V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, + V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT, + V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, + V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT, + V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT, + V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT, + V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, + V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT, + V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, + V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT, + V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT, + VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT, + VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT, + VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT, + VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT, + VOID_FTYPE_PINT_HI_V16SI_V16SI_INT, + VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT, + VOID_FTYPE_PINT_QI_V8DI_V8SI_INT, + VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT, + VOID_FTYPE_HI_V16SI_PCINT_INT_INT, VOID_FTYPE_QI_V8DI_PCINT_INT_INT. + (ALIAS): Add DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V8DF_V8DF, ROUND). + * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_ADDPD512, + IX86_BUILTIN_ADDPS512, IX86_BUILTIN_ADDSD_MASK, + IX86_BUILTIN_ADDSS_MASK, IX86_BUILTIN_ALIGND512, + IX86_BUILTIN_ALIGNQ512, IX86_BUILTIN_BLENDMD512, + IX86_BUILTIN_BLENDMPD512, IX86_BUILTIN_BLENDMPS512, + IX86_BUILTIN_BLENDMQ512, IX86_BUILTIN_BROADCASTF32X4_512, + IX86_BUILTIN_BROADCASTF64X4_512, IX86_BUILTIN_BROADCASTI32X4_512, + IX86_BUILTIN_BROADCASTI64X4_512, IX86_BUILTIN_BROADCASTSD512, + IX86_BUILTIN_BROADCASTSS512, IX86_BUILTIN_CMPD512, + IX86_BUILTIN_CMPPD512, IX86_BUILTIN_CMPPS512, IX86_BUILTIN_CMPQ512, + IX86_BUILTIN_CMPSD_MASK, IX86_BUILTIN_CMPSS_MASK, IX86_BUILTIN_COMIDF, + IX86_BUILTIN_COMISF, IX86_BUILTIN_COMPRESSPD512, + IX86_BUILTIN_COMPRESSPDSTORE512, IX86_BUILTIN_COMPRESSPS512, + IX86_BUILTIN_COMPRESSPSSTORE512, IX86_BUILTIN_CVTDQ2PD512, + IX86_BUILTIN_CVTDQ2PS512, IX86_BUILTIN_CVTPD2DQ512, + IX86_BUILTIN_CVTPD2PS512, IX86_BUILTIN_CVTPD2UDQ512, + IX86_BUILTIN_CVTPH2PS512, IX86_BUILTIN_CVTPS2DQ512, + IX86_BUILTIN_CVTPS2PD512, IX86_BUILTIN_CVTPS2PH512, + IX86_BUILTIN_CVTPS2UDQ512, IX86_BUILTIN_CVTSD2SS_MASK, + IX86_BUILTIN_CVTSI2SD64, IX86_BUILTIN_CVTSI2SS32, + IX86_BUILTIN_CVTSI2SS64, IX86_BUILTIN_CVTSS2SD_MASK, + IX86_BUILTIN_CVTTPD2DQ512, IX86_BUILTIN_CVTTPD2UDQ512, + IX86_BUILTIN_CVTTPS2DQ512, IX86_BUILTIN_CVTTPS2UDQ512, + IX86_BUILTIN_CVTUDQ2PD512, IX86_BUILTIN_CVTUDQ2PS512, + IX86_BUILTIN_CVTUSI2SD32, IX86_BUILTIN_CVTUSI2SD64, + IX86_BUILTIN_CVTUSI2SS32, IX86_BUILTIN_CVTUSI2SS64, + IX86_BUILTIN_DIVPD512, IX86_BUILTIN_DIVPS512, IX86_BUILTIN_DIVSD_MASK, + IX86_BUILTIN_DIVSS_MASK, IX86_BUILTIN_EXPANDPD512, + IX86_BUILTIN_EXPANDPD512Z, IX86_BUILTIN_EXPANDPDLOAD512, + IX86_BUILTIN_EXPANDPDLOAD512Z, IX86_BUILTIN_EXPANDPS512, + IX86_BUILTIN_EXPANDPS512Z, IX86_BUILTIN_EXPANDPSLOAD512, + IX86_BUILTIN_EXPANDPSLOAD512Z, IX86_BUILTIN_EXTRACTF32X4, + IX86_BUILTIN_EXTRACTF64X4, IX86_BUILTIN_EXTRACTI32X4, + IX86_BUILTIN_EXTRACTI64X4, IX86_BUILTIN_FIXUPIMMPD512_MASK, + IX86_BUILTIN_FIXUPIMMPD512_MASKZ, IX86_BUILTIN_FIXUPIMMPS512_MASK, + IX86_BUILTIN_FIXUPIMMPS512_MASKZ, IX86_BUILTIN_FIXUPIMMSD128_MASK, + IX86_BUILTIN_FIXUPIMMSD128_MASKZ, IX86_BUILTIN_FIXUPIMMSS128_MASK, + IX86_BUILTIN_FIXUPIMMSS128_MASKZ, IX86_BUILTIN_GETEXPPD512, + IX86_BUILTIN_GETEXPPS512, IX86_BUILTIN_GETEXPSD128, + IX86_BUILTIN_GETEXPSS128, IX86_BUILTIN_GETMANTPD512, + IX86_BUILTIN_GETMANTPS512, IX86_BUILTIN_GETMANTSD128, + IX86_BUILTIN_GETMANTSS128, IX86_BUILTIN_INSERTF32X4, + IX86_BUILTIN_INSERTF64X4, IX86_BUILTIN_INSERTI32X4, + IX86_BUILTIN_INSERTI64X4, IX86_BUILTIN_LOADAPD512, + IX86_BUILTIN_LOADAPS512, IX86_BUILTIN_LOADDQUDI512, + IX86_BUILTIN_LOADDQUSI512, IX86_BUILTIN_LOADSD, IX86_BUILTIN_LOADSS, + IX86_BUILTIN_LOADUPD512, IX86_BUILTIN_LOADUPS512, + IX86_BUILTIN_MAXPD512, IX86_BUILTIN_MAXPS512, IX86_BUILTIN_MAXSD_MASK, + IX86_BUILTIN_MAXSS_MASK, IX86_BUILTIN_MINPD512, IX86_BUILTIN_MINPS512, + IX86_BUILTIN_MINSD_MASK, IX86_BUILTIN_MINSS_MASK, + IX86_BUILTIN_MOVAPD512, IX86_BUILTIN_MOVAPS512, + IX86_BUILTIN_MOVDDUP512, IX86_BUILTIN_MOVDQA32LOAD512, + IX86_BUILTIN_MOVDQA32STORE512, IX86_BUILTIN_MOVDQA32_512, + IX86_BUILTIN_MOVDQA64LOAD512, IX86_BUILTIN_MOVDQA64STORE512, + IX86_BUILTIN_MOVDQA64_512, IX86_BUILTIN_MOVESD, IX86_BUILTIN_MOVESS, + IX86_BUILTIN_MOVNTDQ512, IX86_BUILTIN_MOVNTPD512, + IX86_BUILTIN_MOVNTPS512, IX86_BUILTIN_MOVSHDUP512, + IX86_BUILTIN_MOVSLDUP512, IX86_BUILTIN_MULPD512, + IX86_BUILTIN_MULPS512, IX86_BUILTIN_MULSD_MASK, + IX86_BUILTIN_MULSS_MASK, IX86_BUILTIN_PABSD512, IX86_BUILTIN_PABSQ512, + IX86_BUILTIN_PADDD512, IX86_BUILTIN_PADDQ512, IX86_BUILTIN_PANDD512, + IX86_BUILTIN_PANDND512, IX86_BUILTIN_PANDNQ512, IX86_BUILTIN_PANDQ512, + IX86_BUILTIN_PBROADCASTD512, IX86_BUILTIN_PBROADCASTD512_GPR, + IX86_BUILTIN_PBROADCASTMB512, IX86_BUILTIN_PBROADCASTMW512, + IX86_BUILTIN_PBROADCASTQ512, IX86_BUILTIN_PBROADCASTQ512_GPR, + IX86_BUILTIN_PBROADCASTQ512_MEM, IX86_BUILTIN_PCMPEQD512_MASK, + IX86_BUILTIN_PCMPEQQ512_MASK, IX86_BUILTIN_PCMPGTD512_MASK, + IX86_BUILTIN_PCMPGTQ512_MASK, IX86_BUILTIN_PCOMPRESSD512, + IX86_BUILTIN_PCOMPRESSDSTORE512, IX86_BUILTIN_PCOMPRESSQ512, + IX86_BUILTIN_PCOMPRESSQSTORE512, IX86_BUILTIN_PEXPANDD512, + IX86_BUILTIN_PEXPANDD512Z, IX86_BUILTIN_PEXPANDDLOAD512, + IX86_BUILTIN_PEXPANDDLOAD512Z, IX86_BUILTIN_PEXPANDQ512, + IX86_BUILTIN_PEXPANDQ512Z, IX86_BUILTIN_PEXPANDQLOAD512, + IX86_BUILTIN_PEXPANDQLOAD512Z, IX86_BUILTIN_PMAXSD512, + IX86_BUILTIN_PMAXSQ512, IX86_BUILTIN_PMAXUD512, + IX86_BUILTIN_PMAXUQ512, IX86_BUILTIN_PMINSD512, + IX86_BUILTIN_PMINSQ512, IX86_BUILTIN_PMINUD512, + IX86_BUILTIN_PMINUQ512, IX86_BUILTIN_PMOVDB512, + IX86_BUILTIN_PMOVDW512, IX86_BUILTIN_PMOVQB512, + IX86_BUILTIN_PMOVQD512, IX86_BUILTIN_PMOVQW512, + IX86_BUILTIN_PMOVSDB512, IX86_BUILTIN_PMOVSDW512, + IX86_BUILTIN_PMOVSQB512, IX86_BUILTIN_PMOVSQD512, + IX86_BUILTIN_PMOVSQW512, IX86_BUILTIN_PMOVSXBD512, + IX86_BUILTIN_PMOVSXBQ512, IX86_BUILTIN_PMOVSXDQ512, + IX86_BUILTIN_PMOVSXWD512, IX86_BUILTIN_PMOVSXWQ512, + IX86_BUILTIN_PMOVUSDB512, IX86_BUILTIN_PMOVUSDW512, + IX86_BUILTIN_PMOVUSQB512, IX86_BUILTIN_PMOVUSQD512, + IX86_BUILTIN_PMOVUSQW512, IX86_BUILTIN_PMOVZXBD512, + IX86_BUILTIN_PMOVZXBQ512, IX86_BUILTIN_PMOVZXDQ512, + IX86_BUILTIN_PMOVZXWD512, IX86_BUILTIN_PMOVZXWQ512, + IX86_BUILTIN_PMULDQ512, IX86_BUILTIN_PMULLD512, + IX86_BUILTIN_PMULUDQ512, IX86_BUILTIN_PORD512, IX86_BUILTIN_PORQ512, + IX86_BUILTIN_PROLD512, IX86_BUILTIN_PROLQ512, IX86_BUILTIN_PROLVD512, + IX86_BUILTIN_PROLVQ512, IX86_BUILTIN_PRORD512, IX86_BUILTIN_PRORQ512, + IX86_BUILTIN_PRORVD512, IX86_BUILTIN_PRORVQ512, + IX86_BUILTIN_PSHUFD512, IX86_BUILTIN_PSLLD512, IX86_BUILTIN_PSLLDI512, + IX86_BUILTIN_PSLLQ512, IX86_BUILTIN_PSLLQI512, + IX86_BUILTIN_PSLLVV16SI, IX86_BUILTIN_PSLLVV8DI, + IX86_BUILTIN_PSRAD512, IX86_BUILTIN_PSRADI512, IX86_BUILTIN_PSRAQ512, + IX86_BUILTIN_PSRAQI512, IX86_BUILTIN_PSRAVV16SI, + IX86_BUILTIN_PSRAVV8DI, IX86_BUILTIN_PSRLD512, IX86_BUILTIN_PSRLDI512, + IX86_BUILTIN_PSRLQ512, IX86_BUILTIN_PSRLQI512, + IX86_BUILTIN_PSRLVV16SI, IX86_BUILTIN_PSRLVV8DI, + IX86_BUILTIN_PSUBD512, IX86_BUILTIN_PSUBQ512, IX86_BUILTIN_PTESTMD512, + IX86_BUILTIN_PTESTMQ512, IX86_BUILTIN_PTESTNMD512, + IX86_BUILTIN_PTESTNMQ512, IX86_BUILTIN_PUNPCKHDQ512, + IX86_BUILTIN_PUNPCKHQDQ512, IX86_BUILTIN_PUNPCKLDQ512, + IX86_BUILTIN_PUNPCKLQDQ512, IX86_BUILTIN_PXORD512, + IX86_BUILTIN_PXORQ512, IX86_BUILTIN_RCP14PD512, + IX86_BUILTIN_RCP14PS512, IX86_BUILTIN_RCP14SD, IX86_BUILTIN_RCP14SS, + IX86_BUILTIN_RNDSCALEPD, IX86_BUILTIN_RNDSCALEPS, + IX86_BUILTIN_RNDSCALESD, IX86_BUILTIN_RNDSCALESS, + IX86_BUILTIN_RSQRT14PD512, IX86_BUILTIN_RSQRT14PS512, + IX86_BUILTIN_RSQRT14SD, IX86_BUILTIN_RSQRT14SS, + IX86_BUILTIN_SCALEFPD512, IX86_BUILTIN_SCALEFPS512, + IX86_BUILTIN_SCALEFSD, IX86_BUILTIN_SCALEFSS, IX86_BUILTIN_SHUFPD512, + IX86_BUILTIN_SHUFPS512, IX86_BUILTIN_SHUF_F32x4, + IX86_BUILTIN_SHUF_F64x2, IX86_BUILTIN_SHUF_I32x4, + IX86_BUILTIN_SHUF_I64x2, + IX86_BUILTIN_SQRTPD512_MASK, IX86_BUILTIN_SQRTPS512_MASK, + IX86_BUILTIN_SQRTSD_MASK, + IX86_BUILTIN_SQRTSS_MASK, IX86_BUILTIN_STOREAPD512, + IX86_BUILTIN_STOREAPS512, IX86_BUILTIN_STOREDQUDI512, + IX86_BUILTIN_STOREDQUSI512, IX86_BUILTIN_STORESD, + IX86_BUILTIN_STORESS, IX86_BUILTIN_STOREUPD512, + IX86_BUILTIN_STOREUPS512, IX86_BUILTIN_SUBPD512, + IX86_BUILTIN_SUBPS512, IX86_BUILTIN_SUBSD_MASK, + IX86_BUILTIN_SUBSS_MASK, IX86_BUILTIN_UCMPD512, IX86_BUILTIN_UCMPQ512, + IX86_BUILTIN_UNPCKHPD512, IX86_BUILTIN_UNPCKHPS512, + IX86_BUILTIN_UNPCKLPD512, IX86_BUILTIN_UNPCKLPS512, + IX86_BUILTIN_VCVTSD2SI32, IX86_BUILTIN_VCVTSD2SI64, + IX86_BUILTIN_VCVTSD2USI32, IX86_BUILTIN_VCVTSD2USI64, + IX86_BUILTIN_VCVTSS2SI32, IX86_BUILTIN_VCVTSS2SI64, + IX86_BUILTIN_VCVTSS2USI32, IX86_BUILTIN_VCVTSS2USI64, + IX86_BUILTIN_VCVTTSD2SI32, IX86_BUILTIN_VCVTTSD2SI64, + IX86_BUILTIN_VCVTTSD2USI32, IX86_BUILTIN_VCVTTSD2USI64, + IX86_BUILTIN_VCVTTSS2SI32, IX86_BUILTIN_VCVTTSS2SI64, + IX86_BUILTIN_VCVTTSS2USI32, IX86_BUILTIN_VCVTTSS2USI64, + IX86_BUILTIN_VFMADDPD512_MASK, IX86_BUILTIN_VFMADDPD512_MASK3, + IX86_BUILTIN_VFMADDPD512_MASKZ, IX86_BUILTIN_VFMADDPS512_MASK, + IX86_BUILTIN_VFMADDPS512_MASK3, IX86_BUILTIN_VFMADDPS512_MASKZ, + IX86_BUILTIN_VFMADDSD3_MASK, IX86_BUILTIN_VFMADDSD3_MASK3, + IX86_BUILTIN_VFMADDSD3_MASKZ, IX86_BUILTIN_VFMADDSS3_MASK, + IX86_BUILTIN_VFMADDSS3_MASK3, IX86_BUILTIN_VFMADDSS3_MASKZ, + IX86_BUILTIN_VFMADDSUBPD512_MASK, IX86_BUILTIN_VFMADDSUBPD512_MASK3, + IX86_BUILTIN_VFMADDSUBPD512_MASKZ, IX86_BUILTIN_VFMADDSUBPS512_MASK, + IX86_BUILTIN_VFMADDSUBPS512_MASK3, IX86_BUILTIN_VFMADDSUBPS512_MASKZ, + IX86_BUILTIN_VFMSUBADDPD512_MASK3, IX86_BUILTIN_VFMSUBADDPS512_MASK3, + IX86_BUILTIN_VFMSUBPD512_MASK3, IX86_BUILTIN_VFMSUBPS512_MASK3, + IX86_BUILTIN_VFMSUBSD3_MASK3, IX86_BUILTIN_VFMSUBSS3_MASK3, + IX86_BUILTIN_VFNMADDPD512_MASK, IX86_BUILTIN_VFNMADDPS512_MASK, + IX86_BUILTIN_VFNMSUBPD512_MASK, IX86_BUILTIN_VFNMSUBPD512_MASK3, + IX86_BUILTIN_VFNMSUBPS512_MASK, IX86_BUILTIN_VFNMSUBPS512_MASK3, + IX86_BUILTIN_VPCLZCNTD512, IX86_BUILTIN_VPCLZCNTQ512, + IX86_BUILTIN_VPCONFLICTD512, IX86_BUILTIN_VPCONFLICTQ512, + IX86_BUILTIN_VPERMDF512, IX86_BUILTIN_VPERMDI512, + IX86_BUILTIN_VPERMI2VARD512, IX86_BUILTIN_VPERMI2VARPD512, + IX86_BUILTIN_VPERMI2VARPS512, IX86_BUILTIN_VPERMI2VARQ512, + IX86_BUILTIN_VPERMILPD512, IX86_BUILTIN_VPERMILPS512, + IX86_BUILTIN_VPERMILVARPD512, IX86_BUILTIN_VPERMILVARPS512, + IX86_BUILTIN_VPERMT2VARD512, IX86_BUILTIN_VPERMT2VARD512_MASKZ, + IX86_BUILTIN_VPERMT2VARPD512, IX86_BUILTIN_VPERMT2VARPD512_MASKZ, + IX86_BUILTIN_VPERMT2VARPS512, IX86_BUILTIN_VPERMT2VARPS512_MASKZ, + IX86_BUILTIN_VPERMT2VARQ512, IX86_BUILTIN_VPERMT2VARQ512_MASKZ, + IX86_BUILTIN_VPERMVARDF512, IX86_BUILTIN_VPERMVARDI512, + IX86_BUILTIN_VPERMVARSF512, IX86_BUILTIN_VPERMVARSI512, + IX86_BUILTIN_VTERNLOGD512_MASK, IX86_BUILTIN_VTERNLOGD512_MASKZ, + IX86_BUILTIN_VTERNLOGQ512_MASK, IX86_BUILTIN_VTERNLOGQ512_MASKZ, + IX86_BUILTIN_KAND16, IX86_BUILTIN_KANDN16, IX86_BUILTIN_KNOT16, + IX86_BUILTIN_KOR16, IX86_BUILTIN_KORTESTC16, IX86_BUILTIN_KORTESTZ16, + IX86_BUILTIN_KUNPCKBW, IX86_BUILTIN_KXNOR16, IX86_BUILTIN_KXOR16, + IX86_BUILTIN_GATHER3SIV8DI, + IX86_BUILTIN_SCATTERDIV16SF, IX86_BUILTIN_SCATTERDIV16SI, + IX86_BUILTIN_SCATTERDIV8DF, IX86_BUILTIN_SCATTERDIV8DI, + IX86_BUILTIN_SCATTERSIV16SF, IX86_BUILTIN_SCATTERSIV16SI, + IX86_BUILTIN_SCATTERSIV8DF, IX86_BUILTIN_SCATTERSIV8DI, + IX86_BUILTIN_GATHERPFDPS, IX86_BUILTIN_GATHERPFQPS, + IX86_BUILTIN_SCATTERPFDPS, IX86_BUILTIN_SCATTERPFQPS, + IX86_BUILTIN_EXP2PD_MASK, IX86_BUILTIN_EXP2PS_MASK, + IX86_BUILTIN_RCP28PD, IX86_BUILTIN_RCP28PS, + IX86_BUILTIN_RSQRT28PD, IX86_BUILTIN_RSQRT28PS. + (bdesc_special_args): Add __builtin_ia32_compressstoresf512_mask, + __builtin_ia32_compressstoresi512_mask, + __builtin_ia32_compressstoredf512_mask, + __builtin_ia32_compressstoredi512_mask, + __builtin_ia32_expandloadsf512_mask, + __builtin_ia32_expandloadsf512_maskz, + __builtin_ia32_expandloadsi512_mask, + __builtin_ia32_expandloadsi512_maskz, + __builtin_ia32_expandloaddf512_mask, + __builtin_ia32_expandloaddf512_maskz, + __builtin_ia32_expandloaddi512_mask, + __builtin_ia32_expandloaddi512_maskz, + __builtin_ia32_loaddqusi512_mask, __builtin_ia32_loaddqudi512_mask, + __builtin_ia32_loadsd_mask, __builtin_ia32_loadss_mask, + __builtin_ia32_loadupd512_mask, __builtin_ia32_loadups512_mask, + __builtin_ia32_loadaps512_mask, __builtin_ia32_movdqa32load512_mask, + __builtin_ia32_loadapd512_mask, __builtin_ia32_movdqa64load512_mask, + __builtin_ia32_movntps512, __builtin_ia32_movntpd512, + __builtin_ia32_movntdq512, __builtin_ia32_storedqusi512_mask, + __builtin_ia32_storedqudi512_mask, __builtin_ia32_storesd_mask, + __builtin_ia32_storess_mask, __builtin_ia32_storeupd512_mask, + __builtin_ia32_storeups512_mask, __builtin_ia32_storeaps512_mask, + __builtin_ia32_movdqa32store512_mask, __builtin_ia32_storeapd512_mask, + __builtin_ia32_movdqa64store512_mask, __builtin_ia32_alignd512_mask, + __builtin_ia32_alignq512_mask, __builtin_ia32_blendmd_512_mask, + __builtin_ia32_blendmpd_512_mask, __builtin_ia32_blendmps_512_mask, + __builtin_ia32_blendmq_512_mask, __builtin_ia32_broadcastf32x4_512, + __builtin_ia32_broadcastf64x4_512, __builtin_ia32_broadcasti32x4_512, + __builtin_ia32_broadcasti64x4_512, __builtin_ia32_broadcastsd512, + __builtin_ia32_broadcastss512, __builtin_ia32_cmpd512_mask, + __builtin_ia32_cmpq512_mask, __builtin_ia32_compressdf512_mask, + __builtin_ia32_compresssf512_mask, __builtin_ia32_cvtdq2pd512_mask, + __builtin_ia32_vcvtps2ph512_mask, __builtin_ia32_cvtudq2pd512_mask, + __builtin_ia32_cvtusi2sd32, __builtin_ia32_expanddf512_mask, + __builtin_ia32_expanddf512_maskz, __builtin_ia32_expandsf512_mask, + __builtin_ia32_expandsf512_maskz, __builtin_ia32_extractf32x4_mask, + __builtin_ia32_extractf64x4_mask, __builtin_ia32_extracti32x4_mask, + __builtin_ia32_extracti64x4_mask, __builtin_ia32_insertf32x4_mask, + __builtin_ia32_insertf64x4_mask, __builtin_ia32_inserti32x4_mask, + __builtin_ia32_inserti64x4_mask, __builtin_ia32_movapd512_mask, + __builtin_ia32_movaps512_mask, __builtin_ia32_movddup512_mask, + __builtin_ia32_movdqa32_512_mask, __builtin_ia32_movdqa64_512_mask, + __builtin_ia32_movesd_mask, __builtin_ia32_movess_mask, + __builtin_ia32_movshdup512_mask, __builtin_ia32_movsldup512_mask, + __builtin_ia32_pabsd512_mask, __builtin_ia32_pabsq512_mask, + __builtin_ia32_paddd512_mask, __builtin_ia32_paddq512_mask, + __builtin_ia32_pandd512_mask, __builtin_ia32_pandnd512_mask, + __builtin_ia32_pandnq512_mask, __builtin_ia32_pandq512_mask, + __builtin_ia32_pbroadcastd512, __builtin_ia32_pbroadcastd512_gpr_mask, + __builtin_ia32_broadcastmb512, __builtin_ia32_broadcastmw512, + __builtin_ia32_pbroadcastq512, __builtin_ia32_pbroadcastq512_gpr_mask, + __builtin_ia32_pbroadcastq512_mem_mask, + __builtin_ia32_pcmpeqd512_mask, __builtin_ia32_pcmpeqq512_mask, + __builtin_ia32_pcmpgtd512_mask, __builtin_ia32_pcmpgtq512_mask, + __builtin_ia32_compresssi512_mask, __builtin_ia32_compressdi512_mask, + __builtin_ia32_expandsi512_mask, __builtin_ia32_expandsi512_maskz, + __builtin_ia32_expanddi512_mask, __builtin_ia32_expanddi512_maskz, + __builtin_ia32_pmaxsd512_mask, __builtin_ia32_pmaxsq512_mask, + __builtin_ia32_pmaxud512_mask, __builtin_ia32_pmaxuq512_mask, + __builtin_ia32_pminsd512_mask, __builtin_ia32_pminsq512_mask, + __builtin_ia32_pminud512_mask, __builtin_ia32_pminuq512_mask, + __builtin_ia32_pmovdb512_mask, __builtin_ia32_pmovdw512_mask, + __builtin_ia32_pmovqb512_mask, __builtin_ia32_pmovqd512_mask, + __builtin_ia32_pmovqw512_mask, __builtin_ia32_pmovsdb512_mask, + __builtin_ia32_pmovsdw512_mask, __builtin_ia32_pmovsqb512_mask, + __builtin_ia32_pmovsqd512_mask, __builtin_ia32_pmovsqw512_mask, + __builtin_ia32_pmovsxbd512_mask, __builtin_ia32_pmovsxbq512_mask, + __builtin_ia32_pmovsxdq512_mask, __builtin_ia32_pmovsxwd512_mask, + __builtin_ia32_pmovsxwq512_mask, __builtin_ia32_pmovusdb512_mask, + __builtin_ia32_pmovusdw512_mask, __builtin_ia32_pmovusqb512_mask, + __builtin_ia32_pmovusqd512_mask, __builtin_ia32_pmovusqw512_mask, + __builtin_ia32_pmovzxbd512_mask, __builtin_ia32_pmovzxbq512_mask, + __builtin_ia32_pmovzxdq512_mask, __builtin_ia32_pmovzxwd512_mask, + __builtin_ia32_pmovzxwq512_mask, __builtin_ia32_pmuldq512_mask, + __builtin_ia32_pmulld512_mask, __builtin_ia32_pmuludq512_mask, + __builtin_ia32_pord512_mask, __builtin_ia32_porq512_mask, + __builtin_ia32_prold512_mask, __builtin_ia32_prolq512_mask, + __builtin_ia32_prolvd512_mask, __builtin_ia32_prolvq512_mask, + __builtin_ia32_prord512_mask, __builtin_ia32_prorq512_mask, + __builtin_ia32_prorvd512_mask, __builtin_ia32_prorvq512_mask, + __builtin_ia32_pshufd512_mask, __builtin_ia32_pslld512_mask, + __builtin_ia32_pslldi512_mask, __builtin_ia32_psllq512_mask, + __builtin_ia32_psllqi512_mask, __builtin_ia32_psllv16si_mask, + __builtin_ia32_psllv8di_mask, __builtin_ia32_psrad512_mask, + __builtin_ia32_psradi512_mask, __builtin_ia32_psraq512_mask, + __builtin_ia32_psraqi512_mask, __builtin_ia32_psrav16si_mask, + __builtin_ia32_psrav8di_mask, __builtin_ia32_psrld512_mask, + __builtin_ia32_psrldi512_mask, __builtin_ia32_psrlq512_mask, + __builtin_ia32_psrlqi512_mask, __builtin_ia32_psrlv16si_mask, + __builtin_ia32_psrlv8di_mask, __builtin_ia32_psubd512_mask, + __builtin_ia32_psubq512_mask, __builtin_ia32_ptestmd512, + __builtin_ia32_ptestmq512, __builtin_ia32_ptestnmd512, + __builtin_ia32_ptestnmq512, __builtin_ia32_punpckhdq512_mask, + __builtin_ia32_punpckhqdq512_mask, __builtin_ia32_punpckldq512_mask, + __builtin_ia32_punpcklqdq512_mask, __builtin_ia32_pxord512_mask, + __builtin_ia32_pxorq512_mask, __builtin_ia32_rcp14pd512_mask, + __builtin_ia32_rcp14ps512_mask, __builtin_ia32_rcp14sd_mask, + __builtin_ia32_rcp14ss_mask, __builtin_ia32_rsqrt14pd512_mask, + __builtin_ia32_rsqrt14ps512_mask, __builtin_ia32_rsqrt14sd_mask, + __builtin_ia32_rsqrt14ss_mask, __builtin_ia32_shufpd512_mask, + __builtin_ia32_shufps512_mask, __builtin_ia32_shuf_f32x4_mask, + __builtin_ia32_shuf_f64x2_mask, __builtin_ia32_shuf_i32x4_mask, + __builtin_ia32_shuf_i64x2_mask, __builtin_ia32_ucmpd512_mask, + __builtin_ia32_ucmpq512_mask, __builtin_ia32_unpckhpd512_mask, + __builtin_ia32_unpckhps512_mask, __builtin_ia32_unpcklpd512_mask, + __builtin_ia32_unpcklps512_mask, __builtin_ia32_vplzcntd_512_mask, + __builtin_ia32_vplzcntq_512_mask, + __builtin_ia32_vpconflictsi_512_mask, + __builtin_ia32_vpconflictdi_512_mask, __builtin_ia32_permdf512_mask, + __builtin_ia32_permdi512_mask, __builtin_ia32_vpermi2vard512_mask, + __builtin_ia32_vpermi2varpd512_mask, + __builtin_ia32_vpermi2varps512_mask, + __builtin_ia32_vpermi2varq512_mask, __builtin_ia32_vpermilpd512_mask, + __builtin_ia32_vpermilps512_mask, __builtin_ia32_vpermilvarpd512_mask, + __builtin_ia32_vpermilvarps512_mask, + __builtin_ia32_vpermt2vard512_mask, + __builtin_ia32_vpermt2vard512_maskz, + __builtin_ia32_vpermt2varpd512_mask, + __builtin_ia32_vpermt2varpd512_maskz, + __builtin_ia32_vpermt2varps512_mask, + __builtin_ia32_vpermt2varps512_maskz, + __builtin_ia32_vpermt2varq512_mask, + __builtin_ia32_vpermt2varq512_maskz, __builtin_ia32_permvardf512_mask, + __builtin_ia32_permvardi512_mask, __builtin_ia32_permvarsf512_mask, + __builtin_ia32_permvarsi512_mask, __builtin_ia32_pternlogd512_mask, + __builtin_ia32_pternlogd512_maskz, __builtin_ia32_pternlogq512_mask, + __builtin_ia32_pternlogq512_maskz, __builtin_ia32_copysignps512, + __builtin_ia32_copysignpd512, __builtin_ia32_sqrtpd512, + __builtin_ia32_sqrtps512, __builtin_ia32_exp2ps, + __builtin_ia32_roundpd_az_vec_pack_sfix512, + __builtin_ia32_floorpd_vec_pack_sfix512, + __builtin_ia32_ceilpd_vec_pack_sfix512, __builtin_ia32_kandhi, + __builtin_ia32_kandnhi, __builtin_ia32_knothi, __builtin_ia32_korhi, + __builtin_ia32_kortestchi, __builtin_ia32_kortestzhi, + __builtin_ia32_kunpckhi, __builtin_ia32_kxnorhi, + __builtin_ia32_kxorhi, __builtin_ia32_addpd512_mask, + __builtin_ia32_addps512_mask, __builtin_ia32_addsd_mask, + __builtin_ia32_addss_mask, __builtin_ia32_cmppd512_mask, + __builtin_ia32_cmpps512_mask, __builtin_ia32_cmpsd_mask, + __builtin_ia32_cmpss_mask, __builtin_ia32_vcomisd, + __builtin_ia32_vcomiss, __builtin_ia32_cvtdq2ps512_mask, + __builtin_ia32_cvtpd2dq512_mask, __builtin_ia32_cvtpd2ps512_mask, + __builtin_ia32_cvtpd2udq512_mask, __builtin_ia32_vcvtph2ps512_mask, + __builtin_ia32_cvtps2dq512_mask, __builtin_ia32_cvtps2pd512_mask, + __builtin_ia32_cvtps2udq512_mask, __builtin_ia32_cvtsd2ss_mask, + __builtin_ia32_cvtsi2sd64, __builtin_ia32_cvtsi2ss32, + __builtin_ia32_cvtsi2ss64, __builtin_ia32_cvtss2sd_mask, + __builtin_ia32_cvttpd2dq512_mask, __builtin_ia32_cvttpd2udq512_mask, + __builtin_ia32_cvttps2dq512_mask, __builtin_ia32_cvttps2udq512_mask, + __builtin_ia32_cvtudq2ps512_mask, __builtin_ia32_cvtusi2sd64, + __builtin_ia32_cvtusi2ss32, __builtin_ia32_cvtusi2ss64, + __builtin_ia32_divpd512_mask, __builtin_ia32_divps512_mask, + __builtin_ia32_divsd_mask, __builtin_ia32_divss_mask, + __builtin_ia32_fixupimmpd512_mask, __builtin_ia32_fixupimmpd512_maskz, + __builtin_ia32_fixupimmps512_mask, __builtin_ia32_fixupimmps512_maskz, + __builtin_ia32_fixupimmsd_mask, __builtin_ia32_fixupimmsd_maskz, + __builtin_ia32_fixupimmss_mask, __builtin_ia32_fixupimmss_maskz, + __builtin_ia32_getexppd512_mask, __builtin_ia32_getexpps512_mask, + __builtin_ia32_getexpsd128_mask, __builtin_ia32_getexpss128_mask, + __builtin_ia32_getmantpd512_mask, __builtin_ia32_getmantps512_mask, + __builtin_ia32_getmantsd_mask, __builtin_ia32_getmantss_mask, + __builtin_ia32_maxpd512_mask, __builtin_ia32_maxps512_mask, + __builtin_ia32_maxsd_mask, __builtin_ia32_maxss_mask, + __builtin_ia32_minpd512_mask, __builtin_ia32_minps512_mask, + __builtin_ia32_minsd_mask, __builtin_ia32_minss_mask, + __builtin_ia32_mulpd512_mask, __builtin_ia32_mulps512_mask, + __builtin_ia32_mulsd_mask, __builtin_ia32_mulss_mask, + __builtin_ia32_rndscalepd_mask, __builtin_ia32_rndscaleps_mask, + __builtin_ia32_rndscalesd_mask, __builtin_ia32_rndscaless_mask, + __builtin_ia32_scalefpd512_mask, __builtin_ia32_scalefps512_mask, + __builtin_ia32_scalefsd_mask, __builtin_ia32_scalefss_mask, + __builtin_ia32_sqrtpd512_mask, __builtin_ia32_sqrtps512_mask, + __builtin_ia32_sqrtsd_mask, __builtin_ia32_sqrtss_mask, + __builtin_ia32_subpd512_mask, __builtin_ia32_subps512_mask, + __builtin_ia32_subsd_mask, __builtin_ia32_subss_mask, + __builtin_ia32_vcvtsd2si32, __builtin_ia32_vcvtsd2si64, + __builtin_ia32_vcvtsd2usi32, __builtin_ia32_vcvtsd2usi64, + __builtin_ia32_vcvtss2si32, __builtin_ia32_vcvtss2si64, + __builtin_ia32_vcvtss2usi32, __builtin_ia32_vcvtss2usi64, + __builtin_ia32_vcvttsd2si32, __builtin_ia32_vcvttsd2si64, + __builtin_ia32_vcvttsd2usi32, __builtin_ia32_vcvttsd2usi64, + __builtin_ia32_vcvttss2si32, __builtin_ia32_vcvttss2si64, + __builtin_ia32_vcvttss2usi32, __builtin_ia32_vcvttss2usi64, + __builtin_ia32_vfmaddpd512_mask, __builtin_ia32_vfmaddpd512_mask3, + __builtin_ia32_vfmaddpd512_maskz, __builtin_ia32_vfmaddps512_mask, + __builtin_ia32_vfmaddps512_mask3, __builtin_ia32_vfmaddps512_maskz, + __builtin_ia32_vfmaddsd3_mask, __builtin_ia32_vfmaddsd3_mask3, + __builtin_ia32_vfmaddsd3_maskz, __builtin_ia32_vfmaddss3_mask, + __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, + __builtin_ia32_vfmaddsubpd512_mask, + __builtin_ia32_vfmaddsubpd512_mask3, + __builtin_ia32_vfmaddsubpd512_maskz, + __builtin_ia32_vfmaddsubps512_mask, + __builtin_ia32_vfmaddsubps512_mask3, + __builtin_ia32_vfmaddsubps512_maskz, + __builtin_ia32_vfmsubaddpd512_mask3, + __builtin_ia32_vfmsubaddps512_mask3, __builtin_ia32_vfmsubpd512_mask3, + __builtin_ia32_vfmsubps512_mask3, __builtin_ia32_vfmsubsd3_mask3, + __builtin_ia32_vfmsubss3_mask3, __builtin_ia32_vfnmaddpd512_mask, + __builtin_ia32_vfnmaddps512_mask, __builtin_ia32_vfnmsubpd512_mask, + __builtin_ia32_vfnmsubpd512_mask3, __builtin_ia32_vfnmsubps512_mask, + __builtin_ia32_vfnmsubps512_mask3, __builtin_ia32_exp2pd_mask, + __builtin_ia32_exp2ps_mask, __builtin_ia32_rcp28pd_mask, + __builtin_ia32_rcp28ps_mask, __builtin_ia32_rsqrt28pd_mask, + __builtin_ia32_rsqrt28ps_mask, __builtin_ia32_gathersiv16sf, + __builtin_ia32_gathersiv8df, __builtin_ia32_gatherdiv16sf, + __builtin_ia32_gatherdiv8df, __builtin_ia32_gathersiv16si, + __builtin_ia32_gathersiv8di, __builtin_ia32_gatherdiv16si, + __builtin_ia32_gatherdiv8di, __builtin_ia32_gatheraltsiv8df , + __builtin_ia32_gatheraltdiv8sf , __builtin_ia32_gatheraltsiv8di , + __builtin_ia32_gatheraltdiv8si , __builtin_ia32_scattersiv16sf, + __builtin_ia32_scattersiv8df, __builtin_ia32_scatterdiv16sf, + __builtin_ia32_scatterdiv8df, __builtin_ia32_scattersiv16si, + __builtin_ia32_scattersiv8di, __builtin_ia32_scatterdiv16si, + __builtin_ia32_scatterdiv8di, __builtin_ia32_gatherpfdps, + __builtin_ia32_gatherpfqps, __builtin_ia32_scatterpfdps, + __builtin_ia32_scatterpfqps. + (ix86_init_mmx_sse_builtins): Handle builtins with AVX512 embeded + rounding, builtins for AVX512 gathers/scatters. + (ix86_expand_args_builtin): Handle new functions types, add warnings + for masked builtins. + (ix86_erase_embedded_rounding): Handle patterns with embedded rounding. + (ix86_expand_sse_comi_round): Ditto. + (ix86_expand_round_builtin): Ditto. + (ix86_expand_builtin): Handle AVX512's gathers/scatters and kortest{z}. + Call ix86_expand_round_builtin. + * config/i386/immintrin.h: Add avx512fintrin.h, avx512erintrin.h, + avx512pfintrin.h, avx512cdintrin.h. + +2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com> + Maxim Kuznetsov <maxim.kuznetsov@intel.com> + Sergey Lega <sergey.s.lega@intel.com> + Anna Tikhonova <anna.tikhonova@intel.com> + Ilya Tocar <ilya.tocar@intel.com> + Andrey Turetskiy <andrey.turetskiy@intel.com> + Ilya Verbin <ilya.verbin@intel.com> + Kirill Yukhin <kirill.yukhin@intel.com> + Michael Zolotukhin <michael.v.zolotukhin@intel.com> + * config/i386/i386.c (MAX_CLASSES): Increase number of classes. (classify_argument): Extend for 512 bit vectors. (construct_container): Ditto. diff --git a/gcc/config.gcc b/gcc/config.gcc index 99f11e5561a..9ce29901f6e 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -372,9 +372,10 @@ i[34567]86-*-*) immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h - avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h - xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h - fxsrintrin.h xsaveintrin.h xsaveoptintrin.h" + avx2intrin.h avx512fintrin.h fmaintrin.h f16cintrin.h + rtmintrin.h xtestintrin.h rdseedintrin.h prfchwintrin.h + adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h + avx512cdintrin.h avx512erintrin.h avx512pfintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -387,9 +388,10 @@ x86_64-*-*) immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h - avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h - xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h - fxsrintrin.h xsaveintrin.h xsaveoptintrin.h" + avx2intrin.h avx512fintrin.h fmaintrin.h f16cintrin.h + rtmintrin.h xtestintrin.h rdseedintrin.h prfchwintrin.h + adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h + avx512cdintrin.h avx512erintrin.h avx512pfintrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h new file mode 100644 index 00000000000..6186e06bdbe --- /dev/null +++ b/gcc/config/i386/avx512cdintrin.h @@ -0,0 +1,219 @@ +/* Copyright (C) 2013 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX512CDINTRIN_H_INCLUDED +#define _AVX512CDINTRIN_H_INCLUDED + +#ifndef __AVX512CD__ +#pragma GCC push_options +#pragma GCC target("avx512cd") +#define __DISABLE_AVX512CD__ +#endif /* __AVX512CD__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef long long __v8di __attribute__ ((__vector_size__ (64))); +typedef int __v16si __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); + +typedef unsigned char __mmask8; +typedef unsigned short __mmask16; + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_conflict_epi32 (__m512i __A) +{ + return (__m512i) + __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) + __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_conflict_epi64 (__m512i __A) +{ + return (__m512i) + __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) + __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_lzcnt_epi64 (__m512i __A) +{ + return (__m512i) + __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) + __builtin_ia32_vplzcntq_512_mask ((__v8di) __A, + (__v8di) _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_lzcnt_epi32 (__m512i __A) +{ + return (__m512i) + __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) + __builtin_ia32_vplzcntd_512_mask ((__v16si) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcastmb_epi64 (__mmask8 __A) +{ + return (__m512i) __builtin_ia32_broadcastmb512 (__A); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcastmw_epi32 (__mmask16 __A) +{ + return (__m512i) __builtin_ia32_broadcastmw512 (__A); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_testn_epi32_mask (__m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, + (__v16si) __B, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_testn_epi64_mask (__m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, + (__v8di) __B, __U); +} + +#ifdef __DISABLE_AVX512CD__ +#undef __DISABLE_AVX512CD__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512CD__ */ + +#endif /* _AVX512CDINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h new file mode 100644 index 00000000000..4583d690378 --- /dev/null +++ b/gcc/config/i386/avx512erintrin.h @@ -0,0 +1,333 @@ +/* Copyright (C) 2013 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX512ERINTRIN_H_INCLUDED +#define _AVX512ERINTRIN_H_INCLUDED + +#ifndef __AVX512ER__ +#pragma GCC push_options +#pragma GCC target("avx512er") +#define __DISABLE_AVX512ER__ +#endif /* __AVX512ER__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef double __v8df __attribute__ ((__vector_size__ (64))); +typedef float __v16sf __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); + +typedef unsigned char __mmask8; +typedef unsigned short __mmask16; + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_exp2a23_round_pd (__m512d __A, int __R) +{ + __m512d __W; + return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) +{ + return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R) +{ + return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, + (__v8df) _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_exp2a23_round_ps (__m512 __A, int __R) +{ + __m512 __W; + return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) +{ + return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R) +{ + return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, + (__v16sf) _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp28_round_pd (__m512d __A, int __R) +{ + __m512d __W; + return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) +{ + return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R) +{ + return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, + (__v8df) _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp28_round_ps (__m512 __A, int __R) +{ + __m512 __W; + return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) +{ + return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R) +{ + return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, + (__v16sf) _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt28_round_pd (__m512d __A, int __R) +{ + __m512d __W; + return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) +{ + return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R) +{ + return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, + (__v8df) _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt28_round_ps (__m512 __A, int __R) +{ + __m512 __W; + return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) +{ + return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R) +{ + return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, + (__v16sf) _mm512_setzero_ps (), + (__mmask16) __U, __R); +} +#else +#define _mm512_exp2a23_round_pd(A, C) \ + __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \ + __builtin_ia32_exp2pd_mask(A, W, U, C) + +#define _mm512_maskz_exp2a23_round_pd(U, A, C) \ + __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_exp2a23_round_ps(A, C) \ + __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \ + __builtin_ia32_exp2ps_mask(A, W, U, C) + +#define _mm512_maskz_exp2a23_round_ps(U, A, C) \ + __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) + +#define _mm512_rcp28_round_pd(A, C) \ + __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_rcp28_round_pd(W, U, A, C) \ + __builtin_ia32_rcp28pd_mask(A, W, U, C) + +#define _mm512_maskz_rcp28_round_pd(U, A, C) \ + __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_rcp28_round_ps(A, C) \ + __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_rcp28_round_ps(W, U, A, C) \ + __builtin_ia32_rcp28ps_mask(A, W, U, C) + +#define _mm512_maskz_rcp28_round_ps(U, A, C) \ + __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) + +#define _mm512_rsqrt28_round_pd(A, C) \ + __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \ + __builtin_ia32_rsqrt28pd_mask(A, W, U, C) + +#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \ + __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_rsqrt28_round_ps(A, C) \ + __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \ + __builtin_ia32_rsqrt28ps_mask(A, W, U, C) + +#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \ + __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) +#endif + +#define _mm512_exp2a23_pd(A) \ + _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_exp2a23_pd(W, U, A) \ + _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_exp2a23_pd(U, A) \ + _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_exp2a23_ps(A) \ + _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_exp2a23_ps(W, U, A) \ + _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_exp2a23_ps(U, A) \ + _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rcp28_pd(A) \ + _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rcp28_pd(W, U, A) \ + _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rcp28_pd(U, A) \ + _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rcp28_ps(A) \ + _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rcp28_ps(W, U, A) \ + _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rcp28_ps(U, A) \ + _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rsqrt28_pd(A) \ + _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rsqrt28_pd(W, U, A) \ + _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rsqrt28_pd(U, A) \ + _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rsqrt28_ps(A) \ + _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rsqrt28_ps(W, U, A) \ + _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rsqrt28_ps(U, A) \ + _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) + +#ifdef __DISABLE_AVX512ER__ +#undef __DISABLE_AVX512ER__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512ER__ */ + +#endif /* _AVX512ERINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h new file mode 100644 index 00000000000..f717d4601fc --- /dev/null +++ b/gcc/config/i386/avx512fintrin.h @@ -0,0 +1,12147 @@ +/* Copyright (C) 2013 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX512FINTRIN_H_INCLUDED +#define _AVX512FINTRIN_H_INCLUDED + +#ifndef __AVX512F__ +#pragma GCC push_options +#pragma GCC target("avx512f") +#define __DISABLE_AVX512F__ +#endif /* __AVX512F__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef double __v8df __attribute__ ((__vector_size__ (64))); +typedef float __v16sf __attribute__ ((__vector_size__ (64))); +typedef long long __v8di __attribute__ ((__vector_size__ (64))); +typedef int __v16si __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); + +typedef unsigned char __mmask8; +typedef unsigned short __mmask16; + +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 +#define _MM_FROUND_NO_EXC 0x05 + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set_epi64 (long long __A, long long __B, long long __C, + long long __D, long long __E, long long __F, + long long __G, long long __H) +{ + return __extension__ (__m512i) (__v8di) + { __H, __G, __F, __E, __D, __C, __B, __A }; +} + +/* Create the vector [A B C D E F G H I J K L M N O P]. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set_epi32 (int __A, int __B, int __C, int __D, + int __E, int __F, int __G, int __H, + int __I, int __J, int __K, int __L, + int __M, int __N, int __O, int __P) +{ + return __extension__ (__m512i)(__v16si) + { __P, __O, __N, __M, __L, __K, __J, __I, + __H, __G, __F, __E, __D, __C, __B, __A }; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set_pd (double __A, double __B, double __C, double __D, + double __E, double __F, double __G, double __H) +{ + return __extension__ (__m512d) + { __H, __G, __F, __E, __D, __C, __B, __A }; +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set_ps (float __A, float __B, float __C, float __D, + float __E, float __F, float __G, float __H, + float __I, float __J, float __K, float __L, + float __M, float __N, float __O, float __P) +{ + return __extension__ (__m512) + { __P, __O, __N, __M, __L, __K, __J, __I, + __H, __G, __F, __E, __D, __C, __B, __A }; +} + +#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ + _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) + +#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ + e8,e9,e10,e11,e12,e13,e14,e15) \ + _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) + +#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ + _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) + +#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ + _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_setzero_ps (void) +{ + return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_setzero_pd (void) +{ + return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_setzero_si512 (void) +{ + return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_load_pd (void const *__P) +{ + return *(__m512d *) __P; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_load_pd (__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_store_pd (void *__P, __m512d __A) +{ + *(__m512d *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) +{ + __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_load_ps (void const *__P) +{ + return *(__m512 *) __P; +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_load_ps (__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_store_ps (void *__P, __m512 __A) +{ + *(__m512 *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_load_epi64 (void const *__P) +{ + return *(__m512i *) __P; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_store_epi64 (void *__P, __m512i __A) +{ + *(__m512i *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) +{ + __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_load_si512 (void const *__P) +{ + return *(__m512i *) __P; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_load_epi32 (void const *__P) +{ + return *(__m512i *) __P; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_store_si512 (void *__P, __m512i __A) +{ + *(__m512i *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_store_epi32 (void *__P, __m512i __A) +{ + *(__m512i *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) +{ + __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mullo_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sllv_epi32 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srav_epi32 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srlv_epi32 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sllv_epi64 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srav_epi64 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srlv_epi64 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_epi32 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_epu32 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_slli_epi64 (__m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, + unsigned int __B) +{ + return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} +#else +#define _mm512_slli_epi64(X, C) \ + ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)-1)) + +#define _mm512_mask_slli_epi64(W, U, X, C) \ + ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_slli_epi64(U, X, C) \ + ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sll_epi64 (__m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srli_epi64 (__m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, + __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} +#else +#define _mm512_srli_epi64(X, C) \ + ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)-1)) + +#define _mm512_mask_srli_epi64(W, U, X, C) \ + ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_srli_epi64(U, X, C) \ + ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srl_epi64 (__m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srai_epi64 (__m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, + unsigned int __B) +{ + return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} +#else +#define _mm512_srai_epi64(X, C) \ + ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)-1)) + +#define _mm512_mask_srai_epi64(W, U, X, C) \ + ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_srai_epi64(U, X, C) \ + ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sra_epi64 (__m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, + (__v2di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_slli_epi32 (__m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, + unsigned int __B) +{ + return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} +#else +#define _mm512_slli_epi32(X, C) \ + ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)-1)) + +#define _mm512_mask_slli_epi32(W, U, X, C) \ + ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)(W),\ + (__mmask16)(U))) + +#define _mm512_maskz_slli_epi32(U, X, C) \ + ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sll_epi32 (__m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srli_epi32 (__m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} +#else +#define _mm512_srli_epi32(X, C) \ + ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)-1)) + +#define _mm512_mask_srli_epi32(W, U, X, C) \ + ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)(W),\ + (__mmask16)(U))) + +#define _mm512_maskz_srli_epi32(U, X, C) \ + ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srl_epi32 (__m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_srai_epi32 (__m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, + unsigned int __B) +{ + return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) +{ + return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} +#else +#define _mm512_srai_epi32(X, C) \ + ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)-1)) + +#define _mm512_mask_srai_epi32(W, U, X, C) \ + ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)(W),\ + (__mmask16)(U))) + +#define _mm512_maskz_srai_epi32(U, X, C) \ + ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sra_epi32 (__m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) +{ + return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, + (__v4si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm) +{ + return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, imm, + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, + __m512i __C, const int imm) +{ + return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, imm, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, + __m512i __C, const int imm) +{ + return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, + imm, (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm) +{ + return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + imm, (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, + __m512i __C, const int imm) +{ + return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + imm, (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, + __m512i __C, const int imm) +{ + return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + imm, (__mmask16) __U); +} +#else +#define _mm512_ternarylogic_epi64(A, B, C, I) \ + ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) +#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ + ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) +#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ + ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) +#define _mm512_ternarylogic_epi32(A, B, C, I) \ + ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ + (__mmask16)-1)) +#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ + ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ + (__mmask16)(U))) +#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ + ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ + (__mmask16)(U))) +#endif + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp14_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp14_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt14_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt14_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_round_pd (__m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_round_ps (__m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +#else +#define _mm512_sqrt_round_pd(A, C) \ + (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_sqrt_round_pd(W, U, A, C) \ + (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) + +#define _mm512_maskz_sqrt_round_pd(U, A, C) \ + (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_sqrt_round_ps(A, C) \ + (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_sqrt_round_ps(W, U, A, C) \ + (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) + +#define _mm512_maskz_sqrt_round_ps(U, A, C) \ + (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi8_epi32 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi8_epi64 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi16_epi32 (__m256i __A) +{ + return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) +{ + return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) +{ + return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi16_epi64 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi32_epi64 (__m256i __X) +{ + return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) +{ + return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) +{ + return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu8_epi32 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu8_epi64 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu16_epi32 (__m256i __A) +{ + return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) +{ + return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) +{ + return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu16_epi64 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) +{ + return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu32_epi64 (__m256i __X) +{ + return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) +{ + return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) +{ + return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __R) +{ + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __R) +{ + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} +#else +#define _mm512_add_round_pd(A, B, C) \ + (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_add_round_pd(W, U, A, B, C) \ + (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) + +#define _mm512_maskz_add_round_pd(U, A, B, C) \ + (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_add_round_ps(A, B, C) \ + (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_add_round_ps(W, U, A, B, C) \ + (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) + +#define _mm512_maskz_add_round_ps(U, A, B, C) \ + (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) + +#define _mm512_sub_round_pd(A, B, C) \ + (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_sub_round_pd(W, U, A, B, C) \ + (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) + +#define _mm512_maskz_sub_round_pd(U, A, B, C) \ + (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_sub_round_ps(A, B, C) \ + (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_sub_round_ps(W, U, A, B, C) \ + (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) + +#define _mm512_maskz_sub_round_ps(U, A, B, C) \ + (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __R) +{ + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) +{ + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, + (__v8df) __V, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, + __m512d __V, const int __R) +{ + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, + (__v8df) __V, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, + const int __R) +{ + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, + (__v8df) __V, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +#else +#define _mm512_mul_round_pd(A, B, C) \ + (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_mul_round_pd(W, U, A, B, C) \ + (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) + +#define _mm512_maskz_mul_round_pd(U, A, B, C) \ + (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_mul_round_ps(A, B, C) \ + (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_mul_round_ps(W, U, A, B, C) \ + (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) + +#define _mm512_maskz_mul_round_ps(U, A, B, C) \ + (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) + +#define _mm512_div_round_pd(A, B, C) \ + (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_div_round_pd(W, U, A, B, C) \ + (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C) + +#define _mm512_maskz_div_round_pd(U, A, B, C) \ + (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_div_round_ps(A, B, C) \ + (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_div_round_ps(W, U, A, B, C) \ + (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C) + +#define _mm512_maskz_div_round_ps(U, A, B, C) \ + (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __R) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __R) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} +#else +#define _mm512_max_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R) + +#define _mm512_mask_max_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R) + +#define _mm512_maskz_max_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) + +#define _mm512_max_round_ps(A, B, R) \ + (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_pd(), -1, R) + +#define _mm512_mask_max_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R) + +#define _mm512_maskz_max_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) + +#define _mm512_min_round_pd(A, B, R) \ + (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R) + +#define _mm512_mask_min_round_pd(W, U, A, B, R) \ + (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R) + +#define _mm512_maskz_min_round_pd(U, A, B, R) \ + (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) + +#define _mm512_min_round_ps(A, B, R) \ + (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, R) + +#define _mm512_mask_min_round_ps(W, U, A, B, R) \ + (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R) + +#define _mm512_maskz_min_round_ps(U, A, B, R) \ + (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __R) +{ + return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __R) +{ + return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + const int __R) +{ + return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +#else +#define _mm512_scalef_round_pd(A, B, C) \ + (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C) + +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ + (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) + +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ + (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) + +#define _mm512_scalef_round_ps(A, B, C) \ + (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C) + +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ + (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) + +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ + (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, + __mmask8 __U, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, + __mmask16 __U, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, + __mmask8 __U, const int __R) +{ + return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, + __mmask16 __U, const int __R) +{ + return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, + __mmask8 __U, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, + __mmask16 __U, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, + __mmask8 __U, const int __R) +{ + return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, + __mmask16 __U, const int __R) +{ + return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, + __mmask8 __U, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, + __mmask16 __U, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, + __mmask8 __U, const int __R) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512d __C, const int __R) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, + __mmask16 __U, const int __R) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512 __C, const int __R) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, __R); +} +#else +#define _mm512_fmadd_round_pd(A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R) + +#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) + +#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R) + +#define _mm512_fmadd_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R) + +#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R) + +#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R) + +#define _mm512_fmsub_round_pd(A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R) + +#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R) + +#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ + (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R) + +#define _mm512_fmsub_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R) + +#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R) + +#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ + (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R) + +#define _mm512_fmaddsub_round_pd(A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R) + +#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) + +#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ + (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R) + +#define _mm512_fmaddsub_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R) + +#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R) + +#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ + (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R) + +#define _mm512_fmsubadd_round_pd(A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R) + +#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R) + +#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ + (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R) + +#define _mm512_fmsubadd_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R) + +#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R) + +#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ + (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R) + +#define _mm512_fnmadd_round_pd(A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R) + +#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R) + +#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R) + +#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R) + +#define _mm512_fnmadd_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R) + +#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R) + +#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R) + +#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R) + +#define _mm512_fnmsub_round_pd(A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R) + +#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ + (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R) + +#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ + (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ + (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R) + +#define _mm512_fnmsub_round_ps(A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R) + +#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ + (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R) + +#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ + (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R) + +#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ + (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_abs_epi64 (__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_abs_epi32 (__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcastss_ps (__m128 __A) +{ + __v16sf __O; + return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, __O, + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) +{ + return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, + (__v16sf) __O, __M); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) +{ + return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, + (__v16sf) + _mm512_setzero_ps (), + __M); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcastsd_pd (__m128d __A) +{ + __v8df __O; + return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, __O, + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) +{ + return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, + (__v8df) __O, __M); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) +{ + return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, + (__v8df) + _mm512_setzero_pd (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcastd_epi32 (__m128i __A) +{ + __v16si __O; + return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, __O, + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, + (__v16si) __O, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_epi32 (int __A) +{ + __v16si __O; + return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, __O, + (__mmask16)(-1)); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) +{ + return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_set1_epi32 (__mmask16 __M, int __A) +{ + return (__m512i) + __builtin_ia32_pbroadcastd512_gpr_mask (__A, + (__v16si) _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcastq_epi64 (__m128i __A) +{ + __v8di __O; + return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, + (__v8di) __O, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_epi64 (long long __A) +{ + __v8di __O; +#ifdef TARGET_64BIT + return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, __O, + (__mmask8)(-1)); +#else + return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, __O, + (__mmask8)(-1)); +#endif +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) +{ +#ifdef TARGET_64BIT + return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, + __M); +#else + return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O, + __M); +#endif +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) +{ +#ifdef TARGET_64BIT + return (__m512i) + __builtin_ia32_pbroadcastq512_gpr_mask (__A, + (__v8di) _mm512_setzero_si512 (), + __M); +#else + return (__m512i) + __builtin_ia32_pbroadcastq512_mem_mask (__A, + (__v8di) _mm512_setzero_si512 (), + __M); +#endif +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcast_f32x4 (__m128 __A) +{ + __v16sf __O; + return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, __O, + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, + (__v16sf) __O, + __M); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, + (__v16sf) + _mm512_setzero_ps (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcast_i32x4 (__m128i __A) +{ + __v16si __O; + return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, + __O, + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, + (__v16si) __O, + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcast_f64x4 (__m256d __A) +{ + __v8df __O; + return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, + __O, + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) +{ + return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, + (__v8df) __O, + __M); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) +{ + return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, + (__v8df) + _mm512_setzero_pd (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_broadcast_i64x4 (__m256i __A) +{ + __v8di __O; + return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, + __O, + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) +{ + return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, + (__v8di) __O, + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) +{ + return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +typedef enum +{ + _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, + _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, + _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, + _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, + _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, + _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, + _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, + _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, + _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, + _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, + _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, + _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, + _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, + _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, + _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, + _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, + _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, + _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, + _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, + _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, + _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, + _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, + _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, + _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, + _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, + _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, + _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, + _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, + _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, + _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, + _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, + _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, + _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, + _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, + _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, + _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, + _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, + _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, + _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, + _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, + _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, + _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, + _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, + _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, + _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, + _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, + _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, + _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, + _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, + _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, + _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, + _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, + _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, + _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, + _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, + _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, + _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, + _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, + _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, + _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, + _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, + _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, + _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, + _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, + _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, + _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, + _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, + _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, + _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, + _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, + _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, + _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, + _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, + _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, + _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, + _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, + _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, + _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, + _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, + _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, + _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, + _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, + _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, + _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, + _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, + _MM_PERM_DDDD = 0xFF +} _MM_PERM_ENUM; + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask) +{ + return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, + __mask, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A, + _MM_PERM_ENUM __mask) +{ + return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, + __mask, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask) +{ + return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, + __mask, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, + (__v8di) __B, __imm, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A, + __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, + (__v8di) __B, __imm, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B, + const int __imm) +{ + return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, + (__v8di) __B, __imm, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, + (__v16si) __B, + __imm, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A, + __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, + (__v16si) __B, + __imm, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B, + const int __imm) +{ + return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, + (__v16si) __B, + __imm, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm) +{ + return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, + (__v8df) __B, __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __imm) +{ + return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, + (__v8df) __B, __imm, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B, + const int __imm) +{ + return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, + (__v8df) __B, __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm) +{ + return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, + (__v16sf) __B, __imm, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __imm) +{ + return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, + (__v16sf) __B, __imm, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, + const int __imm) +{ + return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, + (__v16sf) __B, __imm, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +#else +#define _mm512_shuffle_epi32(X, C) \ + ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)-1)) + +#define _mm512_mask_shuffle_epi32(W, U, X, C) \ + ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)(W),\ + (__mmask16)(U))) + +#define _mm512_maskz_shuffle_epi32(U, X, C) \ + ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)(U))) + +#define _mm512_shuffle_i64x2(X, Y, C) \ + ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)-1)) + +#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \ + ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(C),\ + (__v8di)(__m512i)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \ + ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(C),\ + (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)(U))) + +#define _mm512_shuffle_i32x4(X, Y, C) \ + ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)-1)) + +#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \ + ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(C),\ + (__v16si)(__m512i)(W),\ + (__mmask16)(U))) + +#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \ + ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(C),\ + (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)(U))) + +#define _mm512_shuffle_f64x2(X, Y, C) \ + ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(C),\ + (__v8df)(__m512d)_mm512_setzero_pd(),\ + (__mmask8)-1)) + +#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \ + ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(C),\ + (__v8df)(__m512d)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \ + ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(C),\ + (__v8df)(__m512d)_mm512_setzero_pd(),\ + (__mmask8)(U))) + +#define _mm512_shuffle_f32x4(X, Y, C) \ + ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(C),\ + (__v16sf)(__m512)_mm512_setzero_ps(),\ + (__mmask16)-1)) + +#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \ + ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(C),\ + (__v16sf)(__m512)(W),\ + (__mmask16)(U))) + +#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \ + ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(C),\ + (__v16sf)(__m512)_mm512_setzero_ps(),\ + (__mmask16)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rolv_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rorv_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rolv_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rorv_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, __R); +} +#else +#define _mm512_cvtt_roundpd_epi32(A, B) \ + ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B)) + +#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \ + ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B)) + +#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \ + ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) + +#define _mm512_cvtt_roundpd_epu32(A, B) \ + ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B)) + +#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \ + ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B)) + +#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \ + ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, __R); +} +#else +#define _mm512_cvt_roundpd_epi32(A, B) \ + ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B)) + +#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \ + ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B)) + +#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \ + ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) + +#define _mm512_cvt_roundpd_epu32(A, B) \ + ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B)) + +#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \ + ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B)) + +#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \ + ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, __R); +} +#else +#define _mm512_cvtt_roundps_epi32(A, B) \ + ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B)) + +#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \ + ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B)) + +#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \ + ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) + +#define _mm512_cvtt_roundps_epu32(A, B) \ + ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B)) + +#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \ + ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B)) + +#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \ + ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundps_epi32 (__m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundps_epu32 (__m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, __R); +} +#else +#define _mm512_cvt_roundps_epi32(A, B) \ + ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B)) + +#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \ + ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B)) + +#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \ + ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) + +#define _mm512_cvt_roundps_epu32(A, B) \ + ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B)) + +#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \ + ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B)) + +#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \ + ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) +#endif + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu32_sd (__m128d __A, unsigned __B) +{ + return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); +} + +#ifdef __x86_64__ +#ifdef __OPTIMIZE__ +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R) +{ + return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R) +{ + return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R) +{ + return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); +} +#else +#define _mm_cvt_roundu64_sd(A, B, C) \ + (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C) + +#define _mm_cvt_roundi64_sd(A, B, C) \ + (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) + +#define _mm_cvt_roundsi64_sd(A, B, C) \ + (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) +#endif + +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R) +{ + return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R) +{ + return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R) +{ + return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); +} +#else +#define _mm_cvt_roundu32_ss(A, B, C) \ + (__m128)__builtin_ia32_cvtusi2ss32(A, B, C) + +#define _mm_cvt_roundi32_ss(A, B, C) \ + (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) + +#define _mm_cvt_roundsi32_ss(A, B, C) \ + (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) +#endif + +#ifdef __x86_64__ +#ifdef __OPTIMIZE__ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R) +{ + return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R) +{ + return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R) +{ + return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); +} +#else +#define _mm_cvt_roundu64_ss(A, B, C) \ + (__m128)__builtin_ia32_cvtusi2ss64(A, B, C) + +#define _mm_cvt_roundi64_ss(A, B, C) \ + (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) + +#define _mm_cvt_roundsi64_ss(A, B, C) \ + (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) +#endif + +#endif + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi32_epi8 (__m512i __A) +{ + __v16qi __O; + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, __O, + (__mmask16) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, + (__v16qi) __O, __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsepi32_epi8 (__m512i __A) +{ + __v16qi __O; + return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, __O, + (__mmask16) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, + (__v16qi) __O, __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtusepi32_epi8 (__m512i __A) +{ + __v16qi __O; + return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, __O, + (__mmask16) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, + (__v16qi) __O, + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi32_epi16 (__m512i __A) +{ + __v16hi __O; + return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, __O, + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, + (__v16hi) __O, __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsepi32_epi16 (__m512i __A) +{ + __v16hi __O; + return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, __O, + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, + (__v16hi) __O, __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtusepi32_epi16 (__m512i __A) +{ + __v16hi __O; + return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, __O, + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, + (__v16hi) __O, + __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi64_epi32 (__m512i __A) +{ + __v8si __O; + return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, + (__v8si) __O, __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsepi64_epi32 (__m512i __A) +{ + __v8si __O; + return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, + (__v8si) __O, __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtusepi64_epi32 (__m512i __A) +{ + __v8si __O; + return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, + (__v8si) __O, __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi64_epi16 (__m512i __A) +{ + __v8hi __O; + return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, + (__v8hi) __O, __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsepi64_epi16 (__m512i __A) +{ + __v8hi __O; + return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, + (__v8hi) __O, __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtusepi64_epi16 (__m512i __A) +{ + __v8hi __O; + return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, + (__v8hi) __O, __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi64_epi8 (__m512i __A) +{ + __v16qi __O; + return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, + (__v16qi) __O, __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsepi64_epi8 (__m512i __A) +{ + __v16qi __O; + return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, + (__v16qi) __O, __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtusepi64_epi8 (__m512i __A) +{ + __v16qi __O; + return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, __O, + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, + (__v16qi) __O, + __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi32_pd (__m256i __A) +{ + return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) +{ + return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) +{ + return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu32_pd (__m256i __A) +{ + return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) +{ + return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) +{ + return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepi32_ps (__m512i __A, const int __R) +{ + return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A, + const int __R) +{ + return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R) +{ + return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepu32_ps (__m512i __A, const int __R) +{ + return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A, + const int __R) +{ + return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R) +{ + return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +#else +#define _mm512_cvt_roundepi32_ps(A, B) \ + (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B) + +#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \ + (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B) + +#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \ + (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) + +#define _mm512_cvt_roundepu32_ps(A, B) \ + (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B) + +#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \ + (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B) + +#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \ + (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_extractf64x4_pd (__m512d __A, const int __imm) +{ + return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, + __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A, + const int __imm) +{ + return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, + __imm, + (__v4df) __W, + (__mmask8) __U); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm) +{ + return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, + __imm, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_extractf32x4_ps (__m512 __A, const int __imm) +{ + return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, + __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A, + const int __imm) +{ + return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, + __imm, + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm) +{ + return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, + __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_extracti64x4_epi64 (__m512i __A, const int __imm) +{ + return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, + __imm, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A, + const int __imm) +{ + return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, + __imm, + (__v4di) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm) +{ + return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, + __imm, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_extracti32x4_epi32 (__m512i __A, const int __imm) +{ + return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, + __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A, + const int __imm) +{ + return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, + __imm, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm) +{ + return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, + __imm, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} +#else + +#define _mm512_extractf64x4_pd(X, C) \ + ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ + (int) (C),\ + (__v4df)(__m256d)_mm256_setzero_pd(),\ + (__mmask8)-1)) + +#define _mm512_mask_extractf64x4_pd(W, U, X, C) \ + ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ + (int) (C),\ + (__v4df)(__m256d)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_extractf64x4_pd(U, X, C) \ + ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ + (int) (C),\ + (__v4df)(__m256d)_mm256_setzero_pd(),\ + (__mmask8)(U))) + +#define _mm512_extractf32x4_ps(X, C) \ + ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ + (int) (C),\ + (__v4sf)(__m128)_mm_setzero_ps(),\ + (__mmask8)-1)) + +#define _mm512_mask_extractf32x4_ps(W, U, X, C) \ + ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ + (int) (C),\ + (__v4sf)(__m128)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_extractf32x4_ps(U, X, C) \ + ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ + (int) (C),\ + (__v4sf)(__m128)_mm_setzero_ps(),\ + (__mmask8)(U))) + +#define _mm512_extracti64x4_epi64(X, C) \ + ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ + (int) (C),\ + (__v4di)(__m256i)_mm256_setzero_si256 (),\ + (__mmask8)-1)) + +#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \ + ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ + (int) (C),\ + (__v4di)(__m256i)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_extracti64x4_epi64(U, X, C) \ + ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ + (int) (C),\ + (__v4di)(__m256i)_mm256_setzero_si256 (),\ + (__mmask8)(U))) + +#define _mm512_extracti32x4_epi32(X, C) \ + ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ + (int) (C),\ + (__v4si)(__m128i)_mm_setzero_si128 (),\ + (__mmask8)-1)) + +#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \ + ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ + (int) (C),\ + (__v4si)(__m128i)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_extracti32x4_epi32(U, X, C) \ + ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ + (int) (C),\ + (__v4si)(__m128i)_mm_setzero_si128 (),\ + (__mmask8)(U))) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A, + (__v4si) __B, + __imm, + (__v16si) __A, -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm) +{ + return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A, + (__v4sf) __B, + __imm, + (__v16sf) __A, -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, + (__v4di) __B, + __imm, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A, + __m256i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, + (__v4di) __B, + __imm, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B, + const int __imm) +{ + return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, + (__v4di) __B, + __imm, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm) +{ + return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, + (__v4df) __B, + __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A, + __m256d __B, const int __imm) +{ + return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, + (__v4df) __B, + __imm, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B, + const int __imm) +{ + return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, + (__v4df) __B, + __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} +#else +#define _mm512_insertf32x4(X, Y, C) \ + ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ + (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1))) + +#define _mm512_inserti32x4(X, Y, C) \ + ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ + (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1))) + +#define _mm512_insertf64x4(X, Y, C) \ + ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ + (__v4df)(__m256d) (Y), (int) (C), \ + (__v8df)(__m512d)_mm512_setzero_pd(), \ + (__mmask8)-1)) + +#define _mm512_mask_insertf64x4(W, U, X, Y, C) \ + ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ + (__v4df)(__m256d) (Y), (int) (C), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U))) + +#define _mm512_maskz_insertf64x4(U, X, Y, C) \ + ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ + (__v4df)(__m256d) (Y), (int) (C), \ + (__v8df)(__m512d)_mm512_setzero_pd(), \ + (__mmask8)(U))) + +#define _mm512_inserti64x4(X, Y, C) \ + ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ + (__v4di)(__m256i) (Y), (int) (C), \ + (__v8di)(__m512i)_mm512_setzero_si512 (), \ + (__mmask8)-1)) + +#define _mm512_mask_inserti64x4(W, U, X, Y, C) \ + ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ + (__v4di)(__m256i) (Y), (int) (C),\ + (__v8di)(__m512i)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_inserti64x4(U, X, Y, C) \ + ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ + (__v4di)(__m256i) (Y), (int) (C), \ + (__v8di)(__m512i)_mm512_setzero_si512 (), \ + (__mmask8)(U))) +#endif + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_pd (void const *__P) +{ + return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_pd (void *__P, __m512d __A) +{ + __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, + (__mmask8) -1); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) +{ + __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_ps (void const *__P) +{ + return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_ps (void *__P, __m512 __A) +{ + __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, + (__mmask16) -1); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) +{ + __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_si512 (void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_si512 (void *__P, __m512i __A) +{ + __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, + (__mmask16) -1); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) +{ + __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutevar_pd (__m512d __A, __m512i __C) +{ + return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, + (__v8di) __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) +{ + return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, + (__v8di) __C, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) +{ + return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, + (__v8di) __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutevar_ps (__m512 __A, __m512i __C) +{ + return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, + (__v16si) __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) +{ + return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, + (__v16si) __C, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) +{ + return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, + (__v16si) __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I + /* idx */ , + (__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, + __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I + /* idx */ , + (__v8di) __A, + (__v8di) __B, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, + __mmask8 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, + (__v8di) __I + /* idx */ , + (__v8di) __B, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, + __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I + /* idx */ , + (__v8di) __A, + (__v8di) __B, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I + /* idx */ , + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, + __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I + /* idx */ , + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, + __mmask16 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, + (__v16si) __I + /* idx */ , + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, + __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I + /* idx */ , + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) +{ + return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I + /* idx */ , + (__v8df) __A, + (__v8df) __B, + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, + __m512d __B) +{ + return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I + /* idx */ , + (__v8df) __A, + (__v8df) __B, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, + __m512d __B) +{ + return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, + (__v8di) __I + /* idx */ , + (__v8df) __B, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, + __m512d __B) +{ + return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I + /* idx */ , + (__v8df) __A, + (__v8df) __B, + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) +{ + return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I + /* idx */ , + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) +{ + return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I + /* idx */ , + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, + __m512 __B) +{ + return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, + (__v16si) __I + /* idx */ , + (__v16sf) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, + __m512 __B) +{ + return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I + /* idx */ , + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permute_pd (__m512d __X, const int __C) +{ + return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C) +{ + return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C) +{ + return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permute_ps (__m512 __X, const int __C) +{ + return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C) +{ + return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C) +{ + return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} +#else +#define _mm512_permute_pd(X, C) \ + ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ + (__v8df)(__m512d)(X), \ + (__mmask8)(-1))) + +#define _mm512_mask_permute_pd(W, U, X, C) \ + ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U))) + +#define _mm512_maskz_permute_pd(U, X, C) \ + ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ + (__v8df)(__m512d)_mm512_setzero_pd(), \ + (__mmask8)(U))) + +#define _mm512_permute_ps(X, C) \ + ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ + (__v16sf)(__m512)(X), \ + (__mmask16)(-1))) + +#define _mm512_mask_permute_ps(W, U, X, C) \ + ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U))) + +#define _mm512_maskz_permute_ps(U, X, C) \ + ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ + (__v16sf)(__m512)_mm512_setzero_ps(), \ + (__mmask16)(U))) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex_epi64 (__m512i __X, const int __I) +{ + return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) (-1)); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M, + __m512i __X, const int __I) +{ + return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, + (__v8di) __W, + (__mmask8) __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I) +{ + return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __M); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex_pd (__m512d __X, const int __M) +{ + return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M) +{ + return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M) +{ + return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} +#else +#define _mm512_permutex_pd(X, M) \ + ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ + (__v8df)(__m512d)(X), (__mmask8)-1)) + +#define _mm512_mask_permutex_pd(W, U, X, M) \ + ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ + (__v8df)(__m512d)(W), (__mmask8)(U))) + +#define _mm512_maskz_permutex_pd(U, X, M) \ + ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ + (__v8df)(__m512d)_mm512_setzero_pd(),\ + (__mmask8)(U))) + +#define _mm512_permutex_epi64(X, I) \ + ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ + (int)(I), \ + (__v8di)(__m512i)(X), \ + (__mmask8)(-1))) + +#define _mm512_maskz_permutex_epi64(M, X, I) \ + ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ + (int)(I), \ + (__v8di)(__m512i) \ + (_mm512_setzero_si512 ()),\ + (__mmask8)(M))) + +#define _mm512_mask_permutex_epi64(W, M, X, I) \ + ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ + (int)(I), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(M))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, + __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) __W, + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, + __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X, + (__v16si) __Y, + (__v16si) __W, + __M); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutexvar_pd (__m512i __X, __m512d __Y) +{ + return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, + (__v8di) __X, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) +{ + return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, + (__v8di) __X, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) +{ + return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, + (__v8di) __X, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutexvar_ps (__m512i __X, __m512 __Y) +{ + return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, + (__v16si) __X, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) +{ + return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, + (__v16si) __X, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) +{ + return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, + (__v16si) __X, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm) +{ + return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, + (__v16sf) __V, __imm, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M, + __m512 __V, const int __imm) +{ + return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, + (__v16sf) __V, __imm, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm) +{ + return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, + (__v16sf) __V, __imm, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm) +{ + return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, + (__v8df) __V, __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M, + __m512d __V, const int __imm) +{ + return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, + (__v8df) __V, __imm, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V, + const int __imm) +{ + return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, + (__v8df) __V, __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C, + const int __imm, const int __R) +{ + return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8di) __C, + __imm, + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512i __C, const int __imm, const int __R) +{ + return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8di) __C, + __imm, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512i __C, const int __imm, const int __R) +{ + return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8di) __C, + __imm, + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C, + const int __imm, const int __R) +{ + return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16si) __C, + __imm, + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512i __C, const int __imm, const int __R) +{ + return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16si) __C, + __imm, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512i __C, const int __imm, const int __R) +{ + return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16si) __C, + __imm, + (__mmask16) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C, + const int __imm, const int __R) +{ + return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, __imm, + (__mmask8) -1, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B, + __m128i __C, const int __imm, const int __R) +{ + return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, __imm, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + __m128i __C, const int __imm, const int __R) +{ + return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, + __imm, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C, + const int __imm, const int __R) +{ + return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, __imm, + (__mmask8) -1, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B, + __m128i __C, const int __imm, const int __R) +{ + return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, __imm, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + __m128i __C, const int __imm, const int __R) +{ + return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, __imm, + (__mmask8) __U, __R); +} + +#else +#define _mm512_shuffle_pd(X, Y, C) \ + ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(C),\ + (__v8df)(__m512d)_mm512_setzero_pd(),\ + (__mmask8)-1)) + +#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \ + ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(C),\ + (__v8df)(__m512d)(W),\ + (__mmask8)(U))) + +#define _mm512_maskz_shuffle_pd(U, X, Y, C) \ + ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(C),\ + (__v8df)(__m512d)_mm512_setzero_pd(),\ + (__mmask8)(U))) + +#define _mm512_shuffle_ps(X, Y, C) \ + ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(C),\ + (__v16sf)(__m512)_mm512_setzero_ps(),\ + (__mmask16)-1)) + +#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \ + ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(C),\ + (__v16sf)(__m512)(W),\ + (__mmask16)(U))) + +#define _mm512_maskz_shuffle_ps(U, X, Y, C) \ + ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(C),\ + (__v16sf)(__m512)_mm512_setzero_ps(),\ + (__mmask16)(U))) + +#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \ + ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ + (__mmask8)(-1), (R))) + +#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \ + ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ + (__mmask8)(U), (R))) + +#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \ + ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ + (__mmask8)(U), (R))) + +#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \ + ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ + (__mmask16)(-1), (R))) + +#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \ + ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ + (__mmask16)(U), (R))) + +#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \ + ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ + (__mmask16)(U), (R))) + +#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \ + ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ + (__mmask8)(-1), (R))) + +#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \ + ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), (R))) + +#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \ + ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), (R))) + +#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \ + ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ + (__mmask8)(-1), (R))) + +#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \ + ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), (R))) + +#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \ + ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), (R))) +#endif + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_movehdup_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_moveldup_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_or_si512 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_or_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_or_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_xor_si512 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_xor_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_xor_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rol_epi32 (__m512i __A, const int __B) +{ + return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B) +{ + return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B) +{ + return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ror_epi32 (__m512i __A, int __B) +{ + return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B) +{ + return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B) +{ + return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rol_epi64 (__m512i __A, const int __B) +{ + return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B) +{ + return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B) +{ + return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ror_epi64 (__m512i __A, int __B) +{ + return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B) +{ + return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) +{ + return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#else +#define _mm512_rol_epi32(A, B) \ + ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ + (int)(B), \ + (__v16si)_mm512_setzero_si512 (), \ + (__mmask16)(-1))) +#define _mm512_mask_rol_epi32(W, U, A, B) \ + ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ + (int)(B), \ + (__v16si)(__m512i)(W), \ + (__mmask16)(U))) +#define _mm512_maskz_rol_epi32(U, A, B) \ + ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ + (int)(B), \ + (__v16si)_mm512_setzero_si512 (), \ + (__mmask16)(U))) +#define _mm512_ror_epi32(A, B) \ + ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ + (int)(B), \ + (__v16si)_mm512_setzero_si512 (), \ + (__mmask16)(-1))) +#define _mm512_mask_ror_epi32(W, U, A, B) \ + ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ + (int)(B), \ + (__v16si)(__m512i)(W), \ + (__mmask16)(U))) +#define _mm512_maskz_ror_epi32(U, A, B) \ + ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ + (int)(B), \ + (__v16si)_mm512_setzero_si512 (), \ + (__mmask16)(U))) +#define _mm512_rol_epi64(A, B) \ + ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ + (int)(B), \ + (__v8di)_mm512_setzero_si512 (), \ + (__mmask8)(-1))) +#define _mm512_mask_rol_epi64(W, U, A, B) \ + ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ + (int)(B), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U))) +#define _mm512_maskz_rol_epi64(U, A, B) \ + ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ + (int)(B), \ + (__v8di)_mm512_setzero_si512 (), \ + (__mmask8)(U))) + +#define _mm512_ror_epi64(A, B) \ + ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ + (int)(B), \ + (__v8di)_mm512_setzero_si512 (), \ + (__mmask8)(-1))) +#define _mm512_mask_ror_epi64(W, U, A, B) \ + ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ + (int)(B), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U))) +#define _mm512_maskz_ror_epi64(U, A, B) \ + ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ + (int)(B), \ + (__v8di)_mm512_setzero_si512 (), \ + (__mmask8)(U))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_and_si512 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_and_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_and_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_pd (), + __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_andnot_si512 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_andnot_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_andnot_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_pd (), + __U); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_test_epi32_mask (__m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, + (__v16si) __B, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_test_epi64_mask (__m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpackhi_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpackhi_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpacklo_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __x86_64__ +#ifdef __OPTIMIZE__ +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_u64 (__m128 __A, const int __R) +{ + return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_si64 (__m128 __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_i64 (__m128 __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundss_u64 (__m128 __A, const int __R) +{ + return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundss_i64 (__m128 __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundss_si64 (__m128 __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); +} +#else +#define _mm_cvt_roundss_u64(A, B) \ + ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B)) + +#define _mm_cvt_roundss_si64(A, B) \ + ((long long)__builtin_ia32_vcvtss2si64(A, B)) + +#define _mm_cvt_roundss_i64(A, B) \ + ((long long)__builtin_ia32_vcvtss2si64(A, B)) + +#define _mm_cvtt_roundss_u64(A, B) \ + ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B)) + +#define _mm_cvtt_roundss_i64(A, B) \ + ((long long)__builtin_ia32_vcvttss2si64(A, B)) + +#define _mm_cvtt_roundss_si64(A, B) \ + ((long long)__builtin_ia32_vcvttss2si64(A, B)) +#endif +#endif + +#ifdef __OPTIMIZE__ +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_u32 (__m128 __A, const int __R) +{ + return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_si32 (__m128 __A, const int __R) +{ + return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_i32 (__m128 __A, const int __R) +{ + return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundss_u32 (__m128 __A, const int __R) +{ + return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundss_i32 (__m128 __A, const int __R) +{ + return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundss_si32 (__m128 __A, const int __R) +{ + return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); +} +#else +#define _mm_cvt_roundss_u32(A, B) \ + ((unsigned)__builtin_ia32_vcvtss2usi32(A, B)) + +#define _mm_cvt_roundss_si32(A, B) \ + ((int)__builtin_ia32_vcvtss2si32(A, B)) + +#define _mm_cvt_roundss_i32(A, B) \ + ((int)__builtin_ia32_vcvtss2si32(A, B)) + +#define _mm_cvtt_roundss_u32(A, B) \ + ((unsigned)__builtin_ia32_vcvttss2usi32(A, B)) + +#define _mm_cvtt_roundss_si32(A, B) \ + ((int)__builtin_ia32_vcvttss2si32(A, B)) + +#define _mm_cvtt_roundss_i32(A, B) \ + ((int)__builtin_ia32_vcvttss2si32(A, B)) +#endif + +#ifdef __x86_64__ +#ifdef __OPTIMIZE__ +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_u64 (__m128d __A, const int __R) +{ + return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_si64 (__m128d __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_i64 (__m128d __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsd_u64 (__m128d __A, const int __R) +{ + return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsd_si64 (__m128d __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsd_i64 (__m128d __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); +} +#else +#define _mm_cvt_roundsd_u64(A, B) \ + ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B)) + +#define _mm_cvt_roundsd_si64(A, B) \ + ((long long)__builtin_ia32_vcvtsd2si64(A, B)) + +#define _mm_cvt_roundsd_i64(A, B) \ + ((long long)__builtin_ia32_vcvtsd2si64(A, B)) + +#define _mm_cvtt_roundsd_u64(A, B) \ + ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B)) + +#define _mm_cvtt_roundsd_si64(A, B) \ + ((long long)__builtin_ia32_vcvttsd2si64(A, B)) + +#define _mm_cvtt_roundsd_i64(A, B) \ + ((long long)__builtin_ia32_vcvttsd2si64(A, B)) +#endif +#endif + +#ifdef __OPTIMIZE__ +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_u32 (__m128d __A, const int __R) +{ + return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_si32 (__m128d __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_i32 (__m128d __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsd_u32 (__m128d __A, const int __R) +{ + return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsd_i32 (__m128d __A, const int __R) +{ + return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsd_si32 (__m128d __A, const int __R) +{ + return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); +} +#else +#define _mm_cvt_roundsd_u32(A, B) \ + ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B)) + +#define _mm_cvt_roundsd_si32(A, B) \ + ((int)__builtin_ia32_vcvtsd2si32(A, B)) + +#define _mm_cvt_roundsd_i32(A, B) \ + ((int)__builtin_ia32_vcvtsd2si32(A, B)) + +#define _mm_cvtt_roundsd_u32(A, B) \ + ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B)) + +#define _mm_cvtt_roundsd_si32(A, B) \ + ((int)__builtin_ia32_vcvttsd2si32(A, B)) + +#define _mm_cvtt_roundsd_i32(A, B) \ + ((int)__builtin_ia32_vcvttsd2si32(A, B)) +#endif + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_movedup_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpacklo_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpackhi_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpackhi_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundps_pd (__m256 __A, const int __R) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_ps (__m256i __A, const int __R) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A, + const int __R) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundps_ph (__m512 __A, const int __I) +{ + return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, + __I, + (__v16hi) + _mm256_setzero_si256 (), + -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtps_ph (__m512 __A, const int __I) +{ + return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, + __I, + (__v16hi) + _mm256_setzero_si256 (), + -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A, + const int __I) +{ + return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, + __I, + (__v16hi) __U, + (__mmask16) __W); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I) +{ + return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, + __I, + (__v16hi) __U, + (__mmask16) __W); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I) +{ + return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, + __I, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __W); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I) +{ + return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, + __I, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __W); +} +#else +#define _mm512_cvt_roundps_pd(A, B) \ + (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, B) + +#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \ + (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B) + +#define _mm512_maskz_cvt_roundps_pd(U, A, B) \ + (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B) + +#define _mm512_cvt_roundph_ps(A, B) \ + (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), -1, B) + +#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \ + (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B) + +#define _mm512_maskz_cvt_roundph_ps(U, A, B) \ + (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B) + +#define _mm512_cvt_roundps_ph(A, I) \ + ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ + (__v16hi)_mm256_setzero_si256 (), -1)) +#define _mm512_cvtps_ph(A, I) \ + ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ + (__v16hi)_mm256_setzero_si256 (), -1)) +#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ + ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ + (__v16hi)(__m256i)(U), (__mmask16) (W))) +#define _mm512_mask_cvtps_ph(U, W, A, I) \ + ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ + (__v16hi)(__m256i)(U), (__mmask16) (W))) +#define _mm512_maskz_cvt_roundps_ph(W, A, I) \ + ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ + (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) +#define _mm512_maskz_cvtps_ph(W, A, I) \ + ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ + (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundpd_ps (__m512d __A, const int __R) +{ + return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1, __R); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, + (__v8sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U, __R); +} + +#else +#define _mm512_cvt_roundpd_ps(A, B) \ + (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), -1, B) + +#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \ + (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B) + +#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \ + (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B) +#endif + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_stream_si512 (__m512i * __P, __m512i __A) +{ + __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_stream_ps (float *__P, __m512 __A) +{ + __builtin_ia32_movntps512 (__P, (__v16sf) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_stream_pd (double *__P, __m512d __A) +{ + __builtin_ia32_movntpd512 (__P, (__v8df) __A); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_round_ps (__m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_round_pd (__m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + +/* Constants for mantissa extraction */ +typedef enum +{ + _MM_MANT_NORM_1_2, /* interval [1, 2) */ + _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ + _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ + _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ +} _MM_MANTISSA_NORM_ENUM; + +typedef enum +{ + _MM_MANT_SIGN_src, /* sign = sign(SRC) */ + _MM_MANT_SIGN_zero, /* sign = 0 */ + _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ +} _MM_MANTISSA_SIGN_ENUM; + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, + (__C << 2) | __B, + _mm512_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, + (__C << 2) | __B, + (__v8df) __W, __U, + __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, + (__C << 2) | __B, + (__v8df) + _mm512_setzero_pd (), + __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, + (__C << 2) | __B, + _mm512_setzero_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, + (__C << 2) | __B, + (__v16sf) __W, __U, + __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, + (__C << 2) | __B, + (__v16sf) + _mm512_setzero_ps (), + __U, __R); +} + +#else +#define _mm512_getmant_round_pd(X, B, C, R) \ + ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)_mm512_setzero_pd(), \ + (__mmask8)-1,\ + (R))) + +#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \ + ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U),\ + (R))) + +#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \ + ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)_mm512_setzero_pd(), \ + (__mmask8)(U),\ + (R))) +#define _mm512_getmant_round_ps(X, B, C, R) \ + ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)(__m512)_mm512_setzero_ps(), \ + (__mmask16)-1,\ + (R))) + +#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \ + ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U),\ + (R))) + +#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \ + ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)(__m512)_mm512_setzero_ps(), \ + (__mmask16)(U),\ + (R))) +#define _mm512_getexp_round_ps(A, R) \ + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, R)) + +#define _mm512_mask_getexp_round_ps(W, U, A, R) \ + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(W), (__mmask16)(U), R)) + +#define _mm512_maskz_getexp_round_ps(U, A, R) \ + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R)) + +#define _mm512_getexp_round_pd(A, R) \ + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), (__mmask8)-1, R)) + +#define _mm512_mask_getexp_round_pd(W, U, A, R) \ + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(W), (__mmask8)(U), R)) + +#define _mm512_maskz_getexp_round_pd(U, A, R) \ + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R)) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, + (__v16sf) __A, -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C, + const int __imm, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, + (__v16sf) __A, + (__mmask16) __B, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B, + const int __imm, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, + __imm, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __A, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, + (__v8df) __A, -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B, + __m512d __C, const int __imm, const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, + (__v8df) __A, + (__mmask8) __B, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B, + const int __imm, const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, + __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __A, __R); +} +#else +#define _mm512_roundscale_round_ps(A, B, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ + (__v16sf)(__m512)(A), (__mmask16)(-1), R)) +#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ + (int)(D), \ + (__v16sf)(__m512)(A), \ + (__mmask16)(B), R)) +#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ + (int)(C), \ + (__v16sf)_mm512_setzero_ps(),\ + (__mmask16)(A), R)) +#define _mm512_roundscale_round_pd(A, B, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ + (__v8df)(__m512d)(A), (__mmask8)(-1), R)) +#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ + (int)(D), \ + (__v8df)(__m512d)(A), \ + (__mmask8)(B), R)) +#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ + (int)(C), \ + (__v8df)_mm512_setzero_pd(),\ + (__mmask8)(A), R)) +#endif + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_floor_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_floor_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ceil_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ceil_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_floor_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) + _mm512_setzero_ps (), + __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_floor_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) + _mm512_setzero_pd (), + __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ceil_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) + _mm512_setzero_ps (), + __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ceil_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) + _mm512_setzero_pd (), + __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_floor_round_ps (__m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) __A, -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_floor_round_pd (__m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) __A, -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ceil_round_ps (__m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) __A, -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ceil_round_pd (__m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) __A, -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_floor_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) __W, __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_floor_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) __W, __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ceil_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) __W, __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ceil_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) __W, __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_floor_round_ps (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) + _mm512_setzero_ps (), + __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_floor_round_pd (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) + _mm512_setzero_pd (), + __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ceil_round_ps (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) + _mm512_setzero_ps (), + __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ceil_round_pd (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) + _mm512_setzero_pd (), + __U, __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, + (__v16si) __B, __imm, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A, + __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, + (__v16si) __B, __imm, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B, + const int __imm) +{ + return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, + (__v16si) __B, __imm, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, + (__v8di) __B, __imm, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A, + __m512i __B, const int __imm) +{ + return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, + (__v8di) __B, __imm, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, + const int __imm) +{ + return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, + (__v8di) __B, __imm, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} +#else +#define _mm512_floor_round_ps(A, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \ + _MM_FROUND_FLOOR, \ + (__v16sf)(__m512)(A), \ + (__mmask16)(-1), R)) +#define _mm512_mask_floor_round_ps(A, B, C, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ + _MM_FROUND_FLOOR, \ + (__v16sf)(__m512)(A), \ + (__mmask16)(B), R)) +#define _mm512_maskz_floor_round_ps(A, B, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ + _MM_FROUND_FLOOR, \ + (__v16sf)_mm512_setzero_ps(),\ + (__mmask16)(A), R)) +#define _mm512_floor_round_pd(A, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \ + _MM_FROUND_FLOOR, \ + (__v8df)(__m512d)(A), \ + (__mmask8)(-1), R)) +#define _mm512_mask_floor_round_pd(A, B, C, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ + _MM_FROUND_FLOOR, \ + (__v8df)(__m512d)(A), \ + (__mmask8)(B), R)) +#define _mm512_maskz_floor_round_pd(A, B, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ + _MM_FROUND_FLOOR, \ + (__v8df)_mm512_setzero_pd(),\ + (__mmask8)(A), R)) +#define _mm512_ceil_round_ps(A, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \ + _MM_FROUND_CEIL, \ + (__v16sf)(__m512)(A), \ + (__mmask16)(-1), R)) +#define _mm512_mask_ceil_round_ps(A, B, C, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ + _MM_FROUND_CEIL, \ + (__v16sf)(__m512)(A), \ + (__mmask16)(B), R)) +#define _mm512_maskz_ceil_round_ps(A, B, R) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ + _MM_FROUND_CEIL, \ + (__v16sf)_mm512_setzero_ps(),\ + (__mmask16)(A), R)) +#define _mm512_ceil_round_pd(A, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \ + _MM_FROUND_CEIL, \ + (__v8df)(__m512d)(A), \ + (__mmask8)(-1), R)) +#define _mm512_mask_ceil_round_pd(A, B, C, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ + _MM_FROUND_CEIL, \ + (__v8df)(__m512d)(A), \ + (__mmask8)(B), R)) +#define _mm512_maskz_ceil_round_pd(A, B, R) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ + _MM_FROUND_CEIL, \ + (__v8df)_mm512_setzero_pd(),\ + (__mmask8)(A), R)) + +#define _mm512_alignr_epi32(X, Y, C) \ + ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(X), \ + (__mmask16)-1)) + +#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \ + ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \ + (__mmask16)(U))) + +#define _mm512_maskz_alignr_epi32(U, X, Y, C) \ + ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)_mm512_setzero_si512 (),\ + (__mmask16)(U))) + +#define _mm512_alignr_epi64(X, Y, C) \ + ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(X), (__mmask8)-1)) + +#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \ + ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U))) + +#define _mm512_maskz_alignr_epi64(U, X, Y, C) \ + ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)_mm512_setzero_si512 (),\ + (__mmask8)(U))) +#endif + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, + (__v16si) __B, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, + (__v8di) __B, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, + (__v16si) __B, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, + (__v8di) __B, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +#define _MM_CMPINT_EQ 0x0 +#define _MM_CMPINT_LT 0x1 +#define _MM_CMPINT_LE 0x2 +#define _MM_CMPINT_UNUSED 0x3 +#define _MM_CMPINT_NE 0x4 +#define _MM_CMPINT_NLT 0x5 +#define _MM_CMPINT_GE 0x5 +#define _MM_CMPINT_NLE 0x6 +#define _MM_CMPINT_GT 0x6 + +#ifdef __OPTIMIZE__ +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, + (__v8di) __Y, __P, + (__mmask8) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P) +{ + return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, + (__v16si) __Y, __P, + (__mmask16) -1); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, + (__v8di) __Y, __P, + (__mmask8) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P) +{ + return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, + (__v16si) __Y, __P, + (__mmask16) -1); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P, + const int __R) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, __P, + (__mmask8) -1, __R); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, __P, + (__mmask16) -1, __R); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y, + const int __P) +{ + return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, + (__v8di) __Y, __P, + (__mmask8) __U); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y, + const int __P) +{ + return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, + (__v16si) __Y, __P, + (__mmask16) __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y, + const int __P) +{ + return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, + (__v8di) __Y, __P, + (__mmask8) __U); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y, + const int __P) +{ + return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, + (__v16si) __Y, __P, + (__mmask16) __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, + const int __P, const int __R) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, __P, + (__mmask8) __U, __R); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, + const int __P, const int __R) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, __P, + (__mmask16) __U, __R); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R) +{ + return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) -1, __R); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, + const int __P, const int __R) +{ + return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) __M, __R); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R) +{ + return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) -1, __R); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, + const int __P, const int __R) +{ + return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) __M, __R); +} + +#else +#define _mm512_cmp_epi64_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(P),\ + (__mmask8)-1)) + +#define _mm512_cmp_epi32_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P),\ + (__mmask16)-1)) + +#define _mm512_cmp_epu64_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(P),\ + (__mmask8)-1)) + +#define _mm512_cmp_epu32_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P),\ + (__mmask16)-1)) + +#define _mm512_cmp_round_pd_mask(X, Y, P, R) \ + ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(P),\ + (__mmask8)-1, R)) + +#define _mm512_cmp_round_ps_mask(X, Y, P, R) \ + ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(P),\ + (__mmask16)-1, R)) + +#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(P),\ + (__mmask8)M)) + +#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P),\ + (__mmask16)M)) + +#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ + (__v8di)(__m512i)(Y), (int)(P),\ + (__mmask8)M)) + +#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P),\ + (__mmask16)M)) + +#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ + ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(P),\ + (__mmask8)M, R)) + +#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ + ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(P),\ + (__mmask16)M, R)) + +#define _mm_cmp_round_sd_mask(X, Y, P, R) \ + ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P),\ + (__mmask8)-1, R)) + +#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ + ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P),\ + (M), R)) + +#define _mm_cmp_round_ss_mask(X, Y, P, R) \ + ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (__mmask8)-1, R)) + +#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ + ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (M), R)) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale) +{ + __m512 v1_old = _mm512_setzero_ps (); + __mmask16 mask = 0xFFFF; + + return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, + __addr, + (__v16si) __index, + mask, __scale); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask, + __m512i __index, float const *__addr, int __scale) +{ + return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, + __addr, + (__v16si) __index, + __mask, __scale); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale) +{ + __m512d v1_old = _mm512_setzero_pd (); + __mmask8 mask = 0xFF; + + return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old, + __addr, + (__v8si) __index, mask, + __scale); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask, + __m256i __index, double const *__addr, int __scale) +{ + return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, + __addr, + (__v8si) __index, + __mask, __scale); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale) +{ + __m256 v1_old = _mm256_setzero_ps (); + __mmask8 mask = 0xFF; + + return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old, + __addr, + (__v8di) __index, mask, + __scale); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask, + __m512i __index, float const *__addr, int __scale) +{ + return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, + __addr, + (__v8di) __index, + __mask, __scale); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale) +{ + __m512d v1_old = _mm512_setzero_pd (); + __mmask8 mask = 0xFF; + + return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old, + __addr, + (__v8di) __index, mask, + __scale); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask, + __m512i __index, double const *__addr, int __scale) +{ + return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, + __addr, + (__v8di) __index, + __mask, __scale); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale) +{ + __m512i v1_old = _mm512_setzero_si512 (); + __mmask16 mask = 0xFFFF; + + return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old, + __addr, + (__v16si) __index, + mask, __scale); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask, + __m512i __index, int const *__addr, int __scale) +{ + return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, + __addr, + (__v16si) __index, + __mask, __scale); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale) +{ + __m512i v1_old = _mm512_setzero_si512 (); + __mmask8 mask = 0xFF; + + return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old, + __addr, + (__v8si) __index, mask, + __scale); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask, + __m256i __index, long long const *__addr, + int __scale) +{ + return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, + __addr, + (__v8si) __index, + __mask, __scale); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale) +{ + __m256i v1_old = _mm256_setzero_si256 (); + __mmask8 mask = 0xFF; + + return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old, + __addr, + (__v8di) __index, + mask, __scale); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask, + __m512i __index, int const *__addr, int __scale) +{ + return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, + __addr, + (__v8di) __index, + __mask, __scale); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale) +{ + __m512i v1_old = _mm512_setzero_si512 (); + __mmask8 mask = 0xFF; + + return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old, + __addr, + (__v8di) __index, mask, + __scale); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask, + __m512i __index, long long const *__addr, + int __scale) +{ + return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, + __addr, + (__v8di) __index, + __mask, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale) +{ + __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF, + (__v16si) __index, (__v16sf) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask, + __m512i __index, __m512 __v1, int __scale) +{ + __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index, + (__v16sf) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1, + int __scale) +{ + __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, + (__v8si) __index, (__v8df) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask, + __m256i __index, __m512d __v1, int __scale) +{ + __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, + (__v8df) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale) +{ + __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF, + (__v8di) __index, (__v8sf) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask, + __m512i __index, __m256 __v1, int __scale) +{ + __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index, + (__v8sf) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1, + int __scale) +{ + __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, + (__v8di) __index, (__v8df) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask, + __m512i __index, __m512d __v1, int __scale) +{ + __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, + (__v8df) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32scatter_epi32 (int *__addr, __m512i __index, + __m512i __v1, int __scale) +{ + __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF, + (__v16si) __index, (__v16si) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask, + __m512i __index, __m512i __v1, int __scale) +{ + __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index, + (__v16si) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32scatter_epi64 (long long *__addr, __m256i __index, + __m512i __v1, int __scale) +{ + __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF, + (__v8si) __index, (__v8di) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask, + __m256i __index, __m512i __v1, int __scale) +{ + __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index, + (__v8di) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64scatter_epi32 (int *__addr, __m512i __index, + __m256i __v1, int __scale) +{ + __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF, + (__v8di) __index, (__v8si) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask, + __m512i __index, __m256i __v1, int __scale) +{ + __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index, + (__v8si) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64scatter_epi64 (long long *__addr, __m512i __index, + __m512i __v1, int __scale) +{ + __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF, + (__v8di) __index, (__v8di) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask, + __m512i __index, __m512i __v1, int __scale) +{ + __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index, + (__v8di) __v1, __scale); +} +#else +#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \ + (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_setzero_ps(), \ + (float const *)ADDR, \ + (__v16si)(__m512i)INDEX, \ + (__mmask16)0xFFFF, (int)SCALE) + +#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \ + (float const *)ADDR, \ + (__v16si)(__m512i)INDEX, \ + (__mmask16)MASK, (int)SCALE) + +#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \ + (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_setzero_pd(), \ + (double const *)ADDR, \ + (__v8si)(__m256i)INDEX, \ + (__mmask8)0xFF, (int)SCALE) + +#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \ + (double const *)ADDR, \ + (__v8si)(__m256i)INDEX, \ + (__mmask8)MASK, (int)SCALE) + +#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \ + (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_setzero_ps(), \ + (float const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)0xFF, (int)SCALE) + +#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \ + (float const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)MASK, (int)SCALE) + +#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \ + (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_setzero_pd(), \ + (double const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)0xFF, (int)SCALE) + +#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \ + (double const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)MASK, (int)SCALE) + +#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ + (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_setzero_si512 (), \ + (int const *)ADDR, \ + (__v16si)(__m512i)INDEX, \ + (__mmask16)0xFFFF, (int)SCALE) + +#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \ + (int const *)ADDR, \ + (__v16si)(__m512i)INDEX, \ + (__mmask16)MASK, (int)SCALE) + +#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ + (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_setzero_si512 (), \ + (long long const *)ADDR, \ + (__v8si)(__m256i)INDEX, \ + (__mmask8)0xFF, (int)SCALE) + +#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \ + (long long const *)ADDR, \ + (__v8si)(__m256i)INDEX, \ + (__mmask8)MASK, (int)SCALE) + +#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ + (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_setzero_si256(), \ + (int const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)0xFF, (int)SCALE) + +#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \ + (int const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)MASK, (int)SCALE) + +#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ + (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_setzero_si512 (), \ + (long long const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)0xFF, (int)SCALE) + +#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ + (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \ + (long long const *)ADDR, \ + (__v8di)(__m512i)INDEX, \ + (__mmask8)MASK, (int)SCALE) + +#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \ + (__v16si)(__m512i)INDEX, \ + (__v16sf)(__m512)V1, (int)SCALE) + +#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \ + (__v16si)(__m512i)INDEX, \ + (__v16sf)(__m512)V1, (int)SCALE) + +#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \ + (__v8si)(__m256i)INDEX, \ + (__v8df)(__m512d)V1, (int)SCALE) + +#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \ + (__v8si)(__m256i)INDEX, \ + (__v8df)(__m512d)V1, (int)SCALE) + +#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \ + (__v8di)(__m512i)INDEX, \ + (__v8sf)(__m256)V1, (int)SCALE) + +#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \ + (__v8di)(__m512i)INDEX, \ + (__v8sf)(__m256)V1, (int)SCALE) + +#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \ + (__v8di)(__m512i)INDEX, \ + (__v8df)(__m512d)V1, (int)SCALE) + +#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \ + (__v8di)(__m512i)INDEX, \ + (__v8df)(__m512d)V1, (int)SCALE) + +#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \ + (__v16si)(__m512i)INDEX, \ + (__v16si)(__m512i)V1, (int)SCALE) + +#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \ + (__v16si)(__m512i)INDEX, \ + (__v16si)(__m512i)V1, (int)SCALE) + +#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \ + (__v8si)(__m256i)INDEX, \ + (__v8di)(__m512i)V1, (int)SCALE) + +#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \ + (__v8si)(__m256i)INDEX, \ + (__v8di)(__m512i)V1, (int)SCALE) + +#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \ + (__v8di)(__m512i)INDEX, \ + (__v8si)(__m256i)V1, (int)SCALE) + +#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \ + (__v8di)(__m512i)INDEX, \ + (__v8si)(__m256i)V1, (int)SCALE) + +#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \ + (__v8di)(__m512i)INDEX, \ + (__v8di)(__m512i)V1, (int)SCALE) + +#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ + __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \ + (__v8di)(__m512i)INDEX, \ + (__v8di)(__m512i)V1, (int)SCALE) +#endif + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) +{ + __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) +{ + __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) +{ + __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) +{ + return (__m512i) + __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P, + (__v16si) + _mm512_setzero_si512 + (), (__mmask16) __U); +} + +/* Mask arithmetic operations */ +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kand (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kandn (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kor (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kortestz (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A, + (__mmask16) __B); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kortestc (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A, + (__mmask16) __B); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kxnor (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kxor (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_knot (__mmask16 __A) +{ + return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kunpackb (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D, + const int __imm) +{ + return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, + (__v4si) __D, + __imm, + (__v16si) + _mm512_setzero_si512 (), + __B); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D, + const int __imm) +{ + return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, + (__v4sf) __D, + __imm, + (__v16sf) + _mm512_setzero_ps (), __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C, + __m128i __D, const int __imm) +{ + return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, + (__v4si) __D, + __imm, + (__v16si) __A, + __B); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C, + __m128 __D, const int __imm) +{ + return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, + (__v4sf) __D, + __imm, + (__v16sf) __A, __B); +} +#else +#define _mm512_maskz_insertf32x4(A, X, Y, C) \ + ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ + (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \ + (__mmask8)(A))) + +#define _mm512_maskz_inserti32x4(A, X, Y, C) \ + ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ + (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \ + (__mmask8)(A))) + +#define _mm512_mask_insertf32x4(A, B, X, Y, C) \ + ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ + (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \ + (__mmask8)(B))) + +#define _mm512_mask_inserti32x4(A, B, X, Y, C) \ + ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ + (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \ + (__mmask8)(B))) +#endif + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_epi64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_epu64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_epu64 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_epu32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_epu32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __W, __M); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_unpacklo_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W) +{ + return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W) +{ + return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R) +{ + return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R) +{ + return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R); +} +#else +#define _mm_comi_round_ss(A, B, C, D)\ +__builtin_ia32_vcomiss(A, B, C, D) +#define _mm_comi_round_sd(A, B, C, D)\ +__builtin_ia32_vcomisd(A, B, C, D) +#endif + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_pd (__m512d __M, __m512d __V) +{ + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, + (__v8df) __V, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) +{ + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, + (__v8df) __V, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V) +{ + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, + (__v8df) __V, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_pd (__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_ps (__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) +{ + return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, + (__v8df) __B, + -(__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) +{ + return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, + (__v16sf) __B, + -(__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttpd_epi32 (__m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttpd_epu32 (__m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpd_epi32 (__m512d __A) +{ + return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpd_epu32 (__m512d __A) +{ + return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttps_epi32 (__m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttps_epu32 (__m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtps_epi32 (__m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtps_epu32 (__m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __x86_64__ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu64_ss (__m128 __A, unsigned long long __B) +{ + return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu64_sd (__m128d __A, unsigned long long __B) +{ + return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +#endif + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu32_ss (__m128 __A, unsigned __B) +{ + return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi32_ps (__m512i __A) +{ + return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) +{ + return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) +{ + return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu32_ps (__m512i __A) +{ + return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) +{ + return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) +{ + return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm) +{ + return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8di) __C, + __imm, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B, + __m512i __C, const int __imm) +{ + return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8di) __C, + __imm, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B, + __m512i __C, const int __imm) +{ + return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8di) __C, + __imm, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm) +{ + return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16si) __C, + __imm, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B, + __m512i __C, const int __imm) +{ + return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16si) __C, + __imm, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B, + __m512i __C, const int __imm) +{ + return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16si) __C, + __imm, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm) +{ + return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, __imm, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B, + __m128i __C, const int __imm) +{ + return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, __imm, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B, + __m128i __C, const int __imm) +{ + return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, + (__v2df) __B, + (__v2di) __C, + __imm, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm) +{ + return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, __imm, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B, + __m128i __C, const int __imm) +{ + return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, __imm, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B, + __m128i __C, const int __imm) +{ + return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4si) __C, __imm, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#else +#define _mm512_fixupimm_pd(X, Y, Z, C) \ + ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ + (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \ + ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \ + ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_fixupimm_ps(X, Y, Z, C) \ + ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ + (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \ + ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ + (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \ + ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ + (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_fixupimm_sd(X, Y, Z, C) \ + ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ + (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \ + ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \ + ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_fixupimm_ss(X, Y, Z, C) \ + ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ + (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \ + ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \ + ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) +#endif + +#ifdef __x86_64__ +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_u64 (__m128 __A) +{ + return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) + __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_u64 (__m128 __A) +{ + return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) + __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_i64 (__m128 __A) +{ + return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} +#endif /* __x86_64__ */ + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_u32 (__m128 __A) +{ + return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_u32 (__m128 __A) +{ + return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_i32 (__m128 __A) +{ + return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __x86_64__ +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_u64 (__m128d __A) +{ + return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) + __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsd_u64 (__m128d __A) +{ + return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) + __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsd_i64 (__m128d __A) +{ + return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} +#endif /* __x86_64__ */ + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_u32 (__m128d __A) +{ + return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsd_u32 (__m128d __A) +{ + return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsd_i32 (__m128d __A) +{ + return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtps_pd (__m256 __A) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_ps (__m256i __A) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpd_ps (__m512d __A) +{ + return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) +{ + return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, + (__v8sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) +{ + return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, + (__C << 2) | __B, + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A, + _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, + (__C << 2) | __B, + (__v8df) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A, + _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, + (__C << 2) | __B, + (__v8df) + _mm512_setzero_pd (), + __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, + (__C << 2) | __B, + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A, + _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, + (__C << 2) | __B, + (__v16sf) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A, + _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, + (__C << 2) | __B, + (__v16sf) + _mm512_setzero_ps (), + __U, + _MM_FROUND_CUR_DIRECTION); +} + +#else +#define _mm512_getmant_pd(X, B, C) \ + ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)_mm512_setzero_pd(), \ + (__mmask8)-1,\ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_getmant_pd(W, U, X, B, C) \ + ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U),\ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_getmant_pd(U, X, B, C) \ + ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)_mm512_setzero_pd(), \ + (__mmask8)(U),\ + _MM_FROUND_CUR_DIRECTION)) +#define _mm512_getmant_ps(X, B, C) \ + ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)(__m512)_mm512_setzero_ps(), \ + (__mmask16)-1,\ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_getmant_ps(W, U, X, B, C) \ + ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U),\ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_getmant_ps(U, X, B, C) \ + ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)(__m512)_mm512_setzero_ps(), \ + (__mmask16)(U),\ + _MM_FROUND_CUR_DIRECTION)) +#define _mm512_getexp_ps(A) \ + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_getexp_ps(W, U, A) \ + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_getexp_ps(U, A) \ + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_getexp_pd(A) \ + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_getexp_pd(W, U, A) \ + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_getexp_pd(U, A) \ + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_ps (__m512 __A, const int __imm) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C, + const int __imm) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, + (__v16sf) __A, + (__mmask16) __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, + __imm, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_pd (__m512d __A, const int __imm) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C, + const int __imm) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, + (__v8df) __A, + (__mmask8) __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, + __imm, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __A, + _MM_FROUND_CUR_DIRECTION); +} + +#else +#define _mm512_roundscale_ps(A, B) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ + (__v16sf)(__m512)(A), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_mask_roundscale_ps(A, B, C, D) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ + (int)(D), \ + (__v16sf)(__m512)(A), \ + (__mmask16)(B), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_maskz_roundscale_ps(A, B, C) \ + ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ + (int)(C), \ + (__v16sf)_mm512_setzero_ps(),\ + (__mmask16)(A), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_roundscale_pd(A, B) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ + (__v8df)(__m512d)(A), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_mask_roundscale_pd(A, B, C, D) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ + (int)(D), \ + (__v8df)(__m512d)(A), \ + (__mmask8)(B), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_maskz_roundscale_pd(A, B, C) \ + ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ + (int)(C), \ + (__v8df)_mm512_setzero_pd(),\ + (__mmask8)(A), _MM_FROUND_CUR_DIRECTION)) +#endif + +#ifdef __OPTIMIZE__ +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, __P, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, __P, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, __P, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) __M, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) __M, + _MM_FROUND_CUR_DIRECTION); +} + +#else +#define _mm512_cmp_pd_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(P),\ + (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm512_cmp_ps_mask(X, Y, P) \ + ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(P),\ + (__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(P),\ + (__mmask8)M, _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ + ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(P),\ + (__mmask16)M,_MM_FROUND_CUR_DIRECTION)) + +#define _mm_cmp_sd_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P),\ + (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P),\ + M,_MM_FROUND_CUR_DIRECTION)) + +#define _mm_cmp_ss_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + M,_MM_FROUND_CUR_DIRECTION)) +#endif + +#ifdef __DISABLE_AVX512F__ +#undef __DISABLE_AVX512F__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512F__ */ + +#endif /* _AVX512FINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h new file mode 100644 index 00000000000..4dba640458d --- /dev/null +++ b/gcc/config/i386/avx512pfintrin.h @@ -0,0 +1,130 @@ +/* Copyright (C) 2013 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX512PFINTRIN_H_INCLUDED +#define _AVX512PFINTRIN_H_INCLUDED + +#ifndef __AVX512PF__ +#pragma GCC push_options +#pragma GCC target("avx512pf") +#define __DISABLE_AVX512PF__ +#endif /* __AVX512PF__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef long long __v8di __attribute__ ((__vector_size__ (64))); +typedef int __v16si __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); + +typedef unsigned char __mmask8; +typedef unsigned short __mmask16; + +#ifdef __OPTIMIZE__ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask, + int const *addr, int scale, int hint) +{ + __builtin_ia32_gatherpfdps (mask, (__v16si) index, addr, scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask, + int const *addr, int scale, int hint) +{ + __builtin_ia32_gatherpfqps (mask, (__v8di) index, addr, scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i32scatter_ps (int const *addr, __m512i index, int scale, + int hint) +{ + __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, addr, scale, + hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i32scatter_ps (int const *addr, __mmask16 mask, + __m512i index, int scale, int hint) +{ + __builtin_ia32_scatterpfdps (mask, (__v16si) index, addr, scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i64scatter_ps (int const *addr, __m512i index, int scale, + int hint) +{ + __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, addr, scale, + hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i64scatter_ps (int const *addr, __mmask16 mask, + __m512i index, int scale, int hint) +{ + __builtin_ia32_scatterpfqps (mask, (__v8di) index, addr, scale, hint); +} +#else +#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ + (int const *)ADDR, (int)SCALE, (int)HINT) + +#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ + (int const *)ADDR, (int)SCALE, (int)HINT) + +#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \ + (int const *)ADDR, (int)SCALE, (int)HINT) + +#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ + (int const *)ADDR, (int)SCALE, (int)HINT) + +#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ + (int const *)ADDR, (int)SCALE, (int)HINT) + +#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ + (int const *)ADDR, (int)SCALE, (int)HINT) +#endif + +#ifdef __DISABLE_AVX512PF__ +#undef __DISABLE_AVX512PF__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512PF__ */ + +#endif /* _AVX512PFINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index c866170bde8..86ad31ea478 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -99,6 +99,15 @@ DEF_VECTOR_TYPE (V16HI, HI) DEF_VECTOR_TYPE (V32QI, QI) DEF_VECTOR_TYPE (V4UDI, UDI, V4DI) DEF_VECTOR_TYPE (V8USI, USI, V8SI) +DEF_VECTOR_TYPE (V16UHI, UHI, V16HI) + +# AVX512F vectors +DEF_VECTOR_TYPE (V32SF, FLOAT) +DEF_VECTOR_TYPE (V16SF, FLOAT) +DEF_VECTOR_TYPE (V8DF, DOUBLE) +DEF_VECTOR_TYPE (V8DI, DI) +DEF_VECTOR_TYPE (V16SI, SI) +DEF_VECTOR_TYPE (V64QI, QI) DEF_POINTER_TYPE (PCCHAR, CHAR, CONST) DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST) @@ -123,21 +132,29 @@ DEF_POINTER_TYPE (PV2SF, V2SF) DEF_POINTER_TYPE (PV4DF, V4DF) DEF_POINTER_TYPE (PV4DI, V4DI) DEF_POINTER_TYPE (PV4SF, V4SF) +DEF_POINTER_TYPE (PV8DF, V8DF) DEF_POINTER_TYPE (PV8SF, V8SF) DEF_POINTER_TYPE (PV4SI, V4SI) DEF_POINTER_TYPE (PV8SI, V8SI) +DEF_POINTER_TYPE (PV8DI, V8DI) +DEF_POINTER_TYPE (PV16SI, V16SI) +DEF_POINTER_TYPE (PV16SF, V16SF) DEF_POINTER_TYPE (PCV2SI, V2SI, CONST) DEF_POINTER_TYPE (PCV2DF, V2DF, CONST) DEF_POINTER_TYPE (PCV2SF, V2SF, CONST) DEF_POINTER_TYPE (PCV4DF, V4DF, CONST) DEF_POINTER_TYPE (PCV4SF, V4SF, CONST) +DEF_POINTER_TYPE (PCV8DF, V8DF, CONST) DEF_POINTER_TYPE (PCV8SF, V8SF, CONST) +DEF_POINTER_TYPE (PCV16SF, V16SF, CONST) DEF_POINTER_TYPE (PCV2DI, V2DI, CONST) DEF_POINTER_TYPE (PCV4SI, V4SI, CONST) DEF_POINTER_TYPE (PCV4DI, V4DI, CONST) DEF_POINTER_TYPE (PCV8SI, V8SI, CONST) +DEF_POINTER_TYPE (PCV8DI, V8DI, CONST) +DEF_POINTER_TYPE (PCV16SI, V16SI, CONST) DEF_FUNCTION_TYPE (FLOAT128) DEF_FUNCTION_TYPE (UINT64) @@ -165,6 +182,7 @@ DEF_FUNCTION_TYPE (UINT16, UINT16) DEF_FUNCTION_TYPE (UINT64, PUNSIGNED) DEF_FUNCTION_TYPE (V16QI, PCCHAR) DEF_FUNCTION_TYPE (V16QI, V16QI) +DEF_FUNCTION_TYPE (V16QI, V16SI) DEF_FUNCTION_TYPE (V2DF, PCDOUBLE) DEF_FUNCTION_TYPE (V2DF, V2DF) DEF_FUNCTION_TYPE (V2DF, V2SI) @@ -190,6 +208,8 @@ DEF_FUNCTION_TYPE (V4DF, V2DF) DEF_FUNCTION_TYPE (V4DF, V4DF) DEF_FUNCTION_TYPE (V4DF, V4SF) DEF_FUNCTION_TYPE (V4DF, V4SI) +DEF_FUNCTION_TYPE (V8DF, V8SI) +DEF_FUNCTION_TYPE (V8DF, V8DF) DEF_FUNCTION_TYPE (V4HI, V4HI) DEF_FUNCTION_TYPE (V4SF, PCFLOAT) DEF_FUNCTION_TYPE (V4SF, V2DF) @@ -207,6 +227,7 @@ DEF_FUNCTION_TYPE (V4SI, V4SI) DEF_FUNCTION_TYPE (V4SI, V8HI) DEF_FUNCTION_TYPE (V4SI, V8SI) DEF_FUNCTION_TYPE (V8HI, V16QI) +DEF_FUNCTION_TYPE (V8HI, V8DI) DEF_FUNCTION_TYPE (V8HI, V8HI) DEF_FUNCTION_TYPE (V8QI, V8QI) DEF_FUNCTION_TYPE (V8SF, PCFLOAT) @@ -216,10 +237,15 @@ DEF_FUNCTION_TYPE (V8SF, V4SF) DEF_FUNCTION_TYPE (V8SF, V8SF) DEF_FUNCTION_TYPE (V8SF, V8SI) DEF_FUNCTION_TYPE (V8SF, V8HI) +DEF_FUNCTION_TYPE (V16SF, V16SF) +DEF_FUNCTION_TYPE (V8SI, V8DI) DEF_FUNCTION_TYPE (V8SI, V4SI) +DEF_FUNCTION_TYPE (V8SF, V8DF) +DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI) DEF_FUNCTION_TYPE (V8SI, V8SF) DEF_FUNCTION_TYPE (V32QI, V32QI) DEF_FUNCTION_TYPE (V32QI, V16QI) +DEF_FUNCTION_TYPE (V16HI, V16SI) DEF_FUNCTION_TYPE (V16HI, V16HI) DEF_FUNCTION_TYPE (V16HI, V8HI) DEF_FUNCTION_TYPE (V8SI, V8SI) @@ -239,6 +265,28 @@ DEF_FUNCTION_TYPE (V4DI, V8HI) DEF_FUNCTION_TYPE (V4DI, V4SI) DEF_FUNCTION_TYPE (V4DI, PV4DI) DEF_FUNCTION_TYPE (V4DI, V2DI) +DEF_FUNCTION_TYPE (V16SF, FLOAT) +DEF_FUNCTION_TYPE (V16SI, INT) +DEF_FUNCTION_TYPE (V8DF, DOUBLE) +DEF_FUNCTION_TYPE (V8DI, INT64) +DEF_FUNCTION_TYPE (V16SF, V4SF) +DEF_FUNCTION_TYPE (V8DF, V4DF) +DEF_FUNCTION_TYPE (V8DI, V4DI) +DEF_FUNCTION_TYPE (V16QI, V8DI) +DEF_FUNCTION_TYPE (UINT, V4SF) +DEF_FUNCTION_TYPE (UINT64, V4SF) +DEF_FUNCTION_TYPE (UINT, V2DF) +DEF_FUNCTION_TYPE (UINT64, V2DF) +DEF_FUNCTION_TYPE (V16SI, V16SI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, HI) +DEF_FUNCTION_TYPE (V8DI, V8DI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, QI) +DEF_FUNCTION_TYPE (V16SI, PV4SI) +DEF_FUNCTION_TYPE (V16SF, PV4SF) +DEF_FUNCTION_TYPE (V8DI, PV4DI) +DEF_FUNCTION_TYPE (V8DF, PV4DF) +DEF_FUNCTION_TYPE (V8UHI, V8UHI) +DEF_FUNCTION_TYPE (V8USI, V8USI) DEF_FUNCTION_TYPE (DI, V2DI, INT) DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT) @@ -270,6 +318,8 @@ DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI) DEF_FUNCTION_TYPE (V1DI, V2SI, V2SI) DEF_FUNCTION_TYPE (V1DI, V8QI, V8QI) DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DI) +DEF_FUNCTION_TYPE (V2DF, V2DF, UINT) +DEF_FUNCTION_TYPE (V2DF, V2DF, UINT64) DEF_FUNCTION_TYPE (V2DF, V2DF, DI) DEF_FUNCTION_TYPE (V2DF, V2DF, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE) @@ -295,16 +345,23 @@ DEF_FUNCTION_TYPE (V2SI, V2SI, V2SI) DEF_FUNCTION_TYPE (V2SI, V4HI, V4HI) DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI) DEF_FUNCTION_TYPE (V4DF, V4DF, INT) +DEF_FUNCTION_TYPE (V4DF, V8DF, INT) +DEF_FUNCTION_TYPE (V4DF, V8DF, INT, V4DF, QI) DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF) DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI) DEF_FUNCTION_TYPE (V4HI, V2SI, V2SI) DEF_FUNCTION_TYPE (V4HI, V4HI, INT) DEF_FUNCTION_TYPE (V4HI, V4HI, SI) DEF_FUNCTION_TYPE (V4HI, V4HI, V4HI) DEF_FUNCTION_TYPE (V4HI, V8QI, V8QI) DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SI) +DEF_FUNCTION_TYPE (V4SF, V4SF, UINT) +DEF_FUNCTION_TYPE (V4SF, V4SF, UINT64) DEF_FUNCTION_TYPE (V4SF, V4SF, DI) DEF_FUNCTION_TYPE (V4SF, V4SF, INT) +DEF_FUNCTION_TYPE (INT, V4SF, V4SF, INT, INT) +DEF_FUNCTION_TYPE (INT, V2DF, V2DF, INT, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, PCV2SF) DEF_FUNCTION_TYPE (V4SF, V4SF, SI) DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF) @@ -331,30 +388,75 @@ DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI) DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI) DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI) DEF_FUNCTION_TYPE (V8SF, V8SF, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, INT) +DEF_FUNCTION_TYPE (V4SF, V16SF, INT) +DEF_FUNCTION_TYPE (V4SF, V16SF, INT, V4SF, QI) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI) DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI) DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI) +DEF_FUNCTION_TYPE (V8DF, V8DF, V4DF, INT, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT, QI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI) +DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT, V16SI, HI) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT, V16SF, HI) DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI) DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI) DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI) DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI) DEF_FUNCTION_TYPE (V16HI, V16HI, INT) +DEF_FUNCTION_TYPE (V16HI, V16SF, INT) +DEF_FUNCTION_TYPE (V16HI, V16SF, INT, V16HI, HI) +DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT, V16HI, HI) DEF_FUNCTION_TYPE (V16HI, V16HI, SI) DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT) DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, INT) DEF_FUNCTION_TYPE (V8SI, V4DF, V4DF) DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT) +DEF_FUNCTION_TYPE (V4SI, V16SI, INT) +DEF_FUNCTION_TYPE (V4SI, V16SI, INT, V4SI, QI) DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, INT, V16SI, HI) DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI) DEF_FUNCTION_TYPE (V8SI, V8SI, INT) DEF_FUNCTION_TYPE (V8SI, V8SI, SI) +DEF_FUNCTION_TYPE (V16SI, V16SI, SI) +DEF_FUNCTION_TYPE (V16SI, V16SI, INT) +DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, V16SI, HI) +DEF_FUNCTION_TYPE (V16SI, V16SI, INT, V16SI, HI) DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI) DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI) +DEF_FUNCTION_TYPE (V16SI, V8DF, V8DF) +DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT, V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT, V8DI, QI) DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI) DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI) DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI) DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI) +DEF_FUNCTION_TYPE (V4DI, V8DI, INT) +DEF_FUNCTION_TYPE (V4DI, V8DI, INT, V4DI, QI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, V8DI, INT, V8DI, QI) DEF_FUNCTION_TYPE (V4DI, V4DI, INT) DEF_FUNCTION_TYPE (V2DI, V4DI, INT) DEF_FUNCTION_TYPE (VOID, PVOID, INT64) @@ -362,8 +464,10 @@ DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI) DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI) DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF) DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, V8DF) DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF) DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF) +DEF_FUNCTION_TYPE (VOID, PFLOAT, V16SF) DEF_FUNCTION_TYPE (VOID, PINT, INT) DEF_FUNCTION_TYPE (VOID, PLONGLONG, LONGLONG) DEF_FUNCTION_TYPE (VOID, PULONGLONG, ULONGLONG) @@ -374,6 +478,34 @@ DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI) DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF) DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF) DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED) +DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI) + +# Instructions returning mask +DEF_FUNCTION_TYPE (HI, HI) +DEF_FUNCTION_TYPE (HI, HI, HI) +DEF_FUNCTION_TYPE (HI, HI, INT) +DEF_FUNCTION_TYPE (QI, V8DI, V8DI) +DEF_FUNCTION_TYPE (QI, V8DI, V8DI, QI) +DEF_FUNCTION_TYPE (HI, V16SI, V16SI) +DEF_FUNCTION_TYPE (HI, V16SI, V16SI, HI) +DEF_FUNCTION_TYPE (QI, V8DI, V8DI, INT) +DEF_FUNCTION_TYPE (QI, V8DI, V8DI, INT, QI) +DEF_FUNCTION_TYPE (HI, V16SI, V16SI, INT) +DEF_FUNCTION_TYPE (HI, V16SI, V16SI, INT ,HI) +DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT) +DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT, QI) +DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT, QI, INT) +DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT) +DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT, HI) +DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT, HI, INT) +DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT) +DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT, QI) +DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT, QI, INT) +DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT) +DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI) +DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI, INT) +DEF_FUNCTION_TYPE (V16SI, HI) +DEF_FUNCTION_TYPE (V8DI, QI) DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT) DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT) @@ -413,11 +545,69 @@ DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF) DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT) DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT) DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI) DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT) DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT) + +# Instructions with masking +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V8SI, V8DF, QI) +DEF_FUNCTION_TYPE (V8DI, V8SI, V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, V8HI, V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, V16QI, V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, QI) +DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF) +DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, HI) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI) +DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI) +DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI) +DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF) +DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF, HI) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF, HI) +DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI) +DEF_FUNCTION_TYPE (V16SF, V4SF, V16SF, HI) +DEF_FUNCTION_TYPE (V8DF, V4DF, V8DF, QI) +DEF_FUNCTION_TYPE (V8DF, V2DF, V8DF, QI) +DEF_FUNCTION_TYPE (V16SI, V4SI, V16SI, HI) +DEF_FUNCTION_TYPE (V16SI, SI, V16SI, HI) +DEF_FUNCTION_TYPE (V16SI, V16HI, V16SI, HI) +DEF_FUNCTION_TYPE (V16SI, V16QI, V16SI, HI) +DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI) +DEF_FUNCTION_TYPE (V8DI, V4DI, V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, V2DI, V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, DI, V8DI, QI) +DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, HI) +DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, QI) +DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, HI) +DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, QI) +DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, QI) +DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, QI) +DEF_FUNCTION_TYPE (V16QI, V16SI, V16QI, HI) +DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, HI) +DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, QI) +DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, QI) +DEF_FUNCTION_TYPE (V16QI, V8DI, V16QI, QI) +DEF_FUNCTION_TYPE (VOID, PV8DF, V8DF, QI) +DEF_FUNCTION_TYPE (VOID, PV16SF, V16SF, HI) +DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI, QI) +DEF_FUNCTION_TYPE (VOID, PV16SI, V16SI, HI) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, QI) +DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, QI) +DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, QI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT, HI) + DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED) DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF) DEF_FUNCTION_TYPE (VOID, PV4DF, V4DI, V4DF) @@ -439,6 +629,13 @@ DEF_FUNCTION_TYPE (V8UHI, V8UHI, V8UHI, V8UHI) DEF_FUNCTION_TYPE (V16UQI, V16UQI, V16UQI, V16UQI) DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI) +DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI) +DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI) +DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI, V8DI, QI) DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT) DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI) @@ -451,6 +648,43 @@ DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI, QI, QI, QI, QI) DEF_FUNCTION_TYPE (UCHAR, UCHAR, UINT, UINT, PUNSIGNED) DEF_FUNCTION_TYPE (UCHAR, UCHAR, ULONGLONG, ULONGLONG, PULONGLONG) +# Instructions with rounding +DEF_FUNCTION_TYPE (UINT64, V2DF, INT) +DEF_FUNCTION_TYPE (UINT64, V4SF, INT) +DEF_FUNCTION_TYPE (UINT, V2DF, INT) +DEF_FUNCTION_TYPE (UINT, V4SF, INT) +DEF_FUNCTION_TYPE (INT64, V2DF, INT) +DEF_FUNCTION_TYPE (INT64, V4SF, INT) +DEF_FUNCTION_TYPE (INT, V2DF, INT) +DEF_FUNCTION_TYPE (INT, V4SF, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, UINT64, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, UINT64, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, UINT, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, INT64, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, INT64, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, INT, INT) +DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI, INT) +DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI, INT) +DEF_FUNCTION_TYPE (V16SF, V16HI, V16SF, HI, INT) +DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI, INT) +DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI, INT) +DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT) + +DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI, INT) +DEF_FUNCTION_TYPE (V8DI, V8DI, SI, V8DI, V8DI) + DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V4SI, V2DF, INT) DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4SI, V4DF, INT) DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V8SI, V4DF, INT) @@ -472,6 +706,30 @@ DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V2DI, V4SI, INT) DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4DI, V4SI, INT) DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V4DI, V8SI, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, PCFLOAT, V16SI, HI, INT) +DEF_FUNCTION_TYPE (V16SF, V16SF, PCFLOAT, V8DI, HI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V8SI, QI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V16SI, QI, INT) +DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8DI, QI, INT) +DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V8DI, QI, INT) +DEF_FUNCTION_TYPE (V16SI, V16SI, PCINT, V16SI, HI, INT) +DEF_FUNCTION_TYPE (V16SI, V16SI, PCINT, V8DI, HI, INT) +DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8SI, QI, INT) +DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V16SI, QI, INT) +DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8DI, QI, INT) +DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8DI, QI, INT) +DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V16SI, V16SF, INT) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8SI, V8DF, INT) +DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V8DI, V8SF, INT) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8DI, V8DF, INT) +DEF_FUNCTION_TYPE (VOID, PINT, HI, V16SI, V16SI, INT) +DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT) +DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT) +DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT) + +DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT) +DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT) + DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND) @@ -479,6 +737,7 @@ DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V2DF_V2DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V4DF_V4DF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V8DF_V8DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SF, ROUND) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index dd48cc51656..95ebf52d2b5 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -27922,12 +27922,334 @@ enum ix86_builtins IX86_BUILTIN_GATHERDIV4SI, IX86_BUILTIN_GATHERDIV8SI, + /* AVX512F */ + IX86_BUILTIN_ADDPD512, + IX86_BUILTIN_ADDPS512, + IX86_BUILTIN_ALIGND512, + IX86_BUILTIN_ALIGNQ512, + IX86_BUILTIN_BLENDMD512, + IX86_BUILTIN_BLENDMPD512, + IX86_BUILTIN_BLENDMPS512, + IX86_BUILTIN_BLENDMQ512, + IX86_BUILTIN_BROADCASTF32X4_512, + IX86_BUILTIN_BROADCASTF64X4_512, + IX86_BUILTIN_BROADCASTI32X4_512, + IX86_BUILTIN_BROADCASTI64X4_512, + IX86_BUILTIN_BROADCASTSD512, + IX86_BUILTIN_BROADCASTSS512, + IX86_BUILTIN_CMPD512, + IX86_BUILTIN_CMPPD512, + IX86_BUILTIN_CMPPS512, + IX86_BUILTIN_CMPQ512, + IX86_BUILTIN_CMPSD_MASK, + IX86_BUILTIN_CMPSS_MASK, + IX86_BUILTIN_COMIDF, + IX86_BUILTIN_COMISF, + IX86_BUILTIN_COMPRESSPD512, + IX86_BUILTIN_COMPRESSPDSTORE512, + IX86_BUILTIN_COMPRESSPS512, + IX86_BUILTIN_COMPRESSPSSTORE512, + IX86_BUILTIN_CVTDQ2PD512, + IX86_BUILTIN_CVTDQ2PS512, + IX86_BUILTIN_CVTPD2DQ512, + IX86_BUILTIN_CVTPD2PS512, + IX86_BUILTIN_CVTPD2UDQ512, + IX86_BUILTIN_CVTPH2PS512, + IX86_BUILTIN_CVTPS2DQ512, + IX86_BUILTIN_CVTPS2PD512, + IX86_BUILTIN_CVTPS2PH512, + IX86_BUILTIN_CVTPS2UDQ512, + IX86_BUILTIN_CVTSI2SD64, + IX86_BUILTIN_CVTSI2SS32, + IX86_BUILTIN_CVTSI2SS64, + IX86_BUILTIN_CVTTPD2DQ512, + IX86_BUILTIN_CVTTPD2UDQ512, + IX86_BUILTIN_CVTTPS2DQ512, + IX86_BUILTIN_CVTTPS2UDQ512, + IX86_BUILTIN_CVTUDQ2PD512, + IX86_BUILTIN_CVTUDQ2PS512, + IX86_BUILTIN_CVTUSI2SD32, + IX86_BUILTIN_CVTUSI2SD64, + IX86_BUILTIN_CVTUSI2SS32, + IX86_BUILTIN_CVTUSI2SS64, + IX86_BUILTIN_DIVPD512, + IX86_BUILTIN_DIVPS512, + IX86_BUILTIN_EXPANDPD512, + IX86_BUILTIN_EXPANDPD512Z, + IX86_BUILTIN_EXPANDPDLOAD512, + IX86_BUILTIN_EXPANDPDLOAD512Z, + IX86_BUILTIN_EXPANDPS512, + IX86_BUILTIN_EXPANDPS512Z, + IX86_BUILTIN_EXPANDPSLOAD512, + IX86_BUILTIN_EXPANDPSLOAD512Z, + IX86_BUILTIN_EXTRACTF32X4, + IX86_BUILTIN_EXTRACTF64X4, + IX86_BUILTIN_EXTRACTI32X4, + IX86_BUILTIN_EXTRACTI64X4, + IX86_BUILTIN_FIXUPIMMPD512_MASK, + IX86_BUILTIN_FIXUPIMMPD512_MASKZ, + IX86_BUILTIN_FIXUPIMMPS512_MASK, + IX86_BUILTIN_FIXUPIMMPS512_MASKZ, + IX86_BUILTIN_FIXUPIMMSD128_MASK, + IX86_BUILTIN_FIXUPIMMSD128_MASKZ, + IX86_BUILTIN_FIXUPIMMSS128_MASK, + IX86_BUILTIN_FIXUPIMMSS128_MASKZ, + IX86_BUILTIN_GETEXPPD512, + IX86_BUILTIN_GETEXPPS512, + IX86_BUILTIN_GETMANTPD512, + IX86_BUILTIN_GETMANTPS512, + IX86_BUILTIN_INSERTF32X4, + IX86_BUILTIN_INSERTF64X4, + IX86_BUILTIN_INSERTI32X4, + IX86_BUILTIN_INSERTI64X4, + IX86_BUILTIN_LOADAPD512, + IX86_BUILTIN_LOADAPS512, + IX86_BUILTIN_LOADDQUDI512, + IX86_BUILTIN_LOADDQUSI512, + IX86_BUILTIN_LOADUPD512, + IX86_BUILTIN_LOADUPS512, + IX86_BUILTIN_MAXPD512, + IX86_BUILTIN_MAXPS512, + IX86_BUILTIN_MINPD512, + IX86_BUILTIN_MINPS512, + IX86_BUILTIN_MOVAPD512, + IX86_BUILTIN_MOVAPS512, + IX86_BUILTIN_MOVDDUP512, + IX86_BUILTIN_MOVDQA32LOAD512, + IX86_BUILTIN_MOVDQA32STORE512, + IX86_BUILTIN_MOVDQA32_512, + IX86_BUILTIN_MOVDQA64LOAD512, + IX86_BUILTIN_MOVDQA64STORE512, + IX86_BUILTIN_MOVDQA64_512, + IX86_BUILTIN_MOVNTDQ512, + IX86_BUILTIN_MOVNTPD512, + IX86_BUILTIN_MOVNTPS512, + IX86_BUILTIN_MOVSHDUP512, + IX86_BUILTIN_MOVSLDUP512, + IX86_BUILTIN_MULPD512, + IX86_BUILTIN_MULPS512, + IX86_BUILTIN_PABSD512, + IX86_BUILTIN_PABSQ512, + IX86_BUILTIN_PADDD512, + IX86_BUILTIN_PADDQ512, + IX86_BUILTIN_PANDD512, + IX86_BUILTIN_PANDND512, + IX86_BUILTIN_PANDNQ512, + IX86_BUILTIN_PANDQ512, + IX86_BUILTIN_PBROADCASTD512, + IX86_BUILTIN_PBROADCASTD512_GPR, + IX86_BUILTIN_PBROADCASTMB512, + IX86_BUILTIN_PBROADCASTMW512, + IX86_BUILTIN_PBROADCASTQ512, + IX86_BUILTIN_PBROADCASTQ512_GPR, + IX86_BUILTIN_PBROADCASTQ512_MEM, + IX86_BUILTIN_PCMPEQD512_MASK, + IX86_BUILTIN_PCMPEQQ512_MASK, + IX86_BUILTIN_PCMPGTD512_MASK, + IX86_BUILTIN_PCMPGTQ512_MASK, + IX86_BUILTIN_PCOMPRESSD512, + IX86_BUILTIN_PCOMPRESSDSTORE512, + IX86_BUILTIN_PCOMPRESSQ512, + IX86_BUILTIN_PCOMPRESSQSTORE512, + IX86_BUILTIN_PEXPANDD512, + IX86_BUILTIN_PEXPANDD512Z, + IX86_BUILTIN_PEXPANDDLOAD512, + IX86_BUILTIN_PEXPANDDLOAD512Z, + IX86_BUILTIN_PEXPANDQ512, + IX86_BUILTIN_PEXPANDQ512Z, + IX86_BUILTIN_PEXPANDQLOAD512, + IX86_BUILTIN_PEXPANDQLOAD512Z, + IX86_BUILTIN_PMAXSD512, + IX86_BUILTIN_PMAXSQ512, + IX86_BUILTIN_PMAXUD512, + IX86_BUILTIN_PMAXUQ512, + IX86_BUILTIN_PMINSD512, + IX86_BUILTIN_PMINSQ512, + IX86_BUILTIN_PMINUD512, + IX86_BUILTIN_PMINUQ512, + IX86_BUILTIN_PMOVDB512, + IX86_BUILTIN_PMOVDW512, + IX86_BUILTIN_PMOVQB512, + IX86_BUILTIN_PMOVQD512, + IX86_BUILTIN_PMOVQW512, + IX86_BUILTIN_PMOVSDB512, + IX86_BUILTIN_PMOVSDW512, + IX86_BUILTIN_PMOVSQB512, + IX86_BUILTIN_PMOVSQD512, + IX86_BUILTIN_PMOVSQW512, + IX86_BUILTIN_PMOVSXBD512, + IX86_BUILTIN_PMOVSXBQ512, + IX86_BUILTIN_PMOVSXDQ512, + IX86_BUILTIN_PMOVSXWD512, + IX86_BUILTIN_PMOVSXWQ512, + IX86_BUILTIN_PMOVUSDB512, + IX86_BUILTIN_PMOVUSDW512, + IX86_BUILTIN_PMOVUSQB512, + IX86_BUILTIN_PMOVUSQD512, + IX86_BUILTIN_PMOVUSQW512, + IX86_BUILTIN_PMOVZXBD512, + IX86_BUILTIN_PMOVZXBQ512, + IX86_BUILTIN_PMOVZXDQ512, + IX86_BUILTIN_PMOVZXWD512, + IX86_BUILTIN_PMOVZXWQ512, + IX86_BUILTIN_PMULDQ512, + IX86_BUILTIN_PMULLD512, + IX86_BUILTIN_PMULUDQ512, + IX86_BUILTIN_PORD512, + IX86_BUILTIN_PORQ512, + IX86_BUILTIN_PROLD512, + IX86_BUILTIN_PROLQ512, + IX86_BUILTIN_PROLVD512, + IX86_BUILTIN_PROLVQ512, + IX86_BUILTIN_PRORD512, + IX86_BUILTIN_PRORQ512, + IX86_BUILTIN_PRORVD512, + IX86_BUILTIN_PRORVQ512, + IX86_BUILTIN_PSHUFD512, + IX86_BUILTIN_PSLLD512, + IX86_BUILTIN_PSLLDI512, + IX86_BUILTIN_PSLLQ512, + IX86_BUILTIN_PSLLQI512, + IX86_BUILTIN_PSLLVV16SI, + IX86_BUILTIN_PSLLVV8DI, + IX86_BUILTIN_PSRAD512, + IX86_BUILTIN_PSRADI512, + IX86_BUILTIN_PSRAQ512, + IX86_BUILTIN_PSRAQI512, + IX86_BUILTIN_PSRAVV16SI, + IX86_BUILTIN_PSRAVV8DI, + IX86_BUILTIN_PSRLD512, + IX86_BUILTIN_PSRLDI512, + IX86_BUILTIN_PSRLQ512, + IX86_BUILTIN_PSRLQI512, + IX86_BUILTIN_PSRLVV16SI, + IX86_BUILTIN_PSRLVV8DI, + IX86_BUILTIN_PSUBD512, + IX86_BUILTIN_PSUBQ512, + IX86_BUILTIN_PTESTMD512, + IX86_BUILTIN_PTESTMQ512, + IX86_BUILTIN_PTESTNMD512, + IX86_BUILTIN_PTESTNMQ512, + IX86_BUILTIN_PUNPCKHDQ512, + IX86_BUILTIN_PUNPCKHQDQ512, + IX86_BUILTIN_PUNPCKLDQ512, + IX86_BUILTIN_PUNPCKLQDQ512, + IX86_BUILTIN_PXORD512, + IX86_BUILTIN_PXORQ512, + IX86_BUILTIN_RCP14PD512, + IX86_BUILTIN_RCP14PS512, + IX86_BUILTIN_RNDSCALEPD, + IX86_BUILTIN_RNDSCALEPS, + IX86_BUILTIN_RSQRT14PD512, + IX86_BUILTIN_RSQRT14PS512, + IX86_BUILTIN_SCALEFPD512, + IX86_BUILTIN_SCALEFPS512, + IX86_BUILTIN_SHUFPD512, + IX86_BUILTIN_SHUFPS512, + IX86_BUILTIN_SHUF_F32x4, + IX86_BUILTIN_SHUF_F64x2, + IX86_BUILTIN_SHUF_I32x4, + IX86_BUILTIN_SHUF_I64x2, IX86_BUILTIN_SQRTPD512, - IX86_BUILTIN_EXP2PS, + IX86_BUILTIN_SQRTPD512_MASK, + IX86_BUILTIN_SQRTPS512_MASK, IX86_BUILTIN_SQRTPS_NR512, - - /* Alternate 4 element gather for the vectorizer where - all operands are 32-byte wide. */ + IX86_BUILTIN_STOREAPD512, + IX86_BUILTIN_STOREAPS512, + IX86_BUILTIN_STOREDQUDI512, + IX86_BUILTIN_STOREDQUSI512, + IX86_BUILTIN_STOREUPD512, + IX86_BUILTIN_STOREUPS512, + IX86_BUILTIN_SUBPD512, + IX86_BUILTIN_SUBPS512, + IX86_BUILTIN_UCMPD512, + IX86_BUILTIN_UCMPQ512, + IX86_BUILTIN_UNPCKHPD512, + IX86_BUILTIN_UNPCKHPS512, + IX86_BUILTIN_UNPCKLPD512, + IX86_BUILTIN_UNPCKLPS512, + IX86_BUILTIN_VCVTSD2SI32, + IX86_BUILTIN_VCVTSD2SI64, + IX86_BUILTIN_VCVTSD2USI32, + IX86_BUILTIN_VCVTSD2USI64, + IX86_BUILTIN_VCVTSS2SI32, + IX86_BUILTIN_VCVTSS2SI64, + IX86_BUILTIN_VCVTSS2USI32, + IX86_BUILTIN_VCVTSS2USI64, + IX86_BUILTIN_VCVTTSD2SI32, + IX86_BUILTIN_VCVTTSD2SI64, + IX86_BUILTIN_VCVTTSD2USI32, + IX86_BUILTIN_VCVTTSD2USI64, + IX86_BUILTIN_VCVTTSS2SI32, + IX86_BUILTIN_VCVTTSS2SI64, + IX86_BUILTIN_VCVTTSS2USI32, + IX86_BUILTIN_VCVTTSS2USI64, + IX86_BUILTIN_VFMADDPD512_MASK, + IX86_BUILTIN_VFMADDPD512_MASK3, + IX86_BUILTIN_VFMADDPD512_MASKZ, + IX86_BUILTIN_VFMADDPS512_MASK, + IX86_BUILTIN_VFMADDPS512_MASK3, + IX86_BUILTIN_VFMADDPS512_MASKZ, + IX86_BUILTIN_VFMADDSUBPD512_MASK, + IX86_BUILTIN_VFMADDSUBPD512_MASK3, + IX86_BUILTIN_VFMADDSUBPD512_MASKZ, + IX86_BUILTIN_VFMADDSUBPS512_MASK, + IX86_BUILTIN_VFMADDSUBPS512_MASK3, + IX86_BUILTIN_VFMADDSUBPS512_MASKZ, + IX86_BUILTIN_VFMSUBADDPD512_MASK3, + IX86_BUILTIN_VFMSUBADDPS512_MASK3, + IX86_BUILTIN_VFMSUBPD512_MASK3, + IX86_BUILTIN_VFMSUBPS512_MASK3, + IX86_BUILTIN_VFNMADDPD512_MASK, + IX86_BUILTIN_VFNMADDPS512_MASK, + IX86_BUILTIN_VFNMSUBPD512_MASK, + IX86_BUILTIN_VFNMSUBPD512_MASK3, + IX86_BUILTIN_VFNMSUBPS512_MASK, + IX86_BUILTIN_VFNMSUBPS512_MASK3, + IX86_BUILTIN_VPCLZCNTD512, + IX86_BUILTIN_VPCLZCNTQ512, + IX86_BUILTIN_VPCONFLICTD512, + IX86_BUILTIN_VPCONFLICTQ512, + IX86_BUILTIN_VPERMDF512, + IX86_BUILTIN_VPERMDI512, + IX86_BUILTIN_VPERMI2VARD512, + IX86_BUILTIN_VPERMI2VARPD512, + IX86_BUILTIN_VPERMI2VARPS512, + IX86_BUILTIN_VPERMI2VARQ512, + IX86_BUILTIN_VPERMILPD512, + IX86_BUILTIN_VPERMILPS512, + IX86_BUILTIN_VPERMILVARPD512, + IX86_BUILTIN_VPERMILVARPS512, + IX86_BUILTIN_VPERMT2VARD512, + IX86_BUILTIN_VPERMT2VARD512_MASKZ, + IX86_BUILTIN_VPERMT2VARPD512, + IX86_BUILTIN_VPERMT2VARPD512_MASKZ, + IX86_BUILTIN_VPERMT2VARPS512, + IX86_BUILTIN_VPERMT2VARPS512_MASKZ, + IX86_BUILTIN_VPERMT2VARQ512, + IX86_BUILTIN_VPERMT2VARQ512_MASKZ, + IX86_BUILTIN_VPERMVARDF512, + IX86_BUILTIN_VPERMVARDI512, + IX86_BUILTIN_VPERMVARSF512, + IX86_BUILTIN_VPERMVARSI512, + IX86_BUILTIN_VTERNLOGD512_MASK, + IX86_BUILTIN_VTERNLOGD512_MASKZ, + IX86_BUILTIN_VTERNLOGQ512_MASK, + IX86_BUILTIN_VTERNLOGQ512_MASKZ, + + /* Mask arithmetic operations */ + IX86_BUILTIN_KAND16, + IX86_BUILTIN_KANDN16, + IX86_BUILTIN_KNOT16, + IX86_BUILTIN_KOR16, + IX86_BUILTIN_KORTESTC16, + IX86_BUILTIN_KORTESTZ16, + IX86_BUILTIN_KUNPCKBW, + IX86_BUILTIN_KXNOR16, + IX86_BUILTIN_KXOR16, + + /* Alternate 4 and 8 element gather/scatter for the vectorizer + where all operands are 32-byte or 64-byte wide respectively. */ IX86_BUILTIN_GATHERALTSIV4DF, IX86_BUILTIN_GATHERALTDIV8SF, IX86_BUILTIN_GATHERALTSIV4DI, @@ -27943,6 +28265,28 @@ enum ix86_builtins IX86_BUILTIN_GATHER3SIV16SF, IX86_BUILTIN_GATHER3SIV16SI, IX86_BUILTIN_GATHER3SIV8DF, + IX86_BUILTIN_GATHER3SIV8DI, + IX86_BUILTIN_SCATTERDIV16SF, + IX86_BUILTIN_SCATTERDIV16SI, + IX86_BUILTIN_SCATTERDIV8DF, + IX86_BUILTIN_SCATTERDIV8DI, + IX86_BUILTIN_SCATTERSIV16SF, + IX86_BUILTIN_SCATTERSIV16SI, + IX86_BUILTIN_SCATTERSIV8DF, + IX86_BUILTIN_SCATTERSIV8DI, + + /* AVX512PF */ + IX86_BUILTIN_GATHERPFDPS, + IX86_BUILTIN_GATHERPFQPS, + IX86_BUILTIN_SCATTERPFDPS, + IX86_BUILTIN_SCATTERPFQPS, + IX86_BUILTIN_EXP2PD_MASK, + IX86_BUILTIN_EXP2PS_MASK, + IX86_BUILTIN_EXP2PS, + IX86_BUILTIN_RCP28PD, + IX86_BUILTIN_RCP28PS, + IX86_BUILTIN_RSQRT28PD, + IX86_BUILTIN_RSQRT28PS, /* TFmode support builtins. */ IX86_BUILTIN_INFQ, @@ -28497,6 +28841,39 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI }, + /* AVX512F */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID }, { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID }, { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT }, @@ -29348,6 +29725,322 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + + /* AVX512F */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI }, + + { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, + + /* Mask arithmetic operations */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI }, +}; + +/* Builtins with rounding support. */ +static const struct builtin_description bdesc_round_args[] = +{ + /* AVX512F */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + + /* AVX512ER */ + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, }; /* FMA4 and XOP. */ @@ -29794,6 +30487,18 @@ ix86_init_mmx_sse_builtins (void) def_builtin_const (d->mask, d->name, ftype, d->code); } + /* Add all builtins with rounding. */ + for (i = 0, d = bdesc_round_args; + i < ARRAY_SIZE (bdesc_round_args); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + def_builtin_const (d->mask, d->name, ftype, d->code); + } + /* pcmpestr[im] insns. */ for (i = 0, d = bdesc_pcmpestr; i < ARRAY_SIZE (bdesc_pcmpestr); @@ -29962,6 +30667,101 @@ ix86_init_mmx_sse_builtins (void) V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, IX86_BUILTIN_GATHERALTDIV8SI); + /* AVX512F */ + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf", + V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT, + IX86_BUILTIN_GATHER3SIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df", + V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf", + V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df", + V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si", + V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT, + IX86_BUILTIN_GATHER3SIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di", + V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si", + V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di", + V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ", + V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ", + V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, + IX86_BUILTIN_GATHER3ALTDIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ", + V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ", + V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, + IX86_BUILTIN_GATHER3ALTDIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf", + VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT, + IX86_BUILTIN_SCATTERSIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df", + VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT, + IX86_BUILTIN_SCATTERSIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf", + VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT, + IX86_BUILTIN_SCATTERDIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df", + VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT, + IX86_BUILTIN_SCATTERDIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si", + VOID_FTYPE_PINT_HI_V16SI_V16SI_INT, + IX86_BUILTIN_SCATTERSIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di", + VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT, + IX86_BUILTIN_SCATTERSIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si", + VOID_FTYPE_PINT_QI_V8DI_V8SI_INT, + IX86_BUILTIN_SCATTERDIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di", + VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT, + IX86_BUILTIN_SCATTERDIV8DI); + + /* AVX512PF */ + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps", + VOID_FTYPE_HI_V16SI_PCINT_INT_INT, + IX86_BUILTIN_GATHERPFDPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps", + VOID_FTYPE_QI_V8DI_PCINT_INT_INT, + IX86_BUILTIN_GATHERPFQPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps", + VOID_FTYPE_HI_V16SI_PCINT_INT_INT, + IX86_BUILTIN_SCATTERPFDPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps", + VOID_FTYPE_QI_V8DI_PCINT_INT_INT, + IX86_BUILTIN_SCATTERPFQPS); + /* RTM. */ def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort", VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT); @@ -32364,12 +33164,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, rtx pat, real_target; unsigned int i, nargs; unsigned int nargs_constant = 0; + unsigned int mask_pos = 0; int num_memory = 0; struct { rtx op; enum machine_mode mode; - } args[4]; + } args[6]; bool last_arg_count = false; enum insn_code icode = d->icode; const struct insn_data_d *insn_p = &insn_data[icode]; @@ -32389,6 +33190,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, return ix86_expand_sse_round (d, exp, target); case V4SI_FTYPE_V2DF_V2DF_ROUND: case V8SI_FTYPE_V4DF_V4DF_ROUND: + case V16SI_FTYPE_V8DF_V8DF_ROUND: return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); case INT_FTYPE_V8SF_V8SF_PTEST: case INT_FTYPE_V4DI_V4DI_PTEST: @@ -32467,6 +33269,32 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4DI_FTYPE_V8HI: case V4DI_FTYPE_V4SI: case V4DI_FTYPE_V2DI: + case HI_FTYPE_HI: + case UINT_FTYPE_V2DF: + case UINT_FTYPE_V4SF: + case UINT64_FTYPE_V2DF: + case UINT64_FTYPE_V4SF: + case V16QI_FTYPE_V8DI: + case V16HI_FTYPE_V16SI: + case V16SI_FTYPE_HI: + case V16SI_FTYPE_V16SI: + case V16SI_FTYPE_INT: + case V16SF_FTYPE_FLOAT: + case V16SF_FTYPE_V4SF: + case V16SF_FTYPE_V16SF: + case V8HI_FTYPE_V8DI: + case V8UHI_FTYPE_V8UHI: + case V8SI_FTYPE_V8DI: + case V8USI_FTYPE_V8USI: + case V8SF_FTYPE_V8DF: + case V8DI_FTYPE_QI: + case V8DI_FTYPE_INT64: + case V8DI_FTYPE_V4DI: + case V8DI_FTYPE_V8DI: + case V8DF_FTYPE_DOUBLE: + case V8DF_FTYPE_V4DF: + case V8DF_FTYPE_V8DF: + case V8DF_FTYPE_V8SI: nargs = 1; break; case V4SF_FTYPE_V4SF_VEC_MERGE: @@ -32475,6 +33303,9 @@ ix86_expand_args_builtin (const struct builtin_description *d, case FLOAT128_FTYPE_FLOAT128_FLOAT128: case V16QI_FTYPE_V16QI_V16QI: case V16QI_FTYPE_V8HI_V8HI: + case V16SI_FTYPE_V16SI_V16SI: + case V16SF_FTYPE_V16SF_V16SF: + case V16SF_FTYPE_V16SF_V16SI: case V8QI_FTYPE_V8QI_V8QI: case V8QI_FTYPE_V4HI_V4HI: case V8HI_FTYPE_V8HI_V8HI: @@ -32482,6 +33313,9 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8HI_FTYPE_V4SI_V4SI: case V8SF_FTYPE_V8SF_V8SF: case V8SF_FTYPE_V8SF_V8SI: + case V8DI_FTYPE_V8DI_V8DI: + case V8DF_FTYPE_V8DF_V8DF: + case V8DF_FTYPE_V8DF_V8DI: case V4SI_FTYPE_V4SI_V4SI: case V4SI_FTYPE_V8HI_V8HI: case V4SI_FTYPE_V4SF_V4SF: @@ -32495,6 +33329,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4SF_FTYPE_V4SF_V4SI: case V4SF_FTYPE_V4SF_V2SI: case V4SF_FTYPE_V4SF_V2DF: + case V4SF_FTYPE_V4SF_UINT: + case V4SF_FTYPE_V4SF_UINT64: case V4SF_FTYPE_V4SF_DI: case V4SF_FTYPE_V4SF_SI: case V2DI_FTYPE_V2DI_V2DI: @@ -32511,6 +33347,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V2DF_FTYPE_V2DF_V2DI: case V2DF_FTYPE_V2DF_DI: case V2DF_FTYPE_V2DF_SI: + case V2DF_FTYPE_V2DF_UINT: + case V2DF_FTYPE_V2DF_UINT64: case V2SF_FTYPE_V2SF_V2SF: case V1DI_FTYPE_V1DI_V1DI: case V1DI_FTYPE_V8QI_V8QI: @@ -32526,6 +33364,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4DI_FTYPE_V4DI_V4DI: case V4DI_FTYPE_V8SI_V8SI: case V4UDI_FTYPE_V8USI_V8USI: + case QI_FTYPE_V8DI_V8DI: + case HI_FTYPE_V16SI_V16SI: if (comparison == UNKNOWN) return ix86_expand_binop_builtin (icode, exp, target); nargs = 2; @@ -32563,6 +33403,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case UINT_FTYPE_UINT_UCHAR: case UINT16_FTYPE_UINT16_INT: case UINT8_FTYPE_UINT8_INT: + case HI_FTYPE_HI_HI: + case V16SI_FTYPE_V8DF_V8DF: nargs = 2; break; case V2DI_FTYPE_V2DI_INT_CONVERT: @@ -32577,12 +33419,16 @@ ix86_expand_args_builtin (const struct builtin_description *d, break; case V8HI_FTYPE_V8HI_INT: case V8HI_FTYPE_V8SF_INT: + case V16HI_FTYPE_V16SF_INT: case V8HI_FTYPE_V4SF_INT: case V8SF_FTYPE_V8SF_INT: + case V4SF_FTYPE_V16SF_INT: + case V16SF_FTYPE_V16SF_INT: case V4SI_FTYPE_V4SI_INT: case V4SI_FTYPE_V8SI_INT: case V4HI_FTYPE_V4HI_INT: case V4DF_FTYPE_V4DF_INT: + case V4DF_FTYPE_V8DF_INT: case V4SF_FTYPE_V4SF_INT: case V4SF_FTYPE_V8SF_INT: case V2DI_FTYPE_V2DI_INT: @@ -32590,8 +33436,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V2DF_FTYPE_V4DF_INT: case V16HI_FTYPE_V16HI_INT: case V8SI_FTYPE_V8SI_INT: + case V16SI_FTYPE_V16SI_INT: + case V4SI_FTYPE_V16SI_INT: case V4DI_FTYPE_V4DI_INT: case V2DI_FTYPE_V4DI_INT: + case V4DI_FTYPE_V8DI_INT: + case HI_FTYPE_HI_INT: nargs = 2; nargs_constant = 1; break; @@ -32601,6 +33451,46 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4SF_FTYPE_V4SF_V4SF_V4SF: case V2DF_FTYPE_V2DF_V2DF_V2DF: case V32QI_FTYPE_V32QI_V32QI_V32QI: + case HI_FTYPE_V16SI_V16SI_HI: + case QI_FTYPE_V8DI_V8DI_QI: + case V16HI_FTYPE_V16SI_V16HI_HI: + case V16QI_FTYPE_V16SI_V16QI_HI: + case V16QI_FTYPE_V8DI_V16QI_QI: + case V16SF_FTYPE_V16SF_V16SF_HI: + case V16SF_FTYPE_V16SF_V16SF_V16SF: + case V16SF_FTYPE_V16SF_V16SI_V16SF: + case V16SF_FTYPE_V16SI_V16SF_HI: + case V16SF_FTYPE_V16SI_V16SF_V16SF: + case V16SF_FTYPE_V4SF_V16SF_HI: + case V16SI_FTYPE_SI_V16SI_HI: + case V16SI_FTYPE_V16HI_V16SI_HI: + case V16SI_FTYPE_V16QI_V16SI_HI: + case V16SI_FTYPE_V16SF_V16SI_HI: + case V16SI_FTYPE_V16SI_V16SI_HI: + case V16SI_FTYPE_V16SI_V16SI_V16SI: + case V16SI_FTYPE_V4SI_V16SI_HI: + case V2DI_FTYPE_V2DI_V2DI_V2DI: + case V4DI_FTYPE_V4DI_V4DI_V4DI: + case V8DF_FTYPE_V2DF_V8DF_QI: + case V8DF_FTYPE_V4DF_V8DF_QI: + case V8DF_FTYPE_V8DF_V8DF_QI: + case V8DF_FTYPE_V8DF_V8DF_V8DF: + case V8DF_FTYPE_V8DF_V8DI_V8DF: + case V8DF_FTYPE_V8DI_V8DF_V8DF: + case V8DF_FTYPE_V8SF_V8DF_QI: + case V8DF_FTYPE_V8SI_V8DF_QI: + case V8DI_FTYPE_DI_V8DI_QI: + case V8DI_FTYPE_V16QI_V8DI_QI: + case V8DI_FTYPE_V2DI_V8DI_QI: + case V8DI_FTYPE_V4DI_V8DI_QI: + case V8DI_FTYPE_V8DI_V8DI_QI: + case V8DI_FTYPE_V8DI_V8DI_V8DI: + case V8DI_FTYPE_V8HI_V8DI_QI: + case V8DI_FTYPE_V8SI_V8DI_QI: + case V8HI_FTYPE_V8DI_V8HI_QI: + case V8SF_FTYPE_V8DF_V8SF_QI: + case V8SI_FTYPE_V8DF_V8SI_QI: + case V8SI_FTYPE_V8DI_V8SI_QI: nargs = 3; break; case V32QI_FTYPE_V32QI_V32QI_INT: @@ -32614,11 +33504,20 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8SF_FTYPE_V8SF_V4SF_INT: case V4SI_FTYPE_V4SI_V4SI_INT: case V4DF_FTYPE_V4DF_V4DF_INT: + case V16SF_FTYPE_V16SF_V16SF_INT: + case V16SF_FTYPE_V16SF_V4SF_INT: + case V16SI_FTYPE_V16SI_V4SI_INT: case V4DF_FTYPE_V4DF_V2DF_INT: case V4SF_FTYPE_V4SF_V4SF_INT: case V2DI_FTYPE_V2DI_V2DI_INT: case V4DI_FTYPE_V4DI_V2DI_INT: case V2DF_FTYPE_V2DF_V2DF_INT: + case QI_FTYPE_V8DI_V8DI_INT: + case QI_FTYPE_V8DF_V8DF_INT: + case QI_FTYPE_V2DF_V2DF_INT: + case QI_FTYPE_V4SF_V4SF_INT: + case HI_FTYPE_V16SI_V16SI_INT: + case HI_FTYPE_V16SF_V16SF_INT: nargs = 3; nargs_constant = 1; break; @@ -32641,11 +33540,36 @@ ix86_expand_args_builtin (const struct builtin_description *d, nargs = 3; nargs_constant = 2; break; + case V16SF_FTYPE_V16SF_V16SF_V16SF_HI: + case V16SF_FTYPE_V16SF_V16SI_V16SF_HI: + case V16SF_FTYPE_V16SI_V16SF_V16SF_HI: + case V16SI_FTYPE_V16SI_V16SI_V16SI_HI: + case V16SI_FTYPE_V16SI_V4SI_V16SI_HI: + case V2DF_FTYPE_V2DF_V2DF_V2DF_QI: + case V2DF_FTYPE_V2DF_V4SF_V2DF_QI: + case V4SF_FTYPE_V4SF_V2DF_V4SF_QI: + case V4SF_FTYPE_V4SF_V4SF_V4SF_QI: + case V8DF_FTYPE_V8DF_V8DF_V8DF_QI: + case V8DF_FTYPE_V8DF_V8DI_V8DF_QI: + case V8DF_FTYPE_V8DI_V8DF_V8DF_QI: + case V8DI_FTYPE_V16SI_V16SI_V8DI_QI: + case V8DI_FTYPE_V8DI_SI_V8DI_V8DI: + case V8DI_FTYPE_V8DI_V2DI_V8DI_QI: + case V8DI_FTYPE_V8DI_V8DI_V8DI_QI: + nargs = 4; + break; case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: + case V16SF_FTYPE_V16SF_V16SF_V16SI_INT: + nargs = 4; + nargs_constant = 1; + break; + case QI_FTYPE_V2DF_V2DF_INT_QI: + case QI_FTYPE_V4SF_V4SF_INT_QI: nargs = 4; + mask_pos = 1; nargs_constant = 1; break; case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: @@ -32656,6 +33580,51 @@ ix86_expand_args_builtin (const struct builtin_description *d, case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG: nargs = 4; break; + case QI_FTYPE_V8DI_V8DI_INT_QI: + case HI_FTYPE_V16SI_V16SI_INT_HI: + case QI_FTYPE_V8DF_V8DF_INT_QI: + case HI_FTYPE_V16SF_V16SF_INT_HI: + mask_pos = 1; + nargs = 4; + nargs_constant = 1; + break; + case V8DF_FTYPE_V8DF_INT_V8DF_QI: + case V16SF_FTYPE_V16SF_INT_V16SF_HI: + case V16HI_FTYPE_V16SF_INT_V16HI_HI: + case V16SI_FTYPE_V16SI_INT_V16SI_HI: + case V4SI_FTYPE_V16SI_INT_V4SI_QI: + case V4DI_FTYPE_V8DI_INT_V4DI_QI: + case V4DF_FTYPE_V8DF_INT_V4DF_QI: + case V4SF_FTYPE_V16SF_INT_V4SF_QI: + case V8DI_FTYPE_V8DI_INT_V8DI_QI: + nargs = 4; + mask_pos = 2; + nargs_constant = 1; + break; + case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI: + case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI: + case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI: + case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI: + case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI: + case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI: + case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI: + case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI: + case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI: + case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI: + nargs = 5; + mask_pos = 2; + nargs_constant = 1; + break; + case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI: + case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI: + case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI: + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI: + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI: + nargs = 5; + mask_pos = 1; + nargs_constant = 1; + break; + default: gcc_unreachable (); } @@ -32702,7 +33671,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, op = copy_to_reg (op); } } - else if ((nargs - i) <= nargs_constant) + else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || + (!mask_pos && (nargs - i) <= nargs_constant)) { if (!match) switch (icode) @@ -32712,6 +33682,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, error ("the last argument must be an 1-bit immediate"); return const0_rtx; + case CODE_FOR_avx512f_cmpv8di3_mask: + case CODE_FOR_avx512f_cmpv16si3_mask: + case CODE_FOR_avx512f_ucmpv8di3_mask: + case CODE_FOR_avx512f_ucmpv16si3_mask: + error ("the last argument must be a 3-bit immediate"); + return const0_rtx; + case CODE_FOR_sse4_1_roundsd: case CODE_FOR_sse4_1_roundss: @@ -32728,6 +33705,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_sse4_1_blendps: case CODE_FOR_avx_blendpd256: case CODE_FOR_avx_vpermilv4df: + case CODE_FOR_avx512f_getmantv8df_mask: + case CODE_FOR_avx512f_getmantv16sf_mask: error ("the last argument must be a 4-bit immediate"); return const0_rtx; @@ -32737,6 +33716,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_xop_vpermil2v4sf3: case CODE_FOR_xop_vpermil2v4df3: case CODE_FOR_xop_vpermil2v8sf3: + case CODE_FOR_avx512f_vinsertf32x4_mask: + case CODE_FOR_avx512f_vinserti32x4_mask: + case CODE_FOR_avx512f_vextractf32x4_mask: + case CODE_FOR_avx512f_vextracti32x4_mask: error ("the last argument must be a 2-bit immediate"); return const0_rtx; @@ -32746,6 +33729,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx_vinsertf128v4df: case CODE_FOR_avx_vinsertf128v8sf: case CODE_FOR_avx_vinsertf128v8si: + case CODE_FOR_avx512f_vinsertf64x4_mask: + case CODE_FOR_avx512f_vinserti64x4_mask: + case CODE_FOR_avx512f_vextractf64x4_mask: + case CODE_FOR_avx512f_vextracti64x4_mask: error ("the last argument must be a 1-bit immediate"); return const0_rtx; @@ -32755,14 +33742,19 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx_cmpv4sf3: case CODE_FOR_avx_cmpv4df3: case CODE_FOR_avx_cmpv8sf3: + case CODE_FOR_avx512f_cmpv8df3_mask: + case CODE_FOR_avx512f_cmpv16sf3_mask: + case CODE_FOR_avx512f_vmcmpv2df3_mask: + case CODE_FOR_avx512f_vmcmpv4sf3_mask: error ("the last argument must be a 5-bit immediate"); return const0_rtx; - default: + default: switch (nargs_constant) { case 2: - if ((nargs - i) == nargs_constant) + if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || + (!mask_pos && (nargs - i) == nargs_constant)) { error ("the next to last argument must be an 8-bit immediate"); break; @@ -32818,6 +33810,14 @@ ix86_expand_args_builtin (const struct builtin_description *d, pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, args[2].op, args[3].op); break; + case 5: + pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op); + case 6: + pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op, + args[5].op); + break; default: gcc_unreachable (); } @@ -32829,6 +33829,363 @@ ix86_expand_args_builtin (const struct builtin_description *d, return target; } +/* Transform pattern of following layout: + (parallel [ + set (A B) + (unspec [C] UNSPEC_EMBEDDED_ROUNDING)]) + ]) + into: + (set (A B)) + + Or: + (parallel [ A B + ... + (unspec [C] UNSPEC_EMBEDDED_ROUNDING) + ... + ]) + into: + (parallel [ A B ... ]) */ + +static rtx +ix86_erase_embedded_rounding (rtx pat) +{ + if (GET_CODE (pat) == INSN) + pat = PATTERN (pat); + + gcc_assert (GET_CODE (pat) == PARALLEL); + + if (XVECLEN (pat, 0) == 2) + { + rtx p0 = XVECEXP (pat, 0, 0); + rtx p1 = XVECEXP (pat, 0, 1); + + gcc_assert (GET_CODE (p0) == SET + && GET_CODE (p1) == UNSPEC + && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING); + + return p0; + } + else + { + rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0)); + int i = 0; + int j = 0; + + for (; i < XVECLEN (pat, 0); ++i) + { + rtx elem = XVECEXP (pat, 0, i); + if (GET_CODE (elem) != UNSPEC + || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING) + res [j++] = elem; + } + + /* No more than 1 occurence was removed. */ + gcc_assert (j >= XVECLEN (pat, 0) - 1); + + return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res)); + } +} + +/* Subroutine of ix86_expand_round_builtin to take care of comi insns + with rounding. */ +static rtx +ix86_expand_sse_comi_round (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat, set_dst; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + tree arg3 = CALL_EXPR_ARG (exp, 3); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = expand_normal (arg3); + enum insn_code icode = d->icode; + const struct insn_data_d *insn_p = &insn_data[icode]; + enum machine_mode mode0 = insn_p->operand[0].mode; + enum machine_mode mode1 = insn_p->operand[1].mode; + enum rtx_code comparison = UNEQ; + bool need_ucomi = false; + + /* See avxintrin.h for values. */ + enum rtx_code comi_comparisons[32] = + { + UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT, + UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE, + UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT + }; + bool need_ucomi_values[32] = + { + true, false, false, true, true, false, false, true, + true, false, false, true, true, false, false, true, + false, true, true, false, false, true, true, false, + false, true, true, false, false, true, true, false + }; + + if (!CONST_INT_P (op2)) + { + error ("the third argument must be comparison constant"); + return const0_rtx; + } + if (INTVAL (op2) < 0 || INTVAL (op2) >= 32) + { + error ("incorect comparison mode"); + return const0_rtx; + } + + if (!insn_p->operand[2].predicate (op3, SImode)) + { + error ("incorrect rounding operand"); + return const0_rtx; + } + + comparison = comi_comparisons[INTVAL (op2)]; + need_ucomi = need_ucomi_values[INTVAL (op2)]; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_p->operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !insn_p->operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + if (need_ucomi) + icode = icode == CODE_FOR_sse_comi_round + ? CODE_FOR_sse_ucomi_round + : CODE_FOR_sse2_ucomi_round; + + pat = GEN_FCN (icode) (op0, op1, op3); + if (! pat) + return 0; + + /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */ + if (INTVAL (op3) == NO_ROUND) + { + pat = ix86_erase_embedded_rounding (pat); + if (! pat) + return 0; + + set_dst = SET_DEST (pat); + } + else + { + gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET); + set_dst = SET_DEST (XVECEXP (pat, 0, 0)); + } + + emit_insn (pat); + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (comparison, QImode, + set_dst, + const0_rtx))); + + return SUBREG_REG (target); +} + +static rtx +ix86_expand_round_builtin (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + unsigned int i, nargs; + struct + { + rtx op; + enum machine_mode mode; + } args[6]; + enum insn_code icode = d->icode; + const struct insn_data_d *insn_p = &insn_data[icode]; + enum machine_mode tmode = insn_p->operand[0].mode; + unsigned int nargs_constant = 0; + unsigned int redundant_embed_rnd = 0; + + switch ((enum ix86_builtin_func_type) d->flag) + { + case UINT64_FTYPE_V2DF_INT: + case UINT64_FTYPE_V4SF_INT: + case UINT_FTYPE_V2DF_INT: + case UINT_FTYPE_V4SF_INT: + case INT64_FTYPE_V2DF_INT: + case INT64_FTYPE_V4SF_INT: + case INT_FTYPE_V2DF_INT: + case INT_FTYPE_V4SF_INT: + nargs = 2; + break; + case V4SF_FTYPE_V4SF_UINT_INT: + case V4SF_FTYPE_V4SF_UINT64_INT: + case V2DF_FTYPE_V2DF_UINT64_INT: + case V4SF_FTYPE_V4SF_INT_INT: + case V4SF_FTYPE_V4SF_INT64_INT: + case V2DF_FTYPE_V2DF_INT64_INT: + nargs = 3; + break; + case V8SF_FTYPE_V8DF_V8SF_QI_INT: + case V8DF_FTYPE_V8DF_V8DF_QI_INT: + case V8SI_FTYPE_V8DF_V8SI_QI_INT: + case V16SF_FTYPE_V16SF_V16SF_HI_INT: + case V16SF_FTYPE_V16SI_V16SF_HI_INT: + case V16SI_FTYPE_V16SF_V16SI_HI_INT: + case V8DF_FTYPE_V8SF_V8DF_QI_INT: + case V16SF_FTYPE_V16HI_V16SF_HI_INT: + nargs = 4; + break; + case INT_FTYPE_V4SF_V4SF_INT_INT: + case INT_FTYPE_V2DF_V2DF_INT_INT: + return ix86_expand_sse_comi_round (d, exp, target); + case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT: + case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: + case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT: + case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: + nargs = 5; + break; + case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: + case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: + nargs_constant = 4; + nargs = 5; + break; + case QI_FTYPE_V8DF_V8DF_INT_QI_INT: + case QI_FTYPE_V2DF_V2DF_INT_QI_INT: + case HI_FTYPE_V16SF_V16SF_INT_HI_INT: + case QI_FTYPE_V4SF_V4SF_INT_QI_INT: + nargs_constant = 3; + nargs = 5; + break; + case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: + case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: + nargs = 6; + nargs_constant = 4; + break; + case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT: + case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT: + nargs = 6; + nargs_constant = 3; + break; + default: + gcc_unreachable (); + } + gcc_assert (nargs <= ARRAY_SIZE (args)); + + if (optimize + || target == 0 + || GET_MODE (target) != tmode + || !insn_p->operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + for (i = 0; i < nargs; i++) + { + tree arg = CALL_EXPR_ARG (exp, i); + rtx op = expand_normal (arg); + enum machine_mode mode = insn_p->operand[i + 1].mode; + bool match = insn_p->operand[i + 1].predicate (op, mode); + + if (i == nargs - nargs_constant) + { + if (!match) + { + switch (icode) + { + case CODE_FOR_avx512f_getmantv8df_mask_round: + case CODE_FOR_avx512f_getmantv16sf_mask_round: + error ("the immediate argument must be a 4-bit immediate"); + return const0_rtx; + case CODE_FOR_avx512f_cmpv8df3_mask_round: + case CODE_FOR_avx512f_cmpv16sf3_mask_round: + case CODE_FOR_avx512f_vmcmpv2df3_mask_round: + case CODE_FOR_avx512f_vmcmpv4sf3_mask_round: + error ("the immediate argument must be a 5-bit immediate"); + return const0_rtx; + default: + error ("the immediate argument must be an 8-bit immediate"); + return const0_rtx; + } + } + } + else if (i == nargs-1) + { + if (!insn_p->operand[nargs].predicate (op, SImode)) + { + error ("incorrect rounding operand"); + return const0_rtx; + } + + /* If there is no rounding use normal version of the pattern. */ + if (INTVAL (op) == NO_ROUND) + redundant_embed_rnd = 1; + } + else + { + if (VECTOR_MODE_P (mode)) + op = safe_vector_operand (op, mode); + + if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) + { + if (optimize || !match) + op = copy_to_mode_reg (mode, op); + } + else + { + op = copy_to_reg (op); + op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); + } + } + + args[i].op = op; + args[i].mode = mode; + } + + switch (nargs) + { + case 1: + pat = GEN_FCN (icode) (target, args[0].op); + break; + case 2: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op); + break; + case 3: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op); + break; + case 4: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op, args[3].op); + break; + case 5: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op); + case 6: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op, + args[5].op); + break; + default: + gcc_unreachable (); + } + + if (!pat) + return 0; + + if (redundant_embed_rnd) + pat = ix86_erase_embedded_rounding (pat); + + emit_insn (pat); + return target; +} + /* Subroutine of ix86_expand_builtin to take care of special insns with variable number of operands. */ @@ -32882,6 +34239,10 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case V4DF_FTYPE_PCDOUBLE: case V2DF_FTYPE_PCDOUBLE: case VOID_FTYPE_PVOID: + case V16SI_FTYPE_PV4SI: + case V16SF_FTYPE_PV4SF: + case V8DI_FTYPE_PV4DI: + case V8DF_FTYPE_PV4DF: nargs = 1; klass = load; memory = 0; @@ -32896,12 +34257,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, } break; case VOID_FTYPE_PV2SF_V4SF: + case VOID_FTYPE_PV8DI_V8DI: case VOID_FTYPE_PV4DI_V4DI: case VOID_FTYPE_PV2DI_V2DI: case VOID_FTYPE_PCHAR_V32QI: case VOID_FTYPE_PCHAR_V16QI: + case VOID_FTYPE_PFLOAT_V16SF: case VOID_FTYPE_PFLOAT_V8SF: case VOID_FTYPE_PFLOAT_V4SF: + case VOID_FTYPE_PDOUBLE_V8DF: case VOID_FTYPE_PDOUBLE_V4DF: case VOID_FTYPE_PDOUBLE_V2DF: case VOID_FTYPE_PLONGLONG_LONGLONG: @@ -32958,11 +34322,27 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PV4DI_V4DI_V4DI: case VOID_FTYPE_PV4SI_V4SI_V4SI: case VOID_FTYPE_PV2DI_V2DI_V2DI: + case VOID_FTYPE_PV8DF_V8DF_QI: + case VOID_FTYPE_PV16SF_V16SF_HI: + case VOID_FTYPE_PV8DI_V8DI_QI: + case VOID_FTYPE_PV16SI_V16SI_HI: + case VOID_FTYPE_PDOUBLE_V2DF_QI: + case VOID_FTYPE_PFLOAT_V4SF_QI: nargs = 2; klass = store; /* Reserve memory operand for target. */ memory = ARRAY_SIZE (args); break; + case V16SF_FTYPE_PCV16SF_V16SF_HI: + case V16SI_FTYPE_PCV16SI_V16SI_HI: + case V8DF_FTYPE_PCV8DF_V8DF_QI: + case V8DI_FTYPE_PCV8DI_V8DI_QI: + case V2DF_FTYPE_PCDOUBLE_V2DF_QI: + case V4SF_FTYPE_PCFLOAT_V4SF_QI: + nargs = 3; + klass = load; + memory = 0; + break; case VOID_FTYPE_UINT_UINT_UINT: case VOID_FTYPE_UINT64_UINT_UINT: case UCHAR_FTYPE_UINT_UINT_UINT: @@ -33061,9 +34441,13 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, if (VECTOR_MODE_P (mode)) op = safe_vector_operand (op, mode); - gcc_assert (GET_MODE (op) == mode - || GET_MODE (op) == VOIDmode); - op = copy_to_mode_reg (mode, op); + if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) + op = copy_to_mode_reg (mode, op); + else + { + op = copy_to_reg (op); + op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); + } } } @@ -33804,6 +35188,38 @@ addcarryx: emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG))); return 0; + case IX86_BUILTIN_KORTESTC16: + icode = CODE_FOR_kortestchi; + mode0 = HImode; + mode1 = CCCmode; + goto kortest; + + case IX86_BUILTIN_KORTESTZ16: + icode = CODE_FOR_kortestzhi; + mode0 = HImode; + mode1 = CCZmode; + + kortest: + arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ + arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */ + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0); + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0); + + target = gen_reg_rtx (QImode); + emit_insn (gen_rtx_SET (mode0, target, const0_rtx)); + + /* Emit kortest. */ + emit_insn (GEN_FCN (icode) (op0, op1)); + /* And use setcc to return result from flags. */ + ix86_expand_setcc (target, EQ, + gen_rtx_REG (mode1, FLAGS_REG), const0_rtx); + return target; + case IX86_BUILTIN_GATHERSIV2DF: icode = CODE_FOR_avx2_gathersiv2df; goto gather_gen; @@ -33864,8 +35280,83 @@ addcarryx: case IX86_BUILTIN_GATHERALTDIV8SI: icode = CODE_FOR_avx2_gatherdiv8si; goto gather_gen; + case IX86_BUILTIN_GATHER3SIV16SF: + icode = CODE_FOR_avx512f_gathersiv16sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8DF: + icode = CODE_FOR_avx512f_gathersiv8df; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV16SF: + icode = CODE_FOR_avx512f_gatherdiv16sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8DF: + icode = CODE_FOR_avx512f_gatherdiv8df; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV16SI: + icode = CODE_FOR_avx512f_gathersiv16si; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8DI: + icode = CODE_FOR_avx512f_gathersiv8di; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV16SI: + icode = CODE_FOR_avx512f_gatherdiv16si; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8DI: + icode = CODE_FOR_avx512f_gatherdiv8di; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV8DF: + icode = CODE_FOR_avx512f_gathersiv8df; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV16SF: + icode = CODE_FOR_avx512f_gatherdiv16sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV8DI: + icode = CODE_FOR_avx512f_gathersiv8di; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV16SI: + icode = CODE_FOR_avx512f_gatherdiv16si; + goto gather_gen; + case IX86_BUILTIN_SCATTERSIV16SF: + icode = CODE_FOR_avx512f_scattersiv16sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV8DF: + icode = CODE_FOR_avx512f_scattersiv8df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV16SF: + icode = CODE_FOR_avx512f_scatterdiv16sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8DF: + icode = CODE_FOR_avx512f_scatterdiv8df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV16SI: + icode = CODE_FOR_avx512f_scattersiv16si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV8DI: + icode = CODE_FOR_avx512f_scattersiv8di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV16SI: + icode = CODE_FOR_avx512f_scatterdiv16si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8DI: + icode = CODE_FOR_avx512f_scatterdiv8di; + goto scatter_gen; + case IX86_BUILTIN_GATHERPFDPS: + icode = CODE_FOR_avx512pf_gatherpfv16si; + goto vec_prefetch_gen; + case IX86_BUILTIN_GATHERPFQPS: + icode = CODE_FOR_avx512pf_gatherpfv8di; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFDPS: + icode = CODE_FOR_avx512pf_scatterpfv16si; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFQPS: + icode = CODE_FOR_avx512pf_scatterpfv8di; + goto vec_prefetch_gen; gather_gen: + rtx half; + rtx (*gen) (rtx, rtx); + arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); arg2 = CALL_EXPR_ARG (exp, 2); @@ -33888,20 +35379,46 @@ addcarryx: else subtarget = target; - if (fcode == IX86_BUILTIN_GATHERALTSIV4DF - || fcode == IX86_BUILTIN_GATHERALTSIV4DI) + switch (fcode) { - rtx half = gen_reg_rtx (V4SImode); + case IX86_BUILTIN_GATHER3ALTSIV8DF: + case IX86_BUILTIN_GATHER3ALTSIV8DI: + half = gen_reg_rtx (V8SImode); + if (!nonimmediate_operand (op2, V16SImode)) + op2 = copy_to_mode_reg (V16SImode, op2); + emit_insn (gen_vec_extract_lo_v16si (half, op2)); + op2 = half; + break; + case IX86_BUILTIN_GATHERALTSIV4DF: + case IX86_BUILTIN_GATHERALTSIV4DI: + half = gen_reg_rtx (V4SImode); if (!nonimmediate_operand (op2, V8SImode)) op2 = copy_to_mode_reg (V8SImode, op2); emit_insn (gen_vec_extract_lo_v8si (half, op2)); op2 = half; - } - else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF - || fcode == IX86_BUILTIN_GATHERALTDIV8SI) - { - rtx (*gen) (rtx, rtx); - rtx half = gen_reg_rtx (mode0); + break; + case IX86_BUILTIN_GATHER3ALTDIV16SF: + case IX86_BUILTIN_GATHER3ALTDIV16SI: + half = gen_reg_rtx (mode0); + if (mode0 == V8SFmode) + gen = gen_vec_extract_lo_v16sf; + else + gen = gen_vec_extract_lo_v16si; + if (!nonimmediate_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + emit_insn (gen (half, op0)); + op0 = half; + if (GET_MODE (op3) != VOIDmode) + { + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + emit_insn (gen (half, op3)); + op3 = half; + } + break; + case IX86_BUILTIN_GATHERALTDIV8SF: + case IX86_BUILTIN_GATHERALTDIV8SI: + half = gen_reg_rtx (mode0); if (mode0 == V4SFmode) gen = gen_vec_extract_lo_v8sf; else @@ -33910,10 +35427,16 @@ addcarryx: op0 = copy_to_mode_reg (GET_MODE (op0), op0); emit_insn (gen (half, op0)); op0 = half; - if (!nonimmediate_operand (op3, GET_MODE (op3))) - op3 = copy_to_mode_reg (GET_MODE (op3), op3); - emit_insn (gen (half, op3)); - op3 = half; + if (GET_MODE (op3) != VOIDmode) + { + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + emit_insn (gen (half, op3)); + op3 = half; + } + break; + default: + break; } /* Force memory operand only with base register here. But we @@ -33927,11 +35450,19 @@ addcarryx: op1 = copy_to_mode_reg (Pmode, op1); if (!insn_data[icode].operand[3].predicate (op2, mode2)) op2 = copy_to_mode_reg (mode2, op2); - if (!insn_data[icode].operand[4].predicate (op3, mode3)) - op3 = copy_to_mode_reg (mode3, op3); + if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode) + { + if (!insn_data[icode].operand[4].predicate (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + } + else + { + op3 = copy_to_reg (op3); + op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0); + } if (!insn_data[icode].operand[5].predicate (op4, mode4)) { - error ("last argument must be scale 1, 2, 4, 8"); + error ("the last argument must be scale 1, 2, 4, 8"); return const0_rtx; } @@ -33941,7 +35472,12 @@ addcarryx: previous contents. */ if (optimize) { - if (TREE_CODE (arg3) == VECTOR_CST) + if (TREE_CODE (arg3) == INTEGER_CST) + { + if (integer_all_onesp (arg3)) + op0 = pc_rtx; + } + else if (TREE_CODE (arg3) == VECTOR_CST) { unsigned int negative = 0; for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i) @@ -33957,7 +35493,8 @@ addcarryx: if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) op0 = pc_rtx; } - else if (TREE_CODE (arg3) == SSA_NAME) + else if (TREE_CODE (arg3) == SSA_NAME + && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE) { /* Recognize also when mask is like: __v2df src = _mm_setzero_pd (); @@ -34002,22 +35539,146 @@ addcarryx: return const0_rtx; emit_insn (pat); - if (fcode == IX86_BUILTIN_GATHERDIV8SF - || fcode == IX86_BUILTIN_GATHERDIV8SI) + switch (fcode) { - enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode - ? V4SFmode : V4SImode; + case IX86_BUILTIN_GATHER3DIV16SF: if (target == NULL_RTX) - target = gen_reg_rtx (tmode); - if (tmode == V4SFmode) - emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); - else - emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); + target = gen_reg_rtx (V8SFmode); + emit_insn (gen_vec_extract_lo_v16sf (target, subtarget)); + break; + case IX86_BUILTIN_GATHER3DIV16SI: + if (target == NULL_RTX) + target = gen_reg_rtx (V8SImode); + emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); + break; + case IX86_BUILTIN_GATHERDIV8SF: + if (target == NULL_RTX) + target = gen_reg_rtx (V4SFmode); + emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); + break; + case IX86_BUILTIN_GATHERDIV8SI: + if (target == NULL_RTX) + target = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); + break; + default: + target = subtarget; + break; + } + return target; + + scatter_gen: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + arg3 = CALL_EXPR_ARG (exp, 3); + arg4 = CALL_EXPR_ARG (exp, 4); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + op3 = expand_normal (arg3); + op4 = expand_normal (arg4); + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + /* Force memory operand only with base register here. But we + don't want to do it on memory operand for other builtin + functions. */ + op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1)); + + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = copy_to_mode_reg (Pmode, op0); + + if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) + { + if (!insn_data[icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); } else - target = subtarget; + { + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0); + } - return target; + if (!insn_data[icode].operand[2].predicate (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + + if (!insn_data[icode].operand[3].predicate (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + + if (!insn_data[icode].operand[4].predicate (op4, mode4)) + { + error ("the last argument must be scale 1, 2, 4, 8"); + return const0_rtx; + } + + pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); + if (! pat) + return const0_rtx; + + emit_insn (pat); + return 0; + + vec_prefetch_gen: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + arg3 = CALL_EXPR_ARG (exp, 3); + arg4 = CALL_EXPR_ARG (exp, 4); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + op3 = expand_normal (arg3); + op4 = expand_normal (arg4); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + if (GET_MODE (op0) == mode0 + || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx)) + { + if (!insn_data[icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + } + else if (op0 != constm1_rtx) + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0); + } + + if (!insn_data[icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + /* Force memory operand only with base register here. But we + don't want to do it on memory operand for other builtin + functions. */ + op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1)); + + if (!insn_data[icode].operand[2].predicate (op2, Pmode)) + op2 = copy_to_mode_reg (Pmode, op2); + + if (!insn_data[icode].operand[3].predicate (op3, mode3)) + { + error ("the forth argument must be scale 1, 2, 4, 8"); + return const0_rtx; + } + + if (!insn_data[icode].operand[4].predicate (op4, mode4)) + { + error ("the last argument must be hint 0 or 1"); + return const0_rtx; + } + + pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); + if (! pat) + return const0_rtx; + + emit_insn (pat); + + return 0; case IX86_BUILTIN_XABORT: icode = CODE_FOR_xabort; @@ -34061,6 +35722,10 @@ addcarryx: if (d->code == fcode) return ix86_expand_sse_comi (d, exp, target); + for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++) + if (d->code == fcode) + return ix86_expand_round_builtin (d, exp, target); + for (i = 0, d = bdesc_pcmpestr; i < ARRAY_SIZE (bdesc_pcmpestr); i++, d++) diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index e825c34a256..fa75a30ba79 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -42,6 +42,14 @@ #include <avx2intrin.h> +#include <avx512fintrin.h> + +#include <avx512erintrin.h> + +#include <avx512pfintrin.h> + +#include <avx512cdintrin.h> + #include <lzcntintrin.h> #include <bmiintrin.h> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ed9467b04bb..240a964a47e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,12 +1,26 @@ 2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com> - Maxim Kuznetsov <maxim.kuznetsov@intel.com> - Sergey Lega <sergey.s.lega@intel.com> - Anna Tikhonova <anna.tikhonova@intel.com> - Ilya Tocar <ilya.tocar@intel.com> - Andrey Turetskiy <andrey.turetskiy@intel.com> - Ilya Verbin <ilya.verbin@intel.com> - Kirill Yukhin <kirill.yukhin@intel.com> - Michael Zolotukhin <michael.v.zolotukhin@intel.com> + Maxim Kuznetsov <maxim.kuznetsov@intel.com> + Sergey Lega <sergey.s.lega@intel.com> + Anna Tikhonova <anna.tikhonova@intel.com> + Ilya Tocar <ilya.tocar@intel.com> + Andrey Turetskiy <andrey.turetskiy@intel.com> + Ilya Verbin <ilya.verbin@intel.com> + Kirill Yukhin <kirill.yukhin@intel.com> + Michael Zolotukhin <michael.v.zolotukhin@intel.com> + + * gcc.target/i386/avx-1.c: Extend to AVX-512. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + +2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com> + Maxim Kuznetsov <maxim.kuznetsov@intel.com> + Sergey Lega <sergey.s.lega@intel.com> + Anna Tikhonova <anna.tikhonova@intel.com> + Ilya Tocar <ilya.tocar@intel.com> + Andrey Turetskiy <andrey.turetskiy@intel.com> + Ilya Verbin <ilya.verbin@intel.com> + Kirill Yukhin <kirill.yukhin@intel.com> + Michael Zolotukhin <michael.v.zolotukhin@intel.com> * gcc.target/i386/pr49002-2.c: allow vmovapd generation. diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 7496746aec8..75b6f04e24b 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -166,6 +166,181 @@ /* rtmintrin.h */ #define __builtin_ia32_xabort(I) __builtin_ia32_xabort(0) +/* avx512fintrin.h */ +#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1) +#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) +#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) +#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) +#define __builtin_ia32_cmppd512_mask(A, B, F, D, E) __builtin_ia32_cmppd512_mask(A, B, 1, D, 5) +#define __builtin_ia32_cmpps512_mask(A, B, F, D, E) __builtin_ia32_cmpps512_mask(A, B, 1, D, 5) +#define __builtin_ia32_cmpq512_mask(A, B, E, D) __builtin_ia32_cmpq512_mask(A, B, 1, D) +#define __builtin_ia32_cmpsd_mask(A, B, F, D, E) __builtin_ia32_cmpsd_mask(A, B, 1, D, 5) +#define __builtin_ia32_cmpss_mask(A, B, F, D, E) __builtin_ia32_cmpss_mask(A, B, 1, D, 5) +#define __builtin_ia32_cvtdq2ps512_mask(A, B, C, D) __builtin_ia32_cvtdq2ps512_mask(A, B, C, 1) +#define __builtin_ia32_cvtpd2dq512_mask(A, B, C, D) __builtin_ia32_cvtpd2dq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtpd2ps512_mask(A, B, C, D) __builtin_ia32_cvtpd2ps512_mask(A, B, C, 1) +#define __builtin_ia32_cvtpd2udq512_mask(A, B, C, D) __builtin_ia32_cvtpd2udq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5) +#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtsd2ss_mask(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask(A, B, C, D, 1) +#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1) +#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1) +#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1) +#define __builtin_ia32_cvtss2sd_mask(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask(A, B, C, D, 5) +#define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5) +#define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5) +#define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5) +#define __builtin_ia32_cvttps2udq512_mask(A, B, C, D) __builtin_ia32_cvttps2udq512_mask(A, B, C, 5) +#define __builtin_ia32_cvtudq2ps512_mask(A, B, C, D) __builtin_ia32_cvtudq2ps512_mask(A, B, C, 1) +#define __builtin_ia32_cvtusi2sd64(A, B, C) __builtin_ia32_cvtusi2sd64(A, B, 1) +#define __builtin_ia32_cvtusi2ss32(A, B, C) __builtin_ia32_cvtusi2ss32(A, B, 1) +#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1) +#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1) +#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) +#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) +#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) +#define __builtin_ia32_extracti64x4_mask(A, E, C, D) __builtin_ia32_extracti64x4_mask(A, 1, C, D) +#define __builtin_ia32_fixupimmpd512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmpd512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmps512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmps512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmsd_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmsd_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmss_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmss_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmss_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmss_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_gatherdiv8df(A, B, C, D, F) __builtin_ia32_gatherdiv8df(A, B, C, D, 1) +#define __builtin_ia32_gatherdiv8di(A, B, C, D, F) __builtin_ia32_gatherdiv8di(A, B, C, D, 1) +#define __builtin_ia32_gatherdiv16sf(A, B, C, D, F) __builtin_ia32_gatherdiv16sf(A, B, C, D, 1) +#define __builtin_ia32_gatherdiv16si(A, B, C, D, F) __builtin_ia32_gatherdiv16si(A, B, C, D, 1) +#define __builtin_ia32_gathersiv16sf(A, B, C, D, F) __builtin_ia32_gathersiv16sf(A, B, C, D, 1) +#define __builtin_ia32_gathersiv16si(A, B, C, D, F) __builtin_ia32_gathersiv16si(A, B, C, D, 1) +#define __builtin_ia32_gathersiv8df(A, B, C, D, F) __builtin_ia32_gathersiv8df(A, B, C, D, 1) +#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1) +#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5) +#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5) +#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5) +#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5) +#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E) +#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E) +#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5) +#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5) +#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5) +#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5) +#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1) +#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) +#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) +#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) +#define __builtin_ia32_prolq512_mask(A, E, C, D) __builtin_ia32_prolq512_mask(A, 1, C, D) +#define __builtin_ia32_prord512_mask(A, E, C, D) __builtin_ia32_prord512_mask(A, 1, C, D) +#define __builtin_ia32_prorq512_mask(A, E, C, D) __builtin_ia32_prorq512_mask(A, 1, C, D) +#define __builtin_ia32_pshufd512_mask(A, E, C, D) __builtin_ia32_pshufd512_mask(A, 1, C, D) +#define __builtin_ia32_pslldi512_mask(A, E, C, D) __builtin_ia32_pslldi512_mask(A, 1, C, D) +#define __builtin_ia32_psllqi512_mask(A, E, C, D) __builtin_ia32_psllqi512_mask(A, 1, C, D) +#define __builtin_ia32_psradi512_mask(A, E, C, D) __builtin_ia32_psradi512_mask(A, 1, C, D) +#define __builtin_ia32_psraqi512_mask(A, E, C, D) __builtin_ia32_psraqi512_mask(A, 1, C, D) +#define __builtin_ia32_psrldi512_mask(A, E, C, D) __builtin_ia32_psrldi512_mask(A, 1, C, D) +#define __builtin_ia32_psrlqi512_mask(A, E, C, D) __builtin_ia32_psrlqi512_mask(A, 1, C, D) +#define __builtin_ia32_pternlogd512_mask(A, B, C, F, E) __builtin_ia32_pternlogd512_mask(A, B, C, 1, E) +#define __builtin_ia32_pternlogd512_maskz(A, B, C, F, E) __builtin_ia32_pternlogd512_maskz(A, B, C, 1, E) +#define __builtin_ia32_pternlogq512_mask(A, B, C, F, E) __builtin_ia32_pternlogq512_mask(A, B, C, 1, E) +#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E) +#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5) +#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5) +#define __builtin_ia32_rndscalesd_mask(A, B, I, D, E, F) __builtin_ia32_rndscalesd_mask(A, B, 1, D, E, 5) +#define __builtin_ia32_rndscaless_mask(A, B, I, D, E, F) __builtin_ia32_rndscaless_mask(A, B, 1, D, E, 5) +#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv16si(A, B, C, D, F) __builtin_ia32_scatterdiv16si(A, B, C, D, 1) +#define __builtin_ia32_scattersiv16sf(A, B, C, D, F) __builtin_ia32_scattersiv16sf(A, B, C, D, 1) +#define __builtin_ia32_scattersiv16si(A, B, C, D, F) __builtin_ia32_scattersiv16si(A, B, C, D, 1) +#define __builtin_ia32_scattersiv8df(A, B, C, D, F) __builtin_ia32_scattersiv8df(A, B, C, D, 1) +#define __builtin_ia32_scattersiv8di(A, B, C, D, F) __builtin_ia32_scattersiv8di(A, B, C, D, 1) +#define __builtin_ia32_shuf_f32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_shuf_f64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_mask(A, B, 1, D, E) +#define __builtin_ia32_shuf_i32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_shuf_i64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_mask(A, B, 1, D, E) +#define __builtin_ia32_shufpd512_mask(A, B, F, D, E) __builtin_ia32_shufpd512_mask(A, B, 1, D, E) +#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) +#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1) +#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1) +#define __builtin_ia32_sqrtsd_mask(A, B, C, D, E) __builtin_ia32_sqrtsd_mask(A, B, C, D, 1) +#define __builtin_ia32_sqrtss_mask(A, B, C, D, E) __builtin_ia32_sqrtss_mask(A, B, C, D, 1) +#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1) +#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) +#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) +#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5) +#define __builtin_ia32_vcomiss(A, B, C, D) __builtin_ia32_vcomiss(A, B, 1, 5) +#define __builtin_ia32_vcvtph2ps512_mask(A, B, C, D) __builtin_ia32_vcvtph2ps512_mask(A, B, C, 5) +#define __builtin_ia32_vcvtps2ph512_mask(A, E, C, D) __builtin_ia32_vcvtps2ph512_mask(A, 1, C, D) +#define __builtin_ia32_vcvtsd2si32(A, B) __builtin_ia32_vcvtsd2si32(A, 1) +#define __builtin_ia32_vcvtsd2si64(A, B) __builtin_ia32_vcvtsd2si64(A, 1) +#define __builtin_ia32_vcvtsd2usi32(A, B) __builtin_ia32_vcvtsd2usi32(A, 1) +#define __builtin_ia32_vcvtsd2usi64(A, B) __builtin_ia32_vcvtsd2usi64(A, 1) +#define __builtin_ia32_vcvtss2si32(A, B) __builtin_ia32_vcvtss2si32(A, 1) +#define __builtin_ia32_vcvtss2si64(A, B) __builtin_ia32_vcvtss2si64(A, 1) +#define __builtin_ia32_vcvtss2usi32(A, B) __builtin_ia32_vcvtss2usi32(A, 1) +#define __builtin_ia32_vcvtss2usi64(A, B) __builtin_ia32_vcvtss2usi64(A, 1) +#define __builtin_ia32_vcvttsd2si32(A, B) __builtin_ia32_vcvttsd2si32(A, 5) +#define __builtin_ia32_vcvttsd2si64(A, B) __builtin_ia32_vcvttsd2si64(A, 5) +#define __builtin_ia32_vcvttsd2usi32(A, B) __builtin_ia32_vcvttsd2usi32(A, 5) +#define __builtin_ia32_vcvttsd2usi64(A, B) __builtin_ia32_vcvttsd2usi64(A, 5) +#define __builtin_ia32_vcvttss2si32(A, B) __builtin_ia32_vcvttss2si32(A, 5) +#define __builtin_ia32_vcvttss2si64(A, B) __builtin_ia32_vcvttss2si64(A, 5) +#define __builtin_ia32_vcvttss2usi32(A, B) __builtin_ia32_vcvttss2usi32(A, 5) +#define __builtin_ia32_vcvttss2usi64(A, B) __builtin_ia32_vcvttss2usi64(A, 5) +#define __builtin_ia32_vfmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubps512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D) +#define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D) +#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 1) +#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask(A, B, C, 1) +#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 1) +#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask(A, B, C, 1) +#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask(A, B, C, 1) +#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask(A, B, C, 1) +#define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, 1) +#define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1) + #include <wmmintrin.h> #include <immintrin.h> #include <mm3dnow.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 8e4c4bd3ebd..6f625ad11c9 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -30,6 +30,10 @@ type _CONCAT(_,func) (op1_type A, int const I, int const L) \ { return func (A, imm1, imm2); } +#define test_1y(func, type, op1_type, imm1, imm2, imm3) \ + type _CONCAT(_,func) (op1_type A, int const I, int const L, int const R)\ + { return func (A, imm1, imm2, imm3); } + #define test_2(func, type, op1_type, op2_type, imm) \ type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \ { return func (A, B, imm); } @@ -38,19 +42,64 @@ type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ { return func (A, B, imm1, imm2); } +#define test_2y(func, type, op1_type, op2_type, imm1, imm2, imm3) \ + type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L,\ + int const R) \ + { return func (A, B, imm1, imm2, imm3); } + +#define test_2vx(func, op1_type, op2_type, imm1, imm2) \ + _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ + { func (A, B, imm1, imm2); } + #define test_3(func, type, op1_type, op2_type, op3_type, imm) \ type _CONCAT(_,func) (op1_type A, op2_type B, \ op3_type C, int const I) \ { return func (A, B, C, imm); } +#define test_3x(func, type, op1_type, op2_type, op3_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, int const I, int const L) \ + { return func (A, B, C, imm1, imm2); } + +#define test_3y(func, type, op1_type, op2_type, op3_type, imm1, imm2, imm3) \ + type _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, int const I, int const L, int const R) \ + { return func (A, B, C, imm1, imm2, imm3); } + +#define test_3v(func, op1_type, op2_type, op3_type, imm) \ + _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, int const I) \ + { func (A, B, C, imm); } + +#define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2) \ + _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, int const I, int const L) \ + { func (A, B, C, imm1, imm2); } + #define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \ type _CONCAT(_,func) (op1_type A, op2_type B, \ op3_type C, op4_type D, int const I) \ { return func (A, B, C, D, imm); } +#define test_4x(func, type, op1_type, op2_type, op3_type, op4_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, op4_type D, int const I, int const L) \ + { return func (A, B, C, D, imm1, imm2); } + +#define test_4y(func, type, op1_type, op2_type, op3_type, op4_type, imm1, imm2, imm3) \ + type _CONCAT(_,func) (op1_type A, op2_type B, op3_type C, \ + op4_type D, int const I, int const L, int const R) \ + { return func (A, B, C, D, imm1, imm2, imm3); } + + +#define test_4v(func, op1_type, op2_type, op3_type, op4_type, imm) \ + _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, op4_type D, int const I) \ + { func (A, B, C, D, imm); } + #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512pf,avx512er,avx512cd") #endif /* Following intrinsics require immediate arguments. They @@ -163,9 +212,9 @@ test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1) test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1) test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) -/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM) */ +/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf") #endif #include <immintrin.h> test_1 (_cvtss_sh, unsigned short, float, 1) @@ -248,6 +297,375 @@ test_2 ( _mm256_i64gather_epi32, __m128i, int const *, __m256i, 1) /* rtmintrin.h */ test_0 ( _xabort, void, 1) +/* avx512fintrin.h */ +test_1 (_mm512_cvt_roundepi32_ps, __m512, __m512i, 1) +test_1 (_mm512_cvt_roundepu32_ps, __m512, __m512i, 1) +test_1 (_mm512_cvt_roundpd_epi32, __m256i, __m512d, 1) +test_1 (_mm512_cvt_roundpd_epu32, __m256i, __m512d, 1) +test_1 (_mm512_cvt_roundpd_ps, __m256, __m512d, 1) +test_1 (_mm512_cvt_roundph_ps, __m512, __m256i, 5) +test_1 (_mm512_cvt_roundps_epi32, __m512i, __m512, 1) +test_1 (_mm512_cvt_roundps_epu32, __m512i, __m512, 1) +test_1 (_mm512_cvt_roundps_pd, __m512d, __m256, 5) +test_1 (_mm512_cvtps_ph, __m256i, __m512, 1) +test_1 (_mm512_cvtt_roundpd_epi32, __m256i, __m512d, 5) +test_1 (_mm512_cvtt_roundpd_epu32, __m256i, __m512d, 5) +test_1 (_mm512_cvtt_roundps_epi32, __m512i, __m512, 5) +test_1 (_mm512_cvtt_roundps_epu32, __m512i, __m512, 5) +test_1 (_mm512_extractf32x4_ps, __m128, __m512, 1) +test_1 (_mm512_extractf64x4_pd, __m256d, __m512d, 1) +test_1 (_mm512_extracti32x4_epi32, __m128i, __m512i, 1) +test_1 (_mm512_extracti64x4_epi64, __m256i, __m512i, 1) +test_1 (_mm512_getexp_round_pd, __m512d, __m512d, 5) +test_1 (_mm512_getexp_round_ps, __m512, __m512, 5) +test_1y (_mm512_getmant_round_pd, __m512d, __m512d, 1, 1, 5) +test_1y (_mm512_getmant_round_ps, __m512, __m512, 1, 1, 5) +test_1 (_mm512_permute_pd, __m512d, __m512d, 1) +test_1 (_mm512_permute_ps, __m512, __m512, 1) +test_1 (_mm512_permutex_epi64, __m512i, __m512i, 1) +test_1 (_mm512_permutex_pd, __m512d, __m512d, 1) +test_1 (_mm512_rol_epi32, __m512i, __m512i, 1) +test_1 (_mm512_rol_epi64, __m512i, __m512i, 1) +test_1 (_mm512_ror_epi32, __m512i, __m512i, 1) +test_1 (_mm512_ror_epi64, __m512i, __m512i, 1) +test_1 (_mm512_shuffle_epi32, __m512i, __m512i, 1) +test_1 (_mm512_slli_epi32, __m512i, __m512i, 1) +test_1 (_mm512_slli_epi64, __m512i, __m512i, 1) +test_1 (_mm512_sqrt_round_pd, __m512d, __m512d, 1) +test_1 (_mm512_sqrt_round_ps, __m512, __m512, 1) +test_1 (_mm512_srai_epi32, __m512i, __m512i, 1) +test_1 (_mm512_srai_epi64, __m512i, __m512i, 1) +test_1 (_mm512_srli_epi32, __m512i, __m512i, 1) +test_1 (_mm512_srli_epi64, __m512i, __m512i, 1) +test_1 (_mm_cvt_roundsd_i32, int, __m128d, 1) +test_1 (_mm_cvt_roundsd_u32, unsigned, __m128d, 1) +test_1 (_mm_cvt_roundss_i32, int, __m128, 1) +test_1 (_mm_cvt_roundss_u32, unsigned, __m128, 1) +test_1 (_mm_cvtt_roundsd_i32, int, __m128d, 5) +test_1 (_mm_cvtt_roundsd_u32, unsigned, __m128d, 5) +test_1 (_mm_cvtt_roundss_i32, int, __m128, 5) +test_1 (_mm_cvtt_roundss_u32, unsigned, __m128, 5) +test_1x (_mm512_getmant_pd, __m512d, __m512d, 1, 1) +test_1x (_mm512_getmant_ps, __m512, __m512, 1, 1) +test_1x (_mm_cvt_roundi32_ss, __m128, __m128, 1, 1) +test_2 (_mm512_add_round_pd, __m512d, __m512d, __m512d, 1) +test_2 (_mm512_add_round_ps, __m512, __m512, __m512, 1) +test_2 (_mm512_alignr_epi32, __m512i, __m512i, __m512i, 1) +test_2 (_mm512_alignr_epi64, __m512i, __m512i, __m512i, 1) +test_2 (_mm512_cmp_epi32_mask, __mmask16, __m512i, __m512i, 1) +test_2 (_mm512_cmp_epi64_mask, __mmask8, __m512i, __m512i, 1) +test_2 (_mm512_cmp_epu32_mask, __mmask16, __m512i, __m512i, 1) +test_2 (_mm512_cmp_epu64_mask, __mmask8, __m512i, __m512i, 1) +test_2 (_mm512_cmp_pd_mask, __mmask8, __m512d, __m512d, 1) +test_2 (_mm512_cmp_ps_mask, __mmask16, __m512, __m512, 1) +test_2 (_mm512_div_round_pd, __m512d, __m512d, __m512d, 1) +test_2 (_mm512_div_round_ps, __m512, __m512, __m512, 1) +test_2 (_mm512_i32gather_epi32, __m512i, __m512i, void const *, 1) +test_2 (_mm512_i32gather_epi64, __m512i, __m256i, void const *, 1) +test_2 (_mm512_i32gather_pd, __m512d, __m256i, void const *, 1) +test_2 (_mm512_i32gather_ps, __m512, __m512i, void const *, 1) +test_2 (_mm512_i64gather_epi32, __m256i, __m512i, void const *, 1) +test_2 (_mm512_i64gather_epi64, __m512i, __m512i, void const *, 1) +test_2 (_mm512_i64gather_pd, __m512d, __m512i, void const *, 1) +test_2 (_mm512_i64gather_ps, __m256, __m512i, void const *, 1) +test_2 (_mm512_insertf32x4, __m512, __m512, __m128, 1) +test_2 (_mm512_insertf64x4, __m512d, __m512d, __m256d, 1) +test_2 (_mm512_inserti32x4, __m512i, __m512i, __m128i, 1) +test_2 (_mm512_inserti64x4, __m512i, __m512i, __m256i, 1) +test_2 (_mm512_maskz_cvt_roundepi32_ps, __m512, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_cvt_roundepu32_ps, __m512, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_cvt_roundpd_epi32, __m256i, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_cvt_roundpd_epu32, __m256i, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_cvt_roundpd_ps, __m256, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_cvt_roundph_ps, __m512, __mmask16, __m256i, 5) +test_2 (_mm512_maskz_cvt_roundps_epi32, __m512i, __mmask16, __m512, 1) +test_2 (_mm512_maskz_cvt_roundps_epu32, __m512i, __mmask16, __m512, 1) +test_2 (_mm512_maskz_cvt_roundps_pd, __m512d, __mmask8, __m256, 5) +test_2 (_mm512_maskz_cvtps_ph, __m256i, __mmask16, __m512, 1) +test_2 (_mm512_maskz_cvtt_roundpd_epi32, __m256i, __mmask8, __m512d, 5) +test_2 (_mm512_maskz_cvtt_roundpd_epu32, __m256i, __mmask8, __m512d, 5) +test_2 (_mm512_maskz_cvtt_roundps_epi32, __m512i, __mmask16, __m512, 5) +test_2 (_mm512_maskz_cvtt_roundps_epu32, __m512i, __mmask16, __m512, 5) +test_2 (_mm512_maskz_extractf32x4_ps, __m128, __mmask8, __m512, 1) +test_2 (_mm512_maskz_extractf64x4_pd, __m256d, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_extracti32x4_epi32, __m128i, __mmask8, __m512i, 1) +test_2 (_mm512_maskz_extracti64x4_epi64, __m256i, __mmask8, __m512i, 1) +test_2 (_mm512_maskz_getexp_round_pd, __m512d, __mmask8, __m512d, 5) +test_2 (_mm512_maskz_getexp_round_ps, __m512, __mmask16, __m512, 5) +test_2y (_mm512_maskz_getmant_round_pd, __m512d, __mmask8, __m512d, 1, 1, 5) +test_2y (_mm512_maskz_getmant_round_ps, __m512, __mmask16, __m512, 1, 1, 5) +test_2 (_mm512_maskz_permute_pd, __m512d, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_permute_ps, __m512, __mmask16, __m512, 1) +test_2 (_mm512_maskz_permutex_epi64, __m512i, __mmask8, __m512i, 1) +test_2 (_mm512_maskz_permutex_pd, __m512d, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_rol_epi32, __m512i, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_rol_epi64, __m512i, __mmask8, __m512i, 1) +test_2 (_mm512_maskz_ror_epi32, __m512i, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_ror_epi64, __m512i, __mmask8, __m512i, 1) +test_2 (_mm512_maskz_shuffle_epi32, __m512i, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_slli_epi32, __m512i, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_slli_epi64, __m512i, __mmask8, __m512i, 1) +test_2 (_mm512_maskz_sqrt_round_pd, __m512d, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_sqrt_round_ps, __m512, __mmask16, __m512, 1) +test_2 (_mm512_maskz_srai_epi32, __m512i, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_srai_epi64, __m512i, __mmask8, __m512i, 1) +test_2 (_mm512_maskz_srli_epi32, __m512i, __mmask16, __m512i, 1) +test_2 (_mm512_maskz_srli_epi64, __m512i, __mmask8, __m512i, 1) +test_2 (_mm512_max_round_pd, __m512d, __m512d, __m512d, 5) +test_2 (_mm512_max_round_ps, __m512, __m512, __m512, 5) +test_2 (_mm512_min_round_pd, __m512d, __m512d, __m512d, 5) +test_2 (_mm512_min_round_ps, __m512, __m512, __m512, 5) +test_2 (_mm512_mul_round_pd, __m512d, __m512d, __m512d, 1) +test_2 (_mm512_mul_round_ps, __m512, __m512, __m512, 1) +test_2 (_mm512_scalef_round_pd, __m512d, __m512d, __m512d, 1) +test_2 (_mm512_scalef_round_ps, __m512, __m512, __m512, 1) +test_2 (_mm512_shuffle_f32x4, __m512, __m512, __m512, 1) +test_2 (_mm512_shuffle_f64x2, __m512d, __m512d, __m512d, 1) +test_2 (_mm512_shuffle_i32x4, __m512i, __m512i, __m512i, 1) +test_2 (_mm512_shuffle_i64x2, __m512i, __m512i, __m512i, 1) +test_2 (_mm512_shuffle_pd, __m512d, __m512d, __m512d, 1) +test_2 (_mm512_shuffle_ps, __m512, __m512, __m512, 1) +test_2 (_mm512_sub_round_pd, __m512d, __m512d, __m512d, 1) +test_2 (_mm512_sub_round_ps, __m512, __m512, __m512, 1) +test_2 (_mm_cmp_sd_mask, __mmask8, __m128d, __m128d, 1) +test_2 (_mm_cmp_ss_mask, __mmask8, __m128, __m128, 1) +#ifdef __x86_64__ +test_2 (_mm_cvt_roundi64_sd, __m128d, __m128d, long long, 1) +test_2 (_mm_cvt_roundi64_ss, __m128, __m128, long long, 1) +#endif +test_2 (_mm_cvt_roundu32_ss, __m128, __m128, unsigned, 1) +#ifdef __x86_64__ +test_2 (_mm_cvt_roundu64_sd, __m128d, __m128d, unsigned long long, 1) +test_2 (_mm_cvt_roundu64_ss, __m128, __m128, unsigned long long, 1) +#endif +test_2x (_mm512_cmp_round_pd_mask, __mmask8, __m512d, __m512d, 1, 5) +test_2x (_mm512_cmp_round_ps_mask, __mmask16, __m512, __m512, 1, 5) +test_2x (_mm512_maskz_roundscale_round_pd, __m512d, __mmask8, __m512d, 1, 5) +test_2x (_mm512_maskz_roundscale_round_ps, __m512, __mmask16, __m512, 1, 5) +test_2x (_mm_cmp_round_sd_mask, __mmask8, __m128d, __m128d, 1, 5) +test_2x (_mm_cmp_round_ss_mask, __mmask8, __m128, __m128, 1, 5) +test_2x (_mm_comi_round_sd, int, __m128d, __m128d, 1, 5) +test_2x (_mm_comi_round_ss, int, __m128, __m128, 1, 5) +test_3 (_mm512_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1) +test_3 (_mm512_fmadd_round_ps, __m512, __m512, __m512, __m512, 1) +test_3 (_mm512_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1) +test_3 (_mm512_fmaddsub_round_ps, __m512, __m512, __m512, __m512, 1) +test_3 (_mm512_fmsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1) +test_3 (_mm512_fmsub_round_ps, __m512, __m512, __m512, __m512, 1) +test_3 (_mm512_fmsubadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1) +test_3 (_mm512_fmsubadd_round_ps, __m512, __m512, __m512, __m512, 1) +test_3 (_mm512_fnmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1) +test_3 (_mm512_fnmadd_round_ps, __m512, __m512, __m512, __m512, 1) +test_3 (_mm512_fnmsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1) +test_3 (_mm512_fnmsub_round_ps, __m512, __m512, __m512, __m512, 1) +test_3 (_mm512_mask_cmp_epi32_mask, __mmask16, __mmask16, __m512i, __m512i, 1) +test_3 (_mm512_mask_cmp_epi64_mask, __mmask8, __mmask8, __m512i, __m512i, 1) +test_3 (_mm512_mask_cmp_epu32_mask, __mmask16, __mmask16, __m512i, __m512i, 1) +test_3 (_mm512_mask_cmp_epu64_mask, __mmask8, __mmask8, __m512i, __m512i, 1) +test_3 (_mm512_mask_cmp_pd_mask, __mmask8, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_mask_cmp_ps_mask, __mmask16, __mmask16, __m512, __m512, 1) +test_3 (_mm512_mask_cvt_roundepi32_ps, __m512, __m512, __mmask16, __m512i, 1) +test_3 (_mm512_mask_cvt_roundepu32_ps, __m512, __m512, __mmask16, __m512i, 1) +test_3 (_mm512_mask_cvt_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 1) +test_3 (_mm512_mask_cvt_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 1) +test_3 (_mm512_mask_cvt_roundpd_ps, __m256, __m256, __mmask8, __m512d, 1) +test_3 (_mm512_mask_cvt_roundph_ps, __m512, __m512, __mmask16, __m256i, 5) +test_3 (_mm512_mask_cvt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 1) +test_3 (_mm512_mask_cvt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 1) +test_3 (_mm512_mask_cvt_roundps_pd, __m512d, __m512d, __mmask8, __m256, 5) +test_3 (_mm512_mask_cvtps_ph, __m256i, __m256i, __mmask16, __m512, 1) +test_3 (_mm512_mask_cvtt_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 5) +test_3 (_mm512_mask_cvtt_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 5) +test_3 (_mm512_mask_cvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 5) +test_3 (_mm512_mask_cvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 5) +test_3 (_mm512_mask_extractf32x4_ps, __m128, __m128, __mmask8, __m512, 1) +test_3 (_mm512_mask_extractf64x4_pd, __m256d, __m256d, __mmask8, __m512d, 1) +test_3 (_mm512_mask_extracti32x4_epi32, __m128i, __m128i, __mmask8, __m512i, 1) +test_3 (_mm512_mask_extracti64x4_epi64, __m256i, __m256i, __mmask8, __m512i, 1) +test_3 (_mm512_mask_getexp_round_pd, __m512d, __m512d, __mmask8, __m512d, 5) +test_3 (_mm512_mask_getexp_round_ps, __m512, __m512, __mmask16, __m512, 5) +test_3y (_mm512_mask_getmant_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 1, 5) +test_3y (_mm512_mask_getmant_round_ps, __m512, __m512, __mmask16, __m512, 1, 1, 5) +test_3 (_mm512_mask_permute_pd, __m512d, __m512d, __mmask8, __m512d, 1) +test_3 (_mm512_mask_permute_ps, __m512, __m512, __mmask16, __m512, 1) +test_3 (_mm512_mask_permutex_epi64, __m512i, __m512i, __mmask8, __m512i, 1) +test_3 (_mm512_mask_permutex_pd, __m512d, __m512d, __mmask8, __m512d, 1) +test_3 (_mm512_mask_rol_epi32, __m512i, __m512i, __mmask16, __m512i, 1) +test_3 (_mm512_mask_rol_epi64, __m512i, __m512i, __mmask8, __m512i, 1) +test_3 (_mm512_mask_ror_epi32, __m512i, __m512i, __mmask16, __m512i, 1) +test_3 (_mm512_mask_ror_epi64, __m512i, __m512i, __mmask8, __m512i, 1) +test_3 (_mm512_mask_shuffle_epi32, __m512i, __m512i, __mmask16, __m512i, 1) +test_3 (_mm512_mask_slli_epi32, __m512i, __m512i, __mmask16, __m512i, 1) +test_3 (_mm512_mask_slli_epi64, __m512i, __m512i, __mmask8, __m512i, 1) +test_3 (_mm512_mask_sqrt_round_pd, __m512d, __m512d, __mmask8, __m512d, 1) +test_3 (_mm512_mask_sqrt_round_ps, __m512, __m512, __mmask16, __m512, 1) +test_3 (_mm512_mask_srai_epi32, __m512i, __m512i, __mmask16, __m512i, 1) +test_3 (_mm512_mask_srai_epi64, __m512i, __m512i, __mmask8, __m512i, 1) +test_3 (_mm512_mask_srli_epi32, __m512i, __m512i, __mmask16, __m512i, 1) +test_3 (_mm512_mask_srli_epi64, __m512i, __m512i, __mmask8, __m512i, 1) +test_3 (_mm512_maskz_add_round_pd, __m512d, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_maskz_add_round_ps, __m512, __mmask16, __m512, __m512, 1) +test_3 (_mm512_maskz_alignr_epi32, __m512i, __mmask16, __m512i, __m512i, 1) +test_3 (_mm512_maskz_alignr_epi64, __m512i, __mmask8, __m512i, __m512i, 1) +test_3 (_mm512_maskz_div_round_pd, __m512d, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_maskz_div_round_ps, __m512, __mmask16, __m512, __m512, 1) +test_3 (_mm512_maskz_insertf32x4, __m512, __mmask16, __m512, __m128, 1) +test_3 (_mm512_maskz_insertf64x4, __m512d, __mmask8, __m512d, __m256d, 1) +test_3 (_mm512_maskz_inserti32x4, __m512i, __mmask16, __m512i, __m128i, 1) +test_3 (_mm512_maskz_inserti64x4, __m512i, __mmask8, __m512i, __m256i, 1) +test_3 (_mm512_maskz_max_round_pd, __m512d, __mmask8, __m512d, __m512d, 5) +test_3 (_mm512_maskz_max_round_ps, __m512, __mmask16, __m512, __m512, 5) +test_3 (_mm512_maskz_min_round_pd, __m512d, __mmask8, __m512d, __m512d, 5) +test_3 (_mm512_maskz_min_round_ps, __m512, __mmask16, __m512, __m512, 5) +test_3 (_mm512_maskz_mul_round_pd, __m512d, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 1) +test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 1) +test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1) +test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1) +test_3 (_mm512_maskz_shuffle_i64x2, __m512i, __mmask8, __m512i, __m512i, 1) +test_3 (_mm512_maskz_shuffle_pd, __m512d, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_maskz_shuffle_ps, __m512, __mmask16, __m512, __m512, 1) +test_3 (_mm512_maskz_sub_round_pd, __m512d, __mmask8, __m512d, __m512d, 1) +test_3 (_mm512_maskz_sub_round_ps, __m512, __mmask16, __m512, __m512, 1) +test_3 (_mm512_ternarylogic_epi32, __m512i, __m512i, __m512i, __m512i, 1) +test_3 (_mm512_ternarylogic_epi64, __m512i, __m512i, __m512i, __m512i, 1) +test_3 (_mm_mask_cmp_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1) +test_3 (_mm_mask_cmp_ss_mask, __mmask8, __mmask8, __m128, __m128, 1) +test_3v (_mm512_i32scatter_epi32, void *, __m512i, __m512i, 1) +test_3v (_mm512_i32scatter_epi64, void *, __m256i, __m512i, 1) +test_3v (_mm512_i32scatter_pd, void *, __m256i, __m512d, 1) +test_3v (_mm512_i32scatter_ps, void *, __m512i, __m512, 1) +test_3v (_mm512_i64scatter_epi32, void *, __m512i, __m256i, 1) +test_3v (_mm512_i64scatter_epi64, void *, __m512i, __m512i, 1) +test_3v (_mm512_i64scatter_pd, void *, __m512i, __m512d, 1) +test_3v (_mm512_i64scatter_ps, void *, __m512i, __m256, 1) +test_3x (_mm512_mask_roundscale_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 5) +test_3x (_mm512_mask_roundscale_round_ps, __m512, __m512, __mmask16, __m512, 1, 5) +test_3x (_mm512_mask_cmp_round_pd_mask, __mmask8, __mmask8, __m512d, __m512d, 1, 5) +test_3x (_mm512_mask_cmp_round_ps_mask, __mmask16, __mmask16, __m512, __m512, 1, 5) +test_3x (_mm_fixupimm_round_sd, __m128d, __m128d, __m128d, __m128i, 1, 5) +test_3x (_mm_fixupimm_round_ss, __m128, __m128, __m128, __m128i, 1, 5) +test_3x (_mm_mask_cmp_round_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1, 5) +test_3x (_mm_mask_cmp_round_ss_mask, __mmask8, __mmask8, __m128, __m128, 1, 5) +test_4 (_mm512_mask3_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) +test_4 (_mm512_mask3_fmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1) +test_4 (_mm512_mask3_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) +test_4 (_mm512_mask3_fmaddsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1) +test_4 (_mm512_mask3_fmsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) +test_4 (_mm512_mask3_fmsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1) +test_4 (_mm512_mask3_fmsubadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) +test_4 (_mm512_mask3_fmsubadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1) +test_4 (_mm512_mask3_fnmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) +test_4 (_mm512_mask3_fnmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1) +test_4 (_mm512_mask3_fnmsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) +test_4 (_mm512_mask3_fnmsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1) +test_4 (_mm512_mask_add_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_add_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_alignr_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1) +test_4 (_mm512_mask_alignr_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1) +test_4 (_mm512_mask_div_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_div_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_fmadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_fmadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_fmaddsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_fmaddsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_fmsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_fmsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_fmsubadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_fmsubadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_fnmadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_fnmadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_fnmsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_fnmsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_i32gather_epi32, __m512i, __m512i, __mmask16, __m512i, void const *, 1) +test_4 (_mm512_mask_i32gather_epi64, __m512i, __m512i, __mmask8, __m256i, void const *, 1) +test_4 (_mm512_mask_i32gather_pd, __m512d, __m512d, __mmask8, __m256i, void const *, 1) +test_4 (_mm512_mask_i32gather_ps, __m512, __m512, __mmask16, __m512i, void const *, 1) +test_4 (_mm512_mask_i64gather_epi32, __m256i, __m256i, __mmask8, __m512i, void const *, 1) +test_4 (_mm512_mask_i64gather_epi64, __m512i, __m512i, __mmask8, __m512i, void const *, 1) +test_4 (_mm512_mask_i64gather_pd, __m512d, __m512d, __mmask8, __m512i, void const *, 1) +test_4 (_mm512_mask_i64gather_ps, __m256, __m256, __mmask8, __m512i, void const *, 1) +test_4 (_mm512_mask_insertf32x4, __m512, __m512, __mmask16, __m512, __m128, 1) +test_4 (_mm512_mask_insertf64x4, __m512d, __m512d, __mmask8, __m512d, __m256d, 1) +test_4 (_mm512_mask_inserti32x4, __m512i, __m512i, __mmask16, __m512i, __m128i, 1) +test_4 (_mm512_mask_inserti64x4, __m512i, __m512i, __mmask8, __m512i, __m256i, 1) +test_4 (_mm512_mask_max_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 5) +test_4 (_mm512_mask_max_round_ps, __m512, __m512, __mmask16, __m512, __m512, 5) +test_4 (_mm512_mask_min_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 5) +test_4 (_mm512_mask_min_round_ps, __m512, __m512, __mmask16, __m512, __m512, 5) +test_4 (_mm512_mask_mul_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1) +test_4 (_mm512_mask_shuffle_i64x2, __m512i, __m512i, __mmask8, __m512i, __m512i, 1) +test_4 (_mm512_mask_shuffle_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_shuffle_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_sub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) +test_4 (_mm512_mask_sub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1) +test_4 (_mm512_mask_ternarylogic_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1) +test_4 (_mm512_mask_ternarylogic_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1) +test_4 (_mm512_maskz_fmadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1) +test_4 (_mm512_maskz_fmadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1) +test_4 (_mm512_maskz_fmaddsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1) +test_4 (_mm512_maskz_fmaddsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1) +test_4 (_mm512_maskz_fmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1) +test_4 (_mm512_maskz_fmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1) +test_4 (_mm512_maskz_fmsubadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1) +test_4 (_mm512_maskz_fmsubadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1) +test_4 (_mm512_maskz_fnmadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1) +test_4 (_mm512_maskz_fnmadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1) +test_4 (_mm512_maskz_fnmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1) +test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1) +test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, __m512i, __m512i, 1) +test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, __m512i, __m512i, 1) +test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, 1) +test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1) +test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1) +test_4v (_mm512_mask_i32scatter_ps, void *, __mmask16, __m512i, __m512, 1) +test_4v (_mm512_mask_i64scatter_epi32, void *, __mmask8, __m512i, __m256i, 1) +test_4v (_mm512_mask_i64scatter_epi64, void *, __mmask8, __m512i, __m512i, 1) +test_4v (_mm512_mask_i64scatter_pd, void *, __mmask8, __m512i, __m512d, 1) +test_4v (_mm512_mask_i64scatter_ps, void *, __mmask8, __m512i, __m256, 1) +test_4x (_mm512_mask_fixupimm_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512i, 1, 5) +test_4x (_mm512_mask_fixupimm_round_ps, __m512, __m512, __mmask16, __m512, __m512i, 1, 5) +test_4x (_mm512_maskz_fixupimm_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512i, 1, 5) +test_4x (_mm512_maskz_fixupimm_round_ps, __m512, __mmask16, __m512, __m512, __m512i, 1, 5) +test_4x (_mm_mask_fixupimm_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128i, 1, 5) +test_4x (_mm_mask_fixupimm_round_ss, __m128, __m128, __mmask8, __m128, __m128i, 1, 5) +test_4x (_mm_maskz_fixupimm_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128i, 1, 5) +test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 5) + +/* avx512pfintrin.h */ +test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, 1) +test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, 1) +test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, 1) +test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, 1) + +/* avx512erintrin.h */ +test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 1) +test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 1) +test_1 (_mm512_rcp28_round_pd, __m512d, __m512d, 1) +test_1 (_mm512_rcp28_round_ps, __m512, __m512, 1) +test_1 (_mm512_rsqrt28_round_pd, __m512d, __m512d, 1) +test_1 (_mm512_rsqrt28_round_ps, __m512, __m512, 1) +test_2 (_mm512_maskz_exp2a23_round_pd, __m512d, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_exp2a23_round_ps, __m512, __mmask16, __m512, 1) +test_2 (_mm512_maskz_rcp28_round_pd, __m512d, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_rcp28_round_ps, __m512, __mmask16, __m512, 1) +test_2 (_mm512_maskz_rsqrt28_round_pd, __m512d, __mmask8, __m512d, 1) +test_2 (_mm512_maskz_rsqrt28_round_ps, __m512, __mmask16, __m512, 1) +test_3 (_mm512_mask_exp2a23_round_pd, __m512d, __m512d, __mmask8, __m512d, 1) +test_3 (_mm512_mask_exp2a23_round_ps, __m512, __m512, __mmask16, __m512, 1) +test_3 (_mm512_mask_rcp28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1) +test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 1) +test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1) +test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 1) + /* wmmintrin.h (AES/PCLMUL). */ #ifdef DIFFERENT_PRAGMAS #pragma GCC target ("aes,pclmul") diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 069f8e7cb80..f993c07cdf1 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -183,7 +183,180 @@ /* rtmintrin.h */ #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt") +/* avx512fintrin.h */ +#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1) +#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) +#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) +#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) +#define __builtin_ia32_cmppd512_mask(A, B, F, D, E) __builtin_ia32_cmppd512_mask(A, B, 1, D, 5) +#define __builtin_ia32_cmpps512_mask(A, B, F, D, E) __builtin_ia32_cmpps512_mask(A, B, 1, D, 5) +#define __builtin_ia32_cmpq512_mask(A, B, E, D) __builtin_ia32_cmpq512_mask(A, B, 1, D) +#define __builtin_ia32_cmpsd_mask(A, B, F, D, E) __builtin_ia32_cmpsd_mask(A, B, 1, D, 5) +#define __builtin_ia32_cmpss_mask(A, B, F, D, E) __builtin_ia32_cmpss_mask(A, B, 1, D, 5) +#define __builtin_ia32_cvtdq2ps512_mask(A, B, C, D) __builtin_ia32_cvtdq2ps512_mask(A, B, C, 1) +#define __builtin_ia32_cvtpd2dq512_mask(A, B, C, D) __builtin_ia32_cvtpd2dq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtpd2ps512_mask(A, B, C, D) __builtin_ia32_cvtpd2ps512_mask(A, B, C, 1) +#define __builtin_ia32_cvtpd2udq512_mask(A, B, C, D) __builtin_ia32_cvtpd2udq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5) +#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1) +#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1) +#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1) +#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1) +#define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5) +#define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5) +#define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5) +#define __builtin_ia32_cvttps2udq512_mask(A, B, C, D) __builtin_ia32_cvttps2udq512_mask(A, B, C, 5) +#define __builtin_ia32_cvtudq2ps512_mask(A, B, C, D) __builtin_ia32_cvtudq2ps512_mask(A, B, C, 1) +#define __builtin_ia32_cvtusi2sd64(A, B, C) __builtin_ia32_cvtusi2sd64(A, B, 1) +#define __builtin_ia32_cvtusi2ss32(A, B, C) __builtin_ia32_cvtusi2ss32(A, B, 1) +#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1) +#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1) +#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) +#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) +#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) +#define __builtin_ia32_extracti64x4_mask(A, E, C, D) __builtin_ia32_extracti64x4_mask(A, 1, C, D) +#define __builtin_ia32_fixupimmpd512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmpd512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmps512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmps512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmsd_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmsd_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmss_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmss_mask(A, B, C, 1, E, 5) +#define __builtin_ia32_fixupimmss_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmss_maskz(A, B, C, 1, E, 5) +#define __builtin_ia32_gatherdiv8df(A, B, C, D, F) __builtin_ia32_gatherdiv8df(A, B, C, D, 1) +#define __builtin_ia32_gatherdiv8di(A, B, C, D, F) __builtin_ia32_gatherdiv8di(A, B, C, D, 1) +#define __builtin_ia32_gatherdiv16sf(A, B, C, D, F) __builtin_ia32_gatherdiv16sf(A, B, C, D, 1) +#define __builtin_ia32_gatherdiv16si(A, B, C, D, F) __builtin_ia32_gatherdiv16si(A, B, C, D, 1) +#define __builtin_ia32_gathersiv16sf(A, B, C, D, F) __builtin_ia32_gathersiv16sf(A, B, C, D, 1) +#define __builtin_ia32_gathersiv16si(A, B, C, D, F) __builtin_ia32_gathersiv16si(A, B, C, D, 1) +#define __builtin_ia32_gathersiv8df(A, B, C, D, F) __builtin_ia32_gathersiv8df(A, B, C, D, 1) +#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1) +#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5) +#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5) +#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5) +#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5) +#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E) +#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E) +#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5) +#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5) +#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5) +#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5) +#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1) +#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) +#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) +#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) +#define __builtin_ia32_prolq512_mask(A, E, C, D) __builtin_ia32_prolq512_mask(A, 1, C, D) +#define __builtin_ia32_prord512_mask(A, E, C, D) __builtin_ia32_prord512_mask(A, 1, C, D) +#define __builtin_ia32_prorq512_mask(A, E, C, D) __builtin_ia32_prorq512_mask(A, 1, C, D) +#define __builtin_ia32_pshufd512_mask(A, E, C, D) __builtin_ia32_pshufd512_mask(A, 1, C, D) +#define __builtin_ia32_pslldi512_mask(A, E, C, D) __builtin_ia32_pslldi512_mask(A, 1, C, D) +#define __builtin_ia32_psllqi512_mask(A, E, C, D) __builtin_ia32_psllqi512_mask(A, 1, C, D) +#define __builtin_ia32_psradi512_mask(A, E, C, D) __builtin_ia32_psradi512_mask(A, 1, C, D) +#define __builtin_ia32_psraqi512_mask(A, E, C, D) __builtin_ia32_psraqi512_mask(A, 1, C, D) +#define __builtin_ia32_psrldi512_mask(A, E, C, D) __builtin_ia32_psrldi512_mask(A, 1, C, D) +#define __builtin_ia32_psrlqi512_mask(A, E, C, D) __builtin_ia32_psrlqi512_mask(A, 1, C, D) +#define __builtin_ia32_pternlogd512_mask(A, B, C, F, E) __builtin_ia32_pternlogd512_mask(A, B, C, 1, E) +#define __builtin_ia32_pternlogd512_maskz(A, B, C, F, E) __builtin_ia32_pternlogd512_maskz(A, B, C, 1, E) +#define __builtin_ia32_pternlogq512_mask(A, B, C, F, E) __builtin_ia32_pternlogq512_mask(A, B, C, 1, E) +#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E) +#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5) +#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5) +#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1) +#define __builtin_ia32_scatterdiv16si(A, B, C, D, F) __builtin_ia32_scatterdiv16si(A, B, C, D, 1) +#define __builtin_ia32_scattersiv16sf(A, B, C, D, F) __builtin_ia32_scattersiv16sf(A, B, C, D, 1) +#define __builtin_ia32_scattersiv16si(A, B, C, D, F) __builtin_ia32_scattersiv16si(A, B, C, D, 1) +#define __builtin_ia32_scattersiv8df(A, B, C, D, F) __builtin_ia32_scattersiv8df(A, B, C, D, 1) +#define __builtin_ia32_scattersiv8di(A, B, C, D, F) __builtin_ia32_scattersiv8di(A, B, C, D, 1) +#define __builtin_ia32_shuf_f32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_shuf_f64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_mask(A, B, 1, D, E) +#define __builtin_ia32_shuf_i32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_mask(A, B, 1, D, E) +#define __builtin_ia32_shuf_i64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_mask(A, B, 1, D, E) +#define __builtin_ia32_shufpd512_mask(A, B, F, D, E) __builtin_ia32_shufpd512_mask(A, B, 1, D, E) +#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) +#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1) +#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1) +#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1) +#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) +#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) +#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5) +#define __builtin_ia32_vcomiss(A, B, C, D) __builtin_ia32_vcomiss(A, B, 1, 5) +#define __builtin_ia32_vcvtph2ps512_mask(A, B, C, D) __builtin_ia32_vcvtph2ps512_mask(A, B, C, 5) +#define __builtin_ia32_vcvtps2ph512_mask(A, E, C, D) __builtin_ia32_vcvtps2ph512_mask(A, 1, C, D) +#define __builtin_ia32_vcvtsd2si32(A, B) __builtin_ia32_vcvtsd2si32(A, 1) +#define __builtin_ia32_vcvtsd2si64(A, B) __builtin_ia32_vcvtsd2si64(A, 1) +#define __builtin_ia32_vcvtsd2usi32(A, B) __builtin_ia32_vcvtsd2usi32(A, 1) +#define __builtin_ia32_vcvtsd2usi64(A, B) __builtin_ia32_vcvtsd2usi64(A, 1) +#define __builtin_ia32_vcvtss2si32(A, B) __builtin_ia32_vcvtss2si32(A, 1) +#define __builtin_ia32_vcvtss2si64(A, B) __builtin_ia32_vcvtss2si64(A, 1) +#define __builtin_ia32_vcvtss2usi32(A, B) __builtin_ia32_vcvtss2usi32(A, 1) +#define __builtin_ia32_vcvtss2usi64(A, B) __builtin_ia32_vcvtss2usi64(A, 1) +#define __builtin_ia32_vcvttsd2si32(A, B) __builtin_ia32_vcvttsd2si32(A, 5) +#define __builtin_ia32_vcvttsd2si64(A, B) __builtin_ia32_vcvttsd2si64(A, 5) +#define __builtin_ia32_vcvttsd2usi32(A, B) __builtin_ia32_vcvttsd2usi32(A, 5) +#define __builtin_ia32_vcvttsd2usi64(A, B) __builtin_ia32_vcvttsd2usi64(A, 5) +#define __builtin_ia32_vcvttss2si32(A, B) __builtin_ia32_vcvttss2si32(A, 5) +#define __builtin_ia32_vcvttss2si64(A, B) __builtin_ia32_vcvttss2si64(A, 5) +#define __builtin_ia32_vcvttss2usi32(A, B) __builtin_ia32_vcvttss2usi32(A, 5) +#define __builtin_ia32_vcvttss2usi64(A, B) __builtin_ia32_vcvttss2usi64(A, 5) +#define __builtin_ia32_vfmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, 1) +#define __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubps512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask(A, B, C, D, 1) +#define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 1) +#define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D) +#define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D) + +/* avx512pfintrin.h */ +#define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, 1) +#define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1) + +/* avx512erintrin.h */ +#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 1) +#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 1) +#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 1) +#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 1) +#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 1) +#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 1) + +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512pf,avx512cd") #include <wmmintrin.h> #include <smmintrin.h> #include <mm3dnow.h> |