diff options
author | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-05-02 19:20:43 +0000 |
---|---|---|
committer | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-05-02 19:20:43 +0000 |
commit | 875a66b2f9a8edf3b1867ae1a33d0db4cfc6747b (patch) | |
tree | b2be3d60f7c3dd6b0d3583b7d3d356ec6e047075 /gcc | |
parent | fba7ae09296f045c7a02c72db005fa95b78504d5 (diff) | |
download | gcc-875a66b2f9a8edf3b1867ae1a33d0db4cfc6747b.tar.gz |
2008-05-02 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_special_builtin_type): New.
(bdesc_special_args): Likewise.
(ix86_expand_special_args_builtin): Likewise.
(ix86_init_mmx_sse_builtins): Updated.
(ix86_expand_builtin): Updated.
(ix86_expand_store_builtin): Removed.
(ix86_expand_unop_builtin): Likewise.
* config/i386/mm3dnow.h (__v2sf): Moved to ...
* config/i386/mmintrin.h (__v2sf): Here.
* config/i386/xmmintrin.h (_mm_loadh_pi): Replace __v2si with
const __v2sf.
(_mm_loadl_pi): Likewise.
(_mm_storeh_pi): Replace __v2si with __v2sf.
(_mm_storel_pi): Likewise.
* doc/extend.texi: Correct __builtin_ia32_loadhps,
__builtin_ia32_loadlps, __builtin_ia32_storehps,
__builtin_ia32_storelps, __builtin_ia32_loadhpd and
__builtin_ia32_loadlpd.
2008-05-02 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_builtin_type): Add FLOAT_FTYPE_FLOAT,
V4SF_FTYPE_V4SF_VEC_MERGE and V2DF_FTYPE_V2DF_VEC_MERGE.
(bdesc_args): Updated. Add scalar SSE builtins with vec_merge.
(ix86_init_mmx_sse_builtins): Updated.
(ix86_expand_args_builtin): Likewise.
(ix86_expand_builtin): Likewise.
(ix86_expand_unop1_builtin): Renamed to ...
(ix86_expand_unop_vec_merge_builtin): This.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@134886 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 35 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 571 | ||||
-rw-r--r-- | gcc/config/i386/mm3dnow.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/mmintrin.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 8 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 12 |
6 files changed, 368 insertions, 262 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 28a39b347cd..221a8fa87de 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2008-05-02 H.J. Lu <hongjiu.lu@intel.com> + + * config/i386/i386.c (ix86_special_builtin_type): New. + (bdesc_special_args): Likewise. + (ix86_expand_special_args_builtin): Likewise. + (ix86_init_mmx_sse_builtins): Updated. + (ix86_expand_builtin): Updated. + (ix86_expand_store_builtin): Removed. + (ix86_expand_unop_builtin): Likewise. + + * config/i386/mm3dnow.h (__v2sf): Moved to ... + * config/i386/mmintrin.h (__v2sf): Here. + + * config/i386/xmmintrin.h (_mm_loadh_pi): Replace __v2si with + const __v2sf. + (_mm_loadl_pi): Likewise. + (_mm_storeh_pi): Replace __v2si with __v2sf. + (_mm_storel_pi): Likewise. + + * doc/extend.texi: Correct __builtin_ia32_loadhps, + __builtin_ia32_loadlps, __builtin_ia32_storehps, + __builtin_ia32_storelps, __builtin_ia32_loadhpd and + __builtin_ia32_loadlpd. + +2008-05-02 H.J. Lu <hongjiu.lu@intel.com> + + * config/i386/i386.c (ix86_builtin_type): Add FLOAT_FTYPE_FLOAT, + V4SF_FTYPE_V4SF_VEC_MERGE and V2DF_FTYPE_V2DF_VEC_MERGE. + (bdesc_args): Updated. Add scalar SSE builtins with vec_merge. + (ix86_init_mmx_sse_builtins): Updated. + (ix86_expand_args_builtin): Likewise. + (ix86_expand_builtin): Likewise. + (ix86_expand_unop1_builtin): Renamed to ... + (ix86_expand_unop_vec_merge_builtin): This. + 2008-05-01 Jan Hubicka <jh@suse.cz> PR bootstrap/36100 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 80a0fd75f5f..d9fc4bd694b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17984,11 +17984,32 @@ static const struct builtin_description bdesc_pcmpistr[] = { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, }; +/* Special builtin types */ +enum ix86_special_builtin_type +{ + SPECIAL_FTYPE_UNKNOWN, + VOID_FTYPE_VOID, + V16QI_FTYPE_PCCHAR, + V4SF_FTYPE_PCFLOAT, + V2DF_FTYPE_PCDOUBLE, + V4SF_FTYPE_V4SF_PCV2SF, + V2DF_FTYPE_V2DF_PCDOUBLE, + V2DI_FTYPE_PV2DI, + VOID_FTYPE_PV2SF_V4SF, + VOID_FTYPE_PV2DI_V2DI, + VOID_FTYPE_PCHAR_V16QI, + VOID_FTYPE_PFLOAT_V4SF, + VOID_FTYPE_PDOUBLE_V2DF, + VOID_FTYPE_PDI_DI, + VOID_FTYPE_PINT_INT +}; + /* Builtin types */ enum ix86_builtin_type { FTYPE_UNKNOWN, FLOAT128_FTYPE_FLOAT128, + FLOAT_FTYPE_FLOAT, FLOAT128_FTYPE_FLOAT128_FLOAT128, INT64_FTYPE_V4SF, INT64_FTYPE_V2DF, @@ -18007,6 +18028,7 @@ enum ix86_builtin_type V4SI_FTYPE_V2DF, V4HI_FTYPE_V4HI, V4SF_FTYPE_V4SF, + V4SF_FTYPE_V4SF_VEC_MERGE, V4SF_FTYPE_V4SI, V4SF_FTYPE_V2DF, V2DI_FTYPE_V2DI, @@ -18014,6 +18036,7 @@ enum ix86_builtin_type V2DI_FTYPE_V8HI, V2DI_FTYPE_V4SI, V2DF_FTYPE_V2DF, + V2DF_FTYPE_V2DF_VEC_MERGE, V2DF_FTYPE_V4SI, V2DF_FTYPE_V4SF, V2DF_FTYPE_V2SI, @@ -18098,6 +18121,54 @@ enum ix86_builtin_type V2DI_FTYPE_V2DI_V2DI_UINT_UINT }; +/* Special builtins with variable number of arguments. */ +static const struct builtin_description bdesc_special_args[] = +{ + /* MMX */ + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, + + /* 3DNow! */ + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, + + /* SSE */ + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, + + /* SSE or 3DNow!A */ + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI }, + + /* SSE2 */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, + + /* SSE3 */ + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + + /* SSE4.1 */ + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI }, + + /* SSE4A */ + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, +}; + /* Builtins with variable number of arguments. */ static const struct builtin_description bdesc_args[] = { @@ -18263,9 +18334,11 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI }, { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, + { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, /* SSE MMX or 3Dnow!A */ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, @@ -18442,6 +18515,12 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE }, + /* SSE2 MMX */ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, @@ -18882,12 +18961,14 @@ ix86_init_mmx_sse_builtins (void) tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); tree pchar_type_node = build_pointer_type (char_type_node); - tree pcchar_type_node = build_pointer_type ( - build_type_variant (char_type_node, 1, 0)); + tree pcchar_type_node + = build_pointer_type (build_type_variant (char_type_node, 1, 0)); tree pfloat_type_node = build_pointer_type (float_type_node); - tree pcfloat_type_node = build_pointer_type ( - build_type_variant (float_type_node, 1, 0)); - tree pv2si_type_node = build_pointer_type (V2SI_type_node); + tree pcfloat_type_node + = build_pointer_type (build_type_variant (float_type_node, 1, 0)); + tree pv2sf_type_node = build_pointer_type (V2SF_type_node); + tree pcv2sf_type_node + = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0)); tree pv2di_type_node = build_pointer_type (V2DI_type_node); tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); @@ -18964,13 +19045,12 @@ ix86_init_mmx_sse_builtins (void) pchar_type_node, NULL_TREE); tree v4sf_ftype_pcfloat = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); - /* @@@ the type is bogus */ - tree v4sf_ftype_v4sf_pv2si + tree v4sf_ftype_v4sf_pcv2sf = build_function_type_list (V4SF_type_node, - V4SF_type_node, pv2si_type_node, NULL_TREE); - tree void_ftype_pv2si_v4sf + V4SF_type_node, pcv2sf_type_node, NULL_TREE); + tree void_ftype_pv2sf_v4sf = build_function_type_list (void_type_node, - pv2si_type_node, V4SF_type_node, NULL_TREE); + pv2sf_type_node, V4SF_type_node, NULL_TREE); tree void_ftype_pfloat_v4sf = build_function_type_list (void_type_node, pfloat_type_node, V4SF_type_node, NULL_TREE); @@ -19356,6 +19436,10 @@ ix86_init_mmx_sse_builtins (void) long_long_unsigned_type_node, long_long_unsigned_type_node, NULL_TREE); + tree float_ftype_float + = build_function_type_list (float_type_node, + float_type_node, + NULL_TREE); tree ftype; @@ -19400,6 +19484,67 @@ ix86_init_mmx_sse_builtins (void) def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ); } + /* Add all special builtins with variable number of operands. */ + for (i = 0, d = bdesc_special_args; + i < ARRAY_SIZE (bdesc_special_args); + i++, d++) + { + tree type; + + if (d->name == 0) + continue; + + switch ((enum ix86_special_builtin_type) d->flag) + { + case VOID_FTYPE_VOID: + type = void_ftype_void; + break; + case V16QI_FTYPE_PCCHAR: + type = v16qi_ftype_pcchar; + break; + case V4SF_FTYPE_PCFLOAT: + type = v4sf_ftype_pcfloat; + break; + case V2DI_FTYPE_PV2DI: + type = v2di_ftype_pv2di; + break; + case V2DF_FTYPE_PCDOUBLE: + type = v2df_ftype_pcdouble; + break; + case V4SF_FTYPE_V4SF_PCV2SF: + type = v4sf_ftype_v4sf_pcv2sf; + break; + case V2DF_FTYPE_V2DF_PCDOUBLE: + type = v2df_ftype_v2df_pcdouble; + break; + case VOID_FTYPE_PV2SF_V4SF: + type = void_ftype_pv2sf_v4sf; + break; + case VOID_FTYPE_PV2DI_V2DI: + type = void_ftype_pv2di_v2di; + break; + case VOID_FTYPE_PCHAR_V16QI: + type = void_ftype_pchar_v16qi; + break; + case VOID_FTYPE_PFLOAT_V4SF: + type = void_ftype_pfloat_v4sf; + break; + case VOID_FTYPE_PDOUBLE_V2DF: + type = void_ftype_pdouble_v2df; + break; + case VOID_FTYPE_PDI_DI: + type = void_ftype_pdi_di; + break; + case VOID_FTYPE_PINT_INT: + type = void_ftype_pint_int; + break; + default: + gcc_unreachable (); + } + + def_builtin (d->mask, d->name, type, d->code); + } + /* Add all builtins with variable number of operands. */ for (i = 0, d = bdesc_args; i < ARRAY_SIZE (bdesc_args); @@ -19412,6 +19557,9 @@ ix86_init_mmx_sse_builtins (void) switch ((enum ix86_builtin_type) d->flag) { + case FLOAT_FTYPE_FLOAT: + type = float_ftype_float; + break; case INT64_FTYPE_V4SF: type = int64_ftype_v4sf; break; @@ -19461,6 +19609,7 @@ ix86_init_mmx_sse_builtins (void) type = v4si_ftype_v2df; break; case V4SF_FTYPE_V4SF: + case V4SF_FTYPE_V4SF_VEC_MERGE: type = v4sf_ftype_v4sf; break; case V4SF_FTYPE_V4SI: @@ -19497,6 +19646,7 @@ ix86_init_mmx_sse_builtins (void) type = v2df_ftype_v4sf; break; case V2DF_FTYPE_V2DF: + case V2DF_FTYPE_V2DF_VEC_MERGE: type = v2df_ftype_v2df; break; case V2DF_FTYPE_V2SI: @@ -19741,9 +19891,6 @@ ix86_init_mmx_sse_builtins (void) def_builtin_const (d->mask, d->name, ftype, d->code); } - /* Add the remaining MMX insns with somewhat more complicated types. */ - def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); - /* comi/ucomi insns. */ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) if (d->mask == OPTION_MASK_ISA_SSE2) @@ -19755,64 +19902,22 @@ ix86_init_mmx_sse_builtins (void) for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++) def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code); + /* SSE */ def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); + /* SSE or 3DNow!A */ def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); - - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); - - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); - def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); - def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); - - def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); - def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); - def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); - - ftype = build_function_type_list (float_type_node, - float_type_node, - NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF); - - /* Original 3DNow! */ - def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); - /* SSE2 */ def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); - - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); - - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); - - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); - /* SSE3. */ def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR); def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT); - def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); - - /* SSE4.1. */ - def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA); /* AES */ if (TARGET_AES) @@ -19833,10 +19938,6 @@ ix86_init_mmx_sse_builtins (void) def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); } - /* AMDFAM10 SSE4A New built-ins */ - def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); - def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); - /* Access to the vec_init patterns. */ ftype = build_function_type_list (V2SI_type_node, integer_type_node, integer_type_node, NULL_TREE); @@ -20243,71 +20344,12 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, return target; } -/* Subroutine of ix86_expand_builtin to take care of stores. */ +/* Subroutine of ix86_expand_args_builtin to take care of scalar unop + insns with vec_merge. */ static rtx -ix86_expand_store_builtin (enum insn_code icode, tree exp) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - enum machine_mode mode0 = insn_data[icode].operand[0].mode; - enum machine_mode mode1 = insn_data[icode].operand[1].mode; - - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (icode) (op0, op1); - if (pat) - emit_insn (pat); - return 0; -} - -/* Subroutine of ix86_expand_builtin to take care of unop insns. */ - -static rtx -ix86_expand_unop_builtin (enum insn_code icode, tree exp, - rtx target, int do_load) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - rtx op0 = expand_normal (arg0); - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - - if (optimize || !target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - if (do_load) - op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - else - { - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - - if ((optimize && !register_operand (op0, mode0)) - || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - } - - pat = GEN_FCN (icode) (target, op0); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of three special unop insns: - sqrtss, sqrtsd, rsqrtss, rsqrtsf, rcpss. */ - -static rtx -ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target) +ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, + rtx target) { rtx pat; tree arg0 = CALL_EXPR_ARG (exp, 0); @@ -20417,6 +20459,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, switch ((enum ix86_builtin_type) d->flag) { case FLOAT128_FTYPE_FLOAT128: + case FLOAT_FTYPE_FLOAT: case INT64_FTYPE_V4SF: case INT64_FTYPE_V2DF: case INT_FTYPE_V16QI: @@ -20452,6 +20495,9 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V2SF_FTYPE_V2SI: nargs = 1; break; + case V4SF_FTYPE_V4SF_VEC_MERGE: + case V2DF_FTYPE_V2DF_VEC_MERGE: + return ix86_expand_unop_vec_merge_builtin (icode, exp, target); case FLOAT128_FTYPE_FLOAT128_FLOAT128: case V16QI_FTYPE_V16QI_V16QI: case V16QI_FTYPE_V8HI_V8HI: @@ -20699,6 +20745,148 @@ ix86_expand_args_builtin (const struct builtin_description *d, return target; } +/* Subroutine of ix86_expand_builtin to take care of special insns + with variable number of operands. */ + +static rtx +ix86_expand_special_args_builtin (const struct builtin_description *d, + tree exp, rtx target) +{ + tree arg; + rtx pat, op; + unsigned int i, nargs, arg_adjust, memory; + struct + { + rtx op; + enum machine_mode mode; + } args[2]; + enum insn_code icode = d->icode; + bool last_arg_constant = false; + const struct insn_data *insn_p = &insn_data[icode]; + enum machine_mode tmode = insn_p->operand[0].mode; + enum { load, store } class; + + switch ((enum ix86_special_builtin_type) d->flag) + { + case VOID_FTYPE_VOID: + emit_insn (GEN_FCN (icode) (target)); + return 0; + case V2DI_FTYPE_PV2DI: + case V16QI_FTYPE_PCCHAR: + case V4SF_FTYPE_PCFLOAT: + case V2DF_FTYPE_PCDOUBLE: + nargs = 1; + class = load; + memory = 0; + break; + case VOID_FTYPE_PV2SF_V4SF: + case VOID_FTYPE_PV2DI_V2DI: + case VOID_FTYPE_PCHAR_V16QI: + case VOID_FTYPE_PFLOAT_V4SF: + case VOID_FTYPE_PDOUBLE_V2DF: + case VOID_FTYPE_PDI_DI: + case VOID_FTYPE_PINT_INT: + nargs = 1; + class = store; + /* Reserve memory operand for target. */ + memory = ARRAY_SIZE (args); + break; + case V4SF_FTYPE_V4SF_PCV2SF: + case V2DF_FTYPE_V2DF_PCDOUBLE: + nargs = 2; + class = load; + memory = 1; + break; + default: + gcc_unreachable (); + } + + gcc_assert (nargs <= ARRAY_SIZE (args)); + + if (class == store) + { + arg = CALL_EXPR_ARG (exp, 0); + op = expand_normal (arg); + gcc_assert (target == 0); + target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op)); + arg_adjust = 1; + } + else + { + arg_adjust = 0; + if (optimize + || target == 0 + || GET_MODE (target) != tmode + || ! (*insn_p->operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + } + + for (i = 0; i < nargs; i++) + { + enum machine_mode mode = insn_p->operand[i + 1].mode; + bool match; + + arg = CALL_EXPR_ARG (exp, i + arg_adjust); + op = expand_normal (arg); + match = (*insn_p->operand[i + 1].predicate) (op, mode); + + if (last_arg_constant && (i + 1) == nargs) + { + if (!match) + switch (icode) + { + default: + error ("the last argument must be an 8-bit immediate"); + return const0_rtx; + } + } + else + { + if (i == memory) + { + /* This must be the memory operand. */ + op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op)); + gcc_assert (GET_MODE (op) == mode + || GET_MODE (op) == VOIDmode); + } + else + { + /* This must be register. */ + if (VECTOR_MODE_P (mode)) + op = safe_vector_operand (op, mode); + + gcc_assert (GET_MODE (op) == mode + || GET_MODE (op) == VOIDmode); + op = copy_to_mode_reg (mode, op); + } + } + + args[i].op = op; + args[i].mode = mode; + } + + switch (nargs) + { + case 1: + pat = GEN_FCN (icode) (target, args[0].op); + break; + case 2: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op); + break; + case 3: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op); + break; + default: + gcc_unreachable (); + } + + if (! pat) + return 0; + emit_insn (pat); + return class == store ? 0 : target; +} + /* Subroutine of ix86_expand_builtin to take care of comi insns. */ static rtx @@ -21134,19 +21322,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); tree arg0, arg1, arg2; rtx op0, op1, op2, pat; - enum machine_mode tmode, mode0, mode1, mode2; + enum machine_mode mode0, mode1, mode2; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); switch (fcode) { - case IX86_BUILTIN_EMMS: - emit_insn (gen_mmx_emms ()); - return 0; - - case IX86_BUILTIN_SFENCE: - emit_insn (gen_sse_sfence ()); - return 0; - case IX86_BUILTIN_MASKMOVQ: case IX86_BUILTIN_MASKMOVDQU: icode = (fcode == IX86_BUILTIN_MASKMOVQ @@ -21178,75 +21358,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (pat); return 0; - case IX86_BUILTIN_RSQRTF: - return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target); - - case IX86_BUILTIN_SQRTSS: - return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target); - case IX86_BUILTIN_RSQRTSS: - return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target); - case IX86_BUILTIN_RCPSS: - return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target); - - case IX86_BUILTIN_LOADUPS: - return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1); - - case IX86_BUILTIN_STOREUPS: - return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp); - - case IX86_BUILTIN_LOADHPS: - case IX86_BUILTIN_LOADLPS: - case IX86_BUILTIN_LOADHPD: - case IX86_BUILTIN_LOADLPD: - icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps - : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps - : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd - : CODE_FOR_sse2_loadlpd); - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - tmode = insn_data[icode].operand[0].mode; - mode0 = insn_data[icode].operand[1].mode; - mode1 = insn_data[icode].operand[2].mode; - - op0 = force_reg (mode0, op0); - op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); - if (optimize || target == 0 - || GET_MODE (target) != tmode - || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; - - case IX86_BUILTIN_STOREHPS: - case IX86_BUILTIN_STORELPS: - icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps - : CODE_FOR_sse_storelps); - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - mode0 = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - - op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - op1 = force_reg (mode1, op1); - - pat = GEN_FCN (icode) (op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return const0_rtx; - - case IX86_BUILTIN_MOVNTPS: - return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp); - case IX86_BUILTIN_MOVNTQ: - return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp); - case IX86_BUILTIN_LDMXCSR: op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); target = assign_386_stack_local (SImode, SLOT_VIRTUAL); @@ -21259,24 +21370,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (gen_sse_stmxcsr (target)); return copy_to_mode_reg (SImode, target); - case IX86_BUILTIN_FEMMS: - emit_insn (gen_mmx_femms ()); - return NULL_RTX; - - case IX86_BUILTIN_SQRTSD: - return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target); - case IX86_BUILTIN_LOADUPD: - return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1); - case IX86_BUILTIN_STOREUPD: - return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp); - - case IX86_BUILTIN_MFENCE: - emit_insn (gen_sse2_mfence ()); - return 0; - case IX86_BUILTIN_LFENCE: - emit_insn (gen_sse2_lfence ()); - return 0; - case IX86_BUILTIN_CLFLUSH: arg0 = CALL_EXPR_ARG (exp, 0); op0 = expand_normal (arg0); @@ -21287,18 +21380,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (gen_sse2_clflush (op0)); return 0; - case IX86_BUILTIN_MOVNTPD: - return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp); - case IX86_BUILTIN_MOVNTDQ: - return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp); - case IX86_BUILTIN_MOVNTI: - return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp); - - case IX86_BUILTIN_LOADDQU: - return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1); - case IX86_BUILTIN_STOREDQU: - return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp); - case IX86_BUILTIN_MONITOR: arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); @@ -21330,20 +21411,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (gen_sse3_mwait (op0, op1)); return 0; - case IX86_BUILTIN_LDDQU: - return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp, - target, 1); - - case IX86_BUILTIN_MOVNTDQA: - return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp, - target, 1); - - case IX86_BUILTIN_MOVNTSD: - return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp); - - case IX86_BUILTIN_MOVNTSS: - return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp); - case IX86_BUILTIN_VEC_INIT_V2SI: case IX86_BUILTIN_VEC_INIT_V4HI: case IX86_BUILTIN_VEC_INIT_V8QI: @@ -21388,6 +21455,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, break; } + for (i = 0, d = bdesc_special_args; + i < ARRAY_SIZE (bdesc_special_args); + i++, d++) + if (d->code == fcode) + return ix86_expand_special_args_builtin (d, exp, target); + for (i = 0, d = bdesc_args; i < ARRAY_SIZE (bdesc_args); i++, d++) diff --git a/gcc/config/i386/mm3dnow.h b/gcc/config/i386/mm3dnow.h index dd1c871936e..96056e28e31 100644 --- a/gcc/config/i386/mm3dnow.h +++ b/gcc/config/i386/mm3dnow.h @@ -34,9 +34,6 @@ #include <mmintrin.h> -/* Internal data types for implementing the intrinsics. */ -typedef float __v2sf __attribute__ ((__vector_size__ (8))); - extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_femms (void) { diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index 1c09be30e1a..3c135e61766 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -43,6 +43,7 @@ typedef int __v2si __attribute__ ((__vector_size__ (8))); typedef short __v4hi __attribute__ ((__vector_size__ (8))); typedef char __v8qi __attribute__ ((__vector_size__ (8))); typedef long long __v1di __attribute__ ((__vector_size__ (8))); +typedef float __v2sf __attribute__ ((__vector_size__ (8))); /* Empty the multimedia state. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index fcfdaf98915..8d9b761bcaf 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -745,14 +745,14 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pi (__m128 __A, __m64 const *__P) { - return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P); + return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P); } /* Stores the upper two SPFP values of A into P. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pi (__m64 *__P, __m128 __A) { - __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A); + __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A); } /* Moves the upper two values of B into the lower two values of A. */ @@ -774,14 +774,14 @@ _mm_movelh_ps (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pi (__m128 __A, __m64 const *__P) { - return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P); + return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P); } /* Stores the lower two SPFP values of A into P. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pi (__m64 *__P, __m128 __A) { - __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A); + __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A); } /* Creates a 4-bit mask from the most significant bits of the SPFP values. */ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7c5ad9bc4f7..f3c6c574a42 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7647,13 +7647,13 @@ Generates the @code{movups} machine instruction as a store to memory. Generates the @code{movss} machine instruction as a load from memory. @item void __builtin_ia32_storess (float *, v4sf) Generates the @code{movss} machine instruction as a store to memory. -@item v4sf __builtin_ia32_loadhps (v4sf, v2si *) +@item v4sf __builtin_ia32_loadhps (v4sf, const v2sf *) Generates the @code{movhps} machine instruction as a load from memory. -@item v4sf __builtin_ia32_loadlps (v4sf, v2si *) +@item v4sf __builtin_ia32_loadlps (v4sf, const v2sf *) Generates the @code{movlps} machine instruction as a load from memory -@item void __builtin_ia32_storehps (v4sf, v2si *) +@item void __builtin_ia32_storehps (v2sf *, v4sf) Generates the @code{movhps} machine instruction as a store to memory. -@item void __builtin_ia32_storelps (v4sf, v2si *) +@item void __builtin_ia32_storelps (v2sf *, v4sf) Generates the @code{movlps} machine instruction as a store to memory. @end table @@ -7755,8 +7755,8 @@ v8hi __builtin_ia32_pmulhuw128 (v8hi, v8hi) void __builtin_ia32_maskmovdqu (v16qi, v16qi) v2df __builtin_ia32_loadupd (double *) void __builtin_ia32_storeupd (double *, v2df) -v2df __builtin_ia32_loadhpd (v2df, double *) -v2df __builtin_ia32_loadlpd (v2df, double *) +v2df __builtin_ia32_loadhpd (v2df, double const *) +v2df __builtin_ia32_loadlpd (v2df, double const *) int __builtin_ia32_movmskpd (v2df) int __builtin_ia32_pmovmskb128 (v16qi) void __builtin_ia32_movnti (int *, int) |