summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2008-05-02 19:20:43 +0000
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2008-05-02 19:20:43 +0000
commit875a66b2f9a8edf3b1867ae1a33d0db4cfc6747b (patch)
treeb2be3d60f7c3dd6b0d3583b7d3d356ec6e047075 /gcc
parentfba7ae09296f045c7a02c72db005fa95b78504d5 (diff)
downloadgcc-875a66b2f9a8edf3b1867ae1a33d0db4cfc6747b.tar.gz
2008-05-02 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_special_builtin_type): New. (bdesc_special_args): Likewise. (ix86_expand_special_args_builtin): Likewise. (ix86_init_mmx_sse_builtins): Updated. (ix86_expand_builtin): Updated. (ix86_expand_store_builtin): Removed. (ix86_expand_unop_builtin): Likewise. * config/i386/mm3dnow.h (__v2sf): Moved to ... * config/i386/mmintrin.h (__v2sf): Here. * config/i386/xmmintrin.h (_mm_loadh_pi): Replace __v2si with const __v2sf. (_mm_loadl_pi): Likewise. (_mm_storeh_pi): Replace __v2si with __v2sf. (_mm_storel_pi): Likewise. * doc/extend.texi: Correct __builtin_ia32_loadhps, __builtin_ia32_loadlps, __builtin_ia32_storehps, __builtin_ia32_storelps, __builtin_ia32_loadhpd and __builtin_ia32_loadlpd. 2008-05-02 H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386.c (ix86_builtin_type): Add FLOAT_FTYPE_FLOAT, V4SF_FTYPE_V4SF_VEC_MERGE and V2DF_FTYPE_V2DF_VEC_MERGE. (bdesc_args): Updated. Add scalar SSE builtins with vec_merge. (ix86_init_mmx_sse_builtins): Updated. (ix86_expand_args_builtin): Likewise. (ix86_expand_builtin): Likewise. (ix86_expand_unop1_builtin): Renamed to ... (ix86_expand_unop_vec_merge_builtin): This. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@134886 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog35
-rw-r--r--gcc/config/i386/i386.c571
-rw-r--r--gcc/config/i386/mm3dnow.h3
-rw-r--r--gcc/config/i386/mmintrin.h1
-rw-r--r--gcc/config/i386/xmmintrin.h8
-rw-r--r--gcc/doc/extend.texi12
6 files changed, 368 insertions, 262 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 28a39b347cd..221a8fa87de 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2008-05-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386.c (ix86_special_builtin_type): New.
+ (bdesc_special_args): Likewise.
+ (ix86_expand_special_args_builtin): Likewise.
+ (ix86_init_mmx_sse_builtins): Updated.
+ (ix86_expand_builtin): Updated.
+ (ix86_expand_store_builtin): Removed.
+ (ix86_expand_unop_builtin): Likewise.
+
+ * config/i386/mm3dnow.h (__v2sf): Moved to ...
+ * config/i386/mmintrin.h (__v2sf): Here.
+
+ * config/i386/xmmintrin.h (_mm_loadh_pi): Replace __v2si with
+ const __v2sf.
+ (_mm_loadl_pi): Likewise.
+ (_mm_storeh_pi): Replace __v2si with __v2sf.
+ (_mm_storel_pi): Likewise.
+
+ * doc/extend.texi: Correct __builtin_ia32_loadhps,
+ __builtin_ia32_loadlps, __builtin_ia32_storehps,
+ __builtin_ia32_storelps, __builtin_ia32_loadhpd and
+ __builtin_ia32_loadlpd.
+
+2008-05-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386.c (ix86_builtin_type): Add FLOAT_FTYPE_FLOAT,
+ V4SF_FTYPE_V4SF_VEC_MERGE and V2DF_FTYPE_V2DF_VEC_MERGE.
+ (bdesc_args): Updated. Add scalar SSE builtins with vec_merge.
+ (ix86_init_mmx_sse_builtins): Updated.
+ (ix86_expand_args_builtin): Likewise.
+ (ix86_expand_builtin): Likewise.
+ (ix86_expand_unop1_builtin): Renamed to ...
+ (ix86_expand_unop_vec_merge_builtin): This.
+
2008-05-01 Jan Hubicka <jh@suse.cz>
PR bootstrap/36100
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 80a0fd75f5f..d9fc4bd694b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17984,11 +17984,32 @@ static const struct builtin_description bdesc_pcmpistr[] =
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
};
+/* Special builtin types */
+enum ix86_special_builtin_type
+{
+ SPECIAL_FTYPE_UNKNOWN,
+ VOID_FTYPE_VOID,
+ V16QI_FTYPE_PCCHAR,
+ V4SF_FTYPE_PCFLOAT,
+ V2DF_FTYPE_PCDOUBLE,
+ V4SF_FTYPE_V4SF_PCV2SF,
+ V2DF_FTYPE_V2DF_PCDOUBLE,
+ V2DI_FTYPE_PV2DI,
+ VOID_FTYPE_PV2SF_V4SF,
+ VOID_FTYPE_PV2DI_V2DI,
+ VOID_FTYPE_PCHAR_V16QI,
+ VOID_FTYPE_PFLOAT_V4SF,
+ VOID_FTYPE_PDOUBLE_V2DF,
+ VOID_FTYPE_PDI_DI,
+ VOID_FTYPE_PINT_INT
+};
+
/* Builtin types */
enum ix86_builtin_type
{
FTYPE_UNKNOWN,
FLOAT128_FTYPE_FLOAT128,
+ FLOAT_FTYPE_FLOAT,
FLOAT128_FTYPE_FLOAT128_FLOAT128,
INT64_FTYPE_V4SF,
INT64_FTYPE_V2DF,
@@ -18007,6 +18028,7 @@ enum ix86_builtin_type
V4SI_FTYPE_V2DF,
V4HI_FTYPE_V4HI,
V4SF_FTYPE_V4SF,
+ V4SF_FTYPE_V4SF_VEC_MERGE,
V4SF_FTYPE_V4SI,
V4SF_FTYPE_V2DF,
V2DI_FTYPE_V2DI,
@@ -18014,6 +18036,7 @@ enum ix86_builtin_type
V2DI_FTYPE_V8HI,
V2DI_FTYPE_V4SI,
V2DF_FTYPE_V2DF,
+ V2DF_FTYPE_V2DF_VEC_MERGE,
V2DF_FTYPE_V4SI,
V2DF_FTYPE_V4SF,
V2DF_FTYPE_V2SI,
@@ -18098,6 +18121,54 @@ enum ix86_builtin_type
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
};
+/* Special builtins with variable number of arguments. */
+static const struct builtin_description bdesc_special_args[] =
+{
+ /* MMX */
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ /* 3DNow! */
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ /* SSE */
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+
+ /* SSE or 3DNow!A */
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
+
+ /* SSE2 */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+
+ /* SSE3 */
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+ /* SSE4.1 */
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
+
+ /* SSE4A */
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+};
+
/* Builtins with variable number of arguments. */
static const struct builtin_description bdesc_args[] =
{
@@ -18263,9 +18334,11 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
/* SSE MMX or 3Dnow!A */
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
@@ -18442,6 +18515,12 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
+
/* SSE2 MMX */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
@@ -18882,12 +18961,14 @@ ix86_init_mmx_sse_builtins (void)
tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
tree pchar_type_node = build_pointer_type (char_type_node);
- tree pcchar_type_node = build_pointer_type (
- build_type_variant (char_type_node, 1, 0));
+ tree pcchar_type_node
+ = build_pointer_type (build_type_variant (char_type_node, 1, 0));
tree pfloat_type_node = build_pointer_type (float_type_node);
- tree pcfloat_type_node = build_pointer_type (
- build_type_variant (float_type_node, 1, 0));
- tree pv2si_type_node = build_pointer_type (V2SI_type_node);
+ tree pcfloat_type_node
+ = build_pointer_type (build_type_variant (float_type_node, 1, 0));
+ tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
+ tree pcv2sf_type_node
+ = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
tree pv2di_type_node = build_pointer_type (V2DI_type_node);
tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
@@ -18964,13 +19045,12 @@ ix86_init_mmx_sse_builtins (void)
pchar_type_node, NULL_TREE);
tree v4sf_ftype_pcfloat
= build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
- /* @@@ the type is bogus */
- tree v4sf_ftype_v4sf_pv2si
+ tree v4sf_ftype_v4sf_pcv2sf
= build_function_type_list (V4SF_type_node,
- V4SF_type_node, pv2si_type_node, NULL_TREE);
- tree void_ftype_pv2si_v4sf
+ V4SF_type_node, pcv2sf_type_node, NULL_TREE);
+ tree void_ftype_pv2sf_v4sf
= build_function_type_list (void_type_node,
- pv2si_type_node, V4SF_type_node, NULL_TREE);
+ pv2sf_type_node, V4SF_type_node, NULL_TREE);
tree void_ftype_pfloat_v4sf
= build_function_type_list (void_type_node,
pfloat_type_node, V4SF_type_node, NULL_TREE);
@@ -19356,6 +19436,10 @@ ix86_init_mmx_sse_builtins (void)
long_long_unsigned_type_node,
long_long_unsigned_type_node,
NULL_TREE);
+ tree float_ftype_float
+ = build_function_type_list (float_type_node,
+ float_type_node,
+ NULL_TREE);
tree ftype;
@@ -19400,6 +19484,67 @@ ix86_init_mmx_sse_builtins (void)
def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
}
+ /* Add all special builtins with variable number of operands. */
+ for (i = 0, d = bdesc_special_args;
+ i < ARRAY_SIZE (bdesc_special_args);
+ i++, d++)
+ {
+ tree type;
+
+ if (d->name == 0)
+ continue;
+
+ switch ((enum ix86_special_builtin_type) d->flag)
+ {
+ case VOID_FTYPE_VOID:
+ type = void_ftype_void;
+ break;
+ case V16QI_FTYPE_PCCHAR:
+ type = v16qi_ftype_pcchar;
+ break;
+ case V4SF_FTYPE_PCFLOAT:
+ type = v4sf_ftype_pcfloat;
+ break;
+ case V2DI_FTYPE_PV2DI:
+ type = v2di_ftype_pv2di;
+ break;
+ case V2DF_FTYPE_PCDOUBLE:
+ type = v2df_ftype_pcdouble;
+ break;
+ case V4SF_FTYPE_V4SF_PCV2SF:
+ type = v4sf_ftype_v4sf_pcv2sf;
+ break;
+ case V2DF_FTYPE_V2DF_PCDOUBLE:
+ type = v2df_ftype_v2df_pcdouble;
+ break;
+ case VOID_FTYPE_PV2SF_V4SF:
+ type = void_ftype_pv2sf_v4sf;
+ break;
+ case VOID_FTYPE_PV2DI_V2DI:
+ type = void_ftype_pv2di_v2di;
+ break;
+ case VOID_FTYPE_PCHAR_V16QI:
+ type = void_ftype_pchar_v16qi;
+ break;
+ case VOID_FTYPE_PFLOAT_V4SF:
+ type = void_ftype_pfloat_v4sf;
+ break;
+ case VOID_FTYPE_PDOUBLE_V2DF:
+ type = void_ftype_pdouble_v2df;
+ break;
+ case VOID_FTYPE_PDI_DI:
+ type = void_ftype_pdi_di;
+ break;
+ case VOID_FTYPE_PINT_INT:
+ type = void_ftype_pint_int;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+
/* Add all builtins with variable number of operands. */
for (i = 0, d = bdesc_args;
i < ARRAY_SIZE (bdesc_args);
@@ -19412,6 +19557,9 @@ ix86_init_mmx_sse_builtins (void)
switch ((enum ix86_builtin_type) d->flag)
{
+ case FLOAT_FTYPE_FLOAT:
+ type = float_ftype_float;
+ break;
case INT64_FTYPE_V4SF:
type = int64_ftype_v4sf;
break;
@@ -19461,6 +19609,7 @@ ix86_init_mmx_sse_builtins (void)
type = v4si_ftype_v2df;
break;
case V4SF_FTYPE_V4SF:
+ case V4SF_FTYPE_V4SF_VEC_MERGE:
type = v4sf_ftype_v4sf;
break;
case V4SF_FTYPE_V4SI:
@@ -19497,6 +19646,7 @@ ix86_init_mmx_sse_builtins (void)
type = v2df_ftype_v4sf;
break;
case V2DF_FTYPE_V2DF:
+ case V2DF_FTYPE_V2DF_VEC_MERGE:
type = v2df_ftype_v2df;
break;
case V2DF_FTYPE_V2SI:
@@ -19741,9 +19891,6 @@ ix86_init_mmx_sse_builtins (void)
def_builtin_const (d->mask, d->name, ftype, d->code);
}
- /* Add the remaining MMX insns with somewhat more complicated types. */
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
-
/* comi/ucomi insns. */
for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
if (d->mask == OPTION_MASK_ISA_SSE2)
@@ -19755,64 +19902,22 @@ ix86_init_mmx_sse_builtins (void)
for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
+ /* SSE */
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
+ /* SSE or 3DNow!A */
def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
-
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
-
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
-
- def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
- def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
- def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
-
- ftype = build_function_type_list (float_type_node,
- float_type_node,
- NULL_TREE);
- def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
-
- /* Original 3DNow! */
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
-
/* SSE2 */
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
-
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
-
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
-
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
-
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
-
/* SSE3. */
def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
- def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
-
- /* SSE4.1. */
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
/* AES */
if (TARGET_AES)
@@ -19833,10 +19938,6 @@ ix86_init_mmx_sse_builtins (void)
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
}
- /* AMDFAM10 SSE4A New built-ins */
- def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
- def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
-
/* Access to the vec_init patterns. */
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
integer_type_node, NULL_TREE);
@@ -20243,71 +20344,12 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
return target;
}
-/* Subroutine of ix86_expand_builtin to take care of stores. */
+/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
+ insns with vec_merge. */
static rtx
-ix86_expand_store_builtin (enum insn_code icode, tree exp)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[icode].operand[1].mode;
-
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (op0, op1);
- if (pat)
- emit_insn (pat);
- return 0;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of unop insns. */
-
-static rtx
-ix86_expand_unop_builtin (enum insn_code icode, tree exp,
- rtx target, int do_load)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- rtx op0 = expand_normal (arg0);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- if (do_load)
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- else
- {
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
-
- if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- }
-
- pat = GEN_FCN (icode) (target, op0);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
- sqrtss, sqrtsd, rsqrtss, rsqrtsf, rcpss. */
-
-static rtx
-ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
+ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
+ rtx target)
{
rtx pat;
tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -20417,6 +20459,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
switch ((enum ix86_builtin_type) d->flag)
{
case FLOAT128_FTYPE_FLOAT128:
+ case FLOAT_FTYPE_FLOAT:
case INT64_FTYPE_V4SF:
case INT64_FTYPE_V2DF:
case INT_FTYPE_V16QI:
@@ -20452,6 +20495,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2SF_FTYPE_V2SI:
nargs = 1;
break;
+ case V4SF_FTYPE_V4SF_VEC_MERGE:
+ case V2DF_FTYPE_V2DF_VEC_MERGE:
+ return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
case FLOAT128_FTYPE_FLOAT128_FLOAT128:
case V16QI_FTYPE_V16QI_V16QI:
case V16QI_FTYPE_V8HI_V8HI:
@@ -20699,6 +20745,148 @@ ix86_expand_args_builtin (const struct builtin_description *d,
return target;
}
+/* Subroutine of ix86_expand_builtin to take care of special insns
+ with variable number of operands. */
+
+static rtx
+ix86_expand_special_args_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ tree arg;
+ rtx pat, op;
+ unsigned int i, nargs, arg_adjust, memory;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[2];
+ enum insn_code icode = d->icode;
+ bool last_arg_constant = false;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum { load, store } class;
+
+ switch ((enum ix86_special_builtin_type) d->flag)
+ {
+ case VOID_FTYPE_VOID:
+ emit_insn (GEN_FCN (icode) (target));
+ return 0;
+ case V2DI_FTYPE_PV2DI:
+ case V16QI_FTYPE_PCCHAR:
+ case V4SF_FTYPE_PCFLOAT:
+ case V2DF_FTYPE_PCDOUBLE:
+ nargs = 1;
+ class = load;
+ memory = 0;
+ break;
+ case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV2DI_V2DI:
+ case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V2DF:
+ case VOID_FTYPE_PDI_DI:
+ case VOID_FTYPE_PINT_INT:
+ nargs = 1;
+ class = store;
+ /* Reserve memory operand for target. */
+ memory = ARRAY_SIZE (args);
+ break;
+ case V4SF_FTYPE_V4SF_PCV2SF:
+ case V2DF_FTYPE_V2DF_PCDOUBLE:
+ nargs = 2;
+ class = load;
+ memory = 1;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (nargs <= ARRAY_SIZE (args));
+
+ if (class == store)
+ {
+ arg = CALL_EXPR_ARG (exp, 0);
+ op = expand_normal (arg);
+ gcc_assert (target == 0);
+ target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ arg_adjust = 1;
+ }
+ else
+ {
+ arg_adjust = 0;
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_p->operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ }
+
+ for (i = 0; i < nargs; i++)
+ {
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match;
+
+ arg = CALL_EXPR_ARG (exp, i + arg_adjust);
+ op = expand_normal (arg);
+ match = (*insn_p->operand[i + 1].predicate) (op, mode);
+
+ if (last_arg_constant && (i + 1) == nargs)
+ {
+ if (!match)
+ switch (icode)
+ {
+ default:
+ error ("the last argument must be an 8-bit immediate");
+ return const0_rtx;
+ }
+ }
+ else
+ {
+ if (i == memory)
+ {
+ /* This must be the memory operand. */
+ op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ }
+ else
+ {
+ /* This must be register. */
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
+
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ op = copy_to_mode_reg (mode, op);
+ }
+ }
+
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return class == store ? 0 : target;
+}
+
/* Subroutine of ix86_expand_builtin to take care of comi insns. */
static rtx
@@ -21134,19 +21322,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
tree arg0, arg1, arg2;
rtx op0, op1, op2, pat;
- enum machine_mode tmode, mode0, mode1, mode2;
+ enum machine_mode mode0, mode1, mode2;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
switch (fcode)
{
- case IX86_BUILTIN_EMMS:
- emit_insn (gen_mmx_emms ());
- return 0;
-
- case IX86_BUILTIN_SFENCE:
- emit_insn (gen_sse_sfence ());
- return 0;
-
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
@@ -21178,75 +21358,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
emit_insn (pat);
return 0;
- case IX86_BUILTIN_RSQRTF:
- return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case IX86_BUILTIN_SQRTSS:
- return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
- case IX86_BUILTIN_RSQRTSS:
- return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
- case IX86_BUILTIN_RCPSS:
- return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
-
- case IX86_BUILTIN_LOADUPS:
- return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
-
- case IX86_BUILTIN_STOREUPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
-
- case IX86_BUILTIN_LOADHPS:
- case IX86_BUILTIN_LOADLPS:
- case IX86_BUILTIN_LOADHPD:
- case IX86_BUILTIN_LOADLPD:
- icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
- : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
- : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
- : CODE_FOR_sse2_loadlpd);
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
-
- op0 = force_reg (mode0, op0);
- op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_STOREHPS:
- case IX86_BUILTIN_STORELPS:
- icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
- : CODE_FOR_sse_storelps);
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
-
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- op1 = force_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return const0_rtx;
-
- case IX86_BUILTIN_MOVNTPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
- case IX86_BUILTIN_MOVNTQ:
- return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
-
case IX86_BUILTIN_LDMXCSR:
op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
@@ -21259,24 +21370,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
emit_insn (gen_sse_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
- case IX86_BUILTIN_FEMMS:
- emit_insn (gen_mmx_femms ());
- return NULL_RTX;
-
- case IX86_BUILTIN_SQRTSD:
- return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
- case IX86_BUILTIN_LOADUPD:
- return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
- case IX86_BUILTIN_STOREUPD:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
-
- case IX86_BUILTIN_MFENCE:
- emit_insn (gen_sse2_mfence ());
- return 0;
- case IX86_BUILTIN_LFENCE:
- emit_insn (gen_sse2_lfence ());
- return 0;
-
case IX86_BUILTIN_CLFLUSH:
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
@@ -21287,18 +21380,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
emit_insn (gen_sse2_clflush (op0));
return 0;
- case IX86_BUILTIN_MOVNTPD:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
- case IX86_BUILTIN_MOVNTDQ:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
- case IX86_BUILTIN_MOVNTI:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
-
- case IX86_BUILTIN_LOADDQU:
- return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
- case IX86_BUILTIN_STOREDQU:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
-
case IX86_BUILTIN_MONITOR:
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
@@ -21330,20 +21411,6 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
emit_insn (gen_sse3_mwait (op0, op1));
return 0;
- case IX86_BUILTIN_LDDQU:
- return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
- target, 1);
-
- case IX86_BUILTIN_MOVNTDQA:
- return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
- target, 1);
-
- case IX86_BUILTIN_MOVNTSD:
- return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
-
- case IX86_BUILTIN_MOVNTSS:
- return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
-
case IX86_BUILTIN_VEC_INIT_V2SI:
case IX86_BUILTIN_VEC_INIT_V4HI:
case IX86_BUILTIN_VEC_INIT_V8QI:
@@ -21388,6 +21455,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
break;
}
+ for (i = 0, d = bdesc_special_args;
+ i < ARRAY_SIZE (bdesc_special_args);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_special_args_builtin (d, exp, target);
+
for (i = 0, d = bdesc_args;
i < ARRAY_SIZE (bdesc_args);
i++, d++)
diff --git a/gcc/config/i386/mm3dnow.h b/gcc/config/i386/mm3dnow.h
index dd1c871936e..96056e28e31 100644
--- a/gcc/config/i386/mm3dnow.h
+++ b/gcc/config/i386/mm3dnow.h
@@ -34,9 +34,6 @@
#include <mmintrin.h>
-/* Internal data types for implementing the intrinsics. */
-typedef float __v2sf __attribute__ ((__vector_size__ (8)));
-
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_femms (void)
{
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 1c09be30e1a..3c135e61766 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -43,6 +43,7 @@ typedef int __v2si __attribute__ ((__vector_size__ (8)));
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
/* Empty the multimedia state. */
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index fcfdaf98915..8d9b761bcaf 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -745,14 +745,14 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B)
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pi (__m128 __A, __m64 const *__P)
{
- return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
+ return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
}
/* Stores the upper two SPFP values of A into P. */
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeh_pi (__m64 *__P, __m128 __A)
{
- __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
+ __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A);
}
/* Moves the upper two values of B into the lower two values of A. */
@@ -774,14 +774,14 @@ _mm_movelh_ps (__m128 __A, __m128 __B)
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pi (__m128 __A, __m64 const *__P)
{
- return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
+ return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
}
/* Stores the lower two SPFP values of A into P. */
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pi (__m64 *__P, __m128 __A)
{
- __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
+ __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A);
}
/* Creates a 4-bit mask from the most significant bits of the SPFP values. */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7c5ad9bc4f7..f3c6c574a42 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -7647,13 +7647,13 @@ Generates the @code{movups} machine instruction as a store to memory.
Generates the @code{movss} machine instruction as a load from memory.
@item void __builtin_ia32_storess (float *, v4sf)
Generates the @code{movss} machine instruction as a store to memory.
-@item v4sf __builtin_ia32_loadhps (v4sf, v2si *)
+@item v4sf __builtin_ia32_loadhps (v4sf, const v2sf *)
Generates the @code{movhps} machine instruction as a load from memory.
-@item v4sf __builtin_ia32_loadlps (v4sf, v2si *)
+@item v4sf __builtin_ia32_loadlps (v4sf, const v2sf *)
Generates the @code{movlps} machine instruction as a load from memory
-@item void __builtin_ia32_storehps (v4sf, v2si *)
+@item void __builtin_ia32_storehps (v2sf *, v4sf)
Generates the @code{movhps} machine instruction as a store to memory.
-@item void __builtin_ia32_storelps (v4sf, v2si *)
+@item void __builtin_ia32_storelps (v2sf *, v4sf)
Generates the @code{movlps} machine instruction as a store to memory.
@end table
@@ -7755,8 +7755,8 @@ v8hi __builtin_ia32_pmulhuw128 (v8hi, v8hi)
void __builtin_ia32_maskmovdqu (v16qi, v16qi)
v2df __builtin_ia32_loadupd (double *)
void __builtin_ia32_storeupd (double *, v2df)
-v2df __builtin_ia32_loadhpd (v2df, double *)
-v2df __builtin_ia32_loadlpd (v2df, double *)
+v2df __builtin_ia32_loadhpd (v2df, double const *)
+v2df __builtin_ia32_loadlpd (v2df, double const *)
int __builtin_ia32_movmskpd (v2df)
int __builtin_ia32_pmovmskb128 (v16qi)
void __builtin_ia32_movnti (int *, int)