summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog71
-rw-r--r--gcc/config/i386/constraints.md7
-rw-r--r--gcc/config/i386/i386-modes.def8
-rw-r--r--gcc/config/i386/i386.c358
-rw-r--r--gcc/config/i386/i386.h5
-rw-r--r--gcc/config/i386/i386.md2
-rw-r--r--gcc/config/i386/smmintrin.h4
-rw-r--r--gcc/config/i386/sse.md243
8 files changed, 672 insertions, 26 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 904fe7d7a34..c7ade0af844 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,53 @@
+2007-06-02 H.J. Lu <hongjiu.lu@intel.com>
+ Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.h (enum reg_class) [SSE_FIRST_REG]: New.
+ (SSE_CLASS_P): Use reg_class_subset_p between SSE_REGS.
+ (REG_CLASS_NAMES): Add "FIRST_SSE_REG" string.
+ (REG_CLASS_CONTENTS): Add members of FIRST_SSE_REG class.
+ * config/i386/constraints.md ("z"): New register constraint
+ for members of SSE_FIRST_REG class.
+ * config/i386/i386-modes.def (CCA, CCC, CCO, CCS): New compare modes.
+ * config/i386/i386.c (regclass_map): Change class of %xmm0 to
+ SSE_FIRST_REG class.
+ (put_condition_code) [EQ, NE]: Output suffixes for new compare modes.
+ (ix86_cc_modes_compatible): Handle CCA, CCC, CCO and CCS modes.
+ (IX86_BUILTIN_PCMPESTRI128): New for SSE4.2.
+ (IX86_BUILTIN_PCMPESTRM128): Likewise.
+ (IX86_BUILTIN_PCMPESTRA128): Likewise.
+ (IX86_BUILTIN_PCMPESTRC128): Likewise.
+ (IX86_BUILTIN_PCMPESTRO128): Likewise.
+ (IX86_BUILTIN_PCMPESTRS128): Likewise.
+ (IX86_BUILTIN_PCMPESTRZ128): Likewise.
+ (IX86_BUILTIN_PCMPISTRI128): Likewise.
+ (IX86_BUILTIN_PCMPISTRM128): Likewise.
+ (IX86_BUILTIN_PCMPISTRA128): Likewise.
+ (IX86_BUILTIN_PCMPISTRC128): Likewise.
+ (IX86_BUILTIN_PCMPISTRO128): Likewise.
+ (IX86_BUILTIN_PCMPISTRS128): Likewise.
+ (IX86_BUILTIN_PCMPISTRZ128): Likewise.
+ (struct builtin_description): Change "flag" field to unsigned.
+ (bdesc_pcmpestr): New builtin description table.
+ (bdesc_pcmpistr): Likewise.
+ (ix86_init_mmx_sse_builtins): Define int_ftype_v16qi_int_v16qi_int_int,
+ v16qi_ftype_v16qi_int_v16qi_int_int and int_ftype_v16qi_v16qi_int.
+ Initialize pcmp[ei]str[im] insns for SSE4.2.
+ (ix86_expand_sse_pcmpestr): New subroutine of ix86_expand_builtin.
+ (ix86_expand_sse_pcmpistr): Likewise.
+ (ix86_expand_builtin): Expand pcmp[ei]str[im] builtins for SSE4.2.
+ * config/i386/i386.md (UNSPEC_PCMPESTR): New for SSE4.2.
+ (UNSPEC_PCMPISTR): Likewise.
+ * config/i386/sse.md (sse4_2_pcmpestr): New insn patern and splitter.
+ (sse4_2_pcmpestri):New isns pattern.
+ (sse4_2_pcmpestrm): Likewise.
+ (sse4_2_pcmpestr_cconly): Likewise.
+ (sse4_2_pcmpistr): New insn patern and splitter.
+ (sse4_2_pcmpistri):New isns pattern.
+ (sse4_2_pcmpistrm): Likewise.
+ (sse4_2_pcmpistr_cconly): Likewise.
+ * config/i386/smmintrin.h: Enable pcmp[ei]str[im] intrinsics
+ in SSE4.2.
+
2007-06-01 David Daney <ddaney@avtrex.com>
* config/mips/mips.c (mips_output_mi_thunk): Only load gp if not
@@ -48,10 +98,8 @@
2007-05-31 H.J. Lu <hongjiu.lu@intel.com>
- * config.gcc (i[34567]86-*-*): Add nmmintrin.h to
- extra_headers.
+ * config.gcc (i[34567]86-*-*): Add nmmintrin.h to extra_headers.
(x86_64-*-*): Likewise.
-
* config/i386/i386.c (OPTION_MASK_ISA_MMX_UNSET): New.
(OPTION_MASK_ISA_3DNOW_UNSET): Likewise.
(OPTION_MASK_ISA_SSE_UNSET): Likewise.
@@ -63,8 +111,7 @@
(OPTION_MASK_ISA_SSE4): Likewise.
(OPTION_MASK_ISA_SSE4_UNSET): Likewise.
(OPTION_MASK_ISA_SSE4A_UNSET): Likewise.
- (ix86_handle_option): Use OPTION_MASK_ISA_*_UNSET. Handle
- SSE4.2.
+ (ix86_handle_option): Use OPTION_MASK_ISA_*_UNSET. Handle SSE4.2.
(override_options): Support SSE4.2.
(ix86_build_const_vector): Support SImode and DImode.
(ix86_build_signbit_mask): Likewise.
@@ -79,32 +126,22 @@
(ix86_expand_crc32): Likewise.
(ix86_init_mmx_sse_builtins): Support SSE4.2.
(ix86_expand_builtin): Likewise.
-
* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define
__SSE4_2__ for -msse4.2.
-
* config/i386/i386.md (UNSPEC_CRC32): New for SSE4.2.
(CRC32MODE): Likewise.
(crc32modesuffix): Likewise.
(crc32modeconstraint): Likewise.
(sse4_2_crc32<mode>): Likewise.
(sse4_2_crc32di): Likewise.
-
* config/i386/i386.opt (msse4.2): New for SSE4.2.
(msse4): Likewise.
-
- * config/i386/nmmintrin.h: New. The dummy SSE4.2 intrinsic header
- file.
-
+ * config/i386/nmmintrin.h: New. The dummy SSE4.2 intrinsic header file.
* config/i386/smmintrin.h: Add SSE4.2 intrinsics.
-
- * config/i386/sse.md (sse4_2_gtv2di3): New pattern for
- SSE4.2.
+ * config/i386/sse.md (sse4_2_gtv2di3): New pattern for SSE4.2.
(vcond<mode>): Use SSEMODEI instead of SSEMODE124.
(vcondu<mode>): Likewise.
-
* doc/extend.texi: Document SSE4.2 built-in functions.
-
* doc/invoke.texi: Document -msse4.2/-msse4.
2007-05-31 Zdenek Dvorak <dvorakz@suse.cz>
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index da700219ed8..281d01ff907 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -19,8 +19,8 @@
;; Boston, MA 02110-1301, USA.
;;; Unused letters:
-;;; B H TU W
-;;; h jk vw z
+;;; B H TU W
+;;; h jk vw
;; Integer register constraints.
;; It is not necessary to define 'r' here.
@@ -83,6 +83,9 @@
(define_register_constraint "x" "TARGET_SSE ? SSE_REGS : NO_REGS"
"Any SSE register.")
+(define_register_constraint "z" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
+ "First SSE register (@code{%xmm0}).")
+
;; We use the Y prefix to denote any number of conditional register sets:
;; 2 SSE2 enabled
;; i SSE2 inter-unit moves enabled
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 2efccda0aec..c772e06685c 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -53,11 +53,19 @@ ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
mode is used to simulate comparisons of (a-b) and (a+b)
against zero using sub/cmp/add operations.
+ Add CCA to indicate that only the Above flag is valid.
+ Add CCC to indicate that only the Carry flag is valid.
+ Add CCO to indicate that only the Overflow flag is valid.
+ Add CCS to indicate that only the Sign flag is valid.
Add CCZ to indicate that only the Zero flag is valid. */
CC_MODE (CCGC);
CC_MODE (CCGOC);
CC_MODE (CCNO);
+CC_MODE (CCA);
+CC_MODE (CCC);
+CC_MODE (CCO);
+CC_MODE (CCS);
CC_MODE (CCZ);
CC_MODE (CCFP);
CC_MODE (CCFPU);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f4cdf170990..593db77a4e0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1278,12 +1278,16 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
NON_Q_REGS,
/* flags, fpsr, fpcr, frame */
NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
- SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+ /* SSE registers */
+ SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS,
+ /* MMX registers */
MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
MMX_REGS, MMX_REGS,
+ /* REX registers */
NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+ /* SSE REX registers */
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS,
};
@@ -8160,10 +8164,50 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
switch (code)
{
case EQ:
- suffix = "e";
+ switch (mode)
+ {
+ case CCAmode:
+ suffix = "a";
+ break;
+
+ case CCCmode:
+ suffix = "c";
+ break;
+
+ case CCOmode:
+ suffix = "o";
+ break;
+
+ case CCSmode:
+ suffix = "s";
+ break;
+
+ default:
+ suffix = "e";
+ }
break;
case NE:
- suffix = "ne";
+ switch (mode)
+ {
+ case CCAmode:
+ suffix = "na";
+ break;
+
+ case CCCmode:
+ suffix = "nc";
+ break;
+
+ case CCOmode:
+ suffix = "no";
+ break;
+
+ case CCSmode:
+ suffix = "ns";
+ break;
+
+ default:
+ suffix = "ne";
+ }
break;
case GT:
gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
@@ -10991,6 +11035,10 @@ ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
case CCGCmode:
case CCGOCmode:
case CCNOmode:
+ case CCAmode:
+ case CCCmode:
+ case CCOmode:
+ case CCSmode:
case CCZmode:
switch (m2)
{
@@ -11001,6 +11049,10 @@ ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
case CCGCmode:
case CCGOCmode:
case CCNOmode:
+ case CCAmode:
+ case CCCmode:
+ case CCOmode:
+ case CCSmode:
case CCZmode:
return CCmode;
}
@@ -16736,6 +16788,21 @@ enum ix86_builtins
IX86_BUILTIN_CRC32SI,
IX86_BUILTIN_CRC32DI,
+ IX86_BUILTIN_PCMPESTRI128,
+ IX86_BUILTIN_PCMPESTRM128,
+ IX86_BUILTIN_PCMPESTRA128,
+ IX86_BUILTIN_PCMPESTRC128,
+ IX86_BUILTIN_PCMPESTRO128,
+ IX86_BUILTIN_PCMPESTRS128,
+ IX86_BUILTIN_PCMPESTRZ128,
+ IX86_BUILTIN_PCMPISTRI128,
+ IX86_BUILTIN_PCMPISTRM128,
+ IX86_BUILTIN_PCMPISTRA128,
+ IX86_BUILTIN_PCMPISTRC128,
+ IX86_BUILTIN_PCMPISTRO128,
+ IX86_BUILTIN_PCMPISTRS128,
+ IX86_BUILTIN_PCMPISTRZ128,
+
IX86_BUILTIN_PCMPGTQ,
IX86_BUILTIN_MAX
@@ -16790,7 +16857,7 @@ struct builtin_description
const char *const name;
const enum ix86_builtins code;
const enum rtx_code comparison;
- const unsigned int flag;
+ const int flag;
};
static const struct builtin_description bdesc_comi[] =
@@ -16829,6 +16896,30 @@ static const struct builtin_description bdesc_ptest[] =
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
};
+static const struct builtin_description bdesc_pcmpestr[] =
+{
+ /* SSE4.2 */
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, 0, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, 0, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, 0, (int) CCAmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, 0, (int) CCCmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, 0, (int) CCOmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, 0, (int) CCSmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, 0, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_pcmpistr[] =
+{
+ /* SSE4.2 */
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, 0, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, 0, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, 0, (int) CCAmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, 0, (int) CCCmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, 0, (int) CCOmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, 0, (int) CCSmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, 0, (int) CCZmode },
+};
+
static const struct builtin_description bdesc_crc32[] =
{
/* SSE4.2 */
@@ -17591,6 +17682,28 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (integer_type_node,
V2DI_type_node, V2DI_type_node,
NULL_TREE);
+ tree int_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree int_ftype_v16qi_v16qi_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ NULL_TREE);
tree float80_type;
tree float128_type;
@@ -17781,6 +17894,30 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (d->mask, d->name, type, d->code);
}
+ /* pcmpestr[im] insns. */
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
+ else
+ ftype = int_ftype_v16qi_int_v16qi_int_int;
+ def_builtin (d->mask, d->name, ftype, d->code);
+ }
+
+ /* pcmpistr[im] insns. */
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ ftype = v16qi_ftype_v16qi_v16qi_int;
+ else
+ ftype = int_ftype_v16qi_v16qi_int;
+ def_builtin (d->mask, d->name, ftype, d->code);
+ }
+
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
@@ -18562,6 +18699,207 @@ ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
return SUBREG_REG (target);
}
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
+ tree arg4 = CALL_EXPR_ARG (exp, 4);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ rtx op4 = expand_normal (arg4);
+ enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modei3 = insn_data[d->icode].operand[3].mode;
+ modev4 = insn_data[d->icode].operand[4].mode;
+ modei5 = insn_data[d->icode].operand[5].mode;
+ modeimm = insn_data[d->icode].operand[6].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev4))
+ op2 = safe_vector_operand (op2, modev4);
+
+ if ((optimize && !register_operand (op0, modev2))
+ || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modei3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ op1 = copy_to_mode_reg (modei3, op1);
+ if ((optimize && !register_operand (op2, modev4))
+ || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ op2 = copy_to_mode_reg (modev4, op2);
+ if ((optimize && !register_operand (op3, modei5))
+ || !(*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ op3 = copy_to_mode_reg (modei5, op3);
+
+ if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ {
+ error ("the fifth argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG (d->flag, FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modev3 = insn_data[d->icode].operand[3].mode;
+ modeimm = insn_data[d->icode].operand[4].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev3))
+ op1 = safe_vector_operand (op1, modev3);
+
+ if ((optimize && !register_operand (op0, modev2))
+ || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modev3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ op1 = copy_to_mode_reg (modev3, op1);
+
+ if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ {
+ error ("the third argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG (d->flag, FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
/* Return the integer constant in ARG. Constrain it to be in the range
of the subparts of VEC_TYPE; issue an error if not. */
@@ -19392,6 +19730,18 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (d->code == fcode)
return ix86_expand_crc32 (d->icode, exp, target);
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpestr (d, exp, target);
+
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpistr (d, exp, target);
+
gcc_unreachable ();
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9bdb3882623..6d351f65753 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1268,6 +1268,7 @@ enum reg_class
GENERAL_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp %r8 - %r15*/
FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */
FLOAT_REGS,
+ SSE_FIRST_REG,
SSE_REGS,
MMX_REGS,
FP_TOP_SSE_REGS,
@@ -1286,7 +1287,7 @@ enum reg_class
#define FLOAT_CLASS_P(CLASS) \
reg_class_subset_p ((CLASS), FLOAT_REGS)
#define SSE_CLASS_P(CLASS) \
- ((CLASS) == SSE_REGS)
+ reg_class_subset_p ((CLASS), SSE_REGS)
#define MMX_CLASS_P(CLASS) \
((CLASS) == MMX_REGS)
#define MAYBE_INTEGER_CLASS_P(CLASS) \
@@ -1314,6 +1315,7 @@ enum reg_class
"GENERAL_REGS", \
"FP_TOP_REG", "FP_SECOND_REG", \
"FLOAT_REGS", \
+ "FIRST_SSE_REG", \
"SSE_REGS", \
"MMX_REGS", \
"FP_TOP_SSE_REGS", \
@@ -1341,6 +1343,7 @@ enum reg_class
{ 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \
{ 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
{ 0xff00, 0x0 }, /* FLOAT_REGS */ \
+ { 0x200000, 0x0 }, /* FIRST_SSE_REG */ \
{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \
{ 0xe0000000, 0x1f }, /* MMX_REGS */ \
{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 38e41bccb08..3d101772f05 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -176,6 +176,8 @@
; For SSE4.2 support
(UNSPEC_CRC32 143)
+ (UNSPEC_PCMPESTR 144)
+ (UNSPEC_PCMPISTR 145)
])
(define_constants
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index d0f365b1f84..01943cd2bb7 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -603,7 +603,7 @@ _mm_stream_load_si128 (__m128i *__X)
/* Intrinsics for text/string processing. */
-#if 0
+#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__))
_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
{
@@ -652,7 +652,7 @@ _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
/* Intrinsics for text/string processing and reading values of
EFlags. */
-#if 0
+#ifdef __OPTIMIZE__
static __inline int __attribute__((__always_inline__))
_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
{
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 76351926127..da7e582b9ab 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6383,3 +6383,246 @@
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V4SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.2 string/text processing instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "sse4_2_pcmpestr"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 2 "register_operand" "x,x")
+ (match_operand:SI 3 "register_operand" "a,a")
+ (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
+ (match_operand:SI 5 "register_operand" "d,d")
+ (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (match_operand:V16QI 1 "register_operand" "=z,z")
+ (unspec:V16QI
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+ int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+ int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+ if (ecx)
+ emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
+ operands[3], operands[4],
+ operands[5], operands[6]));
+ if (xmm0)
+ emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
+ operands[3], operands[4],
+ operands[5], operands[6]));
+ if (flags && !(ecx || xmm0))
+ emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
+ operands[4], operands[5],
+ operands[6]));
+ DONE;
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestri"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:SI 2 "register_operand" "a,a")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "register_operand" "d,d")
+ (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2"
+ "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestrm"
+ [(set (match_operand:V16QI 0 "register_operand" "=z,z")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:SI 2 "register_operand" "a,a")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "register_operand" "d,d")
+ (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2"
+ "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestr_cconly"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
+ (match_operand:SI 1 "register_operand" "a,a,a,a")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:SI 3 "register_operand" "d,d,d,d")
+ (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
+ UNSPEC_PCMPESTR))
+ (clobber (match_scratch:SI 5 "=c,c,X,X"))
+ (clobber (match_scratch:V16QI 6 "=X,X,z,z"))]
+ "TARGET_SSE4_2"
+ "@
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}
+ pcmpestrm\t{%4, %2, %0|%0, %2, %4}
+ pcmpestrm\t{%4, %2, %0|%0, %2, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load,none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "sse4_2_pcmpistr"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 2 "register_operand" "x,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (match_operand:V16QI 1 "register_operand" "=z,z")
+ (unspec:V16QI
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+ int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+ int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+ if (ecx)
+ emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
+ operands[3], operands[4]));
+ if (xmm0)
+ emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
+ operands[3], operands[4]));
+ if (flags && !(ecx || xmm0))
+ emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
+ operands[4]));
+ DONE;
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistri"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2"
+ "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistrm"
+ [(set (match_operand:V16QI 0 "register_operand" "=z,z")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2"
+ "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistr_cconly"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
+ UNSPEC_PCMPISTR))
+ (clobber (match_scratch:SI 3 "=c,c,X,X"))
+ (clobber (match_scratch:V16QI 4 "=X,X,z,z"))]
+ "TARGET_SSE4_2"
+ "@
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}
+ pcmpistrm\t{%2, %1, %0|%0, %1, %2}
+ pcmpistrm\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load,none,load")
+ (set_attr "mode" "TI")])