summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/vsx.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/rs6000/vsx.md')
-rw-r--r--gcc/config/rs6000/vsx.md469
1 files changed, 399 insertions, 70 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 35be5dead64..00d76563f37 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -73,6 +73,13 @@
(TF "FLOAT128_VECTOR_P (TFmode)")
TI])
+(define_mode_attr VSX_XXBR [(V8HI "h")
+ (V4SI "w")
+ (V4SF "w")
+ (V2DF "d")
+ (V2DI "d")
+ (V1TI "q")])
+
;; Map into the appropriate load/store name based on the type
(define_mode_attr VSm [(V16QI "vw4")
(V8HI "vw4")
@@ -273,6 +280,9 @@
(define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
+;; Vector reverse byte modes
+(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
+
;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
;; done on ISA 2.07 and not just ISA 3.0.
@@ -408,6 +418,10 @@
UNSPEC_VCMPNEZW
UNSPEC_XXEXTRACTUW
UNSPEC_XXINSERTW
+ UNSPEC_VSX_FIRST_MATCH_INDEX
+ UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
+ UNSPEC_VSX_FIRST_MISMATCH_INDEX
+ UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
])
;; VSX moves
@@ -1108,7 +1122,7 @@
"lxvw4x %x0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "vsx_ld_elemrev_v8hi"
+(define_expand "vsx_ld_elemrev_v8hi"
[(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
(vec_select:V8HI
(match_operand:V8HI 1 "memory_operand" "Z")
@@ -1116,22 +1130,94 @@
(const_int 5) (const_int 4)
(const_int 3) (const_int 2)
(const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ rtx subreg, subreg2, perm[16], pcv;
+ /* 2 is leftmost element in register */
+ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
+ int i;
+
+ subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
+ emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
+ subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
+ subreg2, pcv));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_ld_elemrev_v8hi_internal"
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "memory_operand" "Z")
+ (parallel [(const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"lxvh8x %x0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "vsx_ld_elemrev_v16qi"
+(define_expand "vsx_ld_elemrev_v16qi"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "memory_operand" "Z")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ rtx subreg, subreg2, perm[16], pcv;
+ /* 3 is leftmost element in register */
+ unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
+ int i;
+
+ subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
+ emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
+ subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
+ subreg2, pcv));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_ld_elemrev_v16qi_internal"
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
(vec_select:V16QI
- (match_operand:V16QI 1 "memory_operand" "Z")
- (parallel [(const_int 15) (const_int 14)
- (const_int 13) (const_int 12)
- (const_int 11) (const_int 10)
- (const_int 9) (const_int 8)
- (const_int 7) (const_int 6)
- (const_int 5) (const_int 4)
- (const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V16QI 1 "memory_operand" "Z")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"lxvb16x %x0,%y1"
[(set_attr "type" "vecload")])
@@ -1139,8 +1225,8 @@
(define_insn "vsx_st_elemrev_v2df"
[(set (match_operand:V2DF 0 "memory_operand" "=Z")
(vec_select:V2DF
- (match_operand:V2DF 1 "vsx_register_operand" "wa")
- (parallel [(const_int 1) (const_int 0)])))]
+ (match_operand:V2DF 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1148,8 +1234,8 @@
(define_insn "vsx_st_elemrev_v2di"
[(set (match_operand:V2DI 0 "memory_operand" "=Z")
(vec_select:V2DI
- (match_operand:V2DI 1 "vsx_register_operand" "wa")
- (parallel [(const_int 1) (const_int 0)])))]
+ (match_operand:V2DI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1157,9 +1243,9 @@
(define_insn "vsx_st_elemrev_v4sf"
[(set (match_operand:V4SF 0 "memory_operand" "=Z")
(vec_select:V4SF
- (match_operand:V4SF 1 "vsx_register_operand" "wa")
- (parallel [(const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V4SF 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
"stxvw4x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -1174,30 +1260,98 @@
"stxvw4x %x1,%y0"
[(set_attr "type" "vecstore")])
-(define_insn "vsx_st_elemrev_v8hi"
+(define_expand "vsx_st_elemrev_v8hi"
[(set (match_operand:V8HI 0 "memory_operand" "=Z")
(vec_select:V8HI
- (match_operand:V8HI 1 "vsx_register_operand" "wa")
- (parallel [(const_int 7) (const_int 6)
- (const_int 5) (const_int 4)
- (const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx subreg, perm[16], pcv;
+ rtx tmp = gen_reg_rtx (V8HImode);
+ /* 2 is leftmost element in register */
+ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
+ operands[1], pcv));
+ subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
+ emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_st_elemrev_v8hi_internal"
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"stxvh8x %x1,%y0"
[(set_attr "type" "vecstore")])
-(define_insn "vsx_st_elemrev_v16qi"
+(define_expand "vsx_st_elemrev_v16qi"
[(set (match_operand:V16QI 0 "memory_operand" "=Z")
(vec_select:V16QI
- (match_operand:V16QI 1 "vsx_register_operand" "wa")
- (parallel [(const_int 15) (const_int 14)
- (const_int 13) (const_int 12)
- (const_int 11) (const_int 10)
- (const_int 9) (const_int 8)
- (const_int 7) (const_int 6)
- (const_int 5) (const_int 4)
- (const_int 3) (const_int 2)
- (const_int 1) (const_int 0)])))]
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
+ "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
+{
+ if (!TARGET_P9_VECTOR)
+ {
+ rtx subreg, perm[16], pcv;
+ rtx tmp = gen_reg_rtx (V16QImode);
+ /* 3 is leftmost element in register */
+ unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (reorder[i]);
+
+ pcv = force_reg (V16QImode,
+ gen_rtx_CONST_VECTOR (V16QImode,
+ gen_rtvec_v (16, perm)));
+ emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
+ operands[1], pcv));
+ subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
+ emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "*vsx_st_elemrev_v16qi_internal"
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 15) (const_int 14)
+ (const_int 13) (const_int 12)
+ (const_int 11) (const_int 10)
+ (const_int 9) (const_int 8)
+ (const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"stxvb16x %x1,%y0"
[(set_attr "type" "vecstore")])
@@ -4054,9 +4208,9 @@
;; ISA 3.0 Binary Floating-Point Support
;; VSX Scalar Extract Exponent Quad-Precision
-(define_insn "xsxexpqp"
+(define_insn "xsxexpqp_<mode>"
[(set (match_operand:DI 0 "altivec_register_operand" "=v")
- (unspec:DI [(match_operand:KF 1 "altivec_register_operand" "v")]
+ (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
UNSPEC_VSX_SXEXPDP))]
"TARGET_P9_VECTOR"
"xsxexpqp %0,%1"
@@ -4072,9 +4226,9 @@
[(set_attr "type" "integer")])
;; VSX Scalar Extract Significand Quad-Precision
-(define_insn "xsxsigqp"
+(define_insn "xsxsigqp_<mode>"
[(set (match_operand:TI 0 "altivec_register_operand" "=v")
- (unspec:TI [(match_operand:KF 1 "altivec_register_operand" "v")]
+ (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
UNSPEC_VSX_SXSIG))]
"TARGET_P9_VECTOR"
"xsxsigqp %0,%1"
@@ -4090,20 +4244,21 @@
[(set_attr "type" "integer")])
;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
-(define_insn "xsiexpqpf"
- [(set (match_operand:KF 0 "altivec_register_operand" "=v")
- (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v")
- (match_operand:DI 2 "altivec_register_operand" "v")]
+(define_insn "xsiexpqpf_<mode>"
+ [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+ (unspec:IEEE128
+ [(match_operand:IEEE128 1 "altivec_register_operand" "v")
+ (match_operand:DI 2 "altivec_register_operand" "v")]
UNSPEC_VSX_SIEXPQP))]
"TARGET_P9_VECTOR"
"xsiexpqp %0,%1,%2"
[(set_attr "type" "vecmove")])
;; VSX Scalar Insert Exponent Quad-Precision
-(define_insn "xsiexpqp"
- [(set (match_operand:KF 0 "altivec_register_operand" "=v")
- (unspec:KF [(match_operand:TI 1 "altivec_register_operand" "v")
- (match_operand:DI 2 "altivec_register_operand" "v")]
+(define_insn "xsiexpqp_<mode>"
+ [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+ (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
+ (match_operand:DI 2 "altivec_register_operand" "v")]
UNSPEC_VSX_SIEXPQP))]
"TARGET_P9_VECTOR"
"xsiexpqp %0,%1,%2"
@@ -4162,11 +4317,11 @@
;; (Has side effect of setting the lt bit if operand 1 is negative,
;; setting the eq bit if any of the conditions tested by operand 2
;; are satisfied, and clearing the gt and undordered bits to zero.)
-(define_expand "xststdcqp"
+(define_expand "xststdcqp_<mode>"
[(set (match_dup 3)
(compare:CCFP
- (unspec:KF
- [(match_operand:KF 1 "altivec_register_operand" "v")
+ (unspec:IEEE128
+ [(match_operand:IEEE128 1 "altivec_register_operand" "v")
(match_operand:SI 2 "u7bit_cint_operand" "n")]
UNSPEC_VSX_STSTDC)
(const_int 0)))
@@ -4200,11 +4355,11 @@
})
;; The VSX Scalar Test Negative Quad-Precision
-(define_expand "xststdcnegqp"
+(define_expand "xststdcnegqp_<mode>"
[(set (match_dup 2)
(compare:CCFP
- (unspec:KF
- [(match_operand:KF 1 "altivec_register_operand" "v")
+ (unspec:IEEE128
+ [(match_operand:IEEE128 1 "altivec_register_operand" "v")
(const_int 0)]
UNSPEC_VSX_STSTDC)
(const_int 0)))
@@ -4234,11 +4389,12 @@
operands[3] = CONST0_RTX (SImode);
})
-(define_insn "*xststdcqp"
+(define_insn "*xststdcqp_<mode>"
[(set (match_operand:CCFP 0 "" "=y")
(compare:CCFP
- (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v")
- (match_operand:SI 2 "u7bit_cint_operand" "n")]
+ (unspec:IEEE128
+ [(match_operand:IEEE128 1 "altivec_register_operand" "v")
+ (match_operand:SI 2 "u7bit_cint_operand" "n")]
UNSPEC_VSX_STSTDC)
(const_int 0)))]
"TARGET_P9_VECTOR"
@@ -4335,6 +4491,149 @@
"vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
[(set_attr "type" "vecsimple")])
+;; Return first position of match between vectors
+(define_expand "first_match_index_<mode>"
+ [(match_operand:SI 0 "register_operand")
+ (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
+ (match_operand:VSX_EXTRACT_I 2 "register_operand")]
+ UNSPEC_VSX_FIRST_MATCH_INDEX)]
+ "TARGET_P9_VECTOR"
+{
+ int sh;
+
+ rtx cmp_result = gen_reg_rtx (<MODE>mode);
+ rtx not_result = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
+ operands[2]));
+ emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
+
+ sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
+
+ if (<MODE>mode == V16QImode)
+ emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
+ else
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
+ emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
+ }
+ DONE;
+})
+
+;; Return first position of match between vectors or end of string (EOS)
+(define_expand "first_match_or_eos_index_<mode>"
+ [(match_operand:SI 0 "register_operand")
+ (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
+ (match_operand:VSX_EXTRACT_I 2 "register_operand")]
+ UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
+ "TARGET_P9_VECTOR"
+{
+ int sh;
+ rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
+ rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
+ rtx cmpz_result = gen_reg_rtx (<MODE>mode);
+ rtx and_result = gen_reg_rtx (<MODE>mode);
+ rtx result = gen_reg_rtx (<MODE>mode);
+ rtx vzero = gen_reg_rtx (<MODE>mode);
+
+ /* Vector with zeros in elements that correspond to zeros in operands. */
+ emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
+ emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
+ emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
+ emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
+
+ /* Vector with ones in elments that do not match. */
+ emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
+ operands[2]));
+
+ /* Create vector with ones in elements where there was a zero in one of
+ the source elements or the elements that match. */
+ emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
+ sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
+
+ if (<MODE>mode == V16QImode)
+ emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
+ else
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_vctzlsbb_<mode> (tmp, result));
+ emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
+ }
+ DONE;
+})
+
+;; Return first position of mismatch between vectors
+(define_expand "first_mismatch_index_<mode>"
+ [(match_operand:SI 0 "register_operand")
+ (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
+ (match_operand:VSX_EXTRACT_I 2 "register_operand")]
+ UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
+ "TARGET_P9_VECTOR"
+{
+ int sh;
+ rtx cmp_result = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
+ operands[2]));
+ sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
+
+ if (<MODE>mode == V16QImode)
+ emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
+ else
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
+ emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
+ }
+ DONE;
+})
+
+;; Return first position of mismatch between vectors or end of string (EOS)
+(define_expand "first_mismatch_or_eos_index_<mode>"
+ [(match_operand:SI 0 "register_operand")
+ (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
+ (match_operand:VSX_EXTRACT_I 2 "register_operand")]
+ UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
+ "TARGET_P9_VECTOR"
+{
+ int sh;
+ rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
+ rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
+ rtx cmpz_result = gen_reg_rtx (<MODE>mode);
+ rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
+ rtx and_result = gen_reg_rtx (<MODE>mode);
+ rtx result = gen_reg_rtx (<MODE>mode);
+ rtx vzero = gen_reg_rtx (<MODE>mode);
+
+ /* Vector with zeros in elements that correspond to zeros in operands. */
+ emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
+
+ emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
+ emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
+ emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
+
+ /* Vector with ones in elments that match. */
+ emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
+ operands[2]));
+ emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
+
+ /* Create vector with ones in elements where there was a zero in one of
+ the source elements or the elements did not match. */
+ emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
+ sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
+
+ if (<MODE>mode == V16QImode)
+ emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
+ else
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_vctzlsbb_<mode> (tmp, result));
+ emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
+ }
+ DONE;
+})
+
;; Load VSX Vector with Length
(define_expand "lxvl"
[(set (match_dup 3)
@@ -4441,12 +4740,12 @@
DONE;
})
-;; Vector Compare Not Equal Byte
+;; Vector Compare Not Equal Byte (specified/not+eq:)
(define_insn "vcmpneb"
[(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
- (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
- (match_operand:V16QI 2 "altivec_register_operand" "v")]
- UNSPEC_VCMPNEB))]
+ (not:V16QI
+ (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
+ (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
"TARGET_P9_VECTOR"
"vcmpneb %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -4462,12 +4761,12 @@
"vcmpnezb %0,%1,%2"
[(set_attr "type" "vecsimple")])
-;; Vector Compare Not Equal Half Word
+;; Vector Compare Not Equal Half Word (specified/not+eq:)
(define_insn "vcmpneh"
[(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
- (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
- (match_operand:V8HI 2 "altivec_register_operand" "v")]
- UNSPEC_VCMPNEH))]
+ (not:V8HI
+ (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
+ (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
"TARGET_P9_VECTOR"
"vcmpneh %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -4482,13 +4781,12 @@
"vcmpnezh %0,%1,%2"
[(set_attr "type" "vecsimple")])
-;; Vector Compare Not Equal Word
+;; Vector Compare Not Equal Word (specified/not+eq:)
(define_insn "vcmpnew"
[(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
- (unspec:V4SI
- [(match_operand:V4SI 1 "altivec_register_operand" "v")
- (match_operand:V4SI 2 "altivec_register_operand" "v")]
- UNSPEC_VCMPNEH))]
+ (not:V4SI
+ (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
+ (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
"TARGET_P9_VECTOR"
"vcmpnew %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -4514,10 +4812,10 @@
[(set_attr "type" "vecsimple")])
;; Vector Count Trailing Zero Least-Significant Bits Byte
-(define_insn "vctzlsbb"
+(define_insn "vctzlsbb_<mode>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
- [(match_operand:V16QI 1 "altivec_register_operand" "v")]
+ [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
UNSPEC_VCTZLSBB))]
"TARGET_P9_VECTOR"
"vctzlsbb %0,%1"
@@ -4776,6 +5074,37 @@
"xxbrw %x0,%x1"
[(set_attr "type" "vecperm")])
+;; Swap all bytes in each element of vector
+(define_expand "revb_<mode>"
+ [(set (match_operand:VEC_REVB 0 "vsx_register_operand")
+ (bswap:VEC_REVB (match_operand:VEC_REVB 1 "vsx_register_operand")))]
+ ""
+{
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
+ else
+ {
+ /* Want to have the elements in reverse order relative
+ to the endian mode in use, i.e. in LE mode, put elements
+ in BE order. */
+ rtx sel = swap_endian_selector_for_mode(<MODE>mode);
+ emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
+ operands[1], sel));
+ }
+
+ DONE;
+})
+
+;; Reversing bytes in vector char is just a NOP.
+(define_expand "revb_v16qi"
+ [(set (match_operand:V16QI 0 "vsx_register_operand")
+ (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
+ ""
+{
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
;; Swap all bytes in each 16-bit element
(define_insn "p9_xxbrh_v8hi"
[(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")