summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjules <jules@138bc75d-0d04-0410-961f-82ee72b054a4>2007-07-30 13:48:43 +0000
committerjules <jules@138bc75d-0d04-0410-961f-82ee72b054a4>2007-07-30 13:48:43 +0000
commit4c0b79b4b59ab8bf642094b120fb96233dde2cbe (patch)
treec8819622d1848dc3712e103c9c91945b13243dd6
parentd17a14689eb22a93cf4ca95ad6c80439f0320a90 (diff)
downloadgcc-4c0b79b4b59ab8bf642094b120fb96233dde2cbe.tar.gz
gcc/
* config/arm/neon.md (V_ext): New mode attribute. (neon_vget_lane<mode>): Replace with define_expand. (neon_vget_lane<mode>_sext_internal) (neon_vget_lane<mode>_zext_internal): New define_insns for double and quad precision vectors. (neon_vget_lanedi): Add bounds check. Remove dead comment. * config/arm/neon.ml (get_lane): Make 32-bit get-lane intrinsics have typeless 32-bit result. gcc/testsuite/ * gcc.target/arm/neon/*.c: Regenerate. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127061 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/arm/neon.md131
-rw-r--r--gcc/config/arm/neon.ml2
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c2
10 files changed, 118 insertions, 42 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 73cc1e46367..e8aff1eeef4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2007-07-30 Julian Brown <julian@codesourcery.com>
+
+ * config/arm/neon.md (V_ext): New mode attribute.
+ (neon_vget_lane<mode>): Replace with define_expand.
+ (neon_vget_lane<mode>_sext_internal)
+ (neon_vget_lane<mode>_zext_internal): New define_insns for double
+ and quad precision vectors.
+ (neon_vget_lanedi): Add bounds check. Remove dead comment.
+ * config/arm/neon.ml (get_lane): Make 32-bit get-lane intrinsics
+ have typeless 32-bit result.
+
2007-07-30 Andrew Pinski <andrew_pinski@playstation.sony.com>
PR tree-opt/32527
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 06b9b3c762e..1b09ead3af4 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -266,6 +266,14 @@
(V2SF "SF") (V4SF "SF")
(DI "DI") (V2DI "DI")])
+;; Element modes for vector extraction, padded up to register size.
+
+(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI")
+ (V4HI "SI") (V8HI "SI")
+ (V2SI "SI") (V4SI "SI")
+ (V2SF "SF") (V4SF "SF")
+ (DI "DI") (V2DI "DI")])
+
;; Mode of pair of elements for each vector mode, to define transfer
;; size for structure lane/dup loads and stores.
(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
@@ -2385,63 +2393,71 @@
DONE;
})
-;; FIXME: 32-bit element sizes are a bit funky (should be output as .32 not
-;; .u32), but the assembler should cope with that.
-
-(define_insn "neon_vget_lane<mode>"
- [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
- (unspec:<V_elem> [(match_operand:VD 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VGET_LANE))]
+(define_insn "neon_vget_lane<mode>_sext_internal"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (sign_extend:SI
+ (vec_select:<V_elem>
+ (match_operand:VD 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
-{
- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
- return "vmov%?.%t3%#<V_sz_elem>\t%0, %P1[%c2]";
-}
+ "vmov%?.s<V_sz_elem>\t%0, %P1[%c2]"
[(set_attr "predicable" "yes")
(set_attr "neon_type" "neon_bp_simple")]
)
-; Operand 2 (lane number) is ignored because we can only extract the zeroth lane
-; with this insn. Operand 3 (info word) is ignored because it does nothing
-; useful with 64-bit elements.
+(define_insn "neon_vget_lane<mode>_zext_internal"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:<V_elem>
+ (match_operand:VD 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "TARGET_NEON"
+ "vmov%?.u<V_sz_elem>\t%0, %P1[%c2]"
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_bp_simple")]
+)
-(define_insn "neon_vget_lanedi"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VGET_LANE))]
+(define_insn "neon_vget_lane<mode>_sext_internal"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (sign_extend:SI
+ (vec_select:<V_elem>
+ (match_operand:VQ 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, 1);
- return "vmov%?\t%Q0, %R0, %P1 @ di";
+ rtx ops[3];
+ int regno = REGNO (operands[1]);
+ unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
+ unsigned int elt = INTVAL (operands[2]);
+
+ ops[0] = operands[0];
+ ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
+ ops[2] = GEN_INT (elt % halfelts);
+ output_asm_insn ("vmov%?.s<V_sz_elem>\t%0, %P1[%c2]", ops);
+
+ return "";
}
[(set_attr "predicable" "yes")
(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vget_lane<mode>"
- [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
- (unspec:<V_elem> [(match_operand:VQ 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VGET_LANE))]
+(define_insn "neon_vget_lane<mode>_zext_internal"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:<V_elem>
+ (match_operand:VQ 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
{
- rtx ops[4];
+ rtx ops[3];
int regno = REGNO (operands[1]);
unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
unsigned int elt = INTVAL (operands[2]);
- neon_lane_bounds (operands[2], 0, halfelts * 2);
-
ops[0] = operands[0];
ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
ops[2] = GEN_INT (elt % halfelts);
- ops[3] = operands[3];
- output_asm_insn ("vmov%?.%t3%#<V_sz_elem>\t%0, %P1[%c2]", ops);
+ output_asm_insn ("vmov%?.u<V_sz_elem>\t%0, %P1[%c2]", ops);
return "";
}
@@ -2449,6 +2465,51 @@
(set_attr "neon_type" "neon_bp_simple")]
)
+(define_expand "neon_vget_lane<mode>"
+ [(match_operand:<V_ext> 0 "s_register_operand" "")
+ (match_operand:VDQW 1 "s_register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_NEON"
+{
+ HOST_WIDE_INT magic = INTVAL (operands[3]);
+ rtx insn;
+
+ neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
+
+ if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
+ insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
+ else
+ {
+ if ((magic & 1) != 0)
+ insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
+ operands[2]);
+ else
+ insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
+ operands[2]);
+ }
+ emit_insn (insn);
+ DONE;
+})
+
+; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
+; elements.
+
+(define_insn "neon_vget_lanedi"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VGET_LANE))]
+ "TARGET_NEON"
+{
+ neon_lane_bounds (operands[2], 0, 1);
+ return "vmov%?\t%Q0, %R0, %P1 @ di";
+}
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_bp_simple")]
+)
+
(define_insn "neon_vget_lanev2di"
[(set (match_operand:DI 0 "s_register_operand" "=r")
(unspec:DI [(match_operand:V2DI 1 "s_register_operand" "w")
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 39807a48d37..a68c64539da 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -611,7 +611,7 @@ let shift_insert shape elt =
let get_lane shape elt =
let vtype = type_for_elt shape elt in
Arity2 (vtype 0, vtype 1, vtype 2),
- (match elt with P8 -> U8 | P16 -> U16 | x -> x)
+ (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
let set_lane shape elt =
let vtype = type_for_elt shape elt in
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f60df69a862..b7681c36194 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2007-07-30 Julian Brown <julian@codesourcery.com>
+
+ * gcc.target/arm/neon/v*.c: Regenerate.
+
2007-07-30 Paolo Carlini <pcarlini@suse.de>
PR c++/32108
diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c
index aa4dad6ecb8..4d0561b1ed0 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c
@@ -15,5 +15,5 @@ void test_vgetQ_lanef32 (void)
out_float32_t = vgetq_lane_f32 (arg0_float32x4_t, 1);
}
-/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c
index 551fd28dd37..0f87fdb3b16 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c
@@ -15,5 +15,5 @@ void test_vgetQ_lanes32 (void)
out_int32_t = vgetq_lane_s32 (arg0_int32x4_t, 1);
}
-/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c
index e9191726620..5a9344a808a 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c
@@ -15,5 +15,5 @@ void test_vgetQ_laneu32 (void)
out_uint32_t = vgetq_lane_u32 (arg0_uint32x4_t, 1);
}
-/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c b/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c
index 3f0a02798a4..e469c6ec40b 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c
@@ -15,5 +15,5 @@ void test_vget_lanef32 (void)
out_float32_t = vget_lane_f32 (arg0_float32x2_t, 1);
}
-/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c b/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c
index 441b623e834..50b8f40cb5a 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c
@@ -15,5 +15,5 @@ void test_vget_lanes32 (void)
out_int32_t = vget_lane_s32 (arg0_int32x2_t, 1);
}
-/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c b/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c
index 13d33801808..fd09ad4d0d2 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c
@@ -15,5 +15,5 @@ void test_vget_laneu32 (void)
out_uint32_t = vget_lane_u32 (arg0_uint32x2_t, 1);
}
-/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */