diff options
author | jules <jules@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-07-30 13:48:43 +0000 |
---|---|---|
committer | jules <jules@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-07-30 13:48:43 +0000 |
commit | 4c0b79b4b59ab8bf642094b120fb96233dde2cbe (patch) | |
tree | c8819622d1848dc3712e103c9c91945b13243dd6 | |
parent | d17a14689eb22a93cf4ca95ad6c80439f0320a90 (diff) | |
download | gcc-4c0b79b4b59ab8bf642094b120fb96233dde2cbe.tar.gz |
gcc/
* config/arm/neon.md (V_ext): New mode attribute.
(neon_vget_lane<mode>): Replace with define_expand.
(neon_vget_lane<mode>_sext_internal)
(neon_vget_lane<mode>_zext_internal): New define_insns for double
and quad precision vectors.
(neon_vget_lanedi): Add bounds check. Remove dead comment.
* config/arm/neon.ml (get_lane): Make 32-bit get-lane intrinsics
have typeless 32-bit result.
gcc/testsuite/
* gcc.target/arm/neon/*.c: Regenerate.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127061 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 131 | ||||
-rw-r--r-- | gcc/config/arm/neon.ml | 2 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c | 2 |
10 files changed, 118 insertions, 42 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 73cc1e46367..e8aff1eeef4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2007-07-30 Julian Brown <julian@codesourcery.com> + + * config/arm/neon.md (V_ext): New mode attribute. + (neon_vget_lane<mode>): Replace with define_expand. + (neon_vget_lane<mode>_sext_internal) + (neon_vget_lane<mode>_zext_internal): New define_insns for double + and quad precision vectors. + (neon_vget_lanedi): Add bounds check. Remove dead comment. + * config/arm/neon.ml (get_lane): Make 32-bit get-lane intrinsics + have typeless 32-bit result. + 2007-07-30 Andrew Pinski <andrew_pinski@playstation.sony.com> PR tree-opt/32527 diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 06b9b3c762e..1b09ead3af4 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -266,6 +266,14 @@ (V2SF "SF") (V4SF "SF") (DI "DI") (V2DI "DI")]) +;; Element modes for vector extraction, padded up to register size. + +(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI") + (V4HI "SI") (V8HI "SI") + (V2SI "SI") (V4SI "SI") + (V2SF "SF") (V4SF "SF") + (DI "DI") (V2DI "DI")]) + ;; Mode of pair of elements for each vector mode, to define transfer ;; size for structure lane/dup loads and stores. (define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") @@ -2385,63 +2393,71 @@ DONE; }) -;; FIXME: 32-bit element sizes are a bit funky (should be output as .32 not -;; .u32), but the assembler should cope with that. - -(define_insn "neon_vget_lane<mode>" - [(set (match_operand:<V_elem> 0 "s_register_operand" "=r") - (unspec:<V_elem> [(match_operand:VD 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VGET_LANE))] +(define_insn "neon_vget_lane<mode>_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select:<V_elem> + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" -{ - neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode)); - return "vmov%?.%t3%#<V_sz_elem>\t%0, %P1[%c2]"; -} + "vmov%?.s<V_sz_elem>\t%0, %P1[%c2]" [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_bp_simple")] ) -; Operand 2 (lane number) is ignored because we can only extract the zeroth lane -; with this insn. Operand 3 (info word) is ignored because it does nothing -; useful with 64-bit elements. +(define_insn "neon_vget_lane<mode>_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select:<V_elem> + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" + "vmov%?.u<V_sz_elem>\t%0, %P1[%c2]" + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) -(define_insn "neon_vget_lanedi" - [(set (match_operand:DI 0 "s_register_operand" "=r") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VGET_LANE))] +(define_insn "neon_vget_lane<mode>_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select:<V_elem> + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { - neon_lane_bounds (operands[2], 0, 1); - return "vmov%?\t%Q0, %R0, %P1 @ di"; + rtx ops[3]; + int regno = REGNO (operands[1]); + unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; + unsigned int elt = INTVAL (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); + ops[2] = GEN_INT (elt % halfelts); + output_asm_insn ("vmov%?.s<V_sz_elem>\t%0, %P1[%c2]", ops); + + return ""; } [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "neon_vget_lane<mode>" - [(set (match_operand:<V_elem> 0 "s_register_operand" "=r") - (unspec:<V_elem> [(match_operand:VQ 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VGET_LANE))] +(define_insn "neon_vget_lane<mode>_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select:<V_elem> + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { - rtx ops[4]; + rtx ops[3]; int regno = REGNO (operands[1]); unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; unsigned int elt = INTVAL (operands[2]); - neon_lane_bounds (operands[2], 0, halfelts * 2); - ops[0] = operands[0]; ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); ops[2] = GEN_INT (elt % halfelts); - ops[3] = operands[3]; - output_asm_insn ("vmov%?.%t3%#<V_sz_elem>\t%0, %P1[%c2]", ops); + output_asm_insn ("vmov%?.u<V_sz_elem>\t%0, %P1[%c2]", ops); return ""; } @@ -2449,6 +2465,51 @@ (set_attr "neon_type" "neon_bp_simple")] ) +(define_expand "neon_vget_lane<mode>" + [(match_operand:<V_ext> 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + HOST_WIDE_INT magic = INTVAL (operands[3]); + rtx insn; + + neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode)); + + if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32) + insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]); + else + { + if ((magic & 1) != 0) + insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1], + operands[2]); + else + insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1], + operands[2]); + } + emit_insn (insn); + DONE; +}) + +; Operand 3 (info word) is ignored because it does nothing useful with 64-bit +; elements. + +(define_insn "neon_vget_lanedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VGET_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 1); + return "vmov%?\t%Q0, %R0, %P1 @ di"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + (define_insn "neon_vget_lanev2di" [(set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:V2DI 1 "s_register_operand" "w") diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index 39807a48d37..a68c64539da 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -611,7 +611,7 @@ let shift_insert shape elt = let get_lane shape elt = let vtype = type_for_elt shape elt in Arity2 (vtype 0, vtype 1, vtype 2), - (match elt with P8 -> U8 | P16 -> U16 | x -> x) + (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) let set_lane shape elt = let vtype = type_for_elt shape elt in diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f60df69a862..b7681c36194 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-07-30 Julian Brown <julian@codesourcery.com> + + * gcc.target/arm/neon/v*.c: Regenerate. + 2007-07-30 Paolo Carlini <pcarlini@suse.de> PR c++/32108 diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c index aa4dad6ecb8..4d0561b1ed0 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c @@ -15,5 +15,5 @@ void test_vgetQ_lanef32 (void) out_float32_t = vgetq_lane_f32 (arg0_float32x4_t, 1); } -/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c index 551fd28dd37..0f87fdb3b16 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c @@ -15,5 +15,5 @@ void test_vgetQ_lanes32 (void) out_int32_t = vgetq_lane_s32 (arg0_int32x4_t, 1); } -/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c index e9191726620..5a9344a808a 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c @@ -15,5 +15,5 @@ void test_vgetQ_laneu32 (void) out_uint32_t = vgetq_lane_u32 (arg0_uint32x4_t, 1); } -/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c b/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c index 3f0a02798a4..e469c6ec40b 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c @@ -15,5 +15,5 @@ void test_vget_lanef32 (void) out_float32_t = vget_lane_f32 (arg0_float32x2_t, 1); } -/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c b/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c index 441b623e834..50b8f40cb5a 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c @@ -15,5 +15,5 @@ void test_vget_lanes32 (void) out_int32_t = vget_lane_s32 (arg0_int32x2_t, 1); } -/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c b/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c index 13d33801808..fd09ad4d0d2 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c @@ -15,5 +15,5 @@ void test_vget_laneu32 (void) out_uint32_t = vget_lane_u32 (arg0_uint32x2_t, 1); } -/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ |