diff options
author | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-04-18 19:48:09 +0000 |
---|---|---|
committer | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-04-18 19:48:09 +0000 |
commit | dcab66ecf79d6ab0a8cd2d67f4992c2d7090a805 (patch) | |
tree | 7808b3601bef4427b90a5eb67e5725f71fc0343a | |
parent | 89fa2e3c3bd3adcf47fb934d34f51d2dd2bf8b9f (diff) | |
download | gcc-dcab66ecf79d6ab0a8cd2d67f4992c2d7090a805.tar.gz |
* config/i386/i386.h (SSE_VEC_FLOAT_MODE_P): Remove.
(AVX_FLOAT_MODE_P): Ditto.
(AVX128_VEC_FLOAT_MODE_P): Ditto.
(AVX256_VEC_FLOAT_MODE_P): Ditto.
(AVX_VEC_FLOAT_MODE_P): Ditto.
* config/i386/i386.md (UNSPEC_MASKLOAD): Remove.
(UNSPEC_MASKSTORE): Ditto.
* config/i386/sse.md (<sse>_movmsk<ssemodesuffix><avxmodesuffix>):
Merge from <sse>_movmsk<ssemodesuffix> and
avx_movmsk<ssemodesuffix>256. Use VF mode iterator.
(*sse2_maskmovdqu): Merge with *sse2_maskmovdqu_rex64. Use P mode
iterator.
(avx_maskload<ssemodesuffix><avxmodesuffix>): New expander.
(avx_maskstore<ssemodesuffix><avxmodesuffix>): Ditto.
(*avx_maskmov<ssemodesuffix><avxmodesuffix>): New insn.
testsuite/ChangeLog:
* gcc.target/i386/sse2-maskmovdqu.c: New test.
* gcc.target/i386/avx-vmaskmovdqu.c: Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@172669 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 49 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 16 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 2 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 83 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vmaskmovdqu.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse2-maskmovdqu.c | 44 |
7 files changed, 119 insertions, 88 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 718056a85f9..9a56f9a1ce5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2011-04-18 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.h (SSE_VEC_FLOAT_MODE_P): Remove. + (AVX_FLOAT_MODE_P): Ditto. + (AVX128_VEC_FLOAT_MODE_P): Ditto. + (AVX256_VEC_FLOAT_MODE_P): Ditto. + (AVX_VEC_FLOAT_MODE_P): Ditto. + * config/i386/i386.md (UNSPEC_MASKLOAD): Remove. + (UNSPEC_MASKSTORE): Ditto. + * config/i386/sse.md (<sse>_movmsk<ssemodesuffix><avxmodesuffix>): + Merge from <sse>_movmsk<ssemodesuffix> and + avx_movmsk<ssemodesuffix>256. Use VF mode iterator. + (*sse2_maskmovdqu): Merge with *sse2_maskmovdqu_rex64. Use P mode + iterator. + (avx_maskload<ssemodesuffix><avxmodesuffix>): New expander. + (avx_maskstore<ssemodesuffix><avxmodesuffix>): Ditto. + (*avx_maskmov<ssemodesuffix><avxmodesuffix>): New insn. + 2011-04-18 Jan Hubicka <jh@suse.cz> * ipa-inline.c (inline_small_functions): Fix pasto in previous patch. @@ -10,8 +28,7 @@ (inline_small_functions): Move program size estimates here; actually process whole queue even when unit growth has been met. (to properly compute inline_failed reasons and for the - case unit size decrease.) Revisit comments on recursive - inlining. + case unit size decrease.) Revisit comments on recursive inlining. (ipa_inline): Remove unit summary code; first inline hot calls of functions called once, cold calls next. (order, nnodes): Remove unused variables. @@ -164,17 +181,16 @@ want_inline_small_function_p. (cgraph_decide_recursive_inlining): Rename to... (recursive_inlining): Use can_inline_edge_p and - want_inline_self_recursive_call_p; simplify and - remove no longer valid FIXME. + want_inline_self_recursive_call_p; simplify and remove no longer + valid FIXME. (cgraph_set_inline_failed): Remove. (add_new_edges_to_heap): Use can_inline_edge_p and want_inline_small_function_p. (cgraph_decide_inlining_of_small_functions): Rename to ... (inline_small_functions): ... this one; cleanup; use - can/want predicates; cleanup debug ouput; work edges - till fibheap is exhausted and do not stop once unit - growth is reached; remove later loop processing remaining - edges. + can/want predicates; cleanup debug ouput; work edges till fibheap + is exhausted and do not stop once unit growth is reached; remove + later loop processing remaining edges. (cgraph_flatten): Rename to ... (flatten_function): ... this one; use can_inline_edge_p and can_early_inline_edge_p predicates. @@ -183,8 +199,7 @@ inlining functions called once; simplify the pass. (cgraph_perform_always_inlining): Rename to ... (inline_always_inline_functions): ... this one; use - DECL_DISREGARD_INLINE_LIMITS; use can_inline_edge_p - predicate + DECL_DISREGARD_INLINE_LIMITS; use can_inline_edge_p predicate. (cgraph_decide_inlining_incrementally): Rename to ... (early_inline_small_functions): ... this one; simplify using new predicates; cleanup; make dumps prettier. @@ -290,8 +305,8 @@ (initialize_inline_failed): Move here from cgraph.c. * tree-sra.c: Include ipa-inline.h. (ipa_sra_preliminary_function_checks): Update. - * Makefile.in: (cgraph.o, cgraphbuild.o): Add dependency on - ipa-inline.h + * Makefile.in (cgraph.o, cgraphbuild.o): Add dependency on + ipa-inline.h. 2011-04-16 Uros Bizjak <ubizjak@gmail.com> @@ -618,7 +633,7 @@ Eric Weddington <eric.weddington@atmel.com> Georg-Johann Lay <avr@gjlay.de> - * config/avr/avr.c: ("insn-codes.h", "optabs.h", "langhooks.h"): + * config/avr/avr.c ("insn-codes.h", "optabs.h", "langhooks.h"): New Includes (avr_init_builtins, avr_expand_builtin, avr_expand_delay_cycles, avr_expand_unop_builtin, @@ -3256,7 +3271,7 @@ 2010-08-13 Vladimir Makarov <vmakarov@redhat.com> - * ira-build.c: (ira_create_object): Remove initialization of + * ira-build.c (ira_create_object): Remove initialization of OBJECT_PROFITABLE_HARD_REGS. Initialize OBJECT_ADD_DATA. (ira_create_allocno): Remove initialization of ALLOCNO_MEM_OPTIMIZED_DEST, ALLOCNO_MEM_OPTIMIZED_DEST_P, @@ -3382,7 +3397,7 @@ ira_object_conflict_iter_cond. (FOR_EACH_OBJECT_CONFLICT): Don't use ira_object_conflict_iter_next. - * ira-live.c: (process_single_reg_class_operands): Call + * ira-live.c (process_single_reg_class_operands): Call ira_init_register_move_cost_if_necessary. Use ira_register_move_cost instead of ira_get_register_move_cost. @@ -6172,7 +6187,7 @@ 2011-03-14 Andreas Tobler <andreast@fgznet.ch> - * config/rs6000/freebsd.h: (RELOCATABLE_NEEDS_FIXUP): Define in + * config/rs6000/freebsd.h (RELOCATABLE_NEEDS_FIXUP): Define in terms of target_flags_explicit. Adjust copyright year. * config.gcc: Add FreeBSD PowerPC soft-float libgcc bits. @@ -10408,7 +10423,7 @@ 2011-01-07 Jan Hubicka <jh@suse.cz> - * doc/invoke.texi: (-flto, -fuse-linker-plugin): Update defaults + * doc/invoke.texi (-flto, -fuse-linker-plugin): Update defaults and no longer claim that gold is required for linker plugin. * configure: Regenerate. * gcc.c (PLUGIN_COND): New macro. diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 511429a727a..10fc1260b58 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1328,22 +1328,6 @@ enum reg_class #define SSE_FLOAT_MODE_P(MODE) \ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode)) -#define SSE_VEC_FLOAT_MODE_P(MODE) \ - ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode)) - -#define AVX_FLOAT_MODE_P(MODE) \ - (TARGET_AVX && ((MODE) == SFmode || (MODE) == DFmode)) - -#define AVX128_VEC_FLOAT_MODE_P(MODE) \ - (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode)) - -#define AVX256_VEC_FLOAT_MODE_P(MODE) \ - (TARGET_AVX && ((MODE) == V8SFmode || (MODE) == V4DFmode)) - -#define AVX_VEC_FLOAT_MODE_P(MODE) \ - (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode \ - || (MODE) == V8SFmode || (MODE) == V4DFmode)) - #define FMA4_VEC_FLOAT_MODE_P(MODE) \ (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \ || (MODE) == V8SFmode || (MODE) == V4DFmode)) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f896bc9b903..0e0ceed4260 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -224,8 +224,6 @@ UNSPEC_VPERMIL UNSPEC_VPERMIL2 UNSPEC_VPERMIL2F128 - UNSPEC_MASKLOAD - UNSPEC_MASKSTORE UNSPEC_CAST UNSPEC_VTESTP UNSPEC_VCVTPH2PS diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 928bf784031..63da5dfe955 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6893,23 +6893,12 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "avx_movmsk<ssemodesuffix>256" +(define_insn "<sse>_movmsk<ssemodesuffix><avxmodesuffix>" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI - [(match_operand:AVX256MODEF2P 1 "register_operand" "x")] + [(match_operand:VF 1 "register_operand" "x")] UNSPEC_MOVMSK))] - "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)" - "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "prefix" "vex") - (set_attr "mode" "<MODE>")]) - -(define_insn "<sse>_movmsk<ssemodesuffix>" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI - [(match_operand:SSEMODEF2P 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" + "" "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") @@ -6935,35 +6924,18 @@ "TARGET_SSE2") (define_insn "*sse2_maskmovdqu" - [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "register_operand" "x") - (mem:V16QI (match_dup 0))] - UNSPEC_MASKMOV))] - "TARGET_SSE2 && !TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "%vmaskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssemov") - (set_attr "prefix_data16" "1") - ;; The implicit %rdi operand confuses default length_vex computation. - (set_attr "length_vex" "3") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "*sse2_maskmovdqu_rex64" - [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) + [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") (match_operand:V16QI 2 "register_operand" "x") (mem:V16QI (match_dup 0))] UNSPEC_MASKMOV))] - "TARGET_SSE2 && TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax + "TARGET_SSE2" "%vmaskmovdqu\t{%2, %1|%1, %2}" [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") ;; The implicit %rdi operand confuses default length_vex computation. (set (attr "length_vex") - (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1"))) + (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -10349,28 +10321,33 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "memory_operand" "m") - (match_operand:<avxpermvecmode> 2 "register_operand" "x") +(define_expand "avx_maskload<ssemodesuffix><avxmodesuffix>" + [(set (match_operand:VF 0 "register_operand" "") + (unspec:VF + [(match_operand:<avxpermvecmode> 2 "register_operand" "") + (match_operand:VF 1 "memory_operand" "") (match_dup 0)] - UNSPEC_MASKLOAD))] - "TARGET_AVX" - "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "<MODE>")]) + UNSPEC_MASKMOV))] + "TARGET_AVX") -(define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>" - [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") - (unspec:AVXMODEF2P - [(match_operand:<avxpermvecmode> 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "register_operand" "x") +(define_expand "avx_maskstore<ssemodesuffix><avxmodesuffix>" + [(set (match_operand:VF 0 "memory_operand" "") + (unspec:VF + [(match_operand:<avxpermvecmode> 1 "register_operand" "") + (match_operand:VF 2 "register_operand" "") (match_dup 0)] - UNSPEC_MASKSTORE))] - "TARGET_AVX" + UNSPEC_MASKMOV))] + "TARGET_AVX") + +(define_insn "*avx_maskmov<ssemodesuffix><avxmodesuffix>" + [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") + (unspec:VF + [(match_operand:<avxpermvecmode> 1 "register_operand" "x,x") + (match_operand:VF 2 "nonimmediate_operand" "m,x") + (match_dup 0)] + UNSPEC_MASKMOV))] + "TARGET_AVX + && (REG_P (operands[0]) == MEM_P (operands[2]))" "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") (set_attr "prefix_extra" "1") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 58bf81ac2a8..5295ec5809f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2011-04-18 Uros Bizjak <ubizjak@gmail.com> + + * gcc.target/i386/sse2-maskmovdqu.c: New test. + * gcc.target/i386/avx-vmaskmovdqu.c: Ditto. + 2011-04-18 Tobias Burnus <burnus@net-b.de> PR fortran/18918 diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovdqu.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovdqu.c new file mode 100644 index 00000000000..24b5bba77d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovdqu.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse2-maskmovdqu.c" diff --git a/gcc/testsuite/gcc.target/i386/sse2-maskmovdqu.c b/gcc/testsuite/gcc.target/i386/sse2-maskmovdqu.c new file mode 100644 index 00000000000..b401c85b3bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-maskmovdqu.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-options "-O2 -msse2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +#include <emmintrin.h> + +#ifndef MASK +#define MASK 0x7986 +#endif + +#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 7) + +void static +TEST (void) +{ + __m128i src, mask; + char s[16] = { 1,-2,3,-4,5,-6,7,-8,9,-10,11,-12,13,-14,15,-16 }; + char m[16]; + + char u[20] = { 0 }; + int i; + + for (i = 0; i < 16; i++) + m[i] = mask_v (i); + + src = _mm_loadu_si128 ((__m128i *)s); + mask = _mm_loadu_si128 ((__m128i *)m); + + _mm_maskmoveu_si128 (src, mask, u+3); + + for (i = 0; i < 16; i++) + if (u[i+3] != (m[i] ? s[i] : 0)) + abort (); +} |