diff options
author | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-10-15 21:34:24 +0000 |
---|---|---|
committer | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-10-15 21:34:24 +0000 |
commit | e6aadd1824b00ffa9f54544d9136c66e67008180 (patch) | |
tree | a7a9719301269da3137e26ab2c249faeb1661832 | |
parent | cc5aea0c4435a310e34b6abeed58591099ebc4b2 (diff) | |
download | gcc-e6aadd1824b00ffa9f54544d9136c66e67008180.tar.gz |
Backport from mainline
2012-10-15 Uros Bizjak <ubizjak@gmail.com>
* config/i386/sse.md (UNSPEC_MOVU): Remove.
(UNSPEC_LOADU): New.
(UNSPEC_STOREU): Ditto.
(<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ...
(<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ...
(<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this.
(<sse2>_movdqu<avxsizesuffix>): Split to ...
(<sse2>_loaddqu<avxsizesuffix>): ... this and ...
(<sse2>_storedqu<avxsizesuffix>): ... this.
(*sse4_2_pcmpestr_unaligned): Update.
(*sse4_2_pcmpistr_unaligned): Ditto.
* config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use
gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and
gen_avx_store{dqu,ups,upd}256 to store to unaligned memory.
(ix86_expand_vector_move_misalign): Use gen_sse_loadups or
gen_sse2_load{dqu,upd} to load from unaligned memory and
gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to
unaligned memory.
(struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>:
Use CODE_FOR_sse_loadups.
<IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd.
<IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu.
<IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups.
<IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd.
<IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu.
<IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256.
<IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256.
<IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256.
<IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256.
<IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256.
<IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256.
testsuite/ChangeLog:
Bacport from mainline
2012-10-15 Uros Bizjak <ubizjak@gmail.com>
* gcc.target/i386/avx256-unaligned-load-1.c: Update asm scan patterns.
* gcc.target/i386/avx256-unaligned-load-2.c: Ditto.
* gcc.target/i386/avx256-unaligned-load-3.c: Ditto.
* gcc.target/i386/avx256-unaligned-load-4.c: Ditto.
* gcc.target/i386/avx256-unaligned-store-1.c: Ditto.
* gcc.target/i386/avx256-unaligned-store-2.c: Ditto.
* gcc.target/i386/avx256-unaligned-store-3.c: Ditto.
* gcc.target/i386/avx256-unaligned-store-4.c: Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_7-branch@192477 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 59 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 134 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 51 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c | 4 |
12 files changed, 207 insertions, 77 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index acac5028ac8..90ff65a3252 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,41 @@ +2012-09-15 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2012-10-15 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/sse.md (UNSPEC_MOVU): Remove. + (UNSPEC_LOADU): New. + (UNSPEC_STOREU): Ditto. + (<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ... + (<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ... + (<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this. + (<sse2>_movdqu<avxsizesuffix>): Split to ... + (<sse2>_loaddqu<avxsizesuffix>): ... this and ... + (<sse2>_storedqu<avxsizesuffix>): ... this. + (*sse4_2_pcmpestr_unaligned): Update. + (*sse4_2_pcmpistr_unaligned): Ditto. + + * config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use + gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and + gen_avx_store{dqu,ups,upd}256 to store to unaligned memory. + (ix86_expand_vector_move_misalign): Use gen_sse_loadups or + gen_sse2_load{dqu,upd} to load from unaligned memory and + gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to + unaligned memory. + (struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>: + Use CODE_FOR_sse_loadups. + <IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd. + <IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu. + <IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups. + <IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd. + <IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu. + <IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256. + <IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256. + <IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256. + <IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256. + <IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256. + <IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256. + 2012-10-15 Steven Bosscher <steven@gcc.gnu.org> Backport from trunk (r190222): @@ -25,7 +63,7 @@ * config.gcc (*-*-openbsd4.[3-9]|*-*-openbsd[5-9]*): Set default_use_cxa_atexit to yes. - + 2012-10-05 John David Anglin <dave.anglin@nrc-cnrc.gc.ca> * config/pa/pa.md: Adjust unamed HImode add insn pattern. @@ -42,7 +80,7 @@ Backported from mainline 2012-10-03 Andrew W. Nosenko <andrew.w.nosenko@gmail.com> - * config/i386/driver-i386.c (host_detect_local_cpu): Fix logic + * config/i386/driver-i386.c (host_detect_local_cpu): Fix logic in SSE and YMM state support check for -march=native. 2012-10-03 Alexandre Oliva <aoliva@redhat.com> @@ -107,8 +145,7 @@ PR target/54703 * simplify-rtx.c (simplify_binary_operation_1): Perform - (x - (x & y)) -> (x & ~y) optimization only for integral - modes. + (x - (x & y)) -> (x & ~y) optimization only for integral modes. 2012-09-24 Eric Botcazou <ebotcazou@adacore.com> @@ -186,12 +223,12 @@ Backport from mainline 2012-09-07 Andi Kleen <ak@linux.intel.com> - * gcc/lto-streamer.h (res_pair): Add. - (lto_file_decl_data): Replace resolutions with respairs. - Add max_index. - * gcc/lto/lto.c (lto_resolution_read): Remove max_index. Add rp. - Initialize respairs. - (lto_file_finalize): Set up resolutions vector lazily from respairs. + * gcc/lto-streamer.h (res_pair): Add. + (lto_file_decl_data): Replace resolutions with respairs. + Add max_index. + * gcc/lto/lto.c (lto_resolution_read): Remove max_index. Add rp. + Initialize respairs. + (lto_file_finalize): Set up resolutions vector lazily from respairs. 2012-09-14 Walter Lee <walt@tilera.com> @@ -257,7 +294,7 @@ 2012-09-12 Christian Bruel <christian.bruel@st.com> - * config/sh/newlib.h (NO_IMPLICIT_EXTERN_C): Define. + * config/sh/newlib.h (NO_IMPLICIT_EXTERN_C): Define. 2012-09-12 Jakub Jelinek <jakub@redhat.com> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c2c6cd02c7f..9ebdeabfe87 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15708,7 +15708,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) { rtx m; rtx (*extract) (rtx, rtx, rtx); - rtx (*move_unaligned) (rtx, rtx); + rtx (*load_unaligned) (rtx, rtx); + rtx (*store_unaligned) (rtx, rtx); enum machine_mode mode; switch (GET_MODE (op0)) @@ -15717,39 +15718,52 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) gcc_unreachable (); case V32QImode: extract = gen_avx_vextractf128v32qi; - move_unaligned = gen_avx_movdqu256; + load_unaligned = gen_avx_loaddqu256; + store_unaligned = gen_avx_storedqu256; mode = V16QImode; break; case V8SFmode: extract = gen_avx_vextractf128v8sf; - move_unaligned = gen_avx_movups256; + load_unaligned = gen_avx_loadups256; + store_unaligned = gen_avx_storeups256; mode = V4SFmode; break; case V4DFmode: extract = gen_avx_vextractf128v4df; - move_unaligned = gen_avx_movupd256; + load_unaligned = gen_avx_loadupd256; + store_unaligned = gen_avx_storeupd256; mode = V2DFmode; break; } - if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + if (MEM_P (op1)) { - rtx r = gen_reg_rtx (mode); - m = adjust_address (op1, mode, 0); - emit_move_insn (r, m); - m = adjust_address (op1, mode, 16); - r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); - emit_move_insn (op0, r); + if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + { + rtx r = gen_reg_rtx (mode); + m = adjust_address (op1, mode, 0); + emit_move_insn (r, m); + m = adjust_address (op1, mode, 16); + r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); + emit_move_insn (op0, r); + } + else + emit_insn (load_unaligned (op0, op1)); } - else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE) + else if (MEM_P (op0)) { - m = adjust_address (op0, mode, 0); - emit_insn (extract (m, op1, const0_rtx)); - m = adjust_address (op0, mode, 16); - emit_insn (extract (m, op1, const1_rtx)); + if (TARGET_AVX256_SPLIT_UNALIGNED_STORE) + { + m = adjust_address (op0, mode, 0); + emit_insn (extract (m, op1, const0_rtx)); + m = adjust_address (op0, mode, 16); + emit_insn (extract (m, op1, const1_rtx)); + } + else + emit_insn (store_unaligned (op0, op1)); } else - emit_insn (move_unaligned (op0, op1)); + gcc_unreachable (); } /* Implement the movmisalign patterns for SSE. Non-SSE modes go @@ -15808,6 +15822,7 @@ void ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { rtx op0, op1, m; + rtx (*move_unaligned) (rtx, rtx); op0 = operands[0]; op1 = operands[1]; @@ -15824,14 +15839,28 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) /* If we're optimizing for size, movups is the smallest. */ if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) { + if (MEM_P (op1)) + move_unaligned = gen_sse_loadups; + else if (MEM_P (op0)) + move_unaligned = gen_sse_storeups; + else + gcc_unreachable (); + op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (move_unaligned (op0, op1)); return; } + if (MEM_P (op1)) + move_unaligned = gen_sse2_loaddqu; + else if (MEM_P (op0)) + move_unaligned = gen_sse2_storedqu; + else + gcc_unreachable (); + op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); + emit_insn (move_unaligned (op0, op1)); break; case 32: op0 = gen_lowpart (V32QImode, op0); @@ -15849,7 +15878,14 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) switch (mode) { case V4SFmode: - emit_insn (gen_sse_movups (op0, op1)); + if (MEM_P (op1)) + move_unaligned = gen_sse_loadups; + else if (MEM_P (op0)) + move_unaligned = gen_sse_storeups; + else + gcc_unreachable (); + + emit_insn (move_unaligned (op0, op1)); break; case V8SFmode: ix86_avx256_split_vector_move_misalign (op0, op1); @@ -15857,12 +15893,26 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) case V2DFmode: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) { + if (MEM_P (op1)) + move_unaligned = gen_sse_loadups; + else if (MEM_P (op0)) + move_unaligned = gen_sse_storeups; + else + gcc_unreachable (); + op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (move_unaligned (op0, op1)); return; } - emit_insn (gen_sse2_movupd (op0, op1)); + if (MEM_P (op1)) + move_unaligned = gen_sse2_loadupd; + else if (MEM_P (op0)) + move_unaligned = gen_sse2_storeupd; + else + gcc_unreachable (); + + emit_insn (move_unaligned (op0, op1)); break; case V4DFmode: ix86_avx256_split_vector_move_misalign (op0, op1); @@ -15887,7 +15937,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (gen_sse_loadups (op0, op1)); return; } @@ -15898,7 +15948,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); + emit_insn (gen_sse2_loaddqu (op0, op1)); return; } @@ -15910,7 +15960,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V2DFmode, op0); op1 = gen_lowpart (V2DFmode, op1); - emit_insn (gen_sse2_movupd (op0, op1)); + emit_insn (gen_sse2_loadupd (op0, op1)); return; } @@ -15945,7 +15995,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (gen_sse_loadups (op0, op1)); return; } @@ -15970,7 +16020,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (gen_sse_storeups (op0, op1)); return; } @@ -15981,7 +16031,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); + emit_insn (gen_sse2_storedqu (op0, op1)); return; } @@ -15991,7 +16041,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V2DFmode, op0); op1 = gen_lowpart (V2DFmode, op1); - emit_insn (gen_sse2_movupd (op0, op1)); + emit_insn (gen_sse2_storeupd (op0, op1)); } else { @@ -16009,7 +16059,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) { op0 = gen_lowpart (V4SFmode, op0); - emit_insn (gen_sse_movups (op0, op1)); + emit_insn (gen_sse_storeups (op0, op1)); } else { @@ -25999,9 +26049,9 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, /* SSE */ - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, @@ -26015,14 +26065,14 @@ static const struct builtin_description bdesc_special_args[] = /* SSE2 */ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, @@ -26047,12 +26097,12 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1917cb660a5..0621d61d68a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -21,7 +21,8 @@ (define_c_enum "unspec" [ ;; SSE UNSPEC_MOVNT - UNSPEC_MOVU + UNSPEC_LOADU + UNSPEC_STOREU ;; SSE3 UNSPEC_LDDQU @@ -580,23 +581,51 @@ DONE; }) -(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>" - [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") +(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>" + [(set (match_operand:VF 0 "register_operand" "=x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + [(match_operand:VF 1 "memory_operand" "m")] + UNSPEC_LOADU))] + "TARGET_SSE" "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) -(define_insn "<sse2>_movdqu<avxsizesuffix>" - [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m") - (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" + [(set (match_operand:VF 0 "memory_operand" "=m") + (unspec:VF + [(match_operand:VF 1 "register_operand" "x")] + UNSPEC_STOREU))] + "TARGET_SSE" + "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "<sse2>_loaddqu<avxsizesuffix>" + [(set (match_operand:VI1 0 "register_operand" "=x") + (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] + UNSPEC_LOADU))] + "TARGET_SSE2" + "%vmovdqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "movu" "1") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "<sse2>_storedqu<avxsizesuffix>" + [(set (match_operand:VI1 0 "memory_operand" "=m") + (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")] + UNSPEC_STOREU))] + "TARGET_SSE2" "%vmovdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "movu" "1") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e45406f1158..412b0cb1d74 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,17 @@ +2012-10-15 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2012-10-15 Uros Bizjak <ubizjak@gmail.com> + + * gcc.target/i386/avx256-unaligned-load-1.c: Update asm scan patterns. + * gcc.target/i386/avx256-unaligned-load-2.c: Ditto. + * gcc.target/i386/avx256-unaligned-load-3.c: Ditto. + * gcc.target/i386/avx256-unaligned-load-4.c: Ditto. + * gcc.target/i386/avx256-unaligned-store-1.c: Ditto. + * gcc.target/i386/avx256-unaligned-store-2.c: Ditto. + * gcc.target/i386/avx256-unaligned-store-3.c: Ditto. + * gcc.target/i386/avx256-unaligned-store-4.c: Ditto. + 2012-10-15 Richard Guenther <rguenther@suse.de> PR tree-optimization/54920 diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c index c2511c643b4..e7eef6d7a90 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c @@ -14,6 +14,6 @@ avx_test (void) c[i] = a[i] * b[i+3]; } -/* { dg-final { scan-assembler-not "avx_movups256/1" } } */ -/* { dg-final { scan-assembler "sse_movups/1" } } */ +/* { dg-final { scan-assembler-not "avx_loadups256" } } */ +/* { dg-final { scan-assembler "sse_loadups" } } */ /* { dg-final { scan-assembler "vinsertf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c index 9d7167304e3..3f4fbf76479 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c @@ -24,6 +24,6 @@ avx_test (void) } } -/* { dg-final { scan-assembler-not "avx_movdqu256/1" } } */ -/* { dg-final { scan-assembler "sse2_movdqu/1" } } */ +/* { dg-final { scan-assembler-not "avx_loaddqu256" } } */ +/* { dg-final { scan-assembler "sse2_loaddqu" } } */ /* { dg-final { scan-assembler "vinsert.128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c index efb5f573fae..b0e0e79bdd8 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c @@ -14,6 +14,6 @@ avx_test (void) c[i] = a[i] * b[i+3]; } -/* { dg-final { scan-assembler-not "avx_movupd256/1" } } */ -/* { dg-final { scan-assembler "sse2_movupd/1" } } */ +/* { dg-final { scan-assembler-not "avx_loadupd256" } } */ +/* { dg-final { scan-assembler "sse2_loadupd" } } */ /* { dg-final { scan-assembler "vinsertf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c index 7c015a8b90a..b3927be70ab 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c @@ -14,6 +14,6 @@ avx_test (void) b[i] = a[i+3] * 2; } -/* { dg-final { scan-assembler "avx_movups256/1" } } */ -/* { dg-final { scan-assembler-not "avx_movups/1" } } */ +/* { dg-final { scan-assembler "avx_loadups256" } } */ +/* { dg-final { scan-assembler-not "sse_loadups" } } */ /* { dg-final { scan-assembler-not "vinsertf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c index 0b5839669a7..1a53ba14a00 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "avx_movups256/2" } } */ +/* { dg-final { scan-assembler-not "avx_storeups256" } } */ /* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c index eac460fef97..e98d1b684de 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c @@ -24,6 +24,6 @@ avx_test (void) } } -/* { dg-final { scan-assembler-not "avx_movdqu256/2" } } */ +/* { dg-final { scan-assembler-not "avx_storedqu256" } } */ /* { dg-final { scan-assembler "vmovdqu.*\\*movv16qi_internal/3" } } */ /* { dg-final { scan-assembler "vextract.128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c index 753625892d7..26c993be7e9 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "avx_movupd256/2" } } */ +/* { dg-final { scan-assembler-not "avx_storeupd256" } } */ /* { dg-final { scan-assembler "vmovupd.*\\*movv2df_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c index 39b6f3bef16..6d734faa25e 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c @@ -14,7 +14,7 @@ avx_test (void) b[i+3] = a[i] * c[i]; } -/* { dg-final { scan-assembler "avx_movups256/2" } } */ -/* { dg-final { scan-assembler-not "avx_movups/2" } } */ +/* { dg-final { scan-assembler "avx_storeups256" } } */ +/* { dg-final { scan-assembler-not "sse_storeups" } } */ /* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */ /* { dg-final { scan-assembler-not "vextractf128" } } */ |