diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2017-11-05 17:19:35 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@linaro.org> | 2017-11-05 17:19:35 +0000 |
commit | 648f8fc59b2cc39abd24f4c22388b346cdebcc31 (patch) | |
tree | 3a07eccc4c22b265261edd75c9ec3910d9c626f5 /gcc/config/i386/sse.md | |
parent | 7bef5b82e4109778a0988d20e19e1ed29dadd835 (diff) | |
parent | 8c089b5c15a7b35644750ca393f1e66071ad9aa9 (diff) | |
download | gcc-648f8fc59b2cc39abd24f4c22388b346cdebcc31.tar.gz |
Merge trunk into sve
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r-- | gcc/config/i386/sse.md | 556 |
1 files changed, 194 insertions, 362 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d5e2ec00237..4dfb2f8d3b3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -83,9 +83,7 @@ UNSPEC_VSIBADDR ;; For AVX512F support - UNSPEC_VPERMI2 UNSPEC_VPERMT2 - UNSPEC_VPERMI2_MASK UNSPEC_UNSIGNED_FIX_NOTRUNC UNSPEC_UNSIGNED_PCMP UNSPEC_TESTM @@ -157,6 +155,9 @@ UNSPEC_VP4FNMADD UNSPEC_VP4DPWSSD UNSPEC_VP4DPWSSDS + + ;; For GFNI support + UNSPEC_GF2P8AFFINEINV ]) (define_c_enum "unspecv" [ @@ -325,6 +326,9 @@ (define_mode_iterator VI1_AVX512 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI]) +(define_mode_iterator VI1_AVX512F + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI]) + (define_mode_iterator VI2_AVX2 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) @@ -371,10 +375,17 @@ [V16SF V16SI]) ;; ??? We should probably use TImode instead. -(define_mode_iterator VIMAX_AVX2 +(define_mode_iterator VIMAX_AVX2_AVX512BW [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI]) -;; ??? This should probably be dropped in favor of VIMAX_AVX2. +;; Suppose TARGET_AVX512BW as baseline +(define_mode_iterator VIMAX_AVX512VL + [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")]) + +(define_mode_iterator VIMAX_AVX2 + [(V2TI "TARGET_AVX2") V1TI]) + +;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW. (define_mode_iterator SSESCALARMODE [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI]) @@ -403,11 +414,19 @@ [(V8SI "TARGET_AVX2") V4SI (V4DI "TARGET_AVX2") V2DI]) +(define_mode_iterator VI248_AVX2 + [(V16HI "TARGET_AVX2") V8HI + (V8SI "TARGET_AVX2") V4SI + (V4DI "TARGET_AVX2") V2DI]) + (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) +(define_mode_iterator VI248_AVX512BW + [(V32HI "TARGET_AVX512BW") V16SI V8DI]) + (define_mode_iterator VI248_AVX512BW_AVX512VL [(V32HI "TARGET_AVX512BW") (V4DI "TARGET_AVX512VL") V16SI V8DI]) @@ -418,6 +437,11 @@ V8SI V4SI V2DI]) +(define_mode_iterator VI248_AVX512BW_2 + [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW") + V8SI V4SI + V4DI V2DI]) + (define_mode_iterator VI48_AVX512F [(V16SI "TARGET_AVX512F") V8SI V4SI (V8DI "TARGET_AVX512F") V4DI V2DI]) @@ -2522,7 +2546,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "reduces<mode>" +(define_insn "reduces<mode><mask_scalar_name>" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 @@ -2533,7 +2557,7 @@ (match_dup 1) (const_int 1)))] "TARGET_AVX512DQ" - "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) @@ -2737,7 +2761,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<sse>_comi<round_saeonly_name>" +(define_insn "<sse>_<unord>comi<round_saeonly_name>" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (vec_select:MODEF @@ -2747,27 +2771,7 @@ (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>") (parallel [(const_int 0)]))))] "SSE_FLOAT_MODE_P (<MODE>mode)" - "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}" - [(set_attr "type" "ssecomi") - (set_attr "prefix" "maybe_vex") - (set_attr "prefix_rep" "0") - (set (attr "prefix_data16") - (if_then_else (eq_attr "mode" "DF") - (const_string "1") - (const_string "0"))) - (set_attr "mode" "<MODE>")]) - -(define_insn "<sse>_ucomi<round_saeonly_name>" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU - (vec_select:MODEF - (match_operand:<ssevecmode> 0 "register_operand" "v") - (parallel [(const_int 0)])) - (vec_select:MODEF - (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>") - (parallel [(const_int 0)]))))] - "SSE_FLOAT_MODE_P (<MODE>mode)" - "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}" + "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}" [(set_attr "type" "ssecomi") (set_attr "prefix" "maybe_vex") (set_attr "prefix_rep" "0") @@ -3700,8 +3704,7 @@ "@ vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" - [(set_attr "isa" "fma_avx512f,fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>" @@ -3715,8 +3718,7 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" - [(set_attr "isa" "fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fmsub_<mode>" @@ -3766,8 +3768,7 @@ "@ vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" - [(set_attr "isa" "fma_avx512f,fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>" @@ -3782,8 +3783,7 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F && <round_mode512bit_condition>" "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" - [(set_attr "isa" "fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fnmadd_<mode>" @@ -3833,8 +3833,7 @@ "@ vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" - [(set_attr "isa" "fma_avx512f,fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>" @@ -3849,8 +3848,7 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F && <round_mode512bit_condition>" "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" - [(set_attr "isa" "fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fnmsub_<mode>" @@ -3903,8 +3901,7 @@ "@ vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" - [(set_attr "isa" "fma_avx512f,fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>" @@ -3920,8 +3917,7 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" - [(set_attr "isa" "fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) ;; FMA parallel floating point multiply addsub and subadd operations. @@ -4005,8 +4001,7 @@ "@ vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" - [(set_attr "isa" "fma_avx512f,fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>" @@ -4021,8 +4016,7 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" - [(set_attr "isa" "fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fmsubadd_<mode>" @@ -4075,8 +4069,7 @@ "@ vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" - [(set_attr "isa" "fma_avx512f,fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>" @@ -4092,8 +4085,7 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" - [(set_attr "isa" "fma_avx512f") - (set_attr "type" "ssemuladd") + [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) ;; FMA3 floating point scalar intrinsics. These merge result with @@ -10168,8 +10160,7 @@ (const_int 12) (const_int 14)])))))] "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)" "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "avx512f") - (set_attr "type" "sseimul") + [(set_attr "type" "sseimul") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -10285,8 +10276,7 @@ (const_int 12) (const_int 14)])))))] "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)" "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "avx512f") - (set_attr "type" "sseimul") + [(set_attr "type" "sseimul") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -10731,65 +10721,57 @@ (const_string "0"))) (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<shift_insn><mode>3<mask_name>" - [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v") - (any_lshift:VI2_AVX2_AVX512BW - (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v") - (match_operand:DI 2 "nonmemory_operand" "xN,vN")))] - "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" - "@ - p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} - vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseishft") +(define_insn "<mask_codefor><shift_insn><mode>3<mask_name>" + [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v") + (any_lshift:VI248_AVX512BW_2 + (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm") + (match_operand:DI 2 "nonmemory_operand" "v,N")))] + "TARGET_AVX512VL" + "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + [(set_attr "type" "sseishft") (set (attr "length_immediate") (if_then_else (match_operand 2 "const_int_operand") (const_string "1") (const_string "0"))) - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<shift_insn><mode>3<mask_name>" - [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,x,v") - (any_lshift:VI48_AVX2 - (match_operand:VI48_AVX2 1 "register_operand" "0,x,v") - (match_operand:DI 2 "nonmemory_operand" "xN,xN,vN")))] - "TARGET_SSE2 && <mask_mode512bit_condition>" +(define_insn "<shift_insn><mode>3" + [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") + (any_lshift:VI248_AVX2 + (match_operand:VI248_AVX2 1 "register_operand" "0,x") + (match_operand:DI 2 "nonmemory_operand" "xN,xN")))] + "TARGET_SSE2" "@ p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} - vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} - vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseishft") (set (attr "length_immediate") (if_then_else (match_operand 2 "const_int_operand") (const_string "1") (const_string "0"))) - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<shift_insn><mode>3<mask_name>" - [(set (match_operand:VI48_512 0 "register_operand" "=v,v") - (any_lshift:VI48_512 - (match_operand:VI48_512 1 "nonimmediate_operand" "v,m") + [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v") + (any_lshift:VI248_AVX512BW + (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m") (match_operand:DI 2 "nonmemory_operand" "vN,N")))] - "TARGET_AVX512F && <mask_mode512bit_condition>" + "TARGET_AVX512F" "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "avx512f") - (set_attr "type" "sseishft") + [(set_attr "type" "sseishft") (set (attr "length_immediate") (if_then_else (match_operand 2 "const_int_operand") (const_string "1") (const_string "0"))) - (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "vec_shl_<mode>" +(define_expand "vec_shr_<mode>" [(set (match_dup 3) - (ashift:V1TI + (lshiftrt:V1TI (match_operand:VI_128 1 "register_operand") (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] @@ -10800,48 +10782,24 @@ operands[4] = gen_lowpart (<MODE>mode, operands[3]); }) -(define_insn "<sse2_avx2>_ashl<mode>3" - [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") - (ashift:VIMAX_AVX2 - (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") - (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] - "TARGET_SSE2" +(define_insn "avx512bw_<shift_insn><mode>3" + [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v") + (any_lshift:VIMAX_AVX512VL + (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX512BW" { operands[2] = GEN_INT (INTVAL (operands[2]) / 8); - - switch (which_alternative) - { - case 0: - return "pslldq\t{%2, %0|%0, %2}"; - case 1: - return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; - default: - gcc_unreachable (); - } + return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}"; } - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseishft") + [(set_attr "type" "sseishft") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "maybe_evex") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "vec_shr_<mode>" - [(set (match_dup 3) - (lshiftrt:V1TI - (match_operand:VI_128 1 "register_operand") - (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) - (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] - "TARGET_SSE2" -{ - operands[1] = gen_lowpart (V1TImode, operands[1]); - operands[3] = gen_reg_rtx (V1TImode); - operands[4] = gen_lowpart (<MODE>mode, operands[3]); -}) - -(define_insn "<sse2_avx2>_lshr<mode>3" +(define_insn "<sse2_avx2>_<shift_insn><mode>3" [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") - (lshiftrt:VIMAX_AVX2 + (any_lshift:VIMAX_AVX2 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] "TARGET_SSE2" @@ -10851,9 +10809,9 @@ switch (which_alternative) { case 0: - return "psrldq\t{%2, %0|%0, %2}"; + return "p<vshift>dq\t{%2, %0|%0, %2}"; case 1: - return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; + return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}"; default: gcc_unreachable (); } @@ -11562,10 +11520,10 @@ "TARGET_AVX512BW") (define_insn "*andnot<mode>3" - [(set (match_operand:VI 0 "register_operand" "=x,v") + [(set (match_operand:VI 0 "register_operand" "=x,x,v") (and:VI - (not:VI (match_operand:VI 1 "register_operand" "0,v")) - (match_operand:VI 2 "vector_operand" "xBm,vm")))] + (not:VI (match_operand:VI 1 "register_operand" "0,x,v")) + (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))] "TARGET_SSE" { static char buf[64]; @@ -11600,10 +11558,11 @@ case E_V4DImode: case E_V4SImode: case E_V2DImode: - ssesuffix = TARGET_AVX512VL ? "<ssemodesuffix>" : ""; + ssesuffix = (TARGET_AVX512VL && which_alternative == 2 + ? "<ssemodesuffix>" : ""); break; default: - ssesuffix = TARGET_AVX512VL ? "q" : ""; + ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : ""; } break; @@ -11629,6 +11588,7 @@ ops = "%s%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; default: @@ -11638,7 +11598,7 @@ snprintf (buf, sizeof (buf), ops, tmp, ssesuffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -11646,7 +11606,7 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex") (set (attr "mode") (cond [(and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) @@ -11691,10 +11651,10 @@ }) (define_insn "<mask_codefor><code><mode>3<mask_name>" - [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v") + [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v") (any_logic:VI48_AVX_AVX512F - (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,v") - (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,vm")))] + (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v") + (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))] "TARGET_SSE && <mask_mode512bit_condition> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" { @@ -11724,7 +11684,9 @@ case E_V4DImode: case E_V4SImode: case E_V2DImode: - ssesuffix = TARGET_AVX512VL ? "<ssemodesuffix>" : ""; + ssesuffix = (TARGET_AVX512VL + && (<mask_applied> || which_alternative == 2) + ? "<ssemodesuffix>" : ""); break; default: gcc_unreachable (); @@ -11753,6 +11715,7 @@ ops = "%s%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; break; default: @@ -11762,7 +11725,7 @@ snprintf (buf, sizeof (buf), ops, tmp, ssesuffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -11770,7 +11733,7 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "<mask_prefix3>") + (set_attr "prefix" "<mask_prefix3>,evex") (set (attr "mode") (cond [(and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) @@ -11789,10 +11752,10 @@ (const_string "<sseinsnmode>")))]) (define_insn "*<code><mode>3" - [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v") + [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v") (any_logic: VI12_AVX_AVX512F - (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,v") - (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,vm")))] + (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v") + (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))] "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" { static char buf[64]; @@ -11821,7 +11784,7 @@ case E_V16HImode: case E_V16QImode: case E_V8HImode: - ssesuffix = TARGET_AVX512VL ? "q" : ""; + ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : ""; break; default: gcc_unreachable (); @@ -11847,6 +11810,7 @@ ops = "%s%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; default: @@ -11856,7 +11820,7 @@ snprintf (buf, sizeof (buf), ops, tmp, ssesuffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -11864,7 +11828,7 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "<mask_prefix3>") + (set_attr "prefix" "<mask_prefix3>,evex") (set (attr "mode") (cond [(and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) @@ -18099,96 +18063,48 @@ (set_attr "prefix" "<mask_prefix>") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI48F 0 "register_operand") - (match_operand:VI48F 1 "register_operand") - (match_operand:<sseintvecmode> 2 "register_operand") - (match_operand:VI48F 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512F" -{ - emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI1_AVX512VL 0 "register_operand") - (match_operand:VI1_AVX512VL 1 "register_operand") - (match_operand:<sseintvecmode> 2 "register_operand") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512VBMI" -{ - emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand") - (match_operand:VI2_AVX512VL 1 "register_operand") - (match_operand:<sseintvecmode> 2 "register_operand") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512BW" -{ - emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (unspec:VI48F - [(match_operand:VI48F 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] +(define_mode_iterator VPERMI2 + [V16SI V16SF V8DI V8DF + (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") + (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") + (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) + +(define_mode_iterator VPERMI2I + [V16SI V8DI + (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") + (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) + +(define_expand "<avx512>_vpermi2var<mode>3_mask" + [(set (match_operand:VPERMI2 0 "register_operand") + (vec_merge:VPERMI2 + (unspec:VPERMI2 + [(match_operand:<sseintvecmode> 2 "register_operand") + (match_operand:VPERMI2 1 "register_operand") + (match_operand:VPERMI2 3 "nonimmediate_operand")] + UNSPEC_VPERMT2) + (match_dup 5) + (match_operand:<avx512fmaskmode> 4 "register_operand")))] "TARGET_AVX512F" - "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (unspec:VI1_AVX512VL - [(match_operand:VI1_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] - "TARGET_AVX512VBMI" - "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (unspec:VI2_AVX512VL - [(match_operand:VI2_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2))] - "TARGET_AVX512BW" - "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermi2var<mode>3_mask" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (vec_merge:VI48F - (unspec:VI48F - [(match_operand:VI48F 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) + "operands[5] = gen_lowpart (<MODE>mode, operands[2]);") + +(define_insn "*<avx512>_vpermi2var<mode>3_mask" + [(set (match_operand:VPERMI2I 0 "register_operand" "=v") + (vec_merge:VPERMI2I + (unspec:VPERMI2I + [(match_operand:<sseintvecmode> 2 "register_operand" "0") + (match_operand:VPERMI2I 1 "register_operand" "v") + (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")] + UNSPEC_VPERMT2) + (match_dup 2) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" @@ -18196,43 +18112,27 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_vpermi2var<mode>3_mask" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI1_AVX512VL - (unspec:VI1_AVX512VL - [(match_operand:VI1_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512VBMI" - "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermi2var<mode>3_mask" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI2_AVX512VL - (unspec:VI2_AVX512VL - [(match_operand:VI2_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMI2_MASK) - (match_dup 0) +(define_insn "*<avx512>_vpermi2var<mode>3_mask" + [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") + (vec_merge:VF_AVX512VL + (unspec:VF_AVX512VL + [(match_operand:<sseintvecmode> 2 "register_operand" "0") + (match_operand:VF_AVX512VL 1 "register_operand" "v") + (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")] + UNSPEC_VPERMT2) + (subreg:VF_AVX512VL (match_dup 2) 0) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" + "TARGET_AVX512F" "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI48F 0 "register_operand") + [(match_operand:VPERMI2 0 "register_operand") (match_operand:<sseintvecmode> 1 "register_operand") - (match_operand:VI48F 2 "register_operand") - (match_operand:VI48F 3 "nonimmediate_operand") + (match_operand:VPERMI2 2 "register_operand") + (match_operand:VPERMI2 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F" { @@ -18242,80 +18142,28 @@ DONE; }) -(define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI1_AVX512VL 0 "register_operand") - (match_operand:<sseintvecmode> 1 "register_operand") - (match_operand:VI1_AVX512VL 2 "register_operand") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512VBMI" -{ - emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - -(define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand") - (match_operand:<sseintvecmode> 1 "register_operand") - (match_operand:VI2_AVX512VL 2 "register_operand") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") - (match_operand:<avx512fmaskmode> 4 "register_operand")] - "TARGET_AVX512BW" -{ - emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( - operands[0], operands[1], operands[2], operands[3], - CONST0_RTX (<MODE>mode), operands[4])); - DONE; -}) - (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (unspec:VI48F - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI48F 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VPERMI2 0 "register_operand" "=v,v") + (unspec:VPERMI2 + [(match_operand:<sseintvecmode> 1 "register_operand" "v,0") + (match_operand:VPERMI2 2 "register_operand" "0,v") + (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")] UNSPEC_VPERMT2))] "TARGET_AVX512F" - "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (unspec:VI1_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI1_AVX512VL 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2))] - "TARGET_AVX512VBMI" - "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (unspec:VI2_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI2_AVX512VL 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2))] - "TARGET_AVX512BW" - "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" + "@ + vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3} + vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<avx512>_vpermt2var<mode>3_mask" - [(set (match_operand:VI48F 0 "register_operand" "=v") - (vec_merge:VI48F - (unspec:VI48F + [(set (match_operand:VPERMI2 0 "register_operand" "=v") + (vec_merge:VPERMI2 + (unspec:VPERMI2 [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI48F 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm")] + (match_operand:VPERMI2 2 "register_operand" "0") + (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")] UNSPEC_VPERMT2) (match_dup 2) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] @@ -18325,38 +18173,6 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_vpermt2var<mode>3_mask" - [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI1_AVX512VL - (unspec:VI1_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI1_AVX512VL 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2) - (match_dup 2) - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512VBMI" - "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<avx512>_vpermt2var<mode>3_mask" - [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI2_AVX512VL - (unspec:VI2_AVX512VL - [(match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI2_AVX512VL 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPERMT2) - (match_dup 2) - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" - "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - (define_expand "avx_vperm2f128<mode>3" [(set (match_operand:AVX256MODE2P 0 "register_operand") (unspec:AVX256MODE2P @@ -19613,8 +19429,7 @@ UNSPEC_DBPSADBW))] "TARGET_AVX512BW" "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}" - [(set_attr "isa" "avx") - (set_attr "type" "sselog1") + [(set_attr "type" "sselog1") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -20159,3 +19974,20 @@ ])] "TARGET_SSE && TARGET_64BIT" "jmp\t%P1") + +(define_insn "vgf2p8affineinvqb_<mode><mask_name>" + [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") + (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") + (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm") + (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")] + UNSPEC_GF2P8AFFINEINV))] + "TARGET_GFNI" + "@ + gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3} + vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3} + vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}" + [(set_attr "isa" "noavx,avx,avx512bw") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "mode" "<sseinsnmode>")]) |