summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorkrebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>2017-04-25 07:37:50 +0000
committerkrebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>2017-04-25 07:37:50 +0000
commitd90d26d883c38333e6780661d87c50d97e05c07f (patch)
tree28e95dc2ae7b20ece9816a634395a3f623677a51 /gcc/config
parent35f0c8ba25302315fd803f3ed1087750bdd4841a (diff)
downloadgcc-d90d26d883c38333e6780661d87c50d97e05c07f.tar.gz
S/390: PR80080: Optimize atomic patterns.
The attached patch optimizes the atomic_exchange and atomic_compare patterns on s390 and s390x (mostly limited to SImode and DImode). Among general optimizaation, the changes fix most of the problems reported in PR 80080: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80080 gcc/ChangeLog: 2017-04-25 Dominik Vogt <vogt@linux.vnet.ibm.com> PR target/80080 * s390-protos.h (s390_expand_cs_hqi): Removed. (s390_expand_cs, s390_expand_atomic_exchange_tdsi): New prototypes. * config/s390/s390.c (s390_emit_compare_and_swap): Handle all integer modes as well as CCZ1mode and CCZmode. (s390_expand_atomic_exchange_tdsi, s390_expand_atomic): Adapt to new signature of s390_emit_compare_and_swap. (s390_expand_cs_hqi): Likewise, make static. (s390_expand_cs_tdsi): Generate an explicit compare before trying compare-and-swap, in some cases. (s390_expand_cs): Wrapper function. (s390_expand_atomic_exchange_tdsi): New backend specific expander for atomic_exchange. (s390_match_ccmode_set): Allow CCZmode <-> CCZ1 mode. * config/s390/s390.md ("atomic_compare_and_swap<mode>"): Merge the patterns for small and large integers. Forbid symref memory operands. Move expander to s390.c. Require cc register. ("atomic_compare_and_swap<DGPR:mode><CCZZ1:mode>_internal") ("*atomic_compare_and_swap<TDI:mode><CCZZ1:mode>_1") ("*atomic_compare_and_swapdi<CCZZ1:mode>_2") ("*atomic_compare_and_swapsi<CCZZ1:mode>_3"): Use s_operand to forbid symref memory operands. Remove CC mode and call s390_match_ccmode instead. ("atomic_exchange<mode>"): Allow and implement all integer modes. gcc/testsuite/ChangeLog: 2017-04-25 Dominik Vogt <vogt@linux.vnet.ibm.com> PR target/80080 * gcc.target/s390/md/atomic_compare_exchange-1.c: New test. * gcc.target/s390/md/atomic_compare_exchange-1.inc: New test. * gcc.target/s390/md/atomic_exchange-1.inc: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@247132 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/s390/s390-protos.h4
-rw-r--r--gcc/config/s390/s390.c184
-rw-r--r--gcc/config/s390/s390.md110
3 files changed, 225 insertions, 73 deletions
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 7f06a208eeb..3fdb32059cd 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -112,8 +112,8 @@ extern void s390_expand_vec_strlen (rtx, rtx, rtx);
extern void s390_expand_vec_movstr (rtx, rtx, rtx);
extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
-extern void s390_expand_cs_hqi (machine_mode, rtx, rtx, rtx,
- rtx, rtx, bool);
+extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
+extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx);
extern void s390_expand_atomic (machine_mode, enum rtx_code,
rtx, rtx, rtx, bool);
extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 2cb8947e5a6..c16391aa8d5 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -1306,6 +1306,7 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode)
set_mode = GET_MODE (SET_DEST (set));
switch (set_mode)
{
+ case CCZ1mode:
case CCSmode:
case CCSRmode:
case CCUmode:
@@ -1328,7 +1329,8 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode)
case CCZmode:
if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
- && req_mode != CCSRmode && req_mode != CCURmode)
+ && req_mode != CCSRmode && req_mode != CCURmode
+ && req_mode != CCZ1mode)
return 0;
break;
@@ -1762,11 +1764,31 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
static rtx
s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
- rtx cmp, rtx new_rtx)
+ rtx cmp, rtx new_rtx, machine_mode ccmode)
{
- emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
- return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
- const0_rtx);
+ rtx cc;
+
+ cc = gen_rtx_REG (ccmode, CC_REGNUM);
+ switch (GET_MODE (mem))
+ {
+ case SImode:
+ emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
+ new_rtx, cc));
+ break;
+ case DImode:
+ emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
+ new_rtx, cc));
+ break;
+ case TImode:
+ emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
+ new_rtx, cc));
+ break;
+ case QImode:
+ case HImode:
+ default:
+ gcc_unreachable ();
+ }
+ return s390_emit_compare (code, cc, const0_rtx);
}
/* Emit a jump instruction to TARGET and return it. If COND is
@@ -6723,7 +6745,7 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
the memory location, CMP the old value to compare MEM with and NEW_RTX the
value to set if CMP == MEM. */
-void
+static void
s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
rtx cmp, rtx new_rtx, bool is_weak)
{
@@ -6770,7 +6792,7 @@ s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
emit_insn (seq2);
emit_insn (seq3);
- cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
+ cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
if (is_weak)
emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
else
@@ -6799,6 +6821,151 @@ s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
NULL_RTX, 1, OPTAB_DIRECT), 1);
}
+/* Variant of s390_expand_cs for SI, DI and TI modes. */
+static void
+s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
+ rtx cmp, rtx new_rtx, bool is_weak)
+{
+ rtx output = vtarget;
+ rtx_code_label *skip_cs_label = NULL;
+ bool do_const_opt = false;
+
+ if (!register_operand (output, mode))
+ output = gen_reg_rtx (mode);
+
+ /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
+ with the constant first and skip the compare_and_swap because its very
+ expensive and likely to fail anyway.
+ Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
+ cause spurious in that case.
+ Note 2: It may be useful to do this also for non-constant INPUT.
+ Note 3: Currently only targets with "load on condition" are supported
+ (z196 and newer). */
+
+ if (TARGET_Z196
+ && (mode == SImode || mode == DImode))
+ do_const_opt = (is_weak && CONST_INT_P (cmp));
+
+ if (do_const_opt)
+ {
+ const int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+ rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
+
+ skip_cs_label = gen_label_rtx ();
+ emit_move_insn (btarget, const0_rtx);
+ if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
+ {
+ rtvec lt = rtvec_alloc (2);
+
+ /* Load-and-test + conditional jump. */
+ RTVEC_ELT (lt, 0)
+ = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
+ RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
+ }
+ else
+ {
+ emit_move_insn (output, mem);
+ emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
+ }
+ s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
+ add_int_reg_note (get_last_insn (), REG_BR_PROB, very_unlikely);
+ /* If the jump is not taken, OUTPUT is the expected value. */
+ cmp = output;
+ /* Reload newval to a register manually, *after* the compare and jump
+ above. Otherwise Reload might place it before the jump. */
+ }
+ else
+ cmp = force_reg (mode, cmp);
+ new_rtx = force_reg (mode, new_rtx);
+ s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
+ (do_const_opt) ? CCZmode : CCZ1mode);
+ if (skip_cs_label != NULL)
+ emit_label (skip_cs_label);
+
+ /* We deliberately accept non-register operands in the predicate
+ to ensure the write back to the output operand happens *before*
+ the store-flags code below. This makes it easier for combine
+ to merge the store-flags code with a potential test-and-branch
+ pattern following (immediately!) afterwards. */
+ if (output != vtarget)
+ emit_move_insn (vtarget, output);
+
+ if (do_const_opt)
+ {
+ rtx cc, cond, ite;
+
+ /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
+ btarget has already been initialized with 0 above. */
+ cc = gen_rtx_REG (CCZmode, CC_REGNUM);
+ cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
+ ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
+ emit_insn (gen_rtx_SET (btarget, ite));
+ }
+ else
+ {
+ rtx cc, cond;
+
+ cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
+ cond = gen_rtx_EQ (SImode, cc, const0_rtx);
+ emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
+ }
+}
+
+/* Expand an atomic compare and swap operation. MEM is the memory location,
+ CMP the old value to compare MEM with and NEW_RTX the value to set if
+ CMP == MEM. */
+
+void
+s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
+ rtx cmp, rtx new_rtx, bool is_weak)
+{
+ switch (mode)
+ {
+ case TImode:
+ case DImode:
+ case SImode:
+ s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
+ break;
+ case HImode:
+ case QImode:
+ s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
+ The memory location MEM is set to INPUT. OUTPUT is set to the previous value
+ of MEM. */
+
+void
+s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
+{
+ machine_mode mode = GET_MODE (mem);
+ rtx_code_label *csloop;
+
+ if (TARGET_Z196
+ && (mode == DImode || mode == SImode)
+ && CONST_INT_P (input) && INTVAL (input) == 0)
+ {
+ emit_move_insn (output, const0_rtx);
+ if (mode == DImode)
+ emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
+ else
+ emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
+ return;
+ }
+
+ input = force_reg (mode, input);
+ emit_move_insn (output, mem);
+ csloop = gen_label_rtx ();
+ emit_label (csloop);
+ s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
+ input, CCZ1mode));
+}
+
/* Expand an atomic operation CODE of mode MODE. MEM is the memory location
and VAL the value to play with. If AFTER is true then store the value
MEM holds after the operation, if AFTER is false then store the value MEM
@@ -6878,7 +7045,8 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code,
}
s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
- ac.memsi, cmp, new_rtx));
+ ac.memsi, cmp, new_rtx,
+ CCZ1mode));
/* Return the correct part of the bitfield. */
if (target)
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 9baafccdf5a..8a700edab94 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -10230,83 +10230,56 @@
(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand") ;; bool success output
- (match_operand:DGPR 1 "nonimmediate_operand");; oldval output
- (match_operand:DGPR 2 "memory_operand") ;; memory
- (match_operand:DGPR 3 "register_operand") ;; expected intput
- (match_operand:DGPR 4 "register_operand") ;; newval intput
+ (match_operand:DINT 1 "nonimmediate_operand");; oldval output
+ (match_operand:DINT 2 "s_operand") ;; memory
+ (match_operand:DINT 3 "general_operand") ;; expected intput
+ (match_operand:DINT 4 "general_operand") ;; newval intput
(match_operand:SI 5 "const_int_operand") ;; is_weak
(match_operand:SI 6 "const_int_operand") ;; success model
(match_operand:SI 7 "const_int_operand")] ;; failure model
""
{
- rtx cc, cmp, output = operands[1];
-
- if (!register_operand (output, <MODE>mode))
- output = gen_reg_rtx (<MODE>mode);
-
- if (MEM_ALIGN (operands[2]) < GET_MODE_BITSIZE (GET_MODE (operands[2])))
+ if (GET_MODE_BITSIZE (<MODE>mode) >= 16
+ && GET_MODE_BITSIZE (<MODE>mode) > MEM_ALIGN (operands[2]))
FAIL;
- emit_insn (gen_atomic_compare_and_swap<mode>_internal
- (output, operands[2], operands[3], operands[4]));
-
- /* We deliberately accept non-register operands in the predicate
- to ensure the write back to the output operand happens *before*
- the store-flags code below. This makes it easier for combine
- to merge the store-flags code with a potential test-and-branch
- pattern following (immediately!) afterwards. */
- if (output != operands[1])
- emit_move_insn (operands[1], output);
-
- cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
- cmp = gen_rtx_EQ (SImode, cc, const0_rtx);
- emit_insn (gen_cstorecc4 (operands[0], cmp, cc, const0_rtx));
- DONE;
-})
-
-(define_expand "atomic_compare_and_swap<mode>"
- [(match_operand:SI 0 "register_operand") ;; bool success output
- (match_operand:HQI 1 "nonimmediate_operand") ;; oldval output
- (match_operand:HQI 2 "memory_operand") ;; memory
- (match_operand:HQI 3 "general_operand") ;; expected intput
- (match_operand:HQI 4 "general_operand") ;; newval intput
- (match_operand:SI 5 "const_int_operand") ;; is_weak
- (match_operand:SI 6 "const_int_operand") ;; success model
- (match_operand:SI 7 "const_int_operand")] ;; failure model
- ""
-{
- s390_expand_cs_hqi (<MODE>mode, operands[0], operands[1], operands[2],
- operands[3], operands[4], INTVAL (operands[5]));
- DONE;
-})
+ s390_expand_cs (<MODE>mode, operands[0], operands[1], operands[2],
+ operands[3], operands[4], INTVAL (operands[5]));
+ DONE;})
(define_expand "atomic_compare_and_swap<mode>_internal"
[(parallel
[(set (match_operand:DGPR 0 "register_operand")
- (match_operand:DGPR 1 "memory_operand"))
+ (match_operand:DGPR 1 "s_operand"))
(set (match_dup 1)
(unspec_volatile:DGPR
[(match_dup 1)
(match_operand:DGPR 2 "register_operand")
(match_operand:DGPR 3 "register_operand")]
UNSPECV_CAS))
- (set (reg:CCZ1 CC_REGNUM)
- (compare:CCZ1 (match_dup 1) (match_dup 2)))])]
- "")
+ (set (match_operand 4 "cc_reg_operand")
+ (match_dup 5))])]
+ "GET_MODE (operands[4]) == CCZmode
+ || GET_MODE (operands[4]) == CCZ1mode"
+{
+ operands[5]
+ = gen_rtx_COMPARE (GET_MODE (operands[4]), operands[1], operands[2]);
+})
; cdsg, csg
(define_insn "*atomic_compare_and_swap<mode>_1"
[(set (match_operand:TDI 0 "register_operand" "=r")
- (match_operand:TDI 1 "memory_operand" "+S"))
+ (match_operand:TDI 1 "s_operand" "+S"))
(set (match_dup 1)
(unspec_volatile:TDI
[(match_dup 1)
(match_operand:TDI 2 "register_operand" "0")
(match_operand:TDI 3 "register_operand" "r")]
UNSPECV_CAS))
- (set (reg:CCZ1 CC_REGNUM)
- (compare:CCZ1 (match_dup 1) (match_dup 2)))]
- "TARGET_ZARCH"
+ (set (reg CC_REGNUM)
+ (compare (match_dup 1) (match_dup 2)))]
+ "TARGET_ZARCH
+ && s390_match_ccmode (insn, CCZ1mode)"
"c<td>sg\t%0,%3,%S1"
[(set_attr "op_type" "RSY")
(set_attr "type" "sem")])
@@ -10314,16 +10287,17 @@
; cds, cdsy
(define_insn "*atomic_compare_and_swapdi_2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
- (match_operand:DI 1 "memory_operand" "+Q,S"))
+ (match_operand:DI 1 "s_operand" "+Q,S"))
(set (match_dup 1)
(unspec_volatile:DI
[(match_dup 1)
(match_operand:DI 2 "register_operand" "0,0")
(match_operand:DI 3 "register_operand" "r,r")]
UNSPECV_CAS))
- (set (reg:CCZ1 CC_REGNUM)
- (compare:CCZ1 (match_dup 1) (match_dup 2)))]
- "!TARGET_ZARCH"
+ (set (reg CC_REGNUM)
+ (compare (match_dup 1) (match_dup 2)))]
+ "!TARGET_ZARCH
+ && s390_match_ccmode (insn, CCZ1mode)"
"@
cds\t%0,%3,%S1
cdsy\t%0,%3,%S1"
@@ -10334,16 +10308,16 @@
; cs, csy
(define_insn "*atomic_compare_and_swapsi_3"
[(set (match_operand:SI 0 "register_operand" "=r,r")
- (match_operand:SI 1 "memory_operand" "+Q,S"))
+ (match_operand:SI 1 "s_operand" "+Q,S"))
(set (match_dup 1)
(unspec_volatile:SI
[(match_dup 1)
(match_operand:SI 2 "register_operand" "0,0")
(match_operand:SI 3 "register_operand" "r,r")]
UNSPECV_CAS))
- (set (reg:CCZ1 CC_REGNUM)
- (compare:CCZ1 (match_dup 1) (match_dup 2)))]
- ""
+ (set (reg CC_REGNUM)
+ (compare (match_dup 1) (match_dup 2)))]
+ "s390_match_ccmode (insn, CCZ1mode)"
"@
cs\t%0,%3,%S1
csy\t%0,%3,%S1"
@@ -10430,15 +10404,25 @@
DONE;
})
+;; Pattern to implement atomic_exchange with a compare-and-swap loop. The code
+;; generated by the middleend is not good.
(define_expand "atomic_exchange<mode>"
- [(match_operand:HQI 0 "register_operand") ;; val out
- (match_operand:HQI 1 "memory_operand") ;; memory
- (match_operand:HQI 2 "general_operand") ;; val in
+ [(match_operand:DINT 0 "register_operand") ;; val out
+ (match_operand:DINT 1 "s_operand") ;; memory
+ (match_operand:DINT 2 "general_operand") ;; val in
(match_operand:SI 3 "const_int_operand")] ;; model
""
{
- s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1],
- operands[2], false);
+ if (<MODE>mode != QImode
+ && MEM_ALIGN (operands[1]) < GET_MODE_BITSIZE (<MODE>mode))
+ FAIL;
+ if (<MODE>mode == HImode || <MODE>mode == QImode)
+ s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1], operands[2],
+ false);
+ else if (<MODE>mode == SImode || TARGET_ZARCH)
+ s390_expand_atomic_exchange_tdsi (operands[0], operands[1], operands[2]);
+ else
+ FAIL;
DONE;
})