summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog26
-rw-r--r--gcc/builtins.c84
-rw-r--r--gcc/builtins.h1
-rw-r--r--gcc/config/i386/sync.md111
-rw-r--r--gcc/doc/md.texi27
-rw-r--r--gcc/internal-fn.c25
-rw-r--r--gcc/internal-fn.def5
-rw-r--r--gcc/optabs.def3
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr49244-1.c188
-rw-r--r--gcc/testsuite/gcc.target/i386/pr49244-2.c108
-rw-r--r--gcc/tree-ssa-ccp.c292
12 files changed, 876 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cc520e895ef..7122b6c2ba9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,29 @@
+2016-05-03 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/49244
+ * tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h.
+ (optimize_atomic_bit_test_and): New function.
+ (pass_fold_builtins::execute): Use it.
+ * optabs.def (atomic_bit_test_and_set_optab,
+ atomic_bit_test_and_complement_optab,
+ atomic_bit_test_and_reset_optab): New optabs.
+ * internal-fn.def (ATOMIC_BIT_TEST_AND_SET,
+ ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns.
+ * builtins.h (expand_ifn_atomic_bit_test_and): New prototype.
+ * builtins.c (expand_ifn_atomic_bit_test_and): New function.
+ * internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET,
+ expand_ATOMIC_BIT_TEST_AND_COMPLEMENT,
+ expand_ATOMIC_BIT_TEST_AND_RESET): New functions.
+ * doc/md.texi (atomic_bit_test_and_set@var{mode},
+ atomic_bit_test_and_complement@var{mode},
+ atomic_bit_test_and_reset@var{mode}): Document.
+ * config/i386/sync.md (atomic_bit_test_and_set<mode>,
+ atomic_bit_test_and_complement<mode>,
+ atomic_bit_test_and_reset<mode>): New expanders.
+ (atomic_bit_test_and_set<mode>_1,
+ atomic_bit_test_and_complement<mode>_1,
+ atomic_bit_test_and_reset<mode>_1): New insns.
+
2016-05-03 Richard Sandiford <richard.sandiford@arm.com>
PR rtl-optimization/70687
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 3d89bafe34a..7d876199bca 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5310,6 +5310,90 @@ expand_builtin_atomic_fetch_op (machine_mode mode, tree exp, rtx target,
return ret;
}
+/* Expand IFN_ATOMIC_BIT_TEST_AND_* internal function. */
+
+void
+expand_ifn_atomic_bit_test_and (gcall *call)
+{
+ tree ptr = gimple_call_arg (call, 0);
+ tree bit = gimple_call_arg (call, 1);
+ tree flag = gimple_call_arg (call, 2);
+ tree lhs = gimple_call_lhs (call);
+ enum memmodel model = MEMMODEL_SYNC_SEQ_CST;
+ machine_mode mode = TYPE_MODE (TREE_TYPE (flag));
+ enum rtx_code code;
+ optab optab;
+ struct expand_operand ops[5];
+
+ gcc_assert (flag_inline_atomics);
+
+ if (gimple_call_num_args (call) == 4)
+ model = get_memmodel (gimple_call_arg (call, 3));
+
+ rtx mem = get_builtin_sync_mem (ptr, mode);
+ rtx val = expand_expr_force_mode (bit, mode);
+
+ switch (gimple_call_internal_fn (call))
+ {
+ case IFN_ATOMIC_BIT_TEST_AND_SET:
+ code = IOR;
+ optab = atomic_bit_test_and_set_optab;
+ break;
+ case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT:
+ code = XOR;
+ optab = atomic_bit_test_and_complement_optab;
+ break;
+ case IFN_ATOMIC_BIT_TEST_AND_RESET:
+ code = AND;
+ optab = atomic_bit_test_and_reset_optab;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (lhs == NULL_TREE)
+ {
+ val = expand_simple_binop (mode, ASHIFT, const1_rtx,
+ val, NULL_RTX, true, OPTAB_DIRECT);
+ if (code == AND)
+ val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
+ expand_atomic_fetch_op (const0_rtx, mem, val, code, model, false);
+ return;
+ }
+
+ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ enum insn_code icode = direct_optab_handler (optab, mode);
+ gcc_assert (icode != CODE_FOR_nothing);
+ create_output_operand (&ops[0], target, mode);
+ create_fixed_operand (&ops[1], mem);
+ create_convert_operand_to (&ops[2], val, mode, true);
+ create_integer_operand (&ops[3], model);
+ create_integer_operand (&ops[4], integer_onep (flag));
+ if (maybe_expand_insn (icode, 5, ops))
+ return;
+
+ rtx bitval = val;
+ val = expand_simple_binop (mode, ASHIFT, const1_rtx,
+ val, NULL_RTX, true, OPTAB_DIRECT);
+ rtx maskval = val;
+ if (code == AND)
+ val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
+ rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, val,
+ code, model, false);
+ if (integer_onep (flag))
+ {
+ result = expand_simple_binop (mode, ASHIFTRT, result, bitval,
+ NULL_RTX, true, OPTAB_DIRECT);
+ result = expand_simple_binop (mode, AND, result, const1_rtx, target,
+ true, OPTAB_DIRECT);
+ }
+ else
+ result = expand_simple_binop (mode, AND, result, maskval, target, true,
+ OPTAB_DIRECT);
+ if (result != target)
+ emit_move_insn (target, result);
+}
+
/* Expand an atomic clear operation.
void _atomic_clear (BOOL *obj, enum memmodel)
EXP is the call expression. */
diff --git a/gcc/builtins.h b/gcc/builtins.h
index b49def349d0..51e298cb76b 100644
--- a/gcc/builtins.h
+++ b/gcc/builtins.h
@@ -71,6 +71,7 @@ extern tree std_fn_abi_va_list (tree);
extern tree std_canonical_va_list_type (tree);
extern void std_expand_builtin_va_start (tree, rtx);
extern void expand_builtin_trap (void);
+extern void expand_ifn_atomic_bit_test_and (gcall *);
extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int);
extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int);
extern enum built_in_function builtin_mathfn_code (const_tree);
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index bc4fd34e6d5..8322676a7b0 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -605,3 +605,114 @@
(clobber (reg:CC FLAGS_REG))]
""
"lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_expand "atomic_bit_test_and_set<mode>"
+ [(match_operand:SWI248 0 "register_operand")
+ (match_operand:SWI248 1 "memory_operand")
+ (match_operand:SWI248 2 "nonmemory_operand")
+ (match_operand:SI 3 "const_int_operand") ;; model
+ (match_operand:SI 4 "const_int_operand")]
+ ""
+{
+ emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
+ operands[3]));
+ rtx tem = gen_reg_rtx (QImode);
+ ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
+ rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
+ if (operands[4] == const0_rtx)
+ result = expand_simple_binop (<MODE>mode, ASHIFT, result,
+ operands[2], operands[0], 0, OPTAB_DIRECT);
+ if (result != operands[0])
+ emit_move_insn (operands[0], result);
+ DONE;
+})
+
+(define_insn "atomic_bit_test_and_set<mode>_1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (unspec_volatile:SWI248
+ [(match_operand:SWI248 0 "memory_operand" "+m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_XCHG)
+ (const_int 0)))
+ (set (zero_extract:SWI248 (match_dup 0)
+ (const_int 1)
+ (match_operand:SWI248 1 "nonmemory_operand" "rN"))
+ (const_int 1))]
+ ""
+ "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_expand "atomic_bit_test_and_complement<mode>"
+ [(match_operand:SWI248 0 "register_operand")
+ (match_operand:SWI248 1 "memory_operand")
+ (match_operand:SWI248 2 "nonmemory_operand")
+ (match_operand:SI 3 "const_int_operand") ;; model
+ (match_operand:SI 4 "const_int_operand")]
+ ""
+{
+ emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
+ operands[2],
+ operands[3]));
+ rtx tem = gen_reg_rtx (QImode);
+ ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
+ rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
+ if (operands[4] == const0_rtx)
+ result = expand_simple_binop (<MODE>mode, ASHIFT, result,
+ operands[2], operands[0], 0, OPTAB_DIRECT);
+ if (result != operands[0])
+ emit_move_insn (operands[0], result);
+ DONE;
+})
+
+(define_insn "atomic_bit_test_and_complement<mode>_1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (unspec_volatile:SWI248
+ [(match_operand:SWI248 0 "memory_operand" "+m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_XCHG)
+ (const_int 0)))
+ (set (zero_extract:SWI248 (match_dup 0)
+ (const_int 1)
+ (match_operand:SWI248 1 "nonmemory_operand" "rN"))
+ (not:SWI248 (zero_extract:SWI248 (match_dup 0)
+ (const_int 1)
+ (match_dup 1))))]
+ ""
+ "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_expand "atomic_bit_test_and_reset<mode>"
+ [(match_operand:SWI248 0 "register_operand")
+ (match_operand:SWI248 1 "memory_operand")
+ (match_operand:SWI248 2 "nonmemory_operand")
+ (match_operand:SI 3 "const_int_operand") ;; model
+ (match_operand:SI 4 "const_int_operand")]
+ ""
+{
+ emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
+ operands[3]));
+ rtx tem = gen_reg_rtx (QImode);
+ ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
+ rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
+ if (operands[4] == const0_rtx)
+ result = expand_simple_binop (<MODE>mode, ASHIFT, result,
+ operands[2], operands[0], 0, OPTAB_DIRECT);
+ if (result != operands[0])
+ emit_move_insn (operands[0], result);
+ DONE;
+})
+
+(define_insn "atomic_bit_test_and_reset<mode>_1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (unspec_volatile:SWI248
+ [(match_operand:SWI248 0 "memory_operand" "+m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_XCHG)
+ (const_int 0)))
+ (set (zero_extract:SWI248 (match_dup 0)
+ (const_int 1)
+ (match_operand:SWI248 1 "nonmemory_operand" "rN"))
+ (const_int 0))]
+ ""
+ "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 4c83719588a..afaecef4e54 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6909,6 +6909,33 @@ The specific value that defines "set" is implementation defined, and
is normally based on what is performed by the native atomic test and set
instruction.
+@cindex @code{atomic_bit_test_and_set@var{mode}} instruction pattern
+@cindex @code{atomic_bit_test_and_complement@var{mode}} instruction pattern
+@cindex @code{atomic_bit_test_and_reset@var{mode}} instruction pattern
+@item @samp{atomic_bit_test_and_set@var{mode}}
+@itemx @samp{atomic_bit_test_and_complement@var{mode}}
+@itemx @samp{atomic_bit_test_and_reset@var{mode}}
+These patterns emit code for an atomic bitwise operation on memory with memory
+model semantics, and return the original value of the specified bit.
+Operand 0 is an output operand which contains the value of the specified bit
+from the memory location before the operation was performed. Operand 1 is the
+memory on which the atomic operation is performed. Operand 2 is the bit within
+the operand, starting with least significant bit. Operand 3 is the memory model
+to be used by the operation. Operand 4 is a flag - it is @code{const1_rtx}
+if operand 0 should contain the original value of the specified bit in the
+least significant bit of the operand, and @code{const0_rtx} if the bit should
+be in its original position in the operand.
+@code{atomic_bit_test_and_set@var{mode}} atomically sets the specified bit after
+remembering its original value, @code{atomic_bit_test_and_complement@var{mode}}
+inverts the specified bit and @code{atomic_bit_test_and_reset@var{mode}} clears
+the specified bit.
+
+If these patterns are not defined, attempts will be made to use
+@code{atomic_fetch_or@var{mode}}, @code{atomic_fetch_xor@var{mode}} or
+@code{atomic_fetch_and@var{mode}} instruction patterns, or their @code{sync}
+counterparts. If none of these are available a compare-and-swap
+loop will be used.
+
@cindex @code{mem_thread_fence@var{mode}} instruction pattern
@item @samp{mem_thread_fence@var{mode}}
This pattern emits code required to implement a thread fence with
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index e70c73aba8a..c867ddc0ef7 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
#include "expr.h"
#include "ubsan.h"
#include "recog.h"
+#include "builtins.h"
/* The names of each internal function, indexed by function number. */
const char *const internal_fn_name_array[] = {
@@ -2118,6 +2119,30 @@ expand_SET_EDOM (internal_fn, gcall *)
#endif
}
+/* Expand atomic bit test and set. */
+
+static void
+expand_ATOMIC_BIT_TEST_AND_SET (internal_fn, gcall *call)
+{
+ expand_ifn_atomic_bit_test_and (call);
+}
+
+/* Expand atomic bit test and complement. */
+
+static void
+expand_ATOMIC_BIT_TEST_AND_COMPLEMENT (internal_fn, gcall *call)
+{
+ expand_ifn_atomic_bit_test_and (call);
+}
+
+/* Expand atomic bit test and reset. */
+
+static void
+expand_ATOMIC_BIT_TEST_AND_RESET (internal_fn, gcall *call)
+{
+ expand_ifn_atomic_bit_test_and (call);
+}
+
/* Expand a call to FN using the operands in STMT. FN has a single
output operand and NARGS input operands. */
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a62f3e8034e..e729d852a13 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -189,6 +189,11 @@ DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NOTHROW | ECF_LEAF, NULL)
current target. */
DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL)
+/* Atomic functions. */
+DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL)
+
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
#undef DEF_INTERNAL_OPTAB_FN
diff --git a/gcc/optabs.def b/gcc/optabs.def
index c938b42ea4e..8875e30d416 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -337,6 +337,9 @@ OPTAB_D (atomic_add_fetch_optab, "atomic_add_fetch$I$a")
OPTAB_D (atomic_add_optab, "atomic_add$I$a")
OPTAB_D (atomic_and_fetch_optab, "atomic_and_fetch$I$a")
OPTAB_D (atomic_and_optab, "atomic_and$I$a")
+OPTAB_D (atomic_bit_test_and_set_optab, "atomic_bit_test_and_set$I$a")
+OPTAB_D (atomic_bit_test_and_complement_optab, "atomic_bit_test_and_complement$I$a")
+OPTAB_D (atomic_bit_test_and_reset_optab, "atomic_bit_test_and_reset$I$a")
OPTAB_D (atomic_compare_and_swap_optab, "atomic_compare_and_swap$I$a")
OPTAB_D (atomic_exchange_optab, "atomic_exchange$I$a")
OPTAB_D (atomic_fetch_add_optab, "atomic_fetch_add$I$a")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bc7c93f5a5d..b29f76334fe 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-05-03 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/49244
+ * gcc.target/i386/pr49244-1.c: New test.
+ * gcc.target/i386/pr49244-2.c: New test.
+
2016-05-03 Bernd Schmidt <bschmidt@redhat.com>
PR rtl-optimization/44281
diff --git a/gcc/testsuite/gcc.target/i386/pr49244-1.c b/gcc/testsuite/gcc.target/i386/pr49244-1.c
new file mode 100644
index 00000000000..70ccf6e935a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr49244-1.c
@@ -0,0 +1,188 @@
+/* PR target/49244 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (void);
+
+__attribute__((noinline, noclone)) int
+f1 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ unsigned int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
+ unsigned int t2 = t1 & mask;
+ return t2 != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f3 (long int *a, int bit)
+{
+ unsigned long int mask = (1ul << bit);
+ return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int *a)
+{
+ unsigned int mask = (1u << 7);
+ return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (int *a)
+{
+ unsigned int mask = (1u << 13);
+ return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f6 (int *a)
+{
+ unsigned int mask = (1u << 0);
+ return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
+ bar ();
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
+ bar ();
+}
+
+__attribute__((noinline, noclone)) int
+f9 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int *a)
+{
+ unsigned int mask = (1u << 7);
+ return (__sync_fetch_and_xor (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f11 (int *a)
+{
+ unsigned int mask = (1u << 13);
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f12 (int *a)
+{
+ unsigned int mask = (1u << 0);
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f14 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f15 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int *a)
+{
+ unsigned int mask = (1u << 7);
+ return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f17 (int *a)
+{
+ unsigned int mask = (1u << 13);
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f18 (int *a)
+{
+ unsigned int mask = (1u << 0);
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) unsigned long int
+f19 (unsigned long int *a, int bit)
+{
+ unsigned long int mask = (1ul << bit);
+ return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) unsigned long int
+f20 (unsigned long int *a)
+{
+ unsigned long int mask = (1ul << 7);
+ return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f21 (int *a, int bit)
+{
+ unsigned int mask = (1u << bit);
+ return (__sync_fetch_and_or (a, mask) & mask);
+}
+
+__attribute__((noinline, noclone)) unsigned long int
+f22 (unsigned long int *a)
+{
+ unsigned long int mask = (1ul << 7);
+ return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) unsigned long int
+f23 (unsigned long int *a)
+{
+ unsigned long int mask = (1ul << 7);
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) unsigned short int
+f24 (unsigned short int *a)
+{
+ unsigned short int mask = (1u << 7);
+ return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) unsigned short int
+f25 (unsigned short int *a)
+{
+ unsigned short int mask = (1u << 7);
+ return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr49244-2.c b/gcc/testsuite/gcc.target/i386/pr49244-2.c
new file mode 100644
index 00000000000..847408e1a3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr49244-2.c
@@ -0,0 +1,108 @@
+/* PR target/49244 */
+/* { dg-do run } */
+/* { dg-options "-O2 -g" } */
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+ cnt++;
+}
+
+#include "pr49244-1.c"
+
+int a;
+long int b;
+unsigned long int c;
+unsigned short int d;
+
+int
+main ()
+{
+ __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
+ if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
+ || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
+ __builtin_abort ();
+ if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
+ || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
+ __builtin_abort ();
+ __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
+ if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
+ || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
+ __builtin_abort ();
+ __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
+ if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
+ || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
+ __builtin_abort ();
+ if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+ || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
+ __builtin_abort ();
+ if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
+ || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (cnt != 0
+ || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+ || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+ || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+ || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+ __builtin_abort ();
+ if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+ || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+ || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
+ if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+ __builtin_abort ();
+ if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+ || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+ || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+ || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+ || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
+ if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
+ || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
+ __builtin_abort ();
+ __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
+ if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+ || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+ || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
+ || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
+ __builtin_abort ();
+ __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
+ if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+ || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+ || cnt != 2)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index baae03f8042..c4e27f1cfe9 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -140,6 +140,8 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "tree-chkp.h"
#include "cfgloop.h"
+#include "stor-layout.h"
+#include "optabs-query.h"
/* Possible lattice values. */
@@ -2697,6 +2699,224 @@ optimize_unreachable (gimple_stmt_iterator i)
return ret;
}
+/* Optimize
+ mask_2 = 1 << cnt_1;
+ _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
+ _5 = _4 & mask_2;
+ to
+ _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
+ _5 = _4;
+ If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1
+ is passed instead of 0, and the builtin just returns a zero
+ or 1 value instead of the actual bit.
+ Similarly for __sync_fetch_and_or_* (without the ", _3" part
+ in there), and/or if mask_2 is a power of 2 constant.
+ Similarly for xor instead of or, use ATOMIC_BIT_TEST_AND_COMPLEMENT
+ in that case. And similarly for and instead of or, except that
+ the second argument to the builtin needs to be one's complement
+ of the mask instead of mask. */
+
+static void
+optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
+ enum internal_fn fn, bool has_model_arg,
+ bool after)
+{
+ gimple *call = gsi_stmt (*gsip);
+ tree lhs = gimple_call_lhs (call);
+ use_operand_p use_p;
+ gimple *use_stmt;
+ tree mask, bit;
+ optab optab;
+
+ if (!flag_inline_atomics
+ || optimize_debug
+ || !gimple_call_builtin_p (call, BUILT_IN_NORMAL)
+ || !lhs
+ || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
+ || !single_imm_use (lhs, &use_p, &use_stmt)
+ || !is_gimple_assign (use_stmt)
+ || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
+ || !gimple_vdef (call))
+ return;
+
+ switch (fn)
+ {
+ case IFN_ATOMIC_BIT_TEST_AND_SET:
+ optab = atomic_bit_test_and_set_optab;
+ break;
+ case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT:
+ optab = atomic_bit_test_and_complement_optab;
+ break;
+ case IFN_ATOMIC_BIT_TEST_AND_RESET:
+ optab = atomic_bit_test_and_reset_optab;
+ break;
+ default:
+ return;
+ }
+
+ if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
+ return;
+
+ mask = gimple_call_arg (call, 1);
+ tree use_lhs = gimple_assign_lhs (use_stmt);
+ if (!use_lhs)
+ return;
+
+ if (TREE_CODE (mask) == INTEGER_CST)
+ {
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
+ mask = fold_convert (TREE_TYPE (lhs), mask);
+ int ibit = tree_log2 (mask);
+ if (ibit < 0)
+ return;
+ bit = build_int_cst (TREE_TYPE (lhs), ibit);
+ }
+ else if (TREE_CODE (mask) == SSA_NAME)
+ {
+ gimple *g = SSA_NAME_DEF_STMT (mask);
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ {
+ if (!is_gimple_assign (g)
+ || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+ return;
+ mask = gimple_assign_rhs1 (g);
+ if (TREE_CODE (mask) != SSA_NAME)
+ return;
+ g = SSA_NAME_DEF_STMT (mask);
+ }
+ if (!is_gimple_assign (g)
+ || gimple_assign_rhs_code (g) != LSHIFT_EXPR
+ || !integer_onep (gimple_assign_rhs1 (g)))
+ return;
+ bit = gimple_assign_rhs2 (g);
+ }
+ else
+ return;
+
+ if (gimple_assign_rhs1 (use_stmt) == lhs)
+ {
+ if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+ return;
+ }
+ else if (gimple_assign_rhs2 (use_stmt) != lhs
+ || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
+ return;
+
+ bool use_bool = true;
+ bool has_debug_uses = false;
+ imm_use_iterator iter;
+ gimple *g;
+
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
+ use_bool = false;
+ FOR_EACH_IMM_USE_STMT (g, iter, use_lhs)
+ {
+ enum tree_code code = ERROR_MARK;
+ tree op0, op1;
+ if (is_gimple_debug (g))
+ {
+ has_debug_uses = true;
+ continue;
+ }
+ else if (is_gimple_assign (g))
+ switch (gimple_assign_rhs_code (g))
+ {
+ case COND_EXPR:
+ op1 = gimple_assign_rhs1 (g);
+ code = TREE_CODE (op1);
+ op0 = TREE_OPERAND (op1, 0);
+ op1 = TREE_OPERAND (op1, 1);
+ break;
+ case EQ_EXPR:
+ case NE_EXPR:
+ code = gimple_assign_rhs_code (g);
+ op0 = gimple_assign_rhs1 (g);
+ op1 = gimple_assign_rhs2 (g);
+ break;
+ default:
+ break;
+ }
+ else if (gimple_code (g) == GIMPLE_COND)
+ {
+ code = gimple_cond_code (g);
+ op0 = gimple_cond_lhs (g);
+ op1 = gimple_cond_rhs (g);
+ }
+
+ if ((code == EQ_EXPR || code == NE_EXPR)
+ && op0 == use_lhs
+ && integer_zerop (op1))
+ {
+ use_operand_p use_p;
+ int n = 0;
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ n++;
+ if (n == 1)
+ continue;
+ }
+
+ use_bool = false;
+ BREAK_FROM_IMM_USE_STMT (iter);
+ }
+
+ tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
+ tree flag = build_int_cst (TREE_TYPE (lhs), use_bool);
+ if (has_model_arg)
+ g = gimple_build_call_internal (fn, 4, gimple_call_arg (call, 0),
+ bit, flag, gimple_call_arg (call, 2));
+ else
+ g = gimple_build_call_internal (fn, 3, gimple_call_arg (call, 0),
+ bit, flag);
+ gimple_call_set_lhs (g, new_lhs);
+ gimple_set_location (g, gimple_location (call));
+ gimple_set_vuse (g, gimple_vuse (call));
+ gimple_set_vdef (g, gimple_vdef (call));
+ SSA_NAME_DEF_STMT (gimple_vdef (call)) = g;
+ gimple_stmt_iterator gsi = *gsip;
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ if (after)
+ {
+ /* The internal function returns the value of the specified bit
+ before the atomic operation. If we are interested in the value
+ of the specified bit after the atomic operation (makes only sense
+ for xor, otherwise the bit content is compile time known),
+ we need to invert the bit. */
+ g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
+ BIT_XOR_EXPR, new_lhs,
+ use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
+ : mask);
+ new_lhs = gimple_assign_lhs (g);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ }
+ if (use_bool && has_debug_uses)
+ {
+ tree temp = make_node (DEBUG_EXPR_DECL);
+ DECL_ARTIFICIAL (temp) = 1;
+ TREE_TYPE (temp) = TREE_TYPE (lhs);
+ DECL_MODE (temp) = TYPE_MODE (TREE_TYPE (lhs));
+ tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit);
+ g = gimple_build_debug_bind (temp, t, g);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ FOR_EACH_IMM_USE_STMT (g, iter, use_lhs)
+ if (is_gimple_debug (g))
+ {
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, temp);
+ update_stmt (g);
+ }
+ }
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_lhs)
+ = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs);
+ replace_uses_by (use_lhs, new_lhs);
+ gsi = gsi_for_stmt (use_stmt);
+ gsi_remove (&gsi, true);
+ release_defs (use_stmt);
+ gsi_remove (gsip, true);
+ release_ssa_name (lhs);
+}
+
/* A simple pass that attempts to fold all builtin functions. This pass
is run after we've propagated as many constants as we can. */
@@ -2806,6 +3026,78 @@ pass_fold_builtins::execute (function *fun)
cfg_changed = true;
break;
+ case BUILT_IN_ATOMIC_FETCH_OR_1:
+ case BUILT_IN_ATOMIC_FETCH_OR_2:
+ case BUILT_IN_ATOMIC_FETCH_OR_4:
+ case BUILT_IN_ATOMIC_FETCH_OR_8:
+ case BUILT_IN_ATOMIC_FETCH_OR_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_SET,
+ true, false);
+ break;
+ case BUILT_IN_SYNC_FETCH_AND_OR_1:
+ case BUILT_IN_SYNC_FETCH_AND_OR_2:
+ case BUILT_IN_SYNC_FETCH_AND_OR_4:
+ case BUILT_IN_SYNC_FETCH_AND_OR_8:
+ case BUILT_IN_SYNC_FETCH_AND_OR_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_SET,
+ false, false);
+ break;
+
+ case BUILT_IN_ATOMIC_FETCH_XOR_1:
+ case BUILT_IN_ATOMIC_FETCH_XOR_2:
+ case BUILT_IN_ATOMIC_FETCH_XOR_4:
+ case BUILT_IN_ATOMIC_FETCH_XOR_8:
+ case BUILT_IN_ATOMIC_FETCH_XOR_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, false);
+ break;
+ case BUILT_IN_SYNC_FETCH_AND_XOR_1:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_2:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_4:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_8:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, false);
+ break;
+
+ case BUILT_IN_ATOMIC_XOR_FETCH_1:
+ case BUILT_IN_ATOMIC_XOR_FETCH_2:
+ case BUILT_IN_ATOMIC_XOR_FETCH_4:
+ case BUILT_IN_ATOMIC_XOR_FETCH_8:
+ case BUILT_IN_ATOMIC_XOR_FETCH_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true);
+ break;
+ case BUILT_IN_SYNC_XOR_AND_FETCH_1:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_2:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_4:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_8:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true);
+ break;
+
+ case BUILT_IN_ATOMIC_FETCH_AND_1:
+ case BUILT_IN_ATOMIC_FETCH_AND_2:
+ case BUILT_IN_ATOMIC_FETCH_AND_4:
+ case BUILT_IN_ATOMIC_FETCH_AND_8:
+ case BUILT_IN_ATOMIC_FETCH_AND_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_RESET,
+ true, false);
+ break;
+ case BUILT_IN_SYNC_FETCH_AND_AND_1:
+ case BUILT_IN_SYNC_FETCH_AND_AND_2:
+ case BUILT_IN_SYNC_FETCH_AND_AND_4:
+ case BUILT_IN_SYNC_FETCH_AND_AND_8:
+ case BUILT_IN_SYNC_FETCH_AND_AND_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_RESET,
+ false, false);
+ break;
+
case BUILT_IN_VA_START:
case BUILT_IN_VA_END:
case BUILT_IN_VA_COPY: