diff options
-rw-r--r-- | gcc/ChangeLog | 26 | ||||
-rw-r--r-- | gcc/config/arc/arc-protos.h | 4 | ||||
-rw-r--r-- | gcc/config/arc/arc.c | 357 | ||||
-rw-r--r-- | gcc/config/arc/arc.h | 6 | ||||
-rw-r--r-- | gcc/config/arc/arc.md | 9 | ||||
-rw-r--r-- | gcc/config/arc/arc.opt | 3 | ||||
-rw-r--r-- | gcc/config/arc/constraints.md | 6 | ||||
-rw-r--r-- | gcc/config/arc/predicates.md | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 8 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 11 |
11 files changed, 440 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 13f41540643..568381119c1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2015-12-10 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/arc-protos.h (arc_expand_atomic_op): Prototype. + (arc_split_compare_and_swap): Likewise. + (arc_expand_compare_and_swap): Likewise. + * config/arc/arc.c (arc_init): Check usage atomic option. + (arc_pre_atomic_barrier): New function. + (arc_post_atomic_barrier): Likewise. + (emit_unlikely_jump): Likewise. + (arc_expand_compare_and_swap_qh): Likewise. + (arc_expand_compare_and_swap): Likewise. + (arc_split_compare_and_swap): Likewise. + (arc_expand_atomic_op): Likewise. + * config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): New C macro. + (ASM_SPEC): Enable mlock option when matomic is used. + * config/arc/arc.md (UNSPEC_ARC_MEMBAR): Define. + (VUNSPEC_ARC_CAS): Likewise. + (VUNSPEC_ARC_LL): Likewise. + (VUNSPEC_ARC_SC): Likewise. + (VUNSPEC_ARC_EX): Likewise. + * config/arc/arc.opt (matomic): New option. + * config/arc/constraints.md (ATO): New constraint. + * config/arc/predicates.md (mem_noofs_operand): New predicate. + * doc/invoke.texi: Document -matomic. + * config/arc/atomic.md: New file. + 2015-12-10 Richard Biener <rguenther@suse.de> PR tree-optimization/68817 diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index 6e04351159b..3581bb0ed27 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -41,6 +41,10 @@ extern int arc_output_commutative_cond_exec (rtx *operands, bool); extern bool arc_expand_movmem (rtx *operands); extern bool prepare_move_operands (rtx *operands, machine_mode mode); extern void emit_shift (enum rtx_code, rtx, rtx, rtx); +extern void arc_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern void arc_split_compare_and_swap (rtx *); +extern void arc_expand_compare_and_swap (rtx *); + #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 8bb0969f176..5bc2bcebb2a 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -61,6 +61,7 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "builtins.h" #include "rtl-iter.h" +#include "alias.h" /* Which cpu we're compiling for (ARC600, ARC601, ARC700). */ static const char *arc_cpu_string = ""; @@ -884,6 +885,9 @@ arc_init (void) flag_pic = 0; } + if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS)) + error ("-matomic is only supported for ARC700 or ARC HS cores"); + arc_init_reg_tables (); /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */ @@ -9650,6 +9654,359 @@ arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); } +/* Emit a (pre) memory barrier around an atomic sequence according to + MODEL. */ + +static void +arc_pre_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, true)) + emit_insn (gen_memory_barrier ()); +} + +/* Emit a (post) memory barrier around an atomic sequence according to + MODEL. */ + +static void +arc_post_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, false)) + emit_insn (gen_memory_barrier ()); +} + +/* Expand a compare and swap pattern. */ + +static void +emit_unlikely_jump (rtx insn) +{ + int very_unlikely = REG_BR_PROB_BASE / 100 - 1; + + insn = emit_jump_insn (insn); + add_int_reg_note (insn, REG_BR_PROB, very_unlikely); +} + +/* Expand code to perform a 8 or 16-bit compare and swap by doing + 32-bit compare and swap on the word containing the byte or + half-word. The difference between a weak and a strong CAS is that + the weak version may simply fail. The strong version relies on two + loops, one checks if the SCOND op is succsfully or not, the other + checks if the 32 bit accessed location which contains the 8 or 16 + bit datum is not changed by other thread. The first loop is + implemented by the atomic_compare_and_swapsi_1 pattern. The second + loops is implemented by this routine. */ + +static void +arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem, + rtx oldval, rtx newval, rtx weak, + rtx mod_s, rtx mod_f) +{ + rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); + rtx addr = gen_reg_rtx (Pmode); + rtx off = gen_reg_rtx (SImode); + rtx oldv = gen_reg_rtx (SImode); + rtx newv = gen_reg_rtx (SImode); + rtx oldvalue = gen_reg_rtx (SImode); + rtx newvalue = gen_reg_rtx (SImode); + rtx res = gen_reg_rtx (SImode); + rtx resv = gen_reg_rtx (SImode); + rtx memsi, val, mask, end_label, loop_label, cc, x; + machine_mode mode; + bool is_weak = (weak != const0_rtx); + + /* Truncate the address. */ + emit_insn (gen_rtx_SET (addr, + gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); + + /* Compute the datum offset. */ + emit_insn (gen_rtx_SET (off, + gen_rtx_AND (SImode, addr1, GEN_INT (3)))); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_rtx_SET (off, + gen_rtx_MINUS (SImode, + (GET_MODE (mem) == QImode) ? + GEN_INT (3) : GEN_INT (2), off))); + + /* Normal read from truncated address. */ + memsi = gen_rtx_MEM (SImode, addr); + set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); + MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); + + val = copy_to_reg (memsi); + + /* Convert the offset in bits. */ + emit_insn (gen_rtx_SET (off, + gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); + + /* Get the proper mask. */ + if (GET_MODE (mem) == QImode) + mask = force_reg (SImode, GEN_INT (0xff)); + else + mask = force_reg (SImode, GEN_INT (0xffff)); + + emit_insn (gen_rtx_SET (mask, + gen_rtx_ASHIFT (SImode, mask, off))); + + /* Prepare the old and new values. */ + emit_insn (gen_rtx_SET (val, + gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), + val))); + + oldval = gen_lowpart (SImode, oldval); + emit_insn (gen_rtx_SET (oldv, + gen_rtx_ASHIFT (SImode, oldval, off))); + + newval = gen_lowpart_common (SImode, newval); + emit_insn (gen_rtx_SET (newv, + gen_rtx_ASHIFT (SImode, newval, off))); + + emit_insn (gen_rtx_SET (oldv, + gen_rtx_AND (SImode, oldv, mask))); + + emit_insn (gen_rtx_SET (newv, + gen_rtx_AND (SImode, newv, mask))); + + if (!is_weak) + { + end_label = gen_label_rtx (); + loop_label = gen_label_rtx (); + emit_label (loop_label); + } + + /* Make the old and new values. */ + emit_insn (gen_rtx_SET (oldvalue, + gen_rtx_IOR (SImode, oldv, val))); + + emit_insn (gen_rtx_SET (newvalue, + gen_rtx_IOR (SImode, newv, val))); + + /* Try an 32bit atomic compare and swap. It clobbers the CC + register. */ + emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue, + weak, mod_s, mod_f)); + + /* Regardless of the weakness of the operation, a proper boolean + result needs to be provided. */ + x = gen_rtx_REG (CC_Zmode, CC_REG); + x = gen_rtx_EQ (SImode, x, const0_rtx); + emit_insn (gen_rtx_SET (bool_result, x)); + + if (!is_weak) + { + /* Check the results: if the atomic op is successfully the goto + to end label. */ + x = gen_rtx_REG (CC_Zmode, CC_REG); + x = gen_rtx_EQ (VOIDmode, x, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + + /* Wait for the right moment when the accessed 32-bit location + is stable. */ + emit_insn (gen_rtx_SET (resv, + gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), + res))); + mode = SELECT_CC_MODE (NE, resv, val); + cc = gen_rtx_REG (mode, CC_REG); + emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val))); + + /* Set the new value of the 32 bit location, proper masked. */ + emit_insn (gen_rtx_SET (val, resv)); + + /* Try again if location is unstable. Fall through if only + scond op failed. */ + x = gen_rtx_NE (VOIDmode, cc, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + + emit_label (end_label); + } + + /* End: proper return the result for the given mode. */ + emit_insn (gen_rtx_SET (res, + gen_rtx_AND (SImode, res, mask))); + + emit_insn (gen_rtx_SET (res, + gen_rtx_LSHIFTRT (SImode, res, off))); + + emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); +} + +/* Helper function used by "atomic_compare_and_swap" expand + pattern. */ + +void +arc_expand_compare_and_swap (rtx operands[]) +{ + rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; + machine_mode mode; + + bval = operands[0]; + rval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + + if (reg_overlap_mentioned_p (rval, oldval)) + oldval = copy_to_reg (oldval); + + if (mode == SImode) + { + emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval, + is_weak, mod_s, mod_f)); + x = gen_rtx_REG (CC_Zmode, CC_REG); + x = gen_rtx_EQ (SImode, x, const0_rtx); + emit_insn (gen_rtx_SET (bval, x)); + } + else + { + arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval, + is_weak, mod_s, mod_f); + } +} + +/* Helper function used by the "atomic_compare_and_swapsi_1" + pattern. */ + +void +arc_split_compare_and_swap (rtx operands[]) +{ + rtx rval, mem, oldval, newval; + machine_mode mode; + enum memmodel mod_s, mod_f; + bool is_weak; + rtx label1, label2, x, cond; + + rval = operands[0]; + mem = operands[1]; + oldval = operands[2]; + newval = operands[3]; + is_weak = (operands[4] != const0_rtx); + mod_s = (enum memmodel) INTVAL (operands[5]); + mod_f = (enum memmodel) INTVAL (operands[6]); + mode = GET_MODE (mem); + + /* ARC atomic ops work only with 32-bit aligned memories. */ + gcc_assert (mode == SImode); + + arc_pre_atomic_barrier (mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_label_rtx (); + emit_label (label1); + } + label2 = gen_label_rtx (); + + /* Load exclusive. */ + emit_insn (gen_arc_load_exclusivesi (rval, mem)); + + /* Check if it is oldval. */ + mode = SELECT_CC_MODE (NE, rval, oldval); + cond = gen_rtx_REG (mode, CC_REG); + emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval))); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + + /* Exclusively store new item. Store clobbers CC reg. */ + emit_insn (gen_arc_store_exclusivesi (mem, newval)); + + if (!is_weak) + { + /* Check the result of the store. */ + cond = gen_rtx_REG (CC_Zmode, CC_REG); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + } + + if (mod_f != MEMMODEL_RELAXED) + emit_label (label2); + + arc_post_atomic_barrier (mod_s); + + if (mod_f == MEMMODEL_RELAXED) + emit_label (label2); +} + +/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation + to perform. MEM is the memory on which to operate. VAL is the second + operand of the binary operator. BEFORE and AFTER are optional locations to + return the value of MEM either before of after the operation. MODEL_RTX + is a CONST_INT containing the memory model to use. */ + +void +arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, + rtx orig_before, rtx orig_after, rtx model_rtx) +{ + enum memmodel model = (enum memmodel) INTVAL (model_rtx); + machine_mode mode = GET_MODE (mem); + rtx label, x, cond; + rtx before = orig_before, after = orig_after; + + /* ARC atomic ops work only with 32-bit aligned memories. */ + gcc_assert (mode == SImode); + + arc_pre_atomic_barrier (model); + + label = gen_label_rtx (); + emit_label (label); + label = gen_rtx_LABEL_REF (VOIDmode, label); + + if (before == NULL_RTX) + before = gen_reg_rtx (mode); + + if (after == NULL_RTX) + after = gen_reg_rtx (mode); + + /* Load exclusive. */ + emit_insn (gen_arc_load_exclusivesi (before, mem)); + + switch (code) + { + case NOT: + x = gen_rtx_AND (mode, before, val); + emit_insn (gen_rtx_SET (after, x)); + x = gen_rtx_NOT (mode, after); + emit_insn (gen_rtx_SET (after, x)); + break; + + case MINUS: + if (CONST_INT_P (val)) + { + val = GEN_INT (-INTVAL (val)); + code = PLUS; + } + + /* FALLTHRU. */ + default: + x = gen_rtx_fmt_ee (code, mode, before, val); + emit_insn (gen_rtx_SET (after, x)); + break; + } + + /* Exclusively store new item. Store clobbers CC reg. */ + emit_insn (gen_arc_store_exclusivesi (mem, after)); + + /* Check the result of the store. */ + cond = gen_rtx_REG (CC_Zmode, CC_REG); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + label, pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + + arc_post_atomic_barrier (model); +} + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-arc.h" diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h index d312f9f14a7..c895725e623 100644 --- a/gcc/config/arc/arc.h +++ b/gcc/config/arc/arc.h @@ -88,6 +88,10 @@ along with GCC; see the file COPYING3. If not see { \ builtin_define ("__HS__"); \ } \ + if (TARGET_ATOMIC) \ + { \ + builtin_define ("__ARC_ATOMIC__"); \ + } \ if (TARGET_NORM) \ { \ builtin_define ("__ARC_NORM__");\ @@ -153,7 +157,7 @@ along with GCC; see the file COPYING3. If not see %{mcpu=ARC700|!mcpu=*:%{mrtsc}} \ %{mcpu=ARCHS:-mHS} \ %{mcpu=ARCEM:-mEM} \ -" +%{matomic:-mlock}" #if DEFAULT_LIBC == LIBC_UCLIBC /* Note that the default is to link against dynamic libraries, if they are diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 1d070a30d82..ac181a98895 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -128,6 +128,12 @@ (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation (VUNSPEC_NOP 29) ; volatile NOP + (UNSPEC_ARC_MEMBAR 30) + (VUNSPEC_ARC_CAS 31) + (VUNSPEC_ARC_LL 32) + (VUNSPEC_ARC_SC 33) + (VUNSPEC_ARC_EX 34) + (R0_REG 0) (R1_REG 1) (R2_REG 2) @@ -5531,3 +5537,6 @@ (include "fpx.md") (include "simdext.md") + +;; include atomic extensions +(include "atomic.md") diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index 0c10c67c4e7..c4d7306ee98 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -414,3 +414,6 @@ Target Joined mmac_ Target Joined +matomic +Target Report Mask(ATOMIC) +Enable atomic instructions. diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md index 65ea44a9f13..18309cc8073 100644 --- a/gcc/config/arc/constraints.md +++ b/gcc/config/arc/constraints.md @@ -421,3 +421,9 @@ An unsigned 6-bit integer constant, up to 62." (and (match_code "const_int") (match_test "UNSIGNED_INT6 (ival - 1)"))) + +;; Memory constraint used for atomic ops. +(define_memory_constraint "ATO" + "A memory with only a base register" + (match_operand 0 "mem_noofs_operand")) + diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md index 43f9474c691..de0735a4071 100644 --- a/gcc/config/arc/predicates.md +++ b/gcc/config/arc/predicates.md @@ -813,3 +813,7 @@ (define_predicate "short_const_int_operand" (and (match_operand 0 "const_int_operand") (match_test "satisfies_constraint_C16 (op)"))) + +(define_predicate "mem_noofs_operand" + (and (match_code "mem") + (match_code "reg" "0"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 6162a609604..3cddf5c4de5 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -538,7 +538,7 @@ Objective-C and Objective-C++ Dialects}. @gccoptlist{-mbarrel-shifter @gol -mcpu=@var{cpu} -mA6 -mARC600 -mA7 -mARC700 @gol -mdpfp -mdpfp-compact -mdpfp-fast -mno-dpfp-lrsr @gol --mea -mno-mpy -mmul32x16 -mmul64 @gol +-mea -mno-mpy -mmul32x16 -mmul64 -matomic @gol -mnorm -mspfp -mspfp-compact -mspfp-fast -msimd -msoft-float -mswap @gol -mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol -mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol @@ -12970,6 +12970,12 @@ can overridden by FPX options; @samp{mspfp}, @samp{mspfp-compact}, or @opindex mswap Generate swap instructions. +@item -matomic +@opindex matomic +This enables Locked Load/Store Conditional extension to implement +atomic memopry built-in functions. Not available for ARC 6xx or ARC +EM cores. + @item -mdiv-rem @opindex mdiv-rem Enable DIV/REM instructions for ARCv2 cores. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 40bfb454567..bb785c689cd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2015-12-10 Claudiu Zissulescu <claziss@synopsys.com> + + * lib/target-supports.exp (check_effective_target_arc_atomic): New + function. + (check_effective_target_sync_int_long): Add checks for ARC atomic + feature. + (check_effective_target_sync_char_short): Likewise. + 2015-12-10 Richard Biener <rguenther@suse.de> PR tree-optimization/68817 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 4e349e9832e..8d28b235c27 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2608,6 +2608,15 @@ proc check_effective_target_aarch64_little_endian { } { }] } +# Return 1 if this is a compiler supporting ARC atomic operations +proc check_effective_target_arc_atomic { } { + return [check_no_compiler_messages arc_atomic assembly { + #if !defined(__ARC_ATOMIC__) + #error FOO + #endif + }] +} + # Return 1 if this is an arm target using 32-bit instructions proc check_effective_target_arm32 { } { if { ![istarget arm*-*-*] } { @@ -5581,6 +5590,7 @@ proc check_effective_target_sync_int_long { } { || [istarget crisv32-*-*] || [istarget cris-*-*] || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9]) || [istarget spu-*-*] + || ([istarget arc*-*-*] && [check_effective_target_arc_atomic]) || [check_effective_target_mips_llsc] } { set et_sync_int_long_saved 1 } @@ -5612,6 +5622,7 @@ proc check_effective_target_sync_char_short { } { || [istarget crisv32-*-*] || [istarget cris-*-*] || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9]) || [istarget spu-*-*] + || ([istarget arc*-*-*] && [check_effective_target_arc_atomic]) || [check_effective_target_mips_llsc] } { set et_sync_char_short_saved 1 } |