summaryrefslogtreecommitdiff
path: root/gcc/config/nvptx/nvptx.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/nvptx/nvptx.md')
-rw-r--r--gcc/config/nvptx/nvptx.md191
1 files changed, 163 insertions, 28 deletions
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index d117343c531..91d11290860 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -36,10 +36,16 @@
UNSPEC_ALLOCA
+ UNSPEC_SET_SOFTSTACK
+
UNSPEC_DIM_SIZE
UNSPEC_BIT_CONV
+ UNSPEC_VOTE_BALLOT
+
+ UNSPEC_LANEID
+
UNSPEC_SHUFFLE
UNSPEC_BR_UNIFIED
])
@@ -55,11 +61,16 @@
UNSPECV_FORKED
UNSPECV_JOINING
UNSPECV_JOIN
+
+ UNSPECV_NOUNROLL
])
(define_attr "subregs_ok" "false,true"
(const_string "false"))
+(define_attr "atomic" "false,true"
+ (const_string "false"))
+
;; The nvptx operand predicates, in general, don't permit subregs and
;; only literal constants, which differ from the generic ones, which
;; permit subregs and symbolc constants (as appropriate)
@@ -124,6 +135,17 @@
return true;
})
+(define_attr "predicable" "false,true"
+ (const_string "true"))
+
+(define_cond_exec
+ [(match_operator 0 "predicate_operator"
+ [(match_operand:BI 1 "nvptx_register_operand" "")
+ (match_operand:BI 2 "const0_operand" "")])]
+ ""
+ ""
+ )
+
(define_constraint "P0"
"An integer with the value 0."
(and (match_code "const_int")
@@ -509,7 +531,8 @@
(label_ref (match_operand 1 "" ""))
(pc)))]
""
- "%j0\\tbra\\t%l1;")
+ "%j0\\tbra\\t%l1;"
+ [(set_attr "predicable" "false")])
(define_insn "br_false"
[(set (pc)
@@ -518,7 +541,8 @@
(label_ref (match_operand 1 "" ""))
(pc)))]
""
- "%J0\\tbra\\t%l1;")
+ "%J0\\tbra\\t%l1;"
+ [(set_attr "predicable" "false")])
;; unified conditional branch
(define_insn "br_true_uni"
@@ -527,7 +551,8 @@
UNSPEC_BR_UNIFIED) (const_int 0))
(label_ref (match_operand 1 "" "")) (pc)))]
""
- "%j0\\tbra.uni\\t%l1;")
+ "%j0\\tbra.uni\\t%l1;"
+ [(set_attr "predicable" "false")])
(define_insn "br_false_uni"
[(set (pc) (if_then_else
@@ -535,7 +560,8 @@
UNSPEC_BR_UNIFIED) (const_int 0))
(label_ref (match_operand 1 "" "")) (pc)))]
""
- "%J0\\tbra.uni\\t%l1;")
+ "%J0\\tbra.uni\\t%l1;"
+ [(set_attr "predicable" "false")])
(define_expand "cbranch<mode>4"
[(set (pc)
@@ -938,12 +964,16 @@
""
{
return nvptx_output_return ();
-})
+}
+ [(set_attr "predicable" "false")])
(define_expand "epilogue"
[(clobber (const_int 0))]
""
{
+ if (TARGET_SOFT_STACK)
+ emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode,
+ SOFTSTACK_PREV_REGNUM)));
emit_jump_insn (gen_return ());
DONE;
})
@@ -972,31 +1002,40 @@
(match_operand 1 "nvptx_register_operand")]
""
{
+ if (TARGET_SOFT_STACK)
+ {
+ emit_move_insn (stack_pointer_rtx,
+ gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
+ emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
+ emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+ DONE;
+ }
/* The ptx documentation specifies an alloca intrinsic (for 32 bit
only) but notes it is not implemented. The assembler emits a
confused error message. Issue a blunt one now instead. */
sorry ("target cannot support alloca.");
emit_insn (gen_nop ());
DONE;
- if (TARGET_ABI64)
- emit_insn (gen_allocate_stack_di (operands[0], operands[1]));
- else
- emit_insn (gen_allocate_stack_si (operands[0], operands[1]));
- DONE;
})
-(define_insn "allocate_stack_<mode>"
- [(set (match_operand:P 0 "nvptx_register_operand" "=R")
- (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
- UNSPEC_ALLOCA))]
- ""
- "%.\\tcall (%0), %%alloca, (%1);")
+(define_insn "set_softstack_insn"
+ [(unspec [(match_operand 0 "nvptx_register_operand" "R")]
+ UNSPEC_SET_SOFTSTACK)]
+ "TARGET_SOFT_STACK"
+{
+ return nvptx_output_set_softstack (REGNO (operands[0]));
+})
(define_expand "restore_stack_block"
[(match_operand 0 "register_operand" "")
(match_operand 1 "register_operand" "")]
""
{
+ if (TARGET_SOFT_STACK)
+ {
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn (gen_set_softstack_insn (operands[0]));
+ }
DONE;
})
@@ -1018,14 +1057,16 @@
(const_int 0))
(const_int 0))]
""
- "%j0 trap;")
+ "%j0 trap;"
+ [(set_attr "predicable" "false")])
(define_insn "trap_if_false"
[(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
(const_int 0))
(const_int 0))]
""
- "%J0 trap;")
+ "%J0 trap;"
+ [(set_attr "predicable" "false")])
(define_expand "ctrap<mode>4"
[(trap_if (match_operator 0 "nvptx_comparison_operator"
@@ -1074,28 +1115,28 @@
UNSPECV_FORK)]
""
"// fork %0;"
-)
+ [(set_attr "predicable" "false")])
(define_insn "nvptx_forked"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_FORKED)]
""
"// forked %0;"
-)
+ [(set_attr "predicable" "false")])
(define_insn "nvptx_joining"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_JOINING)]
""
"// joining %0;"
-)
+ [(set_attr "predicable" "false")])
(define_insn "nvptx_join"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_JOIN)]
""
"// join %0;"
-)
+ [(set_attr "predicable" "false")])
(define_expand "oacc_fork"
[(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
@@ -1134,6 +1175,88 @@
""
"%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
+(define_insn "nvptx_vote_ballot"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
+ UNSPEC_VOTE_BALLOT))]
+ ""
+ "%.\\tvote.ballot.b32\\t%0, %1;")
+
+;; Patterns for OpenMP SIMD-via-SIMT lowering
+
+;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
+(define_insn "omp_simt_lane"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "")
+ (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
+ ""
+ "%.\\tmov.u32\\t%0, %%laneid;")
+
+;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
+;; place a compiler barrier to disallow unrolling/peeling the containing loop
+(define_expand "omp_simt_ordered"
+ [(match_operand:SI 0 "nvptx_register_operand" "=R")
+ (match_operand:SI 1 "nvptx_register_operand" "R")]
+ ""
+{
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn (gen_nvptx_nounroll ());
+ DONE;
+})
+
+;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
+;; across lanes
+(define_expand "omp_simt_xchg_bfly"
+ [(match_operand 0 "nvptx_register_operand" "=R")
+ (match_operand 1 "nvptx_register_operand" "R")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
+ ""
+{
+ emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
+ SHUFFLE_BFLY));
+ DONE;
+})
+
+;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
+;; from lane given by index in operand 2 to operand 0 in all lanes
+(define_expand "omp_simt_xchg_idx"
+ [(match_operand 0 "nvptx_register_operand" "=R")
+ (match_operand 1 "nvptx_register_operand" "R")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
+ ""
+{
+ emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
+ SHUFFLE_IDX));
+ DONE;
+})
+
+;; Implement IFN_GOMP_SIMT_VOTE_ANY:
+;; set operand 0 to zero iff all lanes supply zero in operand 1
+(define_expand "omp_simt_vote_any"
+ [(match_operand:SI 0 "nvptx_register_operand" "=R")
+ (match_operand:SI 1 "nvptx_register_operand" "R")]
+ ""
+{
+ rtx pred = gen_reg_rtx (BImode);
+ emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
+ emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
+ DONE;
+})
+
+;; Implement IFN_GOMP_SIMT_LAST_LANE:
+;; set operand 0 to the lowest lane index that passed non-zero in operand 1
+(define_expand "omp_simt_last_lane"
+ [(match_operand:SI 0 "nvptx_register_operand" "=R")
+ (match_operand:SI 1 "nvptx_register_operand" "R")]
+ ""
+{
+ rtx pred = gen_reg_rtx (BImode);
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
+ emit_insn (gen_nvptx_vote_ballot (tmp, pred));
+ emit_insn (gen_ctzsi2 (operands[0], tmp));
+ DONE;
+})
+
;; extract parts of a 64 bit object into 2 32-bit ints
(define_insn "unpack<mode>si2"
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
@@ -1186,7 +1309,8 @@
(set (match_dup 1)
(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
""
- "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;")
+ "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
+ [(set_attr "atomic" "true")])
(define_insn "atomic_exchange<mode>"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
@@ -1197,7 +1321,8 @@
(set (match_dup 1)
(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
""
- "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;")
+ "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
+ [(set_attr "atomic" "true")])
(define_insn "atomic_fetch_add<mode>"
[(set (match_operand:SDIM 1 "memory_operand" "+m")
@@ -1209,7 +1334,8 @@
(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(match_dup 1))]
""
- "%.\\tatom%A1.add%t0\\t%0, %1, %2;")
+ "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
+ [(set_attr "atomic" "true")])
(define_insn "atomic_fetch_addsf"
[(set (match_operand:SF 1 "memory_operand" "+m")
@@ -1221,7 +1347,8 @@
(set (match_operand:SF 0 "nvptx_register_operand" "=R")
(match_dup 1))]
""
- "%.\\tatom%A1.add%t0\\t%0, %1, %2;")
+ "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
+ [(set_attr "atomic" "true")])
(define_code_iterator any_logic [and ior xor])
(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
@@ -1237,10 +1364,18 @@
(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(match_dup 1))]
"0"
- "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")
+ "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
+ [(set_attr "atomic" "true")])
(define_insn "nvptx_barsync"
[(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_BARSYNC)]
""
- "\\tbar.sync\\t%0;")
+ "\\tbar.sync\\t%0;"
+ [(set_attr "predicable" "false")])
+
+(define_insn "nvptx_nounroll"
+ [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
+ ""
+ "\\t.pragma \\\"nounroll\\\";"
+ [(set_attr "predicable" "false")])