1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX678,HAS-ATOMICS %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,HAS-ATOMICS %s
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX678,NO-ATOMICS %s
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX678,NO-ATOMICS %s
; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f32:
; GFX678-DAG: s_mov_b32 m0
; GFX9-NOT: m0
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 4.0
; HAS-ATOMICS: ds_add_rtn_f32 v0, v0, [[K]]
; NO-ATOMICS: ds_read_b32
; NO-ATOMICS: v_add_f32
; NO-ATOMICS: ds_cmpst_rtn_b32
; NO-ATOMICS: s_cbranch_execnz
define float @lds_atomic_fadd_ret_f32(ptr addrspace(3) %ptr) nounwind {
%result = atomicrmw fadd ptr addrspace(3) %ptr, float 4.0 seq_cst
ret float %result
}
; GCN-LABEL: {{^}}lds_atomic_fadd_noret_f32:
; GFX678-DAG: s_mov_b32 m0
; GFX9-NOT: m0
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 4.0
; HAS-ATOMICS: ds_add_f32 v0, [[K]]
define void @lds_atomic_fadd_noret_f32(ptr addrspace(3) %ptr) nounwind {
%result = atomicrmw fadd ptr addrspace(3) %ptr, float 4.0 seq_cst
ret void
}
; GCN-LABEL: {{^}}lds_ds_fadd:
; VI-DAG: s_mov_b32 m0
; GFX9-NOT: m0
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt lgkmcnt(0)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
%ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
%ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
%a1 = atomicrmw fadd ptr addrspace(3) %ptr0, float 4.2e+1 seq_cst
%a2 = atomicrmw fadd ptr addrspace(3) %ptr1, float 4.2e+1 seq_cst
%a3 = atomicrmw fadd ptr addrspace(3) %ptrf, float %a1 seq_cst
store float %a3, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}lds_ds_fadd_one_as:
; VI-DAG: s_mov_b32 m0
; GFX9-NOT: m0
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrspace(3) %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
%ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
%ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
%a1 = atomicrmw fadd ptr addrspace(3) %ptr0, float 4.2e+1 syncscope("one-as") seq_cst
%a2 = atomicrmw fadd ptr addrspace(3) %ptr1, float 4.2e+1 syncscope("one-as") seq_cst
%a3 = atomicrmw fadd ptr addrspace(3) %ptrf, float %a1 syncscope("one-as") seq_cst
store float %a3, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64:
; GCN: ds_read_b64
; GCN: v_add_f64
; GCN: ds_cmpst_rtn_b64
; GCN: s_cbranch_execnz
define double @lds_atomic_fadd_ret_f64(ptr addrspace(3) %ptr) nounwind {
%result = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret double %result
}
; GCN-LABEL: {{^}}lds_atomic_fadd_noret_f64:
; GCN: ds_read_b64
; GCN: v_add_f64
; GCN: ds_cmpst_rtn_b64
; GCN: s_cbranch_execnz
define void @lds_atomic_fadd_noret_f64(ptr addrspace(3) %ptr) nounwind {
%result = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret void
}
; GCN-LABEL: {{^}}lds_atomic_fsub_ret_f32:
; GCN: ds_read_b32
; GCN: v_sub_f32
; GCN: ds_cmpst_rtn_b32
; GCN: s_cbranch_execnz
define float @lds_atomic_fsub_ret_f32(ptr addrspace(3) %ptr, float %val) nounwind {
%result = atomicrmw fsub ptr addrspace(3) %ptr, float %val seq_cst
ret float %result
}
; GCN-LABEL: {{^}}lds_atomic_fsub_noret_f32:
; GCN: ds_read_b32
; GCN: v_sub_f32
; GCN: ds_cmpst_rtn_b32
define void @lds_atomic_fsub_noret_f32(ptr addrspace(3) %ptr, float %val) nounwind {
%result = atomicrmw fsub ptr addrspace(3) %ptr, float %val seq_cst
ret void
}
; GCN-LABEL: {{^}}lds_atomic_fsub_ret_f64:
; GCN: ds_read_b64
; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
; GCN: ds_cmpst_rtn_b64
define double @lds_atomic_fsub_ret_f64(ptr addrspace(3) %ptr, double %val) nounwind {
%result = atomicrmw fsub ptr addrspace(3) %ptr, double %val seq_cst
ret double %result
}
; GCN-LABEL: {{^}}lds_atomic_fsub_noret_f64:
; GCN: ds_read_b64
; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
; GCN: ds_cmpst_rtn_b64
; GCN: s_cbranch_execnz
define void @lds_atomic_fsub_noret_f64(ptr addrspace(3) %ptr, double %val) nounwind {
%result = atomicrmw fsub ptr addrspace(3) %ptr, double %val seq_cst
ret void
}
|