1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,NOMALLOC %s
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -include cmath \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -D__HIPCC_RTC__ | FileCheck %s -check-prefixes=AMD_BOOL_RETURN
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -include cmath \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -D__HIPCC_RTC__ -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s -check-prefixes=AMD_INT_RETURN
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
// RUN: -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \
// RUN: | FileCheck -check-prefixes=MALLOC-ASAN %s
// expected-no-diagnostics
// Check support for pure and deleted virtual functions
struct base {
__host__
__device__
virtual void pv() = 0;
__host__
__device__
virtual void dv() = delete;
};
struct derived:base {
__host__
__device__
virtual void pv() override {};
};
__device__ void test_vf() {
derived d;
}
// CHECK: @_ZTV7derived = linkonce_odr unnamed_addr addrspace(1) constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN7derived2pvEv, ptr @__cxa_deleted_virtual] }, comdat, align 8
// CHECK: @_ZTV4base = linkonce_odr unnamed_addr addrspace(1) constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @__cxa_pure_virtual, ptr @__cxa_deleted_virtual] }, comdat, align 8
// CHECK: define{{.*}}void @__cxa_pure_virtual()
// CHECK: define{{.*}}void @__cxa_deleted_virtual()
struct Number {
__device__ Number(float _x) : x(_x) {}
float x;
};
#if __cplusplus >= 201103L
// Check __hip::__numeric_type can be used with a class without default ctor.
__device__ void test_numeric_type() {
int x = __hip::__numeric_type<Number>::value;
}
// ToDo: Fix __clang_hip_cmake.h to specialize __hip::is_arithmetic<_Float16>
// to resolve fma(_Float16, _Float16, int) to fma(double, double, double)
// instead of fma(_Float16, _Float16, _Float16).
// CXX14-LABEL: define{{.*}}@_Z8test_fma
// CXX14: call {{.*}}@__ocml_fma_f16
__device__ double test_fma(_Float16 h, int i) {
return fma(h, h, i);
}
#endif
// CHECK-LABEL: amdgpu_kernel void @_Z4kernPff
__global__ void kern(float *x, float y) {
*x = sin(y);
}
// CHECK-LABEL: define{{.*}} i64 @_Z11test_size_tv
// CHECK: ret i64 8
__device__ size_t test_size_t() {
return sizeof(size_t);
}
// Check there is no ambiguity when calling overloaded math functions.
// CHECK-LABEL: define{{.*}}@_Z10test_floorv
// CHECK: call {{.*}}double @__ocml_floor_f64(double
__device__ float test_floor() {
return floor(5);
}
// CHECK-LABEL: define{{.*}}@_Z8test_maxv
// CHECK: call {{.*}}double @__ocml_fmax_f64(double {{.*}}, double
__device__ float test_max() {
return max(5, 6.0);
}
// CHECK-LABEL: define{{.*}}@_Z10test_isnanv
__device__ double test_isnan() {
double r = 0;
double d = 5.0;
float f = 5.0;
// AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
// AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
r += isnan(f);
// AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
// AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
r += isnan(d);
return r ;
}
// Check that device malloc and free do not conflict with std headers.
#include <cstdlib>
// CHECK-LABEL: define{{.*}}@_Z11test_malloc
// CHECK: call {{.*}}ptr @malloc(i64
// CHECK-LABEL: define weak {{.*}}ptr @malloc(i64
// MALLOC: call i64 @__ockl_dm_alloc
// NOMALLOC: call void @llvm.trap
// MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
// MALLOC-ASAN: call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}})
__device__ void test_malloc(void *a) {
a = malloc(42);
}
// CHECK-LABEL: define{{.*}}@_Z9test_free
// CHECK: call {{.*}}void @free(ptr
// CHECK-LABEL: define weak {{.*}}void @free(ptr
// MALLOC: call void @__ockl_dm_dealloc
// NOMALLOC: call void @llvm.trap
// MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
// MALLOC-ASAN: call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}})
__device__ void test_free(void *a) {
free(a);
}
|