diff options
author | Artem Belevich <tra@google.com> | 2015-09-22 17:22:59 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2015-09-22 17:22:59 +0000 |
commit | 72de1e381c176b8c130e589d9dcd9ed78bb4c4bc (patch) | |
tree | 3a15848a031d2bdf782fe0395f0338a3d165328c /test/CodeGenCUDA/function-overload.cu | |
parent | 2ead4d5eabb25334b92746a6eaf51b2285c73709 (diff) | |
download | clang-72de1e381c176b8c130e589d9dcd9ed78bb4c4bc.tar.gz |
[CUDA] Allow function overloads in CUDA based on host/device attributes.
The patch makes it possible to parse CUDA files that contain host/device
functions with identical signatures, but different attributes without
having to physically split source into host-only and device-only parts.
This change is needed in order to parse CUDA header files that have
a lot of name clashes with standard include files.
Gory details are in design doc here: https://goo.gl/EXnymm
Feel free to leave comments there or in this review thread.
This feature is controlled with CC1 option -fcuda-target-overloads
and is disabled by default.
Differential Revision: http://reviews.llvm.org/D12453
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@248295 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGenCUDA/function-overload.cu')
-rw-r--r-- | test/CodeGenCUDA/function-overload.cu | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/test/CodeGenCUDA/function-overload.cu b/test/CodeGenCUDA/function-overload.cu new file mode 100644 index 0000000000..a12ef82773 --- /dev/null +++ b/test/CodeGenCUDA/function-overload.cu @@ -0,0 +1,214 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// Make sure we handle target overloads correctly. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s + +// Check target overloads handling with disabled call target checks. +// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s +// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ +// RUN: -fcuda-is-device -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s + +#include "Inputs/cuda.h" + +typedef int (*fp_t)(void); +typedef void (*gp_t)(void); + +// CHECK-HOST: @hp = global i32 ()* @_Z1hv +// CHECK-HOST: @chp = global i32 ()* @ch +// CHECK-HOST: @dhp = global i32 ()* @_Z2dhv +// CHECK-HOST: @cdhp = global i32 ()* @cdh +// CHECK-HOST: @gp = global void ()* @_Z1gv + +// CHECK-BOTH-LABEL: define i32 @_Z2dhv() +__device__ int dh(void) { return 1; } +// CHECK-DEVICE: ret i32 1 +__host__ int dh(void) { return 2; } +// CHECK-HOST: ret i32 2 + +// CHECK-BOTH-LABEL: define i32 @_Z2hdv() +__host__ __device__ int hd(void) { return 3; } +// CHECK-BOTH: ret i32 3 + +// CHECK-DEVICE-LABEL: define i32 @_Z1dv() +__device__ int d(void) { return 8; } +// CHECK-DEVICE: ret i32 8 + +// CHECK-HOST-LABEL: define i32 @_Z1hv() +__host__ int h(void) { return 9; } +// CHECK-HOST: ret i32 9 + +// CHECK-BOTH-LABEL: define void @_Z1gv() +__global__ void g(void) {} +// CHECK-BOTH: ret void + +// mangled names of extern "C" __host__ __device__ functions clash +// with those of their __host__/__device__ counterparts, so +// overloading of extern "C" functions can only happen for __host__ +// and __device__ functions -- we never codegen them in the same +// compilation and therefore mangled name conflict is not a problem. + +// CHECK-BOTH-LABEL: define i32 @cdh() +extern "C" __device__ int cdh(void) {return 10;} +// CHECK-DEVICE: ret i32 10 +extern "C" __host__ int cdh(void) {return 11;} +// CHECK-HOST: ret i32 11 + +// CHECK-DEVICE-LABEL: define i32 @cd() +extern "C" __device__ int cd(void) {return 12;} +// CHECK-DEVICE: ret i32 12 + +// CHECK-HOST-LABEL: define i32 @ch() +extern "C" __host__ int ch(void) {return 13;} +// CHECK-HOST: ret i32 13 + +// CHECK-BOTH-LABEL: define i32 @chd() +extern "C" __host__ __device__ int chd(void) {return 14;} +// CHECK-BOTH: ret i32 14 + +// CHECK-HOST-LABEL: define void @_Z5hostfv() +__host__ void hostf(void) { +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp, +#endif + fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp, + fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp, + fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp, + fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp, + fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp, + gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp, + +#if defined (NOCHECKS) + d(); // CHECK-HOST-NC: call i32 @_Z1dv() + cd(); // CHECK-HOST-NC: call i32 @cd() +#endif + h(); // CHECK-HOST: call i32 @_Z1hv() + ch(); // CHECK-HOST: call i32 @ch() + dh(); // CHECK-HOST: call i32 @_Z2dhv() + cdh(); // CHECK-HOST: call i32 @cdh() + g<<<0,0>>>(); // CHECK-HOST: call void @_Z1gv() +} + +// CHECK-DEVICE-LABEL: define void @_Z7devicefv() +__device__ void devicef(void) { + fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp, +#if defined (NOCHECKS) + fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp, +#endif + fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp, + fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp, + fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp, + fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp, + + d(); // CHECK-DEVICE: call i32 @_Z1dv() + cd(); // CHECK-DEVICE: call i32 @cd() +#if defined (NOCHECKS) + h(); // CHECK-DEVICE-NC: call i32 @_Z1hv() + ch(); // CHECK-DEVICE-NC: call i32 @ch() +#endif + dh(); // CHECK-DEVICE: call i32 @_Z2dhv() + cdh(); // CHECK-DEVICE: call i32 @cdh() +} + +// CHECK-BOTH-LABEL: define void @_Z11hostdevicefv() +__host__ __device__ void hostdevicef(void) { +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp, + fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp, +#endif + fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp, + fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp, + fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp, + fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp, +#if defined (NOCHECKS) && !defined(__CUDA_ARCH__) + gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp, +#endif + +#if defined (NOCHECKS) + d(); // CHECK-BOTH-NC: call i32 @_Z1dv() + cd(); // CHECK-BOTH-NC: call i32 @cd() + h(); // CHECK-BOTH-NC: call i32 @_Z1hv() + ch(); // CHECK-BOTH-NC: call i32 @ch() +#endif + dh(); // CHECK-BOTH: call i32 @_Z2dhv() + cdh(); // CHECK-BOTH: call i32 @cdh() +#if defined (NOCHECKS) && !defined(__CUDA_ARCH__) + g<<<0,0>>>(); // CHECK-HOST-NC: call void @_Z1gv() +#endif +} + +// Test for address of overloaded function resolution in the global context. +fp_t hp = h; +fp_t chp = ch; +fp_t dhp = dh; +fp_t cdhp = cdh; +gp_t gp = g; + +int x; +// Check constructors/destructors for D/H functions +struct s_cd_dh { + __host__ s_cd_dh() { x = 11; } + __device__ s_cd_dh() { x = 12; } + __host__ ~s_cd_dh() { x = 21; } + __device__ ~s_cd_dh() { x = 22; } +}; + +struct s_cd_hd { + __host__ __device__ s_cd_hd() { x = 31; } + __host__ __device__ ~s_cd_hd() { x = 32; } +}; + +// CHECK-BOTH: define void @_Z7wrapperv +#if defined(__CUDA_ARCH__) +__device__ +#else +__host__ +#endif +void wrapper() { + s_cd_dh scddh; + // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev( + s_cd_hd scdhd; + // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev + + // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev( + // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev( +} +// CHECK-BOTH: ret void + +// Now it's time to check what's been generated for the methods we used. + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev( +// CHECK-HOST: store i32 11, +// CHECK-DEVICE: store i32 12, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev( +// CHECK-BOTH: store i32 31, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev( +// CHECK-BOTH: store i32 32, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev( +// CHECK-HOST: store i32 21, +// CHECK-DEVICE: store i32 22, +// CHECK-BOTH: ret void + |