From 88700afba7f6176b9c437a6b66d81fd484ad845d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 16 Oct 2019 16:59:01 +0000 Subject: [OPENMP]Use different addresses for zeroed thread_id/bound_id. When the parallel region is called directly in the sequential region, the zeroed tid/bound id are used. But they must point to the different memory locations as the parameters are marked as noalias. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@375017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 10 +++++--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 35 +++++++++++++++------------ test/OpenMP/nvptx_target_codegen.cpp | 4 ++- test/OpenMP/nvptx_teams_reduction_codegen.cpp | 1 + test/OpenMP/parallel_if_codegen.cpp | 15 ++++++++---- 5 files changed, 40 insertions(+), 25 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 42d18b4098..7ff36c79f6 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3075,14 +3075,18 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); - // OutlinedFn(>id, &zero, CapturedStruct); + // OutlinedFn(&zero, &zero_bound, CapturedStruct); Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, - /*Name*/ ".zero.addr"); + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + Address ZeroAddrBound = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector OutlinedFnArgs; // ThreadId for serialized parallels is 0. OutlinedFnArgs.push_back(ZeroAddr.getPointer()); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 63093e7325..291052109e 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -2459,9 +2459,8 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector OutlinedFnArgs; OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); @@ -2490,16 +2489,19 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( - /*DestWidth=*/32, /*Signed=*/1), - ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // ThreadId for serialized parallels is 0. Address ThreadIDAddr = ZeroAddr; - auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr]( + auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr]( CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + Address ZeroAddr = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -2656,17 +2658,19 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( // llvm::SmallVector OutlinedFnArgs; - Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( - /*DestWidth=*/32, /*Signed=*/1), - ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // ThreadId for serialized parallels is 0. Address ThreadIDAddr = ZeroAddr; - auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr, - &ThreadIDAddr](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr]( + CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + Address ZeroAddr = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -4567,9 +4571,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( const auto *RD = CS.getCapturedRecordDecl(); auto CurField = RD->field_begin(); - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // Get the array of arguments. SmallVector Args; diff --git a/test/OpenMP/nvptx_target_codegen.cpp b/test/OpenMP/nvptx_target_codegen.cpp index 84ff991006..333f9166d0 100644 --- a/test/OpenMP/nvptx_target_codegen.cpp +++ b/test/OpenMP/nvptx_target_codegen.cpp @@ -577,6 +577,8 @@ int baz(int f, double &a) { // CHECK: alloca i32, // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32, // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, + // CHECK: [[BND_ZERO_ADDR:%.+]] = alloca i32, + // CHECK: store i32 0, i32* [[BND_ZERO_ADDR]] // CHECK: store i32 0, i32* [[ZERO_ADDR]] // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[UNKNOWN]] // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) @@ -609,7 +611,7 @@ int baz(int f, double &a) { // CHECK: br i1 // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) - // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) + // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[BND_ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: br label diff --git a/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/test/OpenMP/nvptx_teams_reduction_codegen.cpp index 96f4de8455..d17eee24ab 100644 --- a/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -712,6 +712,7 @@ int bar(int n){ // CHECK-NOT: call void @__kmpc_get_team_static_memory // CHECK: store i32 0, + // CHECK: store i32 0, // CHECK: store i32 0, i32* [[A_ADDR:%.+]], align // CHECK: store i16 -32768, i16* [[B_ADDR:%.+]], align // CHECK: call void [[OUTLINED:@.+]](i32* {{.+}}, i32* {{.+}}, i32* [[A_ADDR]], i16* [[B_ADDR]]) diff --git a/test/OpenMP/parallel_if_codegen.cpp b/test/OpenMP/parallel_if_codegen.cpp index ec9fc01561..d261b5b21c 100644 --- a/test/OpenMP/parallel_if_codegen.cpp +++ b/test/OpenMP/parallel_if_codegen.cpp @@ -29,12 +29,13 @@ void gtid_test() { } // CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* noalias [[GTID_PARAM:%.+]], i32* noalias +// CHECK: store i32 0, i32* [[BND_ZERO_ADDR:%.+]], // CHECK: store i32 0, i32* [[ZERO_ADDR:%.+]], // CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]], // CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]] // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]] // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]]) -// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]] +// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]], i{{[0-9]+}}* [[BND_ZERO_ADDR]] // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]]) // CHECK: ret void @@ -55,14 +56,16 @@ int tmain(T Arg) { // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() int main() { +// CHECK: store i32 0, i32* [[BND_ZERO_ADDR2:%.+]], // CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]], +// CHECK: store i32 0, i32* [[BND_ZERO_ADDR1:%.+]], // CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]], // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num( // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void #pragma omp parallel if (true) fn4(); // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) -// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]]) +// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[BND_ZERO_ADDR1]]) // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) #pragma omp parallel if (false) fn5(); @@ -73,7 +76,7 @@ int main() { // CHECK: br label %[[OMP_END:.+]] // CHECK: [[OMP_ELSE]] // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) -// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]]) +// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[BND_ZERO_ADDR2]]) // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) // CHECK: br label %[[OMP_END]] // CHECK: [[OMP_END]] @@ -96,12 +99,14 @@ int main() { // CHECK: ret void // CHECK-LABEL: define {{.+}} @{{.+}}tmain +// CHECK: store i32 0, i32* [[BND_ZERO_ADDR2:%.+]], // CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]], +// CHECK: store i32 0, i32* [[BND_ZERO_ADDR1:%.+]], // CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]], // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num( // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) -// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]]) +// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[BND_ZERO_ADDR1]]) // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]] // CHECK: [[OMP_THEN]] @@ -109,7 +114,7 @@ int main() { // CHECK: br label %[[OMP_END:.+]] // CHECK: [[OMP_ELSE]] // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) -// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]]) +// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[BND_ZERO_ADDR2]]) // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) // CHECK: br label %[[OMP_END]] // CHECK: [[OMP_END]] -- cgit v1.2.1