diff options
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 60 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.h | 44 | ||||
-rw-r--r-- | lib/CodeGen/CGStmtOpenMP.cpp | 25 | ||||
-rw-r--r-- | test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp | 7 | ||||
-rw-r--r-- | test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp | 8 | ||||
-rw-r--r-- | test/OpenMP/teams_distribute_codegen.cpp | 7 | ||||
-rw-r--r-- | test/OpenMP/teams_distribute_parallel_for_codegen.cpp | 5 | ||||
-rw-r--r-- | test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp | 5 | ||||
-rw-r--r-- | test/OpenMP/teams_distribute_simd_codegen.cpp | 5 |
9 files changed, 80 insertions, 86 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 45833e1139..46a9ec3ac2 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9176,9 +9176,11 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( } void CGOpenMPRuntime::emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. @@ -9187,30 +9189,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!TD) return; const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, + auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, PrePostActionTy &) { - llvm::Value *NumIterations = SizeEmitter(CGF, *LD); - - // Emit device ID if any. - llvm::Value *DeviceID; - if (Device) - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); - - llvm::Value *Args[] = {DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { + llvm::Value *Args[] = {DeviceID, NumIterations}; + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + } }; emitInlinedDirective(CGF, OMPD_unknown, CodeGen); } -void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) { +void CGOpenMPRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { if (!CGF.HaveInsertPoint()) return; @@ -9229,8 +9225,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &CS, RequiresOuterTask, - &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { + &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -9262,6 +9258,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); + // Emit tripcount for the target loop-based directive. + emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); + bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). @@ -11285,12 +11284,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, - const Expr *Device) { +void CGOpenMPSIMDRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 92e946c670..afec628feb 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -793,6 +793,17 @@ private: /// default. virtual unsigned getDefaultFirstprivateAddressSpace() const { return 0; } + /// Emit code that pushes the trip count of loops associated with constructs + /// 'target teams distribute' and 'teams distribute parallel for'. + /// \param SizeEmitter Emits the int64 value for the number of iterations of + /// the associated loop. + void emitTargetNumIterationsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, ".", ".") {} @@ -1414,15 +1425,6 @@ public: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// Emit code that pushes the trip count of loops associated with constructs - /// 'target teams distribute' and 'teams distribute parallel for'. - /// \param SizeEmitter Emits the int64 value for the number of iterations of - /// the associated loop. - virtual void emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter); - /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of /// a failure it executes the host version outlined in \a OutlinedFn. @@ -1433,11 +1435,15 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - virtual void emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device); + /// \param SizeEmitter Callback to emit number of iterations for loop-based + /// directives. + virtual void + emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if @@ -2117,9 +2123,13 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) override; + void + emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) override; /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index e34b820625..b41ad55322 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -4137,18 +4137,21 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen); OMPLexicalScope Scope(CGF, S, OMPD_task); - auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) { - OMPLoopScope(CGF, D); - // Emit calculation of the iterations count. - llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); - NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, - /*isSigned=*/false); - return NumIterations; + auto &&SizeEmitter = + [IsOffloadEntry](CodeGenFunction &CGF, + const OMPLoopDirective &D) -> llvm::Value * { + if (IsOffloadEntry) { + OMPLoopScope(CGF, D); + // Emit calculation of the iterations count. + llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); + NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, + /*isSigned=*/false); + return NumIterations; + } + return nullptr; }; - if (IsOffloadEntry) - CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, - SizeEmitter); - CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, + SizeEmitter); } static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, diff --git a/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp index 4c7ac2441c..1617f8510a 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -54,17 +54,14 @@ int target_teams_fun(int *g){ // discard capture expressions for te and th // HCK1: = alloca i32, // HCK1: = alloca i32, - // HCK1: = alloca i32, - // HCK1: = alloca i32, - // HCK1: = alloca i32, // HCK1: [[N_CAST:%.+]] = alloca i{{32|64}}, // HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, // HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, - // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]], // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, + // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) + // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]]) #pragma omp target teams distribute parallel for num_teams(te), thread_limit(th) diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index 6846aaf434..2c74b18fbd 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -52,18 +52,15 @@ int target_teams_fun(int *g){ // discard capture expressions for te and th // HCK1: = alloca i32, // HCK1: = alloca i32, -// HCK1: = alloca i32, -// HCK1: = alloca i32, -// HCK1: = alloca i32, // HCK1: [[I_CAST:%.+]] = alloca i{{32|64}}, // HCK1: [[N_CAST:%.+]] = alloca i{{32|64}}, // HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, // HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, -// HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // HCK1: [[I_PAR:%.+]] = load{{.+}}, {{.+}} [[I_CAST]], // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]], // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], +// HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]]) @@ -77,8 +74,7 @@ int target_teams_fun(int *g){ // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}}) {{{ #pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8) - for( - int i = 0; i < n; i++) { + for(int i = 0; i < n; i++) { a[i] = g[0]; } }}} diff --git a/test/OpenMP/teams_distribute_codegen.cpp b/test/OpenMP/teams_distribute_codegen.cpp index ea299bc346..0065c6cbd5 100644 --- a/test/OpenMP/teams_distribute_codegen.cpp +++ b/test/OpenMP/teams_distribute_codegen.cpp @@ -21,22 +21,19 @@ int a[100]; // CK1: define {{.*}}i32 @{{.+}}teams_argument_globali( -int teams_argument_global(int n){ +int teams_argument_global(int n) { int te = n / 128; int th = 128; // discard n_addr // CK1: alloca i32, // CK1: [[TE:%.+]] = alloca i32, // CK1: [[TH:%.+]] = alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, - // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp index fa425b3eca..bc42f740ad 100644 --- a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -28,14 +28,11 @@ int teams_argument_global(int n){ // CK1: alloca i32, // CK1: [[TE:%.+]] = alloca i32, // CK1: [[TH:%.+]] = alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, - // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp index 45793419d7..fd2b2da533 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -28,15 +28,12 @@ int teams_argument_global(int n){ // CK1: alloca i32, // CK1: [[TE:%.+]] = alloca i32, // CK1: [[TH:%.+]] = alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, - // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/test/OpenMP/teams_distribute_simd_codegen.cpp b/test/OpenMP/teams_distribute_simd_codegen.cpp index ab1482855e..ff2a752cfa 100644 --- a/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -30,15 +30,12 @@ int teams_argument_global(int n) { // CK1: alloca i32, // CK1: [[TE:%.+]] = alloca i32, // CK1: [[TH:%.+]] = alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, - // CK1: alloca i32, // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, - // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], |