summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp60
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h44
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp25
-rw-r--r--test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp7
-rw-r--r--test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp8
-rw-r--r--test/OpenMP/teams_distribute_codegen.cpp7
-rw-r--r--test/OpenMP/teams_distribute_parallel_for_codegen.cpp5
-rw-r--r--test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp5
-rw-r--r--test/OpenMP/teams_distribute_simd_codegen.cpp5
9 files changed, 80 insertions, 86 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 45833e1139..46a9ec3ac2 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9176,9 +9176,11 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
}
void CGOpenMPRuntime::emitTargetNumIterationsCall(
- CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
- const llvm::function_ref<llvm::Value *(
- CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *DeviceID,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
OpenMPDirectiveKind Kind = D.getDirectiveKind();
const OMPExecutableDirective *TD = &D;
// Get nested teams distribute kind directive, if any.
@@ -9187,30 +9189,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
if (!TD)
return;
const auto *LD = cast<OMPLoopDirective>(TD);
- auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
PrePostActionTy &) {
- llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
-
- // Emit device ID if any.
- llvm::Value *DeviceID;
- if (Device)
- DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int64Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
-
- llvm::Value *Args[] = {DeviceID, NumIterations};
- CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ }
};
emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
}
-void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device) {
+void CGOpenMPRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
if (!CGF.HaveInsertPoint())
return;
@@ -9229,8 +9225,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *MapTypesArray = nullptr;
// Fill up the pointer arrays and transfer execution to the device.
auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
- &MapTypesArray, &CS, RequiresOuterTask,
- &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
+ &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
+ SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
// On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host
// pointer is used by the runtime library to identify the current target
@@ -9262,6 +9258,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
+ // Emit tripcount for the target loop-based directive.
+ emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
+
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
// via calls __tgt_target() or __tgt_target_teams().
@@ -11285,12 +11284,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
llvm_unreachable("Not supported in SIMD-only mode");
}
-void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond,
- const Expr *Device) {
+void CGOpenMPSIMDRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 92e946c670..afec628feb 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -793,6 +793,17 @@ private:
/// default.
virtual unsigned getDefaultFirstprivateAddressSpace() const { return 0; }
+ /// Emit code that pushes the trip count of loops associated with constructs
+ /// 'target teams distribute' and 'teams distribute parallel for'.
+ /// \param SizeEmitter Emits the int64 value for the number of iterations of
+ /// the associated loop.
+ void emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *DeviceID,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter);
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, ".", ".") {}
@@ -1414,15 +1425,6 @@ public:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
- /// Emit code that pushes the trip count of loops associated with constructs
- /// 'target teams distribute' and 'teams distribute parallel for'.
- /// \param SizeEmitter Emits the int64 value for the number of iterations of
- /// the associated loop.
- virtual void emitTargetNumIterationsCall(
- CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
- const llvm::function_ref<llvm::Value *(
- CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter);
-
/// Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
@@ -1433,11 +1435,15 @@ public:
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used.
- virtual void emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID, const Expr *IfCond,
- const Expr *Device);
+ /// \param SizeEmitter Callback to emit number of iterations for loop-based
+ /// directives.
+ virtual void
+ emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
+ const Expr *IfCond, const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter);
/// Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
@@ -2117,9 +2123,13 @@ public:
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used.
- void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device) override;
+ void
+ emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID,
+ const Expr *IfCond, const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) override;
/// Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index e34b820625..b41ad55322 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -4137,18 +4137,21 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
IsOffloadEntry, CodeGen);
OMPLexicalScope Scope(CGF, S, OMPD_task);
- auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
- OMPLoopScope(CGF, D);
- // Emit calculation of the iterations count.
- llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
- NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
- /*isSigned=*/false);
- return NumIterations;
+ auto &&SizeEmitter =
+ [IsOffloadEntry](CodeGenFunction &CGF,
+ const OMPLoopDirective &D) -> llvm::Value * {
+ if (IsOffloadEntry) {
+ OMPLoopScope(CGF, D);
+ // Emit calculation of the iterations count.
+ llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
+ NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
+ /*isSigned=*/false);
+ return NumIterations;
+ }
+ return nullptr;
};
- if (IsOffloadEntry)
- CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
- SizeEmitter);
- CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
+ CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
+ SizeEmitter);
}
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 4c7ac2441c..1617f8510a 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -54,17 +54,14 @@ int target_teams_fun(int *g){
// discard capture expressions for te and th
// HCK1: = alloca i32,
// HCK1: = alloca i32,
- // HCK1: = alloca i32,
- // HCK1: = alloca i32,
- // HCK1: = alloca i32,
// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
- // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
- // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
+ // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
+ // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
// HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
#pragma omp target teams distribute parallel for num_teams(te), thread_limit(th)
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 6846aaf434..2c74b18fbd 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -52,18 +52,15 @@ int target_teams_fun(int *g){
// discard capture expressions for te and th
// HCK1: = alloca i32,
// HCK1: = alloca i32,
-// HCK1: = alloca i32,
-// HCK1: = alloca i32,
-// HCK1: = alloca i32,
// HCK1: [[I_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
-// HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// HCK1: [[I_PAR:%.+]] = load{{.+}}, {{.+}} [[I_CAST]],
// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+// HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
// HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
@@ -77,8 +74,7 @@ int target_teams_fun(int *g){
// HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
{{{
#pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
- for(
- int i = 0; i < n; i++) {
+ for(int i = 0; i < n; i++) {
a[i] = g[0];
}
}}}
diff --git a/test/OpenMP/teams_distribute_codegen.cpp b/test/OpenMP/teams_distribute_codegen.cpp
index ea299bc346..0065c6cbd5 100644
--- a/test/OpenMP/teams_distribute_codegen.cpp
+++ b/test/OpenMP/teams_distribute_codegen.cpp
@@ -21,22 +21,19 @@
int a[100];
// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
-int teams_argument_global(int n){
+int teams_argument_global(int n) {
int te = n / 128;
int th = 128;
// discard n_addr
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
- // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
// CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index fa425b3eca..bc42f740ad 100644
--- a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -28,14 +28,11 @@ int teams_argument_global(int n){
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
- // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
// CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index 45793419d7..fd2b2da533 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -28,15 +28,12 @@ int teams_argument_global(int n){
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
- // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
// CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/test/OpenMP/teams_distribute_simd_codegen.cpp b/test/OpenMP/teams_distribute_simd_codegen.cpp
index ab1482855e..ff2a752cfa 100644
--- a/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -30,15 +30,12 @@ int teams_argument_global(int n) {
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
- // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
- // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1)
// CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],