summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-01-07 21:30:43 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-01-07 21:30:43 +0000
commit95d3faa7e80d75dfaf71f63aca109c43b63a4c82 (patch)
tree50997d7772f60d36eb4044edbd5013c7a300cd9c
parent7a9019d1530f9e93dcd04078c404194168339d87 (diff)
downloadclang-95d3faa7e80d75dfaf71f63aca109c43b63a4c82.tar.gz
[OPENMP]Add call to __kmpc_push_target_tripcount() function.
Each we create the target regions with the teams distribute inner region, we can better estimate number of the teams required to execute the target region. Function __kmpc_push_target_tripcount() is used for purpose, which accepts device_id and the number of the iterations, performed by the associated loop. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@350571 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp189
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h9
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp4
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp10
-rw-r--r--test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp4
-rw-r--r--test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp5
-rw-r--r--test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp7
-rw-r--r--test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp5
-rw-r--r--test/OpenMP/teams_distribute_codegen.cpp6
-rw-r--r--test/OpenMP/teams_distribute_parallel_for_codegen.cpp4
-rw-r--r--test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp4
-rw-r--r--test/OpenMP/teams_distribute_simd_codegen.cpp4
12 files changed, 247 insertions, 4 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 8176093840..20eb0b29f4 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -673,6 +673,9 @@ enum OpenMPRTLFunction {
//
// Offloading related calls
//
+ // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ OMPRTL__kmpc_push_target_tripcount,
// Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types);
@@ -2163,6 +2166,15 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_push_target_tripcount: {
+ // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
@@ -8053,6 +8065,183 @@ static void emitOffloadingArraysArgument(
}
}
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+ const Stmt *Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return Body;
+ Child = S;
+ }
+ if (Child)
+ return Child;
+ }
+ return Body;
+}
+
+/// Check for inner distribute directive.
+static const OMPExecutableDirective *
+getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return nullptr;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPDistributeDirective(DKind))
+ return NND;
+ }
+ }
+ return nullptr;
+ case OMPD_target_teams:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ return nullptr;
+ case OMPD_target_parallel:
+ case OMPD_target_simd:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ return nullptr;
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return nullptr;
+}
+
+void CGOpenMPRuntime::emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ OpenMPDirectiveKind Kind = D.getDirectiveKind();
+ const OMPExecutableDirective *TD = &D;
+ // Get nested teams distribute kind directive, if any.
+ if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
+ TD = getNestedDistributeDirective(CGM.getContext(), D);
+ if (!TD)
+ return;
+ const auto *LD = cast<OMPLoopDirective>(TD);
+ auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
+
+ // Emit device ID if any.
+ llvm::Value *DeviceID;
+ if (Device)
+ DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+ CGF.Int64Ty, /*isSigned=*/true);
+ else
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ };
+ emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
+}
+
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index d9ac5df36b..933c8af116 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1367,6 +1367,15 @@ public:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
+ /// Emit code that pushes the trip count of loops associated with constructs
+ /// 'target teams distribute' and 'teams distribute parallel for'.
+ /// \param SizeEmitter Emits the int64 value for the number of iterations of
+ /// the associated loop.
+ virtual void emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter);
+
/// Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 403aefbef9..7046ab3aa3 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -705,8 +705,8 @@ getDataSharingMode(CodeGenModule &CGM) {
: CGOpenMPRuntimeNVPTX::Generic;
}
-// Checks if the expression is constant or does not have non-trivial function
-// calls.
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
static bool isTrivial(ASTContext &Ctx, const Expr * E) {
// We can skip constant expressions.
// We can skip expressions with trivial calls or simple expressions.
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index ee38593084..650f4e8b91 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -4071,6 +4071,16 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
IsOffloadEntry, CodeGen);
OMPLexicalScope Scope(CGF, S, OMPD_task);
+ auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
+ OMPLoopScope(CGF, D);
+ // Emit calculation of the iterations count.
+ llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
+ NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
+ /*IsSigned=*/false);
+ return NumIterations;
+ };
+ CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
+ SizeEmitter);
CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
}
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 4d263cf69f..e439431a05 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -51,9 +51,13 @@ int target_teams_fun(int *g){
// discard capture expressions for te and th
// HCK1: = alloca i32,
// HCK1: = alloca i32,
+ // HCK1: = alloca i32,
+ // HCK1: = alloca i32,
+ // HCK1: = alloca i32,
// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index 8cd90a610b..af6ed9744a 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -22,8 +22,10 @@ int Arg;
// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
void gtid_test() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
#pragma omp target teams distribute parallel for
@@ -78,9 +80,12 @@ int tmain(T Arg) {
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
int main() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 0329d7e278..7a1ea81091 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -52,10 +52,14 @@ int target_teams_fun(int *g){
// discard capture expressions for te and th
// HCK1: = alloca i32,
// HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
// HCK1: [[I_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// HCK1: [[I_PAR:%.+]] = load{{.+}}, {{.+}} [[I_CAST]],
// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
@@ -73,7 +77,8 @@ int target_teams_fun(int *g){
// HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
{{{
#pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
- for(int i = 0; i < n; i++) {
+ for(
+ int i = 0; i < n; i++) {
a[i] = g[0];
}
}}}
diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index a3a14fbf1e..b6778417be 100644
--- a/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -22,8 +22,10 @@ int Arg;
// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
void gtid_test() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
#pragma omp target teams distribute parallel for simd
@@ -78,9 +80,12 @@ int tmain(T Arg) {
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
int main() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
diff --git a/test/OpenMP/teams_distribute_codegen.cpp b/test/OpenMP/teams_distribute_codegen.cpp
index 0f6b5f2f25..ea299bc346 100644
--- a/test/OpenMP/teams_distribute_codegen.cpp
+++ b/test/OpenMP/teams_distribute_codegen.cpp
@@ -21,15 +21,19 @@
int a[100];
// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
-int teams_argument_global(int n){
+int teams_argument_global(int n){
int te = n / 128;
int th = 128;
// discard n_addr
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
diff --git a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index 865ea129f9..fa425b3eca 100644
--- a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -28,8 +28,12 @@ int teams_argument_global(int n){
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index 784deeef7c..45793419d7 100644
--- a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -28,8 +28,12 @@ int teams_argument_global(int n){
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
diff --git a/test/OpenMP/teams_distribute_simd_codegen.cpp b/test/OpenMP/teams_distribute_simd_codegen.cpp
index 813d3912c3..c89a936b64 100644
--- a/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -30,8 +30,12 @@ int teams_argument_global(int n) {
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],