diff options
-rw-r--r-- | include/clang/Sema/Sema.h | 4 | ||||
-rw-r--r-- | lib/Sema/SemaExpr.cpp | 20 | ||||
-rw-r--r-- | lib/Sema/SemaOpenMP.cpp | 4 | ||||
-rw-r--r-- | test/OpenMP/target_teams_codegen.cpp | 38 |
4 files changed, 48 insertions, 18 deletions
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index 0b8270dc23..212098e02b 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -9067,6 +9067,10 @@ private: void adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, unsigned Level) const; + /// Returns the number of scopes associated with the construct on the given + /// OpenMP level. + int getNumberOfConstructScopes(unsigned Level) const; + /// Push new OpenMP function region for non-capturing function. void pushOpenMPFunctionRegion(); diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index 8261ede582..3b6389ab7e 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -16092,7 +16092,25 @@ bool Sema::tryCaptureVariable( // target region should not be captured outside the scope of the region. if (RSI->CapRegionKind == CR_OpenMP) { bool IsOpenMPPrivateDecl = isOpenMPPrivateDecl(Var, RSI->OpenMPLevel); - auto IsTargetCap = !IsOpenMPPrivateDecl && + // If the variable is private (i.e. not captured) and has variably + // modified type, we still need to capture the type for correct + // codegen in all regions, associated with the construct. Currently, + // it is captured in the innermost captured region only. + if (IsOpenMPPrivateDecl && Var->getType()->isVariablyModifiedType()) { + QualType QTy = Var->getType(); + if (ParmVarDecl *PVD = dyn_cast_or_null<ParmVarDecl>(Var)) + QTy = PVD->getOriginalType(); + for (int I = 1, E = getNumberOfConstructScopes(RSI->OpenMPLevel); + I < E; ++I) { + auto *OuterRSI = cast<CapturedRegionScopeInfo>( + FunctionScopes[FunctionScopesIndex - I]); + assert(RSI->OpenMPLevel == OuterRSI->OpenMPLevel && + "Wrong number of captured regions associated with the " + "OpenMP construct."); + captureVariablyModifiedType(Context, QTy, OuterRSI); + } + } + bool IsTargetCap = !IsOpenMPPrivateDecl && isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel); // When we detect target captures we are looking from inside the // target region, therefore we need to propagate the capture from the diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index b736c36a29..fe69b70759 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -3435,6 +3435,10 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { } } +int Sema::getNumberOfConstructScopes(unsigned Level) const { + return getOpenMPCaptureLevels(DSAStack->getDirective(Level)); +} + int Sema::getOpenMPCaptureLevels(OpenMPDirectiveKind DKind) { SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; getOpenMPCaptureRegions(CaptureRegions, DKind); diff --git a/test/OpenMP/target_teams_codegen.cpp b/test/OpenMP/target_teams_codegen.cpp index c788a20966..83643cc2c2 100644 --- a/test/OpenMP/target_teams_codegen.cpp +++ b/test/OpenMP/target_teams_codegen.cpp @@ -86,6 +86,7 @@ // TCHECK: @{{.+}} = weak constant [[ENTTY]] // TCHECK: @{{.+}} = weak constant [[ENTTY]] // TCHECK: @{{.+}} = weak constant [[ENTTY]] +// TCHECK: @{{.+}} = weak constant [[ENTTY]] // TCHECK-NOT: @{{.+}} = weak constant [[ENTTY]] // Check if offloading descriptor is created. @@ -341,6 +342,13 @@ int foo(int n) { d.Y += 1; } + const int nn = 0; + #pragma omp target teams shared(nn) + #pragma omp parallel firstprivate(nn) + (void)nn; + #pragma omp target teams firstprivate(nn) + #pragma omp parallel shared(nn) + (void)nn; return a; } @@ -481,6 +489,19 @@ int foo(int n) { // CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. +// CHECK: define {{.*}}void @__omp_offloading_{{.*}}foo{{.*}}_l346(i[[SZ]] %{{.+}}) +// CHECK: define internal void {{@.+}}(i32* {{.+}}, i32* {{.+}}, i[[SZ]] %{{.+}}) +// CHECK: define {{.*}}void @__omp_offloading_{{.*}}foo{{.*}}_l349(i[[SZ]] %{{.+}}) +// CHECK: define internal void {{@.+}}(i32* {{.+}}, i32* {{.+}}, i32* dereferenceable{{.+}}) + +void bazzzz(int n, int f[n]) { +// CHECK: define internal void @__omp_offloading_{{.+}}bazzzz{{.+}}_l501(i[[SZ]] %{{[^,]+}}) +// CHECK: [[VLA:%.+]] = load i[[SZ]], i[[SZ]]* % +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* @{{.+}} to void (i32*, i32*, ...)*), i[[SZ]] [[VLA]]) +#pragma omp target teams private(f) + ; +} + template<typename tx> tx ftemplate(int n) { tx a = 0; @@ -846,21 +867,4 @@ int bar(int n){ // CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -void foo1() { - const int n = 0; - #pragma omp target teams shared(n) - #pragma omp parallel firstprivate(n) - (void)n; -} -void foo() { - const int n = 0; - #pragma omp target teams firstprivate(n) - #pragma omp parallel shared(n) - (void)n; -} - -// define {{.*}}void @__omp_offloading_{{.*}}foo1{{.*}}_l841(i[[SZ]] %{{.+}}) -// define internal void {{@.+}}(i32* {{.+}}, i32* {{.+}}, i[[SZ]] %{{.+}}) -// define {{.*}}void @__omp_offloading_{{.*}}foo1{{.*}}_l847(i[[SZ]] %{{.+}}) -// define internal void {{@.+}}(i32* {{.+}}, i32* {{.+}}, i32* dereferenceable{{.+}}) #endif |