summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Driver/ToolChains/Cuda.cpp10
-rw-r--r--lib/Driver/ToolChains/Cuda.h11
-rw-r--r--test/Driver/openmp-offload.c65
3 files changed, 18 insertions, 68 deletions
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp
index 643c3404d1..00715a7a24 100644
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -527,10 +527,14 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
}
StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
- if (Arch.empty())
- // Default compute capability for CUDA toolchain is sm_20.
+ if (Arch.empty()) {
+ // Default compute capability for CUDA toolchain is the
+ // lowest compute capability supported by the installed
+ // CUDA version.
DAL->AddJoinedArg(nullptr,
- Opts.getOption(options::OPT_march_EQ), "sm_20");
+ Opts.getOption(options::OPT_march_EQ),
+ CudaInstallation.getLowestExistingArch());
+ }
return DAL;
}
diff --git a/lib/Driver/ToolChains/Cuda.h b/lib/Driver/ToolChains/Cuda.h
index 1e30aa7270..2ea76da3cb 100644
--- a/lib/Driver/ToolChains/Cuda.h
+++ b/lib/Driver/ToolChains/Cuda.h
@@ -76,6 +76,17 @@ public:
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
+ /// \brief Get lowest available compute capability
+ /// for which a libdevice library exists.
+ std::string getLowestExistingArch() const {
+ std::string LibDeviceFile;
+ for (auto key : LibDeviceMap.keys()) {
+ LibDeviceFile = LibDeviceMap.lookup(key);
+ if (!LibDeviceFile.empty())
+ return key;
+ }
+ return "sm_35";
+ }
};
namespace tools {
diff --git a/test/Driver/openmp-offload.c b/test/Driver/openmp-offload.c
index cd2ca3e3d6..7187a9ebec 100644
--- a/test/Driver/openmp-offload.c
+++ b/test/Driver/openmp-offload.c
@@ -647,68 +647,3 @@
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
// CHK-FOPENMP-TARGET-COMPILATION: nvlink{{.*}}" "-arch" "sm_35"
-
-/// ###########################################################################
-
-/// Check cubin file generation and usage by nvlink
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN %s
-
-// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s"
-// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" "{{.*}}-openmp-nvptx64-nvidia-cuda.cubin" "{{.*}}-openmp-nvptx64-nvidia-cuda.s"
-// CHK-CUBIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload-openmp-nvptx64-nvidia-cuda.cubin"
-
-/// ###########################################################################
-
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s
-
-// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s"
-// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" "{{.*}}-openmp-nvptx64-nvidia-cuda.cubin" "{{.*}}-openmp-nvptx64-nvidia-cuda.s"
-// CHK-CUBIN-DARWIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload-openmp-nvptx64-nvidia-cuda.cubin"
-
-/// ###########################################################################
-
-/// Check cubin file generation and usage by nvlink
-// RUN: touch %t1.o
-// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
-
-// CHK-TWOCUBIN: nvlink"{{.*}}"openmp-offload-{{.*}}.cubin" "openmp-offload-{{.*}}.cubin"
-
-/// ###########################################################################
-
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN: touch %t1.o
-// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s
-
-// CHK-TWOCUBIN-DARWIN: nvlink"{{.*}}"openmp-offload-{{.*}}.cubin" "openmp-offload-{{.*}}.cubin"
-
-/// ###########################################################################
-
-/// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -no-canonical-prefixes %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s
-
-// CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c"
-
-/// ###########################################################################
-
-/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
-
-// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
-
-/// ###########################################################################
-
-/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
-/// Check that the flag is passed when -fopenmp-relocatable-target is used.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s
-
-// CHK-PTXAS-RELO: ptxas{{.*}}" "-c"