diff options
-rw-r--r-- | lib/Driver/ToolChains/Cuda.cpp | 10 | ||||
-rw-r--r-- | lib/Driver/ToolChains/Cuda.h | 11 | ||||
-rw-r--r-- | test/Driver/openmp-offload.c | 65 |
3 files changed, 18 insertions, 68 deletions
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp index 643c3404d1..00715a7a24 100644 --- a/lib/Driver/ToolChains/Cuda.cpp +++ b/lib/Driver/ToolChains/Cuda.cpp @@ -527,10 +527,14 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ); - if (Arch.empty()) - // Default compute capability for CUDA toolchain is sm_20. + if (Arch.empty()) { + // Default compute capability for CUDA toolchain is the + // lowest compute capability supported by the installed + // CUDA version. DAL->AddJoinedArg(nullptr, - Opts.getOption(options::OPT_march_EQ), "sm_20"); + Opts.getOption(options::OPT_march_EQ), + CudaInstallation.getLowestExistingArch()); + } return DAL; } diff --git a/lib/Driver/ToolChains/Cuda.h b/lib/Driver/ToolChains/Cuda.h index 1e30aa7270..2ea76da3cb 100644 --- a/lib/Driver/ToolChains/Cuda.h +++ b/lib/Driver/ToolChains/Cuda.h @@ -76,6 +76,17 @@ public: std::string getLibDeviceFile(StringRef Gpu) const { return LibDeviceMap.lookup(Gpu); } + /// \brief Get lowest available compute capability + /// for which a libdevice library exists. + std::string getLowestExistingArch() const { + std::string LibDeviceFile; + for (auto key : LibDeviceMap.keys()) { + LibDeviceFile = LibDeviceMap.lookup(key); + if (!LibDeviceFile.empty()) + return key; + } + return "sm_35"; + } }; namespace tools { diff --git a/test/Driver/openmp-offload.c b/test/Driver/openmp-offload.c index cd2ca3e3d6..7187a9ebec 100644 --- a/test/Driver/openmp-offload.c +++ b/test/Driver/openmp-offload.c @@ -647,68 +647,3 @@ // CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35" // CHK-FOPENMP-TARGET-COMPILATION: nvlink{{.*}}" "-arch" "sm_35" - -/// ########################################################################### - -/// Check cubin file generation and usage by nvlink -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-CUBIN %s - -// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" -// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" "{{.*}}-openmp-nvptx64-nvidia-cuda.cubin" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" -// CHK-CUBIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload-openmp-nvptx64-nvidia-cuda.cubin" - -/// ########################################################################### - -/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction -// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s - -// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" -// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" "{{.*}}-openmp-nvptx64-nvidia-cuda.cubin" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" -// CHK-CUBIN-DARWIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload-openmp-nvptx64-nvidia-cuda.cubin" - -/// ########################################################################### - -/// Check cubin file generation and usage by nvlink -// RUN: touch %t1.o -// RUN: touch %t2.o -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s - -// CHK-TWOCUBIN: nvlink"{{.*}}"openmp-offload-{{.*}}.cubin" "openmp-offload-{{.*}}.cubin" - -/// ########################################################################### - -/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction -// RUN: touch %t1.o -// RUN: touch %t2.o -// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s - -// CHK-TWOCUBIN-DARWIN: nvlink"{{.*}}"openmp-offload-{{.*}}.cubin" "openmp-offload-{{.*}}.cubin" - -/// ########################################################################### - -/// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -no-canonical-prefixes %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s - -// CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c" - -/// ########################################################################### - -/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s - -// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c" - -/// ########################################################################### - -/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP -/// Check that the flag is passed when -fopenmp-relocatable-target is used. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s - -// CHK-PTXAS-RELO: ptxas{{.*}}" "-c" |