8 files changed, 18 insertions, 328 deletions
diff --git a/lib/Driver/Action.cpp b/lib/Driver/Action.cpp
index 3219dc1cc0..360dbeecab 100644
--- a/lib/Driver/Action.cpp
+++ b/lib/Driver/Action.cpp
@@ -24,8 +24,6 @@ const char *Action::getClassName(ActionClass AC) {
   switch (AC) {
   case InputClass: return "input";
   case BindArchClass: return "bind-arch";
-  case CudaDeviceClass: return "cuda-device";
-  case CudaHostClass: return "cuda-host";
   case PreprocessJobClass: return "preprocessor";
   case PrecompileJobClass: return "precompiler";
   case AnalyzeJobClass: return "analyzer";
@@ -55,25 +53,6 @@ BindArchAction::BindArchAction(std::unique_ptr<Action> Input,
                                const char *_ArchName)
     : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {}
 
-void CudaDeviceAction::anchor() {}
-
-CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
-                                   const char *ArchName, bool AtTopLevel)
-    : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
-      AtTopLevel(AtTopLevel) {}
-
-void CudaHostAction::anchor() {}
-
-CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
-                               const ActionList &_DeviceActions)
-    : Action(CudaHostClass, std::move(Input)), DeviceActions(_DeviceActions) {}
-
-CudaHostAction::~CudaHostAction() {
-  for (iterator it = DeviceActions.begin(), ie = DeviceActions.end(); it != ie;
-       ++it)
-    delete *it;
-}
-
 void JobAction::anchor() {}
 
 JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input,
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 180c412bd7..f3ec151ec9 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -174,10 +174,8 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
   } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
     FinalPhase = phases::Backend;
 
-    // -c and partial CUDA compilations only run up to the assembler.
-  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) ||
-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only)) ||
-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) {
+    // -c only runs up to the assembler.
+  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
     FinalPhase = phases::Assemble;
 
     // Otherwise do everything.
@@ -902,20 +900,9 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
   } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
     os << '"' << BIA->getArchName() << '"' << ", {"
        << PrintActions1(C, *BIA->begin(), Ids) << "}";
-  } else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    os << '"' << CDA->getGpuArchName() << '"' << ", {"
-       << PrintActions1(C, *CDA->begin(), Ids) << "}";
   } else {
-    ActionList *AL;
-    if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
-      os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
-         << ", gpu binaries ";
-      AL = &CHA->getDeviceActions();
-    } else
-      AL = &A->getInputs();
-
     const char *Prefix = "{";
-    for (Action *PreRequisite : *AL) {
+    for (Action *PreRequisite : *A) {
       os << Prefix << PrintActions1(C, PreRequisite, Ids);
       Prefix = ", ";
     }
@@ -1228,93 +1215,6 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
   }
 }
 
-// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE input
-// action and then wraps each in CudaDeviceAction paired with appropriate GPU
-// arch name. If we're only building device-side code, each action remains
-// independent. Otherwise we pass device-side actions as inputs to a new
-// CudaHostAction which combines both host and device side actions.
-static std::unique_ptr<Action>
-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
-                 const Arg *InputArg, const types::ID InputType,
-                 std::unique_ptr<Action> Current, ActionList &Actions) {
-
-  assert(InputType == types::TY_CUDA &&
-         "CUDA Actions only apply to CUDA inputs.");
-
-  // Collect all cuda_gpu_arch parameters, removing duplicates.
-  SmallVector<const char *, 4> GpuArchList;
-  llvm::StringSet<> GpuArchNames;
-  for (Arg *A : Args) {
-    if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) {
-      A->claim();
-      if (GpuArchNames.insert(A->getValue()).second)
-        GpuArchList.push_back(A->getValue());
-    }
-  }
-
-  // Default to sm_20 which is the lowest common denominator for supported GPUs.
-  // sm_20 code should work correctly, if suboptimally, on all newer GPUs.
-  if (GpuArchList.empty())
-    GpuArchList.push_back("sm_20");
-
-  // Replicate inputs for each GPU architecture.
-  Driver::InputList CudaDeviceInputs;
-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
-    CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
-
-  // Build actions for all device inputs.
-  ActionList CudaDeviceActions;
-  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
-  assert(GpuArchList.size() == CudaDeviceActions.size() &&
-         "Failed to create actions for all devices");
-
-  // Check whether any of device actions stopped before they could generate PTX.
-  bool PartialCompilation = false;
-  bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only);
-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) {
-    if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) {
-      PartialCompilation = true;
-      break;
-    }
-  }
-
-  // Figure out what to do with device actions -- pass them as inputs to the
-  // host action or run each of them independently.
-  if (PartialCompilation || DeviceOnlyCompilation) {
-    // In case of partial or device-only compilation results of device actions
-    // are not consumed by the host action device actions have to be added to
-    // top-level actions list with AtTopLevel=true and run independently.
-
-    // -o is ambiguous if we have more than one top-level action.
-    if (Args.hasArg(options::OPT_o) &&
-        (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
-      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
-      return nullptr;
-    }
-
-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
-      Actions.push_back(
-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
-                               GpuArchList[i], /* AtTopLevel */ true));
-    // Kill host action in case of device-only compilation.
-    if (DeviceOnlyCompilation)
-      Current.reset(nullptr);
-    return Current;
-  } else {
-    // Outputs of device actions during complete CUDA compilation get created
-    // with AtTopLevel=false and become inputs for the host action.
-    ActionList DeviceActions;
-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
-      DeviceActions.push_back(
-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
-                               GpuArchList[i], /* AtTopLevel */ false));
-    // Return a new host action that incorporates original host action and all
-    // device actions.
-    return std::unique_ptr<Action>(
-        new CudaHostAction(std::move(Current), DeviceActions));
-  }
-}
-
 void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
                           const InputList &Inputs, ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
@@ -1412,25 +1312,6 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
       continue;
     }
 
-    phases::ID CudaInjectionPhase;
-    if (isSaveTempsEnabled()) {
-      // All phases are done independently, inject GPU blobs during compilation
-      // phase as that's where we generate glue code to init them.
-      CudaInjectionPhase = phases::Compile;
-    } else {
-      // Assumes that clang does everything up until linking phase, so we inject
-      // cuda device actions at the last step before linking. Otherwise CUDA
-      // host action forces preprocessor into a separate invocation.
-      if (FinalPhase == phases::Link) {
-        for (auto i = PL.begin(), e = PL.end(); i != e; ++i) {
-          auto next = i + 1;
-          if (next != e && *next == phases::Link)
-            CudaInjectionPhase = *i;
-        }
-      } else
-        CudaInjectionPhase = FinalPhase;
-    }
-
     // Build the pipeline for this file.
     std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType));
     for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
@@ -1456,15 +1337,6 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
 
       // Otherwise construct the appropriate action.
       Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
-
-      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase &&
-          !Args.hasArg(options::OPT_cuda_host_only)) {
-        Current = buildCudaActions(*this, TC, Args, InputArg, InputType,
-                                   std::move(Current), Actions);
-        if (!Current)
-          break;
-      }
-
       if (Current->getType() == types::TY_Nothing)
         break;
     }
@@ -1704,13 +1576,7 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
   if (isa<BackendJobAction>(JA)) {
     // Check if the compiler supports emitting LLVM IR.
     assert(Inputs->size() == 1);
-    JobAction *CompileJA;
-    // Extract real host action, if it's a CudaHostAction.
-    if (CudaHostAction *CudaHA = dyn_cast<CudaHostAction>(*Inputs->begin()))
-      CompileJA = cast<CompileJobAction>(*CudaHA->begin());
-    else
-      CompileJA = cast<CompileJobAction>(*Inputs->begin());
-
+    JobAction *CompileJA = cast<CompileJobAction>(*Inputs->begin());
     const Tool *Compiler = TC->SelectTool(*CompileJA);
     if (!Compiler)
       return nullptr;
@@ -1744,20 +1610,6 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
                                 InputInfo &Result) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
 
-  InputInfoList CudaDeviceInputInfos;
-  if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
-    InputInfo II;
-    // Append outputs of device jobs to the input list.
-    for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
-                         /*MultipleArchs*/ false, LinkingOutput, II);
-      CudaDeviceInputInfos.push_back(II);
-    }
-    // Override current action with a real host compile action and continue
-    // processing it.
-    A = *CHA->begin();
-  }
-
   if (const InputAction *IA = dyn_cast<InputAction>(A)) {
     // FIXME: It would be nice to not claim this here; maybe the old scheme of
     // just using Args was better?
@@ -1783,24 +1635,11 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     else
       TC = &C.getDefaultToolChain();
 
-    BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
+    BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(), AtTopLevel,
                        MultipleArchs, LinkingOutput, Result);
     return;
   }
 
-  if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    // Figure out which NVPTX triple to use for device-side compilation based on
-    // whether host is 64-bit.
-    llvm::Triple DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit()
-                                  ? "nvptx64-nvidia-cuda"
-                                  : "nvptx-nvidia-cuda");
-    BuildJobsForAction(C, *CDA->begin(),
-                       &getToolChain(C.getArgs(), DeviceTriple),
-                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
-                       /*MultipleArchs*/ true, LinkingOutput, Result);
-    return;
-  }
-
   const ActionList *Inputs = &A->getInputs();
 
   const JobAction *JA = cast<JobAction>(A);
@@ -1832,10 +1671,6 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
   if (JA->getType() == types::TY_dSYM)
     BaseInput = InputInfos[0].getFilename();
 
-  // Append outputs of cuda device jobs to the input list
-  if (CudaDeviceInputInfos.size())
-    InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end());
-
   // Determine the place to write output to, if any.
   if (JA->getType() == types::TY_Nothing)
     Result = InputInfo(A->getType(), BaseInput);
@@ -2217,9 +2052,6 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
         break;
       }
       break;
-    case llvm::Triple::CUDA:
-      TC = new toolchains::CudaToolChain(*this, Target, Args);
-      break;
     default:
       // Of these targets, Hexagon is the only one that might have
       // an OS of Linux, in which case it got handled above already.
diff --git a/lib/Driver/ToolChain.cpp b/lib/Driver/ToolChain.cpp
index e6a1bc9685..da020a2b8c 100644
--- a/lib/Driver/ToolChain.cpp
+++ b/lib/Driver/ToolChain.cpp
@@ -151,8 +151,6 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const {
 
   case Action::InputClass:
   case Action::BindArchClass:
-  case Action::CudaDeviceClass:
-  case Action::CudaHostClass:
   case Action::LipoJobClass:
   case Action::DsymutilJobClass:
   case Action::VerifyDebugInfoJobClass:
diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp
index 15e36a1e6c..eafc72b6b1 100644
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -3652,65 +3652,6 @@ Tool *DragonFly::buildLinker() const {
   return new tools::dragonfly::Linker(*this);
 }
 
-/// Stub for CUDA toolchain. At the moment we don't have assembler or
-/// linker and need toolchain mainly to propagate device-side options
-/// to CC1.
-
-CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
-                             const ArgList &Args)
-    : Linux(D, Triple, Args) {}
-
-void
-CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                                     llvm::opt::ArgStringList &CC1Args) const {
-  Linux::addClangTargetOptions(DriverArgs, CC1Args);
-  CC1Args.push_back("-fcuda-is-device");
-}
-
-llvm::opt::DerivedArgList *
-CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
-                             const char *BoundArch) const {
-  DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
-  const OptTable &Opts = getDriver().getOpts();
-
-  for (Arg *A : Args) {
-    if (A->getOption().matches(options::OPT_Xarch__)) {
-      // Skip this argument unless the architecture matches BoundArch
-      if (A->getValue(0) != StringRef(BoundArch))
-        continue;
-
-      unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
-      unsigned Prev = Index;
-      std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
-
-      // If the argument parsing failed or more than one argument was
-      // consumed, the -Xarch_ argument's parameter tried to consume
-      // extra arguments. Emit an error and ignore.
-      //
-      // We also want to disallow any options which would alter the
-      // driver behavior; that isn't going to work in our model. We
-      // use isDriverOption() as an approximation, although things
-      // like -O4 are going to slip through.
-      if (!XarchArg || Index > Prev + 1) {
-        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
-            << A->getAsString(Args);
-        continue;
-      } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
-        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
-            << A->getAsString(Args);
-        continue;
-      }
-      XarchArg->setBaseArg(A);
-      A = XarchArg.release();
-      DAL->AddSynthesizedArg(A);
-    }
-    DAL->append(A);
-  }
-
-  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
-  return DAL;
-}
-
 /// XCore tool chain
 XCore::XCore(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : ToolChain(D, Triple, Args) {
diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h
index 327ff9b13b..3689682044 100644
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@@ -699,18 +699,6 @@ private:
   std::string computeSysRoot() const;
 };
 
-class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux {
-public:
-  CudaToolChain(const Driver &D, const llvm::Triple &Triple,
-                const llvm::opt::ArgList &Args);
-
-  llvm::opt::DerivedArgList *
-  TranslateArgs(const llvm::opt::DerivedArgList &Args,
-                const char *BoundArch) const override;
-  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
-};
-
 class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux {
 protected:
   GCCVersion GCCLibAndIncVersion;
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index 0757355958..180533396c 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -1488,12 +1488,6 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T) {
     return CPUName;
   }
 
-  case llvm::Triple::nvptx:
-  case llvm::Triple::nvptx64:
-    if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
-      return A->getValue();
-    return "";
-
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le: {
@@ -2832,14 +2826,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       getToolChain().getTriple().isWindowsCygwinEnvironment();
   bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
 
-  // Check number of inputs for sanity. We need at least one input.
-  assert(Inputs.size() >= 1 && "Must have at least one input.");
+  assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
   const InputInfo &Input = Inputs[0];
-  // CUDA compilation may have multiple inputs (source file + results of
-  // device-side compilations). All other jobs are expected to have exactly one
-  // input.
-  bool IsCuda = types::isCuda(Input.getType());
-  assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs.");
 
   // Invoke ourselves in -cc1 mode.
   //
@@ -4814,12 +4802,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  addDashXForInput(Args, Input, CmdArgs);
+  for (const auto &II : Inputs) {
+    addDashXForInput(Args, II, CmdArgs);
 
-  if (Input.isFilename())
-    CmdArgs.push_back(Input.getFilename());
-  else
-    Input.getInputArg().renderAsInput(Args, CmdArgs);
+    if (II.isFilename())
+      CmdArgs.push_back(II.getFilename());
+    else
+      II.getInputArg().renderAsInput(Args, CmdArgs);
+  }
 
   Args.AddAllArgs(CmdArgs, options::OPT_undef);
 
@@ -4857,16 +4847,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(SplitDwarfOut);
   }
 
-  // Host-side cuda compilation receives device-side outputs as Inputs[1...].
-  // Include them with -fcuda-include-gpubinary.
-  if (IsCuda && Inputs.size() > 1)
-    for (InputInfoList::const_iterator it = std::next(Inputs.begin()),
-                                       ie = Inputs.end();
-         it != ie; ++it) {
-      CmdArgs.push_back("-fcuda-include-gpubinary");
-      CmdArgs.push_back(it->getFilename());
-    }
-
   // Finally add the compile command to the compilation.
   if (Args.hasArg(options::OPT__SLASH_fallback) &&
       Output.getType() == types::TY_Object &&
diff --git a/lib/Driver/Types.cpp b/lib/Driver/Types.cpp
index 2085b01242..7b28145755 100644
--- a/lib/Driver/Types.cpp
+++ b/lib/Driver/Types.cpp
@@ -86,7 +86,6 @@ bool types::isAcceptedByClang(ID Id) {
   case TY_C: case TY_PP_C:
   case TY_CL:
   case TY_CUDA: case TY_PP_CUDA:
-  case TY_CUDA_DEVICE:
   case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias:
   case TY_CXX: case TY_PP_CXX:
   case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
@@ -123,19 +122,7 @@ bool types::isCXX(ID Id) {
   case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
   case TY_CXXHeader: case TY_PP_CXXHeader:
   case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader:
-  case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE:
-    return true;
-  }
-}
-
-bool types::isCuda(ID Id) {
-  switch (Id) {
-  default:
-    return false;
-
-  case TY_CUDA:
-  case TY_PP_CUDA:
-  case TY_CUDA_DEVICE:
+  case TY_CUDA: case TY_PP_CUDA:
     return true;
   }
 }
@@ -219,12 +206,10 @@ void types::getCompilationPhases(ID Id, llvm::SmallVectorImpl<phases::ID> &P) {
         P.push_back(phases::Compile);
         P.push_back(phases::Backend);
       }
-      if (Id != TY_CUDA_DEVICE)
-        P.push_back(phases::Assemble);
+      P.push_back(phases::Assemble);
     }
   }
-
-  if (!onlyPrecompileType(Id) && Id != TY_CUDA_DEVICE) {
+  if (!onlyPrecompileType(Id)) {
     P.push_back(phases::Link);
   }
   assert(0 < P.size() && "Not enough phases in list");
diff --git a/lib/Frontend/CreateInvocationFromCommandLine.cpp b/lib/Frontend/CreateInvocationFromCommandLine.cpp
index 66df1a5ba1..4a8a8a029e 100644
--- a/lib/Frontend/CreateInvocationFromCommandLine.cpp
+++ b/lib/Frontend/CreateInvocationFromCommandLine.cpp
@@ -15,7 +15,6 @@
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
-#include "clang/Driver/Action.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -62,21 +61,9 @@ clang::createInvocationFromCommandLine(ArrayRef<const char *> ArgList,
   }
 
   // We expect to get back exactly one command job, if we didn't something
-  // failed. CUDA compilation is an exception as it creates multiple jobs. If
-  // that's the case, we proceed with the first job. If caller needs particular
-  // CUDA job, it should be controlled via --cuda-{host|device}-only option
-  // passed to the driver.
+  // failed.
   const driver::JobList &Jobs = C->getJobs();
-  bool CudaCompilation = false;
-  if (Jobs.size() > 1) {
-    for (auto &A : C->getActions())
-      if (isa<driver::CudaDeviceAction>(A)) {
-        CudaCompilation = true;
-        break;
-      }
-  }
-  if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) ||
-      (Jobs.size() > 1 && !CudaCompilation)) {
+  if (Jobs.size() != 1 || !isa<driver::Command>(*Jobs.begin())) {
     SmallString<256> Msg;
     llvm::raw_svector_ostream OS(Msg);
     Jobs.Print(OS, "; ", true);