5 files changed, 58 insertions, 6 deletions
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index bf02a379fd..e764bbb7f0 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -129,7 +129,10 @@ OpenCL C Language Changes in Clang
 ABI Changes in Clang
 --------------------
 
-- ...
+- gcc passes vectors of __int128 in memory on X86-64. Clang historically
+  broke the vectors into multiple scalars using two 64-bit values for each
+  element. Clang now matches the gcc behavior on Linux and NetBSD. You can
+  switch back to old API behavior with flag: -fclang-abi-compat=9.0.
 
 OpenMP Support in Clang
 -----------------------
diff --git a/include/clang/Basic/LangOptions.h b/include/clang/Basic/LangOptions.h
index 8099eed28c..e3b7737fa3 100644
--- a/include/clang/Basic/LangOptions.h
+++ b/include/clang/Basic/LangOptions.h
@@ -138,6 +138,12 @@ public:
     /// rather than returning the required alignment.
     Ver7,
 
+    /// Attempt to be ABI-compatible with code generated by Clang 9.0.x
+    /// (SVN r351319). This causes vectors of __int128 to be passed in memory
+    /// instead of passing in multiple scalar registers on x86_64 on Linux and
+    /// NetBSD.
+    Ver9,
+
     /// Conform to the underlying platform's C and C++ ABIs as closely
     /// as we can.
     Latest
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index ffd5586337..f2696a33cf 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -2180,6 +2180,17 @@ class X86_64ABIInfo : public SwiftABIInfo {
     return true;
   }
 
+  // GCC classifies vectors of __int128 as memory.
+  bool passInt128VectorsInMem() const {
+    // Clang <= 9.0 did not do this.
+    if (getContext().getLangOpts().getClangABICompat() <=
+        LangOptions::ClangABI::Ver9)
+      return false;
+
+    const llvm::Triple &T = getTarget().getTriple();
+    return T.isOSLinux() || T.isOSNetBSD();
+  }
+
   X86AVXABILevel AVXLevel;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
@@ -2660,6 +2671,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
         Hi = Lo;
     } else if (Size == 128 ||
                (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
+      QualType ElementType = VT->getElementType();
+
+      // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
+      if (passInt128VectorsInMem() && Size != 128 &&
+          (ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
+        return;
+
       // Arguments of 256-bits are split into four eightbyte chunks. The
       // least significant one belongs to class SSE and all the others to class
       // SSEUP. The original Lo and Hi design considers that types can't be
@@ -2902,6 +2921,11 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
     unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
     if (Size <= 64 || Size > LargestVector)
       return true;
+    QualType EltTy = VecTy->getElementType();
+    if (passInt128VectorsInMem() &&
+        (EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
+         EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
+      return true;
   }
 
   return false;
@@ -2976,14 +3000,28 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
     Ty = QualType(InnerTy, 0);
 
   llvm::Type *IRType = CGT.ConvertType(Ty);
-  if (isa<llvm::VectorType>(IRType) ||
-      IRType->getTypeID() == llvm::Type::FP128TyID)
+  if (isa<llvm::VectorType>(IRType)) {
+    // Don't pass vXi128 vectors in their native type, the backend can't
+    // legalize them.
+    if (passInt128VectorsInMem() &&
+        IRType->getVectorElementType()->isIntegerTy(128)) {
+      // Use a vXi64 vector.
+      uint64_t Size = getContext().getTypeSize(Ty);
+      return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()),
+                                   Size / 64);
+    }
+
+    return IRType;
+  }
+
+  if (IRType->getTypeID() == llvm::Type::FP128TyID)
     return IRType;
 
   // We couldn't find the preferred IR vector type for 'Ty'.
   uint64_t Size = getContext().getTypeSize(Ty);
   assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
 
+
   // Return a LLVM IR vector type based on the size of 'Ty'.
   return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
                                Size / 64);
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 711f7dfa65..f1624532f8 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -3164,6 +3164,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
         Opts.setClangABICompat(LangOptions::ClangABI::Ver6);
       else if (Major <= 7)
         Opts.setClangABICompat(LangOptions::ClangABI::Ver7);
+      else if (Major <= 9)
+        Opts.setClangABICompat(LangOptions::ClangABI::Ver9);
     } else if (Ver != "latest") {
       Diags.Report(diag::err_drv_invalid_value)
           << A->getAsString(Args) << A->getValue();
diff --git a/test/CodeGen/x86-vec-i128.c b/test/CodeGen/x86-vec-i128.c
index d2c05c0784..aee3b9efe7 100644
--- a/test/CodeGen/x86-vec-i128.c
+++ b/test/CodeGen/x86-vec-i128.c
@@ -3,24 +3,27 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN16,MEM512ALIGN16
 // RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
 // RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN32
 // RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
 // RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
 // RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
 
 typedef unsigned long long v16u64 __attribute__((vector_size(16)));
 typedef unsigned __int128 v16u128 __attribute__((vector_size(16)));
 
 v16u64 test_v16u128(v16u64 a, v16u128 b) {
-// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}})
+// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 // CLANG9ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}})
   return a + (v16u64)b;
 }
@@ -31,7 +34,7 @@ typedef unsigned __int128 v32u128 __attribute__((vector_size(32)));
 v32u64 test_v32u128(v32u64 a, v32u128 b) {
 // MEM256ALIGN16: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 16 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 16 %{{.*}})
 // MEM256ALIGN32: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 32 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
-// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}})
+// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
 // CLANG9ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}})
   return a + (v32u64)b;
 }
@@ -43,7 +46,7 @@ v64u64 test_v64u128(v64u64 a, v64u128 b) {
 // MEM512ALIGN16: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 16 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 16 %{{.*}})
 // MEM512ALIGN32: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 32 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 32 %{{.*}})
 // MEM512ALIGN64: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 64 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
-// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}})
+// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
 // CLANG9ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}})
   return a + (v64u64)b;
 }