By default, only pass 128-bit SIMD vectors in registers on X86-64.

LLVM's GHC calling convention only allows 128-bit SIMD vectors to be passed in machine registers on X86-64. This may change in LLVM 3.4; the hidden flag -fllvm-pass-vectors-in-regs causes all SIMD vector widths to be passed in registers on both X86-64 and on X86-32.
author: Geoffrey Mainland <gmainlan@microsoft.com> 2013-09-15 23:43:29 -0400
committer: Geoffrey Mainland <gmainlan@microsoft.com> 2013-09-22 22:34:00 -0400
commit: d2b95264c97b3d7786b359fbf04fb297a160daa3 (patch)
tree: 1be2774ee1a0ee75419adb7876fdb945e86debd7
parent: 7dda67b9820f043774b2c7d21c7f8ac0dcd60b6e (diff)
download: haskell-d2b95264c97b3d7786b359fbf04fb297a160daa3.tar.gz
2 files changed, 22 insertions, 3 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 6a931668bb..60e2c8c8f7 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -66,9 +66,12 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
                                     | isFloatType ty = float
                                     | otherwise      = int
         where vec = case (w, regs) of
-                      (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
-                      (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
-                      (W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
+                      (W128, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+                      (W256, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+                      (W512, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
                       _ -> (assts, (r:rs))
               float = case (w, regs) of
                         (W32, (vs, fs, ds, ls, s:ss))
@@ -100,6 +103,20 @@ passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
                               ArchX86_64 -> True
                               _          -> False
 
+-- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
+-- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
+-- doesn't currently passing vectors in registers. The patch to update the GHC
+-- calling convention to support passing SIMD vectors in registers is small and
+-- well-contained, so it may make it into LLVM 3.4. The hidden
+-- -fllvm-pass-vectors-in-regs flag will generate LLVM code that attempts to
+-- pass vectors in registers, but it must only be used with a version of LLVM
+-- that has an updated GHC calling convention.
+passVectorInReg :: Width -> DynFlags -> Bool
+passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
+                                ArchX86_64 -> True
+                                _          -> gopt Opt_LlvmPassVectorsInRegisters dflags
+passVectorInReg _    dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
+
 assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
             -> (
                  ByteOff              -- bytes of stack args
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 74241ba67f..37f35e61d6 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -309,6 +309,7 @@ data GeneralFlag
    | Opt_RegsIterative                  -- do iterative coalescing graph coloring register allocation
    | Opt_PedanticBottoms                -- Be picky about how we treat bottom
    | Opt_LlvmTBAA                       -- Use LLVM TBAA infastructure for improving AA (hidden flag)
+   | Opt_LlvmPassVectorsInRegisters     -- Pass SIMD vectors in registers (requires a patched LLVM) (hidden flag)
    | Opt_IrrefutableTuples
    | Opt_CmmSink
    | Opt_CmmElimCommonBlocks
@@ -2611,6 +2612,7 @@ fFlags = [
   ( "regs-graph",                       Opt_RegsGraph, nop ),
   ( "regs-iterative",                   Opt_RegsIterative, nop ),
   ( "llvm-tbaa",                        Opt_LlvmTBAA, nop), -- hidden flag
+  ( "llvm-pass-vectors-in-regs",        Opt_LlvmPassVectorsInRegisters, nop), -- hidden flag
   ( "irrefutable-tuples",               Opt_IrrefutableTuples, nop ),
   ( "cmm-sink",                         Opt_CmmSink, nop ),
   ( "cmm-elim-common-blocks",           Opt_CmmElimCommonBlocks, nop ),
author	Geoffrey Mainland <gmainlan@microsoft.com>	2013-09-15 23:43:29 -0400
committer	Geoffrey Mainland <gmainlan@microsoft.com>	2013-09-22 22:34:00 -0400
commit	d2b95264c97b3d7786b359fbf04fb297a160daa3 (patch)
tree	1be2774ee1a0ee75419adb7876fdb945e86debd7
parent	7dda67b9820f043774b2c7d21c7f8ac0dcd60b6e (diff)
download	haskell-d2b95264c97b3d7786b359fbf04fb297a160daa3.tar.gz