Change foldl' to inline when partially applied (#19534)

And though partially applied foldl' is now again inlined, #4301 has not resurfaced, and appears to be resolved.
author: James Foster <jf16688@my.bristol.ac.uk> 2021-03-14 18:38:23 -0400
committer: Viktor Dukhovni <ietf-dane@dukhovni.org> 2021-04-07 14:17:31 -0400
commit: 88d8a0ed387cef8ee45aa3c1b3bc22d5d9d5e51a (patch)
tree: 6ea387160e547a510ee6bd96016dd546398e6f97
parent: d014ab0db0c167ab5a0f9cb15280aee6fd8f3621 (diff)
download: haskell-88d8a0ed387cef8ee45aa3c1b3bc22d5d9d5e51a.tar.gz
3 files changed, 67 insertions, 18 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index 7b22c7881d..e66c88ac7a 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -1757,7 +1757,7 @@ and can lead to a massive blow-up in code size, exhibited by #9020.
 Suppose we have a PAP
     foo :: IO ()
     foo = returnIO ()
-Then we can eta-expand do
+Then we can eta-expand to
     foo = (\eta. (returnIO () |> sym g) eta) |> g
 where
     g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
diff --git a/libraries/base/Data/Foldable.hs b/libraries/base/Data/Foldable.hs
index 94bfa096a1..bee564ee07 100644
--- a/libraries/base/Data/Foldable.hs
+++ b/libraries/base/Data/Foldable.hs
@@ -316,8 +316,9 @@ class Foldable t where
     --
     -- @since 4.6.0.0
     foldr' :: (a -> b -> b) -> b -> t a -> b
-    foldr' f z0 xs = foldl f' id xs z0
-      where f' k x z = k $! f x z
+    foldr' f z0 = \ xs ->
+        foldl (\ k x -> oneShot (\ z -> z `seq` k (f x z))) id xs z0
+    -- Mirror image of 'foldl''.
 
     -- | Left-associative fold of a structure, lazy in the accumulator.  This
     -- is rarely what you want, but can work well for structures with efficient
@@ -388,8 +389,16 @@ class Foldable t where
     --
     -- @since 4.6.0.0
     foldl' :: (b -> a -> b) -> b -> t a -> b
-    foldl' f z0 xs = foldr f' id xs z0
-      where f' x k z = k $! f z x
+    {-# INLINE foldl' #-}
+    foldl' f z0 = \ xs ->
+        foldr (\ (x::a) (k::b->b) -> oneShot (\ (z::b) -> z `seq` k (f z x)))
+              (id::b->b) xs z0
+    --
+    -- We now force the accumulator `z` rather than the value computed by the
+    -- operator `k`, this matches the documented strictness.
+    --
+    -- For the rationale for the arity reduction from 3 to 2, inlining, etc.
+    -- see Note [Definition of foldl'] in GHC.List.
 
     -- | A variant of 'foldr' that has no base case,
     -- and thus may only be applied to non-empty structures.
@@ -2071,25 +2080,25 @@ https://gitlab.haskell.org/ghc/ghc/-/issues/17867 for more context.
 --    where g = flip f
 -- @
 --
--- In which to maintain the expected strictness we need to perform function
--- application eagerly, and composition lazily.  To that end we introduce a new
--- function @f'@ which maps each element @x@ to an eager application of @g x@
--- to its argument, followed by an application of a lazily computed composition
--- (@k@) of the @g e@ functions for the remaining elements @e@:
+-- In which to ensure the correct strictness we must evaluate the accumulator
+-- before applying the next function.  We therefore introduce a helper function
+-- @f'@ which maps each element @x@ to an application of @g x@ to its eagerly
+-- evaluated argument, followed by similar application of the @g e@ functions
+-- for the remaining elements @e@:
 --
--- > f' x k z = k $! (g x) z = k $! f z x
+-- > f' x k = \ z -> z `seq` k (f z x)
 --
 -- We see that a lazy 'foldr' of the @g e@ endomorphisms, with @f'@ as as the
 -- operator, in fact yields a strict left fold, that avoids building a
 -- deep chain of intermediate thunks:
 --
 -- > foldl' f z0 xs = foldr f' id xs z0
--- >   where f' x k z = k $! f z x
+-- >   where f' x k = \ z -> z `seq` k (f z x)
 --
 -- The function applied to @z0@ is built corecursively, and its terms are
--- applied eagerly to the accumulator before further terms are applied to
--- the result.  So, as promised, this will run in constant space, and GHC
--- is able to optimise this to an efficient loop.
+-- applied to an eagerly evaluated accumulator before further terms are applied
+-- to the result.  As promised, this runs in constant space, and can be
+-- optimised to an efficient loop.
 
 --------------
 
diff --git a/libraries/base/GHC/List.hs b/libraries/base/GHC/List.hs
index 07f65fc987..d13749923c 100644
--- a/libraries/base/GHC/List.hs
+++ b/libraries/base/GHC/List.hs
@@ -304,11 +304,51 @@ allocation-free. Also see #13001.
 -- ----------------------------------------------------------------------------
 
 -- | A strict version of 'foldl'.
-foldl'           :: forall a b . (b -> a -> b) -> b -> [a] -> b
+foldl' :: forall a b . (b -> a -> b) -> b -> [a] -> b
 {-# INLINE foldl' #-}
-foldl' k z0 xs =
+foldl' k z0 = \xs ->
   foldr (\(v::a) (fn::b->b) -> oneShot (\(z::b) -> z `seq` fn (k z v))) (id :: b -> b) xs z0
-  -- See Note [Left folds via right fold]
+{-
+Note [Definition of foldl']
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want foldl' to be a good consumer, so:
+
+* We define it (rather cunningly) with `foldr`.  That way, the `fold/build`
+  rule might fire.  See Note [Left folds via right fold]
+
+* We give it an INLINE pragma, so that it'll inline at its call sites, again
+  giving the `fold/build` rule a chance to fire.
+
+* We eta-reduce it so that it has arity 2, not 3.  Reason: consider
+
+     sumlen :: [Float] -> (Float, Int)
+     sumlen = foldl' f (0, 0)
+        where
+          f (!s, !n) !x = (s + x, n + 1)
+
+The RHS of `sumlen` is a partial application of foldl', and is not
+eta-expanded (and it isn't, because we don't eta-expand PAPs.  See Note
+[Do not eta-expand PAPs] in GHC.Core.Opt.Simplify.Utils)
+
+So foldl' is partially applied to two arguments, /and it won't inline/
+if its defn is:
+
+      {-# INLINE foldl' #-}
+      foldl' k z xs = ...
+
+because INLINE functions only inline when saturated.
+
+Conclusion: move the `xs` parameter to the RHS, and define it thus
+
+  fold' k z = \xs -> ...
+
+See !5259 for additional discussion.  This may result in partial applications
+of 'foldl'' inlining in some functions where they previously did not.  Absent
+an INLINE pragam for the calling function, it may become too expensive to
+automatically inline, resulting in a loss of previously accidental list
+fusion.  Such call sites may now need explicit INLINE or INLINABLE pragmas
+to make the desired list fusion robust.
+-}
 
 -- | 'foldl1' is a variant of 'foldl' that has no starting value argument,
 -- and thus must be applied to non-empty lists. Note that unlike 'foldl', the accumulated value must be of the same type as the list elements.
author	James Foster <jf16688@my.bristol.ac.uk>	2021-03-14 18:38:23 -0400
committer	Viktor Dukhovni <ietf-dane@dukhovni.org>	2021-04-07 14:17:31 -0400
commit	88d8a0ed387cef8ee45aa3c1b3bc22d5d9d5e51a (patch)
tree	6ea387160e547a510ee6bd96016dd546398e6f97
parent	d014ab0db0c167ab5a0f9cb15280aee6fd8f3621 (diff)
download	haskell-88d8a0ed387cef8ee45aa3c1b3bc22d5d9d5e51a.tar.gz