summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Foster <jf16688@my.bristol.ac.uk>2021-03-14 18:38:23 -0400
committerViktor Dukhovni <ietf-dane@dukhovni.org>2021-04-07 14:17:31 -0400
commit88d8a0ed387cef8ee45aa3c1b3bc22d5d9d5e51a (patch)
tree6ea387160e547a510ee6bd96016dd546398e6f97
parentd014ab0db0c167ab5a0f9cb15280aee6fd8f3621 (diff)
downloadhaskell-88d8a0ed387cef8ee45aa3c1b3bc22d5d9d5e51a.tar.gz
Change foldl' to inline when partially applied (#19534)
And though partially applied foldl' is now again inlined, #4301 has not resurfaced, and appears to be resolved.
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Utils.hs2
-rw-r--r--libraries/base/Data/Foldable.hs37
-rw-r--r--libraries/base/GHC/List.hs46
3 files changed, 67 insertions, 18 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index 7b22c7881d..e66c88ac7a 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -1757,7 +1757,7 @@ and can lead to a massive blow-up in code size, exhibited by #9020.
Suppose we have a PAP
foo :: IO ()
foo = returnIO ()
-Then we can eta-expand do
+Then we can eta-expand to
foo = (\eta. (returnIO () |> sym g) eta) |> g
where
g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
diff --git a/libraries/base/Data/Foldable.hs b/libraries/base/Data/Foldable.hs
index 94bfa096a1..bee564ee07 100644
--- a/libraries/base/Data/Foldable.hs
+++ b/libraries/base/Data/Foldable.hs
@@ -316,8 +316,9 @@ class Foldable t where
--
-- @since 4.6.0.0
foldr' :: (a -> b -> b) -> b -> t a -> b
- foldr' f z0 xs = foldl f' id xs z0
- where f' k x z = k $! f x z
+ foldr' f z0 = \ xs ->
+ foldl (\ k x -> oneShot (\ z -> z `seq` k (f x z))) id xs z0
+ -- Mirror image of 'foldl''.
-- | Left-associative fold of a structure, lazy in the accumulator. This
-- is rarely what you want, but can work well for structures with efficient
@@ -388,8 +389,16 @@ class Foldable t where
--
-- @since 4.6.0.0
foldl' :: (b -> a -> b) -> b -> t a -> b
- foldl' f z0 xs = foldr f' id xs z0
- where f' x k z = k $! f z x
+ {-# INLINE foldl' #-}
+ foldl' f z0 = \ xs ->
+ foldr (\ (x::a) (k::b->b) -> oneShot (\ (z::b) -> z `seq` k (f z x)))
+ (id::b->b) xs z0
+ --
+ -- We now force the accumulator `z` rather than the value computed by the
+ -- operator `k`, this matches the documented strictness.
+ --
+ -- For the rationale for the arity reduction from 3 to 2, inlining, etc.
+ -- see Note [Definition of foldl'] in GHC.List.
-- | A variant of 'foldr' that has no base case,
-- and thus may only be applied to non-empty structures.
@@ -2071,25 +2080,25 @@ https://gitlab.haskell.org/ghc/ghc/-/issues/17867 for more context.
-- where g = flip f
-- @
--
--- In which to maintain the expected strictness we need to perform function
--- application eagerly, and composition lazily. To that end we introduce a new
--- function @f'@ which maps each element @x@ to an eager application of @g x@
--- to its argument, followed by an application of a lazily computed composition
--- (@k@) of the @g e@ functions for the remaining elements @e@:
+-- In which to ensure the correct strictness we must evaluate the accumulator
+-- before applying the next function. We therefore introduce a helper function
+-- @f'@ which maps each element @x@ to an application of @g x@ to its eagerly
+-- evaluated argument, followed by similar application of the @g e@ functions
+-- for the remaining elements @e@:
--
--- > f' x k z = k $! (g x) z = k $! f z x
+-- > f' x k = \ z -> z `seq` k (f z x)
--
-- We see that a lazy 'foldr' of the @g e@ endomorphisms, with @f'@ as as the
-- operator, in fact yields a strict left fold, that avoids building a
-- deep chain of intermediate thunks:
--
-- > foldl' f z0 xs = foldr f' id xs z0
--- > where f' x k z = k $! f z x
+-- > where f' x k = \ z -> z `seq` k (f z x)
--
-- The function applied to @z0@ is built corecursively, and its terms are
--- applied eagerly to the accumulator before further terms are applied to
--- the result. So, as promised, this will run in constant space, and GHC
--- is able to optimise this to an efficient loop.
+-- applied to an eagerly evaluated accumulator before further terms are applied
+-- to the result. As promised, this runs in constant space, and can be
+-- optimised to an efficient loop.
--------------
diff --git a/libraries/base/GHC/List.hs b/libraries/base/GHC/List.hs
index 07f65fc987..d13749923c 100644
--- a/libraries/base/GHC/List.hs
+++ b/libraries/base/GHC/List.hs
@@ -304,11 +304,51 @@ allocation-free. Also see #13001.
-- ----------------------------------------------------------------------------
-- | A strict version of 'foldl'.
-foldl' :: forall a b . (b -> a -> b) -> b -> [a] -> b
+foldl' :: forall a b . (b -> a -> b) -> b -> [a] -> b
{-# INLINE foldl' #-}
-foldl' k z0 xs =
+foldl' k z0 = \xs ->
foldr (\(v::a) (fn::b->b) -> oneShot (\(z::b) -> z `seq` fn (k z v))) (id :: b -> b) xs z0
- -- See Note [Left folds via right fold]
+{-
+Note [Definition of foldl']
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want foldl' to be a good consumer, so:
+
+* We define it (rather cunningly) with `foldr`. That way, the `fold/build`
+ rule might fire. See Note [Left folds via right fold]
+
+* We give it an INLINE pragma, so that it'll inline at its call sites, again
+ giving the `fold/build` rule a chance to fire.
+
+* We eta-reduce it so that it has arity 2, not 3. Reason: consider
+
+ sumlen :: [Float] -> (Float, Int)
+ sumlen = foldl' f (0, 0)
+ where
+ f (!s, !n) !x = (s + x, n + 1)
+
+The RHS of `sumlen` is a partial application of foldl', and is not
+eta-expanded (and it isn't, because we don't eta-expand PAPs. See Note
+[Do not eta-expand PAPs] in GHC.Core.Opt.Simplify.Utils)
+
+So foldl' is partially applied to two arguments, /and it won't inline/
+if its defn is:
+
+ {-# INLINE foldl' #-}
+ foldl' k z xs = ...
+
+because INLINE functions only inline when saturated.
+
+Conclusion: move the `xs` parameter to the RHS, and define it thus
+
+ fold' k z = \xs -> ...
+
+See !5259 for additional discussion. This may result in partial applications
+of 'foldl'' inlining in some functions where they previously did not. Absent
+an INLINE pragam for the calling function, it may become too expensive to
+automatically inline, resulting in a loss of previously accidental list
+fusion. Such call sites may now need explicit INLINE or INLINABLE pragmas
+to make the desired list fusion robust.
+-}
-- | 'foldl1' is a variant of 'foldl' that has no starting value argument,
-- and thus must be applied to non-empty lists. Note that unlike 'foldl', the accumulated value must be of the same type as the list elements.