summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKoz Ross <koz.ross@retro-freedom.nz>2021-01-25 15:31:10 +1300
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-04-10 05:29:59 -0400
commit6b1d0b9cb5b984e7d4ada5626a675fe2d4e49a5d (patch)
treeecfe83e0dcc0b0b462529b34ffd002cbfbf3d255
parent3c98dda6550b5323a00f4c8cde916a636901db82 (diff)
downloadhaskell-6b1d0b9cb5b984e7d4ada5626a675fe2d4e49a5d.tar.gz
Implement list `fold` and `foldMap` via mconcat
- This allows specialized mconcat implementations an opportunity to combine elements efficiently in a single pass. - Inline the default implementation of `mconcat`, this may result in list fusion. - In Monoids with strict `mappend`, implement `mconcat` as a strict left fold: * And (FiniteBits) * Ior (FiniteBits) * Xor (FiniteBits) * Iff (FiniteBits) * Max (Ord) * Min (Ord) * Sum (Num) * Product (Num) * (a -> m) (Monoid m) - Delegate mconcat for WrappedMonoid to the underlying monoid. Resolves: #17123 Per the discussion in !4890, we expect some stat changes: * T17123(normal) run/alloc 403143160.0 4954736.0 -98.8% GOOD This is the expected improvement in `fold` for a long list of `Text` elements. * T13056(optasm) ghc/alloc 381013328.0 447700520.0 +17.5% BAD Here there's an extra simplifier run as a result of the new methods of the Foldable instance for List. It looks benign. The test is a micro benchmark that compiles just the derived foldable instances for a pair of structures, a cost of this magnitude is not expected to extend to more realistic programs. * T9198(normal) ghc/alloc 504661992.0 541334168.0 +7.3% BAD This test regressed from 8.10 and 9.0 back to exponential blowup. This metric also fluctuates, for reasons not yet clear. The issue here is the exponetial blowup, not this MR. Metric Decrease: T17123 Metric Increase: T9198 T13056
-rw-r--r--libraries/base/Data/Bits.hs17
-rw-r--r--libraries/base/Data/Foldable.hs16
-rw-r--r--libraries/base/Data/Functor/Utils.hs9
-rw-r--r--libraries/base/Data/Semigroup.hs12
-rw-r--r--libraries/base/Data/Semigroup/Internal.hs9
-rw-r--r--libraries/base/GHC/Base.hs7
6 files changed, 70 insertions, 0 deletions
diff --git a/libraries/base/Data/Bits.hs b/libraries/base/Data/Bits.hs
index 73ebfbc03c..92e94dfd39 100644
--- a/libraries/base/Data/Bits.hs
+++ b/libraries/base/Data/Bits.hs
@@ -60,6 +60,7 @@ module Data.Bits (
import GHC.Base
import GHC.Bits
import GHC.Enum
+import qualified GHC.List as List
import GHC.Read
import GHC.Show
@@ -116,6 +117,10 @@ instance (Bits a) => Semigroup (And a) where
-- @since 4.16
instance (FiniteBits a) => Monoid (And a) where
mempty = And oneBits
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | Monoid under bitwise inclusive OR.
--
@@ -143,6 +148,10 @@ instance (Bits a) => Semigroup (Ior a) where
-- | @since 4.16
instance (Bits a) => Monoid (Ior a) where
mempty = Ior zeroBits
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | Monoid under bitwise XOR.
--
@@ -170,6 +179,10 @@ instance (Bits a) => Semigroup (Xor a) where
-- | @since 4.16
instance (Bits a) => Monoid (Xor a) where
mempty = Xor zeroBits
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | Monoid under bitwise \'equality\'; defined as @1@ if the corresponding
-- bits match, and @0@ otherwise.
@@ -206,3 +219,7 @@ instance (FiniteBits a) => Semigroup (Iff a) where
-- @since 4.16
instance (FiniteBits a) => Monoid (Iff a) where
mempty = Iff oneBits
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
diff --git a/libraries/base/Data/Foldable.hs b/libraries/base/Data/Foldable.hs
index bee564ee07..fe5ea6f45f 100644
--- a/libraries/base/Data/Foldable.hs
+++ b/libraries/base/Data/Foldable.hs
@@ -184,6 +184,7 @@ class Foldable t where
-- 2666668666666
--
fold :: Monoid m => t m -> m
+ {-# INLINE fold #-}
fold = foldMap id
-- | Map each element of the structure into a monoid, and combine the
@@ -712,6 +713,8 @@ instance Foldable [] where
foldl' = List.foldl'
foldl1 = List.foldl1
foldr = List.foldr
+ foldMap = (mconcat .) . map -- See Note [Monoidal list folds]
+ fold = mconcat -- See Note [Monoidal list folds]
foldr1 = List.foldr1
length = List.length
maximum = List.maximum
@@ -1460,6 +1463,19 @@ switched to employing foldl' over foldl1, not relying on GHC's optimiser. See
https://gitlab.haskell.org/ghc/ghc/-/issues/17867 for more context.
-}
+{-
+Note [Monoidal list folds]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+Folds of lists of monoid elements should generally use 'mconcat', rather than
+@foldr mappend mempty@. This allows specialized mconcat implementations an
+opportunity to combine elements efficiently. For example, `mappend` of strict
+`Text` and `ByteString` values typically needs to reallocate and copy the
+existing data, making incremental construction expensive (likely quadratic in
+the number of elements combined). The `mconcat` implementations for `Text` and
+`ByteString` preallocate the required storage, and then combine all the list
+elements in a single pass.
+-}
+
--------------
-- $overview
diff --git a/libraries/base/Data/Functor/Utils.hs b/libraries/base/Data/Functor/Utils.hs
index 57e75424da..5cf96d994c 100644
--- a/libraries/base/Data/Functor/Utils.hs
+++ b/libraries/base/Data/Functor/Utils.hs
@@ -12,6 +12,7 @@ module Data.Functor.Utils where
import Data.Coerce (Coercible, coerce)
import GHC.Base ( Applicative(..), Functor(..), Maybe(..), Monoid(..), Ord(..)
, Semigroup(..), ($), otherwise )
+import qualified GHC.List as List
-- We don't expose Max and Min because, as Edward Kmett pointed out to me,
-- there are two reasonable ways to define them. One way is to use Maybe, as we
@@ -34,6 +35,10 @@ instance Ord a => Semigroup (Max a) where
-- | @since 4.8.0.0
instance Ord a => Monoid (Max a) where
mempty = Max Nothing
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | @since 4.11.0.0
instance Ord a => Semigroup (Min a) where
@@ -47,6 +52,10 @@ instance Ord a => Semigroup (Min a) where
-- | @since 4.8.0.0
instance Ord a => Monoid (Min a) where
mempty = Min Nothing
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- left-to-right state-transforming monad
newtype StateL s a = StateL { runStateL :: s -> (s, a) }
diff --git a/libraries/base/Data/Semigroup.hs b/libraries/base/Data/Semigroup.hs
index 13b1e0e77a..d31e8ce073 100644
--- a/libraries/base/Data/Semigroup.hs
+++ b/libraries/base/Data/Semigroup.hs
@@ -112,6 +112,7 @@ import Data.Bitraversable
import Data.Coerce
import Data.Data
import GHC.Generics
+import qualified GHC.List as List
-- $setup
-- >>> import Prelude
@@ -172,6 +173,10 @@ instance Ord a => Semigroup (Min a) where
-- | @since 4.9.0.0
instance (Ord a, Bounded a) => Monoid (Min a) where
mempty = maxBound
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | @since 4.9.0.0
instance Functor Min where
@@ -242,6 +247,10 @@ instance Ord a => Semigroup (Max a) where
-- | @since 4.9.0.0
instance (Ord a, Bounded a) => Monoid (Max a) where
mempty = minBound
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | @since 4.9.0.0
instance Functor Max where
@@ -483,6 +492,9 @@ instance Monoid m => Semigroup (WrappedMonoid m) where
-- | @since 4.9.0.0
instance Monoid m => Monoid (WrappedMonoid m) where
mempty = WrapMonoid mempty
+ -- This ensures that we use whatever mconcat is defined for the wrapped
+ -- Monoid.
+ mconcat = coerce (mconcat :: [m] -> m)
-- | @since 4.9.0.0
instance Enum a => Enum (WrappedMonoid a) where
diff --git a/libraries/base/Data/Semigroup/Internal.hs b/libraries/base/Data/Semigroup/Internal.hs
index 4ce73d0c05..a9e0079d0b 100644
--- a/libraries/base/Data/Semigroup/Internal.hs
+++ b/libraries/base/Data/Semigroup/Internal.hs
@@ -22,6 +22,7 @@ module Data.Semigroup.Internal where
import GHC.Base hiding (Any)
import GHC.Enum
+import qualified GHC.List as List
import GHC.Num
import GHC.Read
import GHC.Show
@@ -230,6 +231,10 @@ instance Num a => Semigroup (Sum a) where
-- | @since 2.01
instance Num a => Monoid (Sum a) where
mempty = Sum 0
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | @since 4.8.0.0
instance Functor Sum where
@@ -268,6 +273,10 @@ instance Num a => Semigroup (Product a) where
-- | @since 2.01
instance Num a => Monoid (Product a) where
mempty = Product 1
+ -- By default, we would get a lazy right fold. This forces the use of a strict
+ -- left fold instead.
+ mconcat = List.foldl' (<>) mempty
+ {-# INLINE mconcat #-}
-- | @since 4.8.0.0
instance Functor Product where
diff --git a/libraries/base/GHC/Base.hs b/libraries/base/GHC/Base.hs
index f4fd326fb2..205fee906b 100644
--- a/libraries/base/GHC/Base.hs
+++ b/libraries/base/GHC/Base.hs
@@ -291,6 +291,8 @@ class Semigroup a => Monoid a where
-- "Hello Haskell!"
mconcat :: [a] -> a
mconcat = foldr mappend mempty
+ {-# INLINE mconcat #-}
+ -- INLINE in the hope of fusion with mconcat's argument (see !4890)
-- | @since 4.9.0.0
instance Semigroup [a] where
@@ -338,6 +340,11 @@ instance Semigroup b => Semigroup (a -> b) where
-- | @since 2.01
instance Monoid b => Monoid (a -> b) where
mempty _ = mempty
+ -- If `b` has a specialised mconcat, use that, rather than the default
+ -- mconcat, which can be much less efficient. Inline in the hope that
+ -- it may result in list fusion.
+ mconcat = \fs x -> mconcat $ map (\f -> f x) fs
+ {-# INLINE mconcat #-}
-- | @since 4.9.0.0
instance Semigroup () where