diff options
author | Koz Ross <koz.ross@retro-freedom.nz> | 2021-01-25 15:31:10 +1300 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-04-10 05:29:59 -0400 |
commit | 6b1d0b9cb5b984e7d4ada5626a675fe2d4e49a5d (patch) | |
tree | ecfe83e0dcc0b0b462529b34ffd002cbfbf3d255 | |
parent | 3c98dda6550b5323a00f4c8cde916a636901db82 (diff) | |
download | haskell-6b1d0b9cb5b984e7d4ada5626a675fe2d4e49a5d.tar.gz |
Implement list `fold` and `foldMap` via mconcat
- This allows specialized mconcat implementations an opportunity to combine
elements efficiently in a single pass.
- Inline the default implementation of `mconcat`, this
may result in list fusion.
- In Monoids with strict `mappend`, implement `mconcat` as a strict left fold:
* And (FiniteBits)
* Ior (FiniteBits)
* Xor (FiniteBits)
* Iff (FiniteBits)
* Max (Ord)
* Min (Ord)
* Sum (Num)
* Product (Num)
* (a -> m) (Monoid m)
- Delegate mconcat for WrappedMonoid to the underlying monoid.
Resolves: #17123
Per the discussion in !4890, we expect some stat changes:
* T17123(normal) run/alloc 403143160.0 4954736.0 -98.8% GOOD
This is the expected improvement in `fold` for a long list of
`Text` elements.
* T13056(optasm) ghc/alloc 381013328.0 447700520.0 +17.5% BAD
Here there's an extra simplifier run as a result of the new methods
of the Foldable instance for List. It looks benign. The test is
a micro benchmark that compiles just the derived foldable instances
for a pair of structures, a cost of this magnitude is not expected
to extend to more realistic programs.
* T9198(normal) ghc/alloc 504661992.0 541334168.0 +7.3% BAD
This test regressed from 8.10 and 9.0 back to exponential blowup.
This metric also fluctuates, for reasons not yet clear. The issue
here is the exponetial blowup, not this MR.
Metric Decrease:
T17123
Metric Increase:
T9198
T13056
-rw-r--r-- | libraries/base/Data/Bits.hs | 17 | ||||
-rw-r--r-- | libraries/base/Data/Foldable.hs | 16 | ||||
-rw-r--r-- | libraries/base/Data/Functor/Utils.hs | 9 | ||||
-rw-r--r-- | libraries/base/Data/Semigroup.hs | 12 | ||||
-rw-r--r-- | libraries/base/Data/Semigroup/Internal.hs | 9 | ||||
-rw-r--r-- | libraries/base/GHC/Base.hs | 7 |
6 files changed, 70 insertions, 0 deletions
diff --git a/libraries/base/Data/Bits.hs b/libraries/base/Data/Bits.hs index 73ebfbc03c..92e94dfd39 100644 --- a/libraries/base/Data/Bits.hs +++ b/libraries/base/Data/Bits.hs @@ -60,6 +60,7 @@ module Data.Bits ( import GHC.Base import GHC.Bits import GHC.Enum +import qualified GHC.List as List import GHC.Read import GHC.Show @@ -116,6 +117,10 @@ instance (Bits a) => Semigroup (And a) where -- @since 4.16 instance (FiniteBits a) => Monoid (And a) where mempty = And oneBits + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | Monoid under bitwise inclusive OR. -- @@ -143,6 +148,10 @@ instance (Bits a) => Semigroup (Ior a) where -- | @since 4.16 instance (Bits a) => Monoid (Ior a) where mempty = Ior zeroBits + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | Monoid under bitwise XOR. -- @@ -170,6 +179,10 @@ instance (Bits a) => Semigroup (Xor a) where -- | @since 4.16 instance (Bits a) => Monoid (Xor a) where mempty = Xor zeroBits + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | Monoid under bitwise \'equality\'; defined as @1@ if the corresponding -- bits match, and @0@ otherwise. @@ -206,3 +219,7 @@ instance (FiniteBits a) => Semigroup (Iff a) where -- @since 4.16 instance (FiniteBits a) => Monoid (Iff a) where mempty = Iff oneBits + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} diff --git a/libraries/base/Data/Foldable.hs b/libraries/base/Data/Foldable.hs index bee564ee07..fe5ea6f45f 100644 --- a/libraries/base/Data/Foldable.hs +++ b/libraries/base/Data/Foldable.hs @@ -184,6 +184,7 @@ class Foldable t where -- 2666668666666 -- fold :: Monoid m => t m -> m + {-# INLINE fold #-} fold = foldMap id -- | Map each element of the structure into a monoid, and combine the @@ -712,6 +713,8 @@ instance Foldable [] where foldl' = List.foldl' foldl1 = List.foldl1 foldr = List.foldr + foldMap = (mconcat .) . map -- See Note [Monoidal list folds] + fold = mconcat -- See Note [Monoidal list folds] foldr1 = List.foldr1 length = List.length maximum = List.maximum @@ -1460,6 +1463,19 @@ switched to employing foldl' over foldl1, not relying on GHC's optimiser. See https://gitlab.haskell.org/ghc/ghc/-/issues/17867 for more context. -} +{- +Note [Monoidal list folds] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Folds of lists of monoid elements should generally use 'mconcat', rather than +@foldr mappend mempty@. This allows specialized mconcat implementations an +opportunity to combine elements efficiently. For example, `mappend` of strict +`Text` and `ByteString` values typically needs to reallocate and copy the +existing data, making incremental construction expensive (likely quadratic in +the number of elements combined). The `mconcat` implementations for `Text` and +`ByteString` preallocate the required storage, and then combine all the list +elements in a single pass. +-} + -------------- -- $overview diff --git a/libraries/base/Data/Functor/Utils.hs b/libraries/base/Data/Functor/Utils.hs index 57e75424da..5cf96d994c 100644 --- a/libraries/base/Data/Functor/Utils.hs +++ b/libraries/base/Data/Functor/Utils.hs @@ -12,6 +12,7 @@ module Data.Functor.Utils where import Data.Coerce (Coercible, coerce) import GHC.Base ( Applicative(..), Functor(..), Maybe(..), Monoid(..), Ord(..) , Semigroup(..), ($), otherwise ) +import qualified GHC.List as List -- We don't expose Max and Min because, as Edward Kmett pointed out to me, -- there are two reasonable ways to define them. One way is to use Maybe, as we @@ -34,6 +35,10 @@ instance Ord a => Semigroup (Max a) where -- | @since 4.8.0.0 instance Ord a => Monoid (Max a) where mempty = Max Nothing + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | @since 4.11.0.0 instance Ord a => Semigroup (Min a) where @@ -47,6 +52,10 @@ instance Ord a => Semigroup (Min a) where -- | @since 4.8.0.0 instance Ord a => Monoid (Min a) where mempty = Min Nothing + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- left-to-right state-transforming monad newtype StateL s a = StateL { runStateL :: s -> (s, a) } diff --git a/libraries/base/Data/Semigroup.hs b/libraries/base/Data/Semigroup.hs index 13b1e0e77a..d31e8ce073 100644 --- a/libraries/base/Data/Semigroup.hs +++ b/libraries/base/Data/Semigroup.hs @@ -112,6 +112,7 @@ import Data.Bitraversable import Data.Coerce import Data.Data import GHC.Generics +import qualified GHC.List as List -- $setup -- >>> import Prelude @@ -172,6 +173,10 @@ instance Ord a => Semigroup (Min a) where -- | @since 4.9.0.0 instance (Ord a, Bounded a) => Monoid (Min a) where mempty = maxBound + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | @since 4.9.0.0 instance Functor Min where @@ -242,6 +247,10 @@ instance Ord a => Semigroup (Max a) where -- | @since 4.9.0.0 instance (Ord a, Bounded a) => Monoid (Max a) where mempty = minBound + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | @since 4.9.0.0 instance Functor Max where @@ -483,6 +492,9 @@ instance Monoid m => Semigroup (WrappedMonoid m) where -- | @since 4.9.0.0 instance Monoid m => Monoid (WrappedMonoid m) where mempty = WrapMonoid mempty + -- This ensures that we use whatever mconcat is defined for the wrapped + -- Monoid. + mconcat = coerce (mconcat :: [m] -> m) -- | @since 4.9.0.0 instance Enum a => Enum (WrappedMonoid a) where diff --git a/libraries/base/Data/Semigroup/Internal.hs b/libraries/base/Data/Semigroup/Internal.hs index 4ce73d0c05..a9e0079d0b 100644 --- a/libraries/base/Data/Semigroup/Internal.hs +++ b/libraries/base/Data/Semigroup/Internal.hs @@ -22,6 +22,7 @@ module Data.Semigroup.Internal where import GHC.Base hiding (Any) import GHC.Enum +import qualified GHC.List as List import GHC.Num import GHC.Read import GHC.Show @@ -230,6 +231,10 @@ instance Num a => Semigroup (Sum a) where -- | @since 2.01 instance Num a => Monoid (Sum a) where mempty = Sum 0 + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | @since 4.8.0.0 instance Functor Sum where @@ -268,6 +273,10 @@ instance Num a => Semigroup (Product a) where -- | @since 2.01 instance Num a => Monoid (Product a) where mempty = Product 1 + -- By default, we would get a lazy right fold. This forces the use of a strict + -- left fold instead. + mconcat = List.foldl' (<>) mempty + {-# INLINE mconcat #-} -- | @since 4.8.0.0 instance Functor Product where diff --git a/libraries/base/GHC/Base.hs b/libraries/base/GHC/Base.hs index f4fd326fb2..205fee906b 100644 --- a/libraries/base/GHC/Base.hs +++ b/libraries/base/GHC/Base.hs @@ -291,6 +291,8 @@ class Semigroup a => Monoid a where -- "Hello Haskell!" mconcat :: [a] -> a mconcat = foldr mappend mempty + {-# INLINE mconcat #-} + -- INLINE in the hope of fusion with mconcat's argument (see !4890) -- | @since 4.9.0.0 instance Semigroup [a] where @@ -338,6 +340,11 @@ instance Semigroup b => Semigroup (a -> b) where -- | @since 2.01 instance Monoid b => Monoid (a -> b) where mempty _ = mempty + -- If `b` has a specialised mconcat, use that, rather than the default + -- mconcat, which can be much less efficient. Inline in the hope that + -- it may result in list fusion. + mconcat = \fs x -> mconcat $ map (\f -> f x) fs + {-# INLINE mconcat #-} -- | @since 4.9.0.0 instance Semigroup () where |