From 0c48e172836d6a1e281aed63e42d60063700e6d8 Mon Sep 17 00:00:00 2001 From: Austin Seipp Date: Wed, 3 Dec 2014 12:44:03 -0600 Subject: compiler: de-lhs utils/ Signed-off-by: Austin Seipp --- compiler/utils/Bag.hs | 266 +++++++++ compiler/utils/Bag.lhs | 273 --------- compiler/utils/Digraph.hs | 652 +++++++++++++++++++++ compiler/utils/Digraph.lhs | 668 --------------------- compiler/utils/FastBool.hs | 70 +++ compiler/utils/FastBool.lhs | 72 --- compiler/utils/FastFunctions.hs | 46 ++ compiler/utils/FastFunctions.lhs | 47 -- compiler/utils/FastMutInt.hs | 63 ++ compiler/utils/FastMutInt.lhs | 68 --- compiler/utils/FastString.hs | 640 ++++++++++++++++++++ compiler/utils/FastString.lhs | 643 -------------------- compiler/utils/FastTypes.hs | 138 +++++ compiler/utils/FastTypes.lhs | 140 ----- compiler/utils/FiniteMap.hs | 29 + compiler/utils/FiniteMap.lhs | 32 - compiler/utils/ListSetOps.hs | 187 ++++++ compiler/utils/ListSetOps.lhs | 196 ------- compiler/utils/Maybes.hs | 106 ++++ compiler/utils/Maybes.lhs | 111 ---- compiler/utils/OrdList.hs | 98 ++++ compiler/utils/OrdList.lhs | 99 ---- compiler/utils/Outputable.hs | 1027 ++++++++++++++++++++++++++++++++ compiler/utils/Outputable.hs-boot | 3 + compiler/utils/Outputable.lhs | 1047 --------------------------------- compiler/utils/Outputable.lhs-boot | 7 - compiler/utils/Pair.hs | 50 ++ compiler/utils/Pair.lhs | 51 -- compiler/utils/Panic.hs | 307 ++++++++++ compiler/utils/Panic.lhs | 309 ---------- compiler/utils/Pretty.hs | 1024 ++++++++++++++++++++++++++++++++ compiler/utils/Pretty.lhs | 1057 --------------------------------- compiler/utils/StringBuffer.hs | 257 ++++++++ compiler/utils/StringBuffer.lhs | 259 -------- compiler/utils/UniqFM.hs | 311 ++++++++++ compiler/utils/UniqFM.lhs | 314 ---------- compiler/utils/UniqSet.hs | 115 ++++ compiler/utils/UniqSet.lhs | 119 ---- compiler/utils/Util.hs | 1097 ++++++++++++++++++++++++++++++++++ compiler/utils/Util.lhs | 1135 ------------------------------------ 40 files changed, 6486 insertions(+), 6647 deletions(-) create mode 100644 compiler/utils/Bag.hs delete mode 100644 compiler/utils/Bag.lhs create mode 100644 compiler/utils/Digraph.hs delete mode 100644 compiler/utils/Digraph.lhs create mode 100644 compiler/utils/FastBool.hs delete mode 100644 compiler/utils/FastBool.lhs create mode 100644 compiler/utils/FastFunctions.hs delete mode 100644 compiler/utils/FastFunctions.lhs create mode 100644 compiler/utils/FastMutInt.hs delete mode 100644 compiler/utils/FastMutInt.lhs create mode 100644 compiler/utils/FastString.hs delete mode 100644 compiler/utils/FastString.lhs create mode 100644 compiler/utils/FastTypes.hs delete mode 100644 compiler/utils/FastTypes.lhs create mode 100644 compiler/utils/FiniteMap.hs delete mode 100644 compiler/utils/FiniteMap.lhs create mode 100644 compiler/utils/ListSetOps.hs delete mode 100644 compiler/utils/ListSetOps.lhs create mode 100644 compiler/utils/Maybes.hs delete mode 100644 compiler/utils/Maybes.lhs create mode 100644 compiler/utils/OrdList.hs delete mode 100644 compiler/utils/OrdList.lhs create mode 100644 compiler/utils/Outputable.hs create mode 100644 compiler/utils/Outputable.hs-boot delete mode 100644 compiler/utils/Outputable.lhs delete mode 100644 compiler/utils/Outputable.lhs-boot create mode 100644 compiler/utils/Pair.hs delete mode 100644 compiler/utils/Pair.lhs create mode 100644 compiler/utils/Panic.hs delete mode 100644 compiler/utils/Panic.lhs create mode 100644 compiler/utils/Pretty.hs delete mode 100644 compiler/utils/Pretty.lhs create mode 100644 compiler/utils/StringBuffer.hs delete mode 100644 compiler/utils/StringBuffer.lhs create mode 100644 compiler/utils/UniqFM.hs delete mode 100644 compiler/utils/UniqFM.lhs create mode 100644 compiler/utils/UniqSet.hs delete mode 100644 compiler/utils/UniqSet.lhs create mode 100644 compiler/utils/Util.hs delete mode 100644 compiler/utils/Util.lhs (limited to 'compiler') diff --git a/compiler/utils/Bag.hs b/compiler/utils/Bag.hs new file mode 100644 index 0000000000..95feaed9f8 --- /dev/null +++ b/compiler/utils/Bag.hs @@ -0,0 +1,266 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + + +Bag: an unordered collection with duplicates +-} + +{-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-} + +module Bag ( + Bag, -- abstract type + + emptyBag, unitBag, unionBags, unionManyBags, + mapBag, + elemBag, lengthBag, + filterBag, partitionBag, partitionBagWith, + concatBag, foldBag, foldrBag, foldlBag, + isEmptyBag, isSingletonBag, consBag, snocBag, anyBag, + listToBag, bagToList, + foldrBagM, foldlBagM, mapBagM, mapBagM_, + flatMapBagM, flatMapBagPairM, + mapAndUnzipBagM, mapAccumBagLM + ) where + +import Outputable +import Util + +import MonadUtils +import Data.Data +import Data.List ( partition ) + +infixr 3 `consBag` +infixl 3 `snocBag` + +data Bag a + = EmptyBag + | UnitBag a + | TwoBags (Bag a) (Bag a) -- INVARIANT: neither branch is empty + | ListBag [a] -- INVARIANT: the list is non-empty + deriving Typeable + +emptyBag :: Bag a +emptyBag = EmptyBag + +unitBag :: a -> Bag a +unitBag = UnitBag + +lengthBag :: Bag a -> Int +lengthBag EmptyBag = 0 +lengthBag (UnitBag {}) = 1 +lengthBag (TwoBags b1 b2) = lengthBag b1 + lengthBag b2 +lengthBag (ListBag xs) = length xs + +elemBag :: Eq a => a -> Bag a -> Bool +elemBag _ EmptyBag = False +elemBag x (UnitBag y) = x == y +elemBag x (TwoBags b1 b2) = x `elemBag` b1 || x `elemBag` b2 +elemBag x (ListBag ys) = any (x ==) ys + +unionManyBags :: [Bag a] -> Bag a +unionManyBags xs = foldr unionBags EmptyBag xs + +-- This one is a bit stricter! The bag will get completely evaluated. + +unionBags :: Bag a -> Bag a -> Bag a +unionBags EmptyBag b = b +unionBags b EmptyBag = b +unionBags b1 b2 = TwoBags b1 b2 + +consBag :: a -> Bag a -> Bag a +snocBag :: Bag a -> a -> Bag a + +consBag elt bag = (unitBag elt) `unionBags` bag +snocBag bag elt = bag `unionBags` (unitBag elt) + +isEmptyBag :: Bag a -> Bool +isEmptyBag EmptyBag = True +isEmptyBag _ = False -- NB invariants + +isSingletonBag :: Bag a -> Bool +isSingletonBag EmptyBag = False +isSingletonBag (UnitBag _) = True +isSingletonBag (TwoBags _ _) = False -- Neither is empty +isSingletonBag (ListBag xs) = isSingleton xs + +filterBag :: (a -> Bool) -> Bag a -> Bag a +filterBag _ EmptyBag = EmptyBag +filterBag pred b@(UnitBag val) = if pred val then b else EmptyBag +filterBag pred (TwoBags b1 b2) = sat1 `unionBags` sat2 + where sat1 = filterBag pred b1 + sat2 = filterBag pred b2 +filterBag pred (ListBag vs) = listToBag (filter pred vs) + +anyBag :: (a -> Bool) -> Bag a -> Bool +anyBag _ EmptyBag = False +anyBag p (UnitBag v) = p v +anyBag p (TwoBags b1 b2) = anyBag p b1 || anyBag p b2 +anyBag p (ListBag xs) = any p xs + +concatBag :: Bag (Bag a) -> Bag a +concatBag EmptyBag = EmptyBag +concatBag (UnitBag b) = b +concatBag (TwoBags b1 b2) = concatBag b1 `unionBags` concatBag b2 +concatBag (ListBag bs) = unionManyBags bs + +partitionBag :: (a -> Bool) -> Bag a -> (Bag a {- Satisfy predictate -}, + Bag a {- Don't -}) +partitionBag _ EmptyBag = (EmptyBag, EmptyBag) +partitionBag pred b@(UnitBag val) + = if pred val then (b, EmptyBag) else (EmptyBag, b) +partitionBag pred (TwoBags b1 b2) + = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) + where (sat1, fail1) = partitionBag pred b1 + (sat2, fail2) = partitionBag pred b2 +partitionBag pred (ListBag vs) = (listToBag sats, listToBag fails) + where (sats, fails) = partition pred vs + + +partitionBagWith :: (a -> Either b c) -> Bag a + -> (Bag b {- Left -}, + Bag c {- Right -}) +partitionBagWith _ EmptyBag = (EmptyBag, EmptyBag) +partitionBagWith pred (UnitBag val) + = case pred val of + Left a -> (UnitBag a, EmptyBag) + Right b -> (EmptyBag, UnitBag b) +partitionBagWith pred (TwoBags b1 b2) + = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) + where (sat1, fail1) = partitionBagWith pred b1 + (sat2, fail2) = partitionBagWith pred b2 +partitionBagWith pred (ListBag vs) = (listToBag sats, listToBag fails) + where (sats, fails) = partitionWith pred vs + +foldBag :: (r -> r -> r) -- Replace TwoBags with this; should be associative + -> (a -> r) -- Replace UnitBag with this + -> r -- Replace EmptyBag with this + -> Bag a + -> r + +{- Standard definition +foldBag t u e EmptyBag = e +foldBag t u e (UnitBag x) = u x +foldBag t u e (TwoBags b1 b2) = (foldBag t u e b1) `t` (foldBag t u e b2) +foldBag t u e (ListBag xs) = foldr (t.u) e xs +-} + +-- More tail-recursive definition, exploiting associativity of "t" +foldBag _ _ e EmptyBag = e +foldBag t u e (UnitBag x) = u x `t` e +foldBag t u e (TwoBags b1 b2) = foldBag t u (foldBag t u e b2) b1 +foldBag t u e (ListBag xs) = foldr (t.u) e xs + +foldrBag :: (a -> r -> r) -> r + -> Bag a + -> r + +foldrBag _ z EmptyBag = z +foldrBag k z (UnitBag x) = k x z +foldrBag k z (TwoBags b1 b2) = foldrBag k (foldrBag k z b2) b1 +foldrBag k z (ListBag xs) = foldr k z xs + +foldlBag :: (r -> a -> r) -> r + -> Bag a + -> r + +foldlBag _ z EmptyBag = z +foldlBag k z (UnitBag x) = k z x +foldlBag k z (TwoBags b1 b2) = foldlBag k (foldlBag k z b1) b2 +foldlBag k z (ListBag xs) = foldl k z xs + +foldrBagM :: (Monad m) => (a -> b -> m b) -> b -> Bag a -> m b +foldrBagM _ z EmptyBag = return z +foldrBagM k z (UnitBag x) = k x z +foldrBagM k z (TwoBags b1 b2) = do { z' <- foldrBagM k z b2; foldrBagM k z' b1 } +foldrBagM k z (ListBag xs) = foldrM k z xs + +foldlBagM :: (Monad m) => (b -> a -> m b) -> b -> Bag a -> m b +foldlBagM _ z EmptyBag = return z +foldlBagM k z (UnitBag x) = k z x +foldlBagM k z (TwoBags b1 b2) = do { z' <- foldlBagM k z b1; foldlBagM k z' b2 } +foldlBagM k z (ListBag xs) = foldlM k z xs + +mapBag :: (a -> b) -> Bag a -> Bag b +mapBag _ EmptyBag = EmptyBag +mapBag f (UnitBag x) = UnitBag (f x) +mapBag f (TwoBags b1 b2) = TwoBags (mapBag f b1) (mapBag f b2) +mapBag f (ListBag xs) = ListBag (map f xs) + +mapBagM :: Monad m => (a -> m b) -> Bag a -> m (Bag b) +mapBagM _ EmptyBag = return EmptyBag +mapBagM f (UnitBag x) = do r <- f x + return (UnitBag r) +mapBagM f (TwoBags b1 b2) = do r1 <- mapBagM f b1 + r2 <- mapBagM f b2 + return (TwoBags r1 r2) +mapBagM f (ListBag xs) = do rs <- mapM f xs + return (ListBag rs) + +mapBagM_ :: Monad m => (a -> m b) -> Bag a -> m () +mapBagM_ _ EmptyBag = return () +mapBagM_ f (UnitBag x) = f x >> return () +mapBagM_ f (TwoBags b1 b2) = mapBagM_ f b1 >> mapBagM_ f b2 +mapBagM_ f (ListBag xs) = mapM_ f xs + +flatMapBagM :: Monad m => (a -> m (Bag b)) -> Bag a -> m (Bag b) +flatMapBagM _ EmptyBag = return EmptyBag +flatMapBagM f (UnitBag x) = f x +flatMapBagM f (TwoBags b1 b2) = do r1 <- flatMapBagM f b1 + r2 <- flatMapBagM f b2 + return (r1 `unionBags` r2) +flatMapBagM f (ListBag xs) = foldrM k EmptyBag xs + where + k x b2 = do { b1 <- f x; return (b1 `unionBags` b2) } + +flatMapBagPairM :: Monad m => (a -> m (Bag b, Bag c)) -> Bag a -> m (Bag b, Bag c) +flatMapBagPairM _ EmptyBag = return (EmptyBag, EmptyBag) +flatMapBagPairM f (UnitBag x) = f x +flatMapBagPairM f (TwoBags b1 b2) = do (r1,s1) <- flatMapBagPairM f b1 + (r2,s2) <- flatMapBagPairM f b2 + return (r1 `unionBags` r2, s1 `unionBags` s2) +flatMapBagPairM f (ListBag xs) = foldrM k (EmptyBag, EmptyBag) xs + where + k x (r2,s2) = do { (r1,s1) <- f x + ; return (r1 `unionBags` r2, s1 `unionBags` s2) } + +mapAndUnzipBagM :: Monad m => (a -> m (b,c)) -> Bag a -> m (Bag b, Bag c) +mapAndUnzipBagM _ EmptyBag = return (EmptyBag, EmptyBag) +mapAndUnzipBagM f (UnitBag x) = do (r,s) <- f x + return (UnitBag r, UnitBag s) +mapAndUnzipBagM f (TwoBags b1 b2) = do (r1,s1) <- mapAndUnzipBagM f b1 + (r2,s2) <- mapAndUnzipBagM f b2 + return (TwoBags r1 r2, TwoBags s1 s2) +mapAndUnzipBagM f (ListBag xs) = do ts <- mapM f xs + let (rs,ss) = unzip ts + return (ListBag rs, ListBag ss) + +mapAccumBagLM :: Monad m + => (acc -> x -> m (acc, y)) -- ^ combining funcction + -> acc -- ^ initial state + -> Bag x -- ^ inputs + -> m (acc, Bag y) -- ^ final state, outputs +mapAccumBagLM _ s EmptyBag = return (s, EmptyBag) +mapAccumBagLM f s (UnitBag x) = do { (s1, x1) <- f s x; return (s1, UnitBag x1) } +mapAccumBagLM f s (TwoBags b1 b2) = do { (s1, b1') <- mapAccumBagLM f s b1 + ; (s2, b2') <- mapAccumBagLM f s1 b2 + ; return (s2, TwoBags b1' b2') } +mapAccumBagLM f s (ListBag xs) = do { (s', xs') <- mapAccumLM f s xs + ; return (s', ListBag xs') } + +listToBag :: [a] -> Bag a +listToBag [] = EmptyBag +listToBag vs = ListBag vs + +bagToList :: Bag a -> [a] +bagToList b = foldrBag (:) [] b + +instance (Outputable a) => Outputable (Bag a) where + ppr bag = braces (pprWithCommas ppr (bagToList bag)) + +instance Data a => Data (Bag a) where + gfoldl k z b = z listToBag `k` bagToList b -- traverse abstract type abstractly + toConstr _ = abstractConstr $ "Bag("++show (typeOf (undefined::a))++")" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNoRepType "Bag" + dataCast1 x = gcast1 x diff --git a/compiler/utils/Bag.lhs b/compiler/utils/Bag.lhs deleted file mode 100644 index 65c5b39df1..0000000000 --- a/compiler/utils/Bag.lhs +++ /dev/null @@ -1,273 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -% - -Bag: an unordered collection with duplicates - -\begin{code} -{-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-} - -module Bag ( - Bag, -- abstract type - - emptyBag, unitBag, unionBags, unionManyBags, - mapBag, - elemBag, lengthBag, - filterBag, partitionBag, partitionBagWith, - concatBag, foldBag, foldrBag, foldlBag, - isEmptyBag, isSingletonBag, consBag, snocBag, anyBag, - listToBag, bagToList, - foldrBagM, foldlBagM, mapBagM, mapBagM_, - flatMapBagM, flatMapBagPairM, - mapAndUnzipBagM, mapAccumBagLM - ) where - -import Outputable -import Util - -import MonadUtils -import Data.Data -import Data.List ( partition ) - -infixr 3 `consBag` -infixl 3 `snocBag` -\end{code} - - -\begin{code} -data Bag a - = EmptyBag - | UnitBag a - | TwoBags (Bag a) (Bag a) -- INVARIANT: neither branch is empty - | ListBag [a] -- INVARIANT: the list is non-empty - deriving Typeable - -emptyBag :: Bag a -emptyBag = EmptyBag - -unitBag :: a -> Bag a -unitBag = UnitBag - -lengthBag :: Bag a -> Int -lengthBag EmptyBag = 0 -lengthBag (UnitBag {}) = 1 -lengthBag (TwoBags b1 b2) = lengthBag b1 + lengthBag b2 -lengthBag (ListBag xs) = length xs - -elemBag :: Eq a => a -> Bag a -> Bool -elemBag _ EmptyBag = False -elemBag x (UnitBag y) = x == y -elemBag x (TwoBags b1 b2) = x `elemBag` b1 || x `elemBag` b2 -elemBag x (ListBag ys) = any (x ==) ys - -unionManyBags :: [Bag a] -> Bag a -unionManyBags xs = foldr unionBags EmptyBag xs - --- This one is a bit stricter! The bag will get completely evaluated. - -unionBags :: Bag a -> Bag a -> Bag a -unionBags EmptyBag b = b -unionBags b EmptyBag = b -unionBags b1 b2 = TwoBags b1 b2 - -consBag :: a -> Bag a -> Bag a -snocBag :: Bag a -> a -> Bag a - -consBag elt bag = (unitBag elt) `unionBags` bag -snocBag bag elt = bag `unionBags` (unitBag elt) - -isEmptyBag :: Bag a -> Bool -isEmptyBag EmptyBag = True -isEmptyBag _ = False -- NB invariants - -isSingletonBag :: Bag a -> Bool -isSingletonBag EmptyBag = False -isSingletonBag (UnitBag _) = True -isSingletonBag (TwoBags _ _) = False -- Neither is empty -isSingletonBag (ListBag xs) = isSingleton xs - -filterBag :: (a -> Bool) -> Bag a -> Bag a -filterBag _ EmptyBag = EmptyBag -filterBag pred b@(UnitBag val) = if pred val then b else EmptyBag -filterBag pred (TwoBags b1 b2) = sat1 `unionBags` sat2 - where sat1 = filterBag pred b1 - sat2 = filterBag pred b2 -filterBag pred (ListBag vs) = listToBag (filter pred vs) - -anyBag :: (a -> Bool) -> Bag a -> Bool -anyBag _ EmptyBag = False -anyBag p (UnitBag v) = p v -anyBag p (TwoBags b1 b2) = anyBag p b1 || anyBag p b2 -anyBag p (ListBag xs) = any p xs - -concatBag :: Bag (Bag a) -> Bag a -concatBag EmptyBag = EmptyBag -concatBag (UnitBag b) = b -concatBag (TwoBags b1 b2) = concatBag b1 `unionBags` concatBag b2 -concatBag (ListBag bs) = unionManyBags bs - -partitionBag :: (a -> Bool) -> Bag a -> (Bag a {- Satisfy predictate -}, - Bag a {- Don't -}) -partitionBag _ EmptyBag = (EmptyBag, EmptyBag) -partitionBag pred b@(UnitBag val) - = if pred val then (b, EmptyBag) else (EmptyBag, b) -partitionBag pred (TwoBags b1 b2) - = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) - where (sat1, fail1) = partitionBag pred b1 - (sat2, fail2) = partitionBag pred b2 -partitionBag pred (ListBag vs) = (listToBag sats, listToBag fails) - where (sats, fails) = partition pred vs - - -partitionBagWith :: (a -> Either b c) -> Bag a - -> (Bag b {- Left -}, - Bag c {- Right -}) -partitionBagWith _ EmptyBag = (EmptyBag, EmptyBag) -partitionBagWith pred (UnitBag val) - = case pred val of - Left a -> (UnitBag a, EmptyBag) - Right b -> (EmptyBag, UnitBag b) -partitionBagWith pred (TwoBags b1 b2) - = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) - where (sat1, fail1) = partitionBagWith pred b1 - (sat2, fail2) = partitionBagWith pred b2 -partitionBagWith pred (ListBag vs) = (listToBag sats, listToBag fails) - where (sats, fails) = partitionWith pred vs - -foldBag :: (r -> r -> r) -- Replace TwoBags with this; should be associative - -> (a -> r) -- Replace UnitBag with this - -> r -- Replace EmptyBag with this - -> Bag a - -> r - -{- Standard definition -foldBag t u e EmptyBag = e -foldBag t u e (UnitBag x) = u x -foldBag t u e (TwoBags b1 b2) = (foldBag t u e b1) `t` (foldBag t u e b2) -foldBag t u e (ListBag xs) = foldr (t.u) e xs --} - --- More tail-recursive definition, exploiting associativity of "t" -foldBag _ _ e EmptyBag = e -foldBag t u e (UnitBag x) = u x `t` e -foldBag t u e (TwoBags b1 b2) = foldBag t u (foldBag t u e b2) b1 -foldBag t u e (ListBag xs) = foldr (t.u) e xs - -foldrBag :: (a -> r -> r) -> r - -> Bag a - -> r - -foldrBag _ z EmptyBag = z -foldrBag k z (UnitBag x) = k x z -foldrBag k z (TwoBags b1 b2) = foldrBag k (foldrBag k z b2) b1 -foldrBag k z (ListBag xs) = foldr k z xs - -foldlBag :: (r -> a -> r) -> r - -> Bag a - -> r - -foldlBag _ z EmptyBag = z -foldlBag k z (UnitBag x) = k z x -foldlBag k z (TwoBags b1 b2) = foldlBag k (foldlBag k z b1) b2 -foldlBag k z (ListBag xs) = foldl k z xs - -foldrBagM :: (Monad m) => (a -> b -> m b) -> b -> Bag a -> m b -foldrBagM _ z EmptyBag = return z -foldrBagM k z (UnitBag x) = k x z -foldrBagM k z (TwoBags b1 b2) = do { z' <- foldrBagM k z b2; foldrBagM k z' b1 } -foldrBagM k z (ListBag xs) = foldrM k z xs - -foldlBagM :: (Monad m) => (b -> a -> m b) -> b -> Bag a -> m b -foldlBagM _ z EmptyBag = return z -foldlBagM k z (UnitBag x) = k z x -foldlBagM k z (TwoBags b1 b2) = do { z' <- foldlBagM k z b1; foldlBagM k z' b2 } -foldlBagM k z (ListBag xs) = foldlM k z xs - -mapBag :: (a -> b) -> Bag a -> Bag b -mapBag _ EmptyBag = EmptyBag -mapBag f (UnitBag x) = UnitBag (f x) -mapBag f (TwoBags b1 b2) = TwoBags (mapBag f b1) (mapBag f b2) -mapBag f (ListBag xs) = ListBag (map f xs) - -mapBagM :: Monad m => (a -> m b) -> Bag a -> m (Bag b) -mapBagM _ EmptyBag = return EmptyBag -mapBagM f (UnitBag x) = do r <- f x - return (UnitBag r) -mapBagM f (TwoBags b1 b2) = do r1 <- mapBagM f b1 - r2 <- mapBagM f b2 - return (TwoBags r1 r2) -mapBagM f (ListBag xs) = do rs <- mapM f xs - return (ListBag rs) - -mapBagM_ :: Monad m => (a -> m b) -> Bag a -> m () -mapBagM_ _ EmptyBag = return () -mapBagM_ f (UnitBag x) = f x >> return () -mapBagM_ f (TwoBags b1 b2) = mapBagM_ f b1 >> mapBagM_ f b2 -mapBagM_ f (ListBag xs) = mapM_ f xs - -flatMapBagM :: Monad m => (a -> m (Bag b)) -> Bag a -> m (Bag b) -flatMapBagM _ EmptyBag = return EmptyBag -flatMapBagM f (UnitBag x) = f x -flatMapBagM f (TwoBags b1 b2) = do r1 <- flatMapBagM f b1 - r2 <- flatMapBagM f b2 - return (r1 `unionBags` r2) -flatMapBagM f (ListBag xs) = foldrM k EmptyBag xs - where - k x b2 = do { b1 <- f x; return (b1 `unionBags` b2) } - -flatMapBagPairM :: Monad m => (a -> m (Bag b, Bag c)) -> Bag a -> m (Bag b, Bag c) -flatMapBagPairM _ EmptyBag = return (EmptyBag, EmptyBag) -flatMapBagPairM f (UnitBag x) = f x -flatMapBagPairM f (TwoBags b1 b2) = do (r1,s1) <- flatMapBagPairM f b1 - (r2,s2) <- flatMapBagPairM f b2 - return (r1 `unionBags` r2, s1 `unionBags` s2) -flatMapBagPairM f (ListBag xs) = foldrM k (EmptyBag, EmptyBag) xs - where - k x (r2,s2) = do { (r1,s1) <- f x - ; return (r1 `unionBags` r2, s1 `unionBags` s2) } - -mapAndUnzipBagM :: Monad m => (a -> m (b,c)) -> Bag a -> m (Bag b, Bag c) -mapAndUnzipBagM _ EmptyBag = return (EmptyBag, EmptyBag) -mapAndUnzipBagM f (UnitBag x) = do (r,s) <- f x - return (UnitBag r, UnitBag s) -mapAndUnzipBagM f (TwoBags b1 b2) = do (r1,s1) <- mapAndUnzipBagM f b1 - (r2,s2) <- mapAndUnzipBagM f b2 - return (TwoBags r1 r2, TwoBags s1 s2) -mapAndUnzipBagM f (ListBag xs) = do ts <- mapM f xs - let (rs,ss) = unzip ts - return (ListBag rs, ListBag ss) - -mapAccumBagLM :: Monad m - => (acc -> x -> m (acc, y)) -- ^ combining funcction - -> acc -- ^ initial state - -> Bag x -- ^ inputs - -> m (acc, Bag y) -- ^ final state, outputs -mapAccumBagLM _ s EmptyBag = return (s, EmptyBag) -mapAccumBagLM f s (UnitBag x) = do { (s1, x1) <- f s x; return (s1, UnitBag x1) } -mapAccumBagLM f s (TwoBags b1 b2) = do { (s1, b1') <- mapAccumBagLM f s b1 - ; (s2, b2') <- mapAccumBagLM f s1 b2 - ; return (s2, TwoBags b1' b2') } -mapAccumBagLM f s (ListBag xs) = do { (s', xs') <- mapAccumLM f s xs - ; return (s', ListBag xs') } - -listToBag :: [a] -> Bag a -listToBag [] = EmptyBag -listToBag vs = ListBag vs - -bagToList :: Bag a -> [a] -bagToList b = foldrBag (:) [] b -\end{code} - -\begin{code} -instance (Outputable a) => Outputable (Bag a) where - ppr bag = braces (pprWithCommas ppr (bagToList bag)) - -instance Data a => Data (Bag a) where - gfoldl k z b = z listToBag `k` bagToList b -- traverse abstract type abstractly - toConstr _ = abstractConstr $ "Bag("++show (typeOf (undefined::a))++")" - gunfold _ _ = error "gunfold" - dataTypeOf _ = mkNoRepType "Bag" - dataCast1 x = gcast1 x -\end{code} - diff --git a/compiler/utils/Digraph.hs b/compiler/utils/Digraph.hs new file mode 100644 index 0000000000..8f5df0ce05 --- /dev/null +++ b/compiler/utils/Digraph.hs @@ -0,0 +1,652 @@ +-- (c) The University of Glasgow 2006 + +{-# LANGUAGE CPP, ScopedTypeVariables #-} +module Digraph( + Graph, graphFromVerticesAndAdjacency, graphFromEdgedVertices, + + SCC(..), Node, flattenSCC, flattenSCCs, + stronglyConnCompG, + topologicalSortG, dfsTopSortG, + verticesG, edgesG, hasVertexG, + reachableG, reachablesG, transposeG, + outdegreeG, indegreeG, + vertexGroupsG, emptyG, + componentsG, + + findCycle, + + -- For backwards compatability with the simpler version of Digraph + stronglyConnCompFromEdgedVertices, stronglyConnCompFromEdgedVerticesR, + + -- No friendly interface yet, not used but exported to avoid warnings + tabulate, preArr, + components, undirected, + back, cross, forward, + path, + bcc, do_label, bicomps, collect + ) where + +#include "HsVersions.h" + +------------------------------------------------------------------------------ +-- A version of the graph algorithms described in: +-- +-- ``Lazy Depth-First Search and Linear IntGraph Algorithms in Haskell'' +-- by David King and John Launchbury +-- +-- Also included is some additional code for printing tree structures ... +------------------------------------------------------------------------------ + + +import Util ( minWith, count ) +import Outputable +import Maybes ( expectJust ) +import MonadUtils ( allM ) + +-- Extensions +import Control.Monad ( filterM, liftM, liftM2 ) +import Control.Monad.ST + +-- std interfaces +import Data.Maybe +import Data.Array +import Data.List hiding (transpose) +import Data.Ord +import Data.Array.ST +import qualified Data.Map as Map +import qualified Data.Set as Set + +{- +************************************************************************ +* * +* Graphs and Graph Construction +* * +************************************************************************ + +Note [Nodes, keys, vertices] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * A 'node' is a big blob of client-stuff + + * Each 'node' has a unique (client) 'key', but the latter + is in Ord and has fast comparison + + * Digraph then maps each 'key' to a Vertex (Int) which is + arranged densely in 0.n +-} + +data Graph node = Graph { + gr_int_graph :: IntGraph, + gr_vertex_to_node :: Vertex -> node, + gr_node_to_vertex :: node -> Maybe Vertex + } + +data Edge node = Edge node node + +type Node key payload = (payload, key, [key]) + -- The payload is user data, just carried around in this module + -- The keys are ordered + -- The [key] are the dependencies of the node; + -- it's ok to have extra keys in the dependencies that + -- are not the key of any Node in the graph + +emptyGraph :: Graph a +emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing) + +graphFromVerticesAndAdjacency + :: Ord key + => [(node, key)] + -> [(key, key)] -- First component is source vertex key, + -- second is target vertex key (thing depended on) + -- Unlike the other interface I insist they correspond to + -- actual vertices because the alternative hides bugs. I can't + -- do the same thing for the other one for backcompat reasons. + -> Graph (node, key) +graphFromVerticesAndAdjacency [] _ = emptyGraph +graphFromVerticesAndAdjacency vertices edges = Graph graph vertex_node (key_vertex . key_extractor) + where key_extractor = snd + (bounds, vertex_node, key_vertex, _) = reduceNodesIntoVertices vertices key_extractor + key_vertex_pair (a, b) = (expectJust "graphFromVerticesAndAdjacency" $ key_vertex a, + expectJust "graphFromVerticesAndAdjacency" $ key_vertex b) + reduced_edges = map key_vertex_pair edges + graph = buildG bounds reduced_edges + +graphFromEdgedVertices + :: Ord key + => [Node key payload] -- The graph; its ok for the + -- out-list to contain keys which arent + -- a vertex key, they are ignored + -> Graph (Node key payload) +graphFromEdgedVertices [] = emptyGraph +graphFromEdgedVertices edged_vertices = Graph graph vertex_fn (key_vertex . key_extractor) + where key_extractor (_, k, _) = k + (bounds, vertex_fn, key_vertex, numbered_nodes) = reduceNodesIntoVertices edged_vertices key_extractor + graph = array bounds [(v, mapMaybe key_vertex ks) | (v, (_, _, ks)) <- numbered_nodes] + +reduceNodesIntoVertices + :: Ord key + => [node] + -> (node -> key) + -> (Bounds, Vertex -> node, key -> Maybe Vertex, [(Int, node)]) +reduceNodesIntoVertices nodes key_extractor = (bounds, (!) vertex_map, key_vertex, numbered_nodes) + where + max_v = length nodes - 1 + bounds = (0, max_v) :: (Vertex, Vertex) + + sorted_nodes = sortBy (comparing key_extractor) nodes + numbered_nodes = zipWith (,) [0..] sorted_nodes + + key_map = array bounds [(i, key_extractor node) | (i, node) <- numbered_nodes] + vertex_map = array bounds numbered_nodes + + --key_vertex :: key -> Maybe Vertex + -- returns Nothing for non-interesting vertices + key_vertex k = find 0 max_v + where + find a b | a > b = Nothing + | otherwise = let mid = (a + b) `div` 2 + in case compare k (key_map ! mid) of + LT -> find a (mid - 1) + EQ -> Just mid + GT -> find (mid + 1) b + +{- +************************************************************************ +* * +* SCC +* * +************************************************************************ +-} + +type WorkItem key payload + = (Node key payload, -- Tip of the path + [payload]) -- Rest of the path; + -- [a,b,c] means c depends on b, b depends on a + +-- | Find a reasonably short cycle a->b->c->a, in a strongly +-- connected component. The input nodes are presumed to be +-- a SCC, so you can start anywhere. +findCycle :: forall payload key. Ord key + => [Node key payload] -- The nodes. The dependencies can + -- contain extra keys, which are ignored + -> Maybe [payload] -- A cycle, starting with node + -- so each depends on the next +findCycle graph + = go Set.empty (new_work root_deps []) [] + where + env :: Map.Map key (Node key payload) + env = Map.fromList [ (key, node) | node@(_, key, _) <- graph ] + + -- Find the node with fewest dependencies among the SCC modules + -- This is just a heuristic to find some plausible root module + root :: Node key payload + root = fst (minWith snd [ (node, count (`Map.member` env) deps) + | node@(_,_,deps) <- graph ]) + (root_payload,root_key,root_deps) = root + + + -- 'go' implements Dijkstra's algorithm, more or less + go :: Set.Set key -- Visited + -> [WorkItem key payload] -- Work list, items length n + -> [WorkItem key payload] -- Work list, items length n+1 + -> Maybe [payload] -- Returned cycle + -- Invariant: in a call (go visited ps qs), + -- visited = union (map tail (ps ++ qs)) + + go _ [] [] = Nothing -- No cycles + go visited [] qs = go visited qs [] + go visited (((payload,key,deps), path) : ps) qs + | key == root_key = Just (root_payload : reverse path) + | key `Set.member` visited = go visited ps qs + | key `Map.notMember` env = go visited ps qs + | otherwise = go (Set.insert key visited) + ps (new_qs ++ qs) + where + new_qs = new_work deps (payload : path) + + new_work :: [key] -> [payload] -> [WorkItem key payload] + new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ] + +{- +************************************************************************ +* * +* SCC +* * +************************************************************************ +-} + +data SCC vertex = AcyclicSCC vertex + | CyclicSCC [vertex] + +instance Functor SCC where + fmap f (AcyclicSCC v) = AcyclicSCC (f v) + fmap f (CyclicSCC vs) = CyclicSCC (fmap f vs) + +flattenSCCs :: [SCC a] -> [a] +flattenSCCs = concatMap flattenSCC + +flattenSCC :: SCC a -> [a] +flattenSCC (AcyclicSCC v) = [v] +flattenSCC (CyclicSCC vs) = vs + +instance Outputable a => Outputable (SCC a) where + ppr (AcyclicSCC v) = text "NONREC" $$ (nest 3 (ppr v)) + ppr (CyclicSCC vs) = text "REC" $$ (nest 3 (vcat (map ppr vs))) + +{- +************************************************************************ +* * +* Strongly Connected Component wrappers for Graph +* * +************************************************************************ + +Note: the components are returned topologically sorted: later components +depend on earlier ones, but not vice versa i.e. later components only have +edges going from them to earlier ones. +-} + +stronglyConnCompG :: Graph node -> [SCC node] +stronglyConnCompG graph = decodeSccs graph forest + where forest = {-# SCC "Digraph.scc" #-} scc (gr_int_graph graph) + +decodeSccs :: Graph node -> Forest Vertex -> [SCC node] +decodeSccs Graph { gr_int_graph = graph, gr_vertex_to_node = vertex_fn } forest + = map decode forest + where + decode (Node v []) | mentions_itself v = CyclicSCC [vertex_fn v] + | otherwise = AcyclicSCC (vertex_fn v) + decode other = CyclicSCC (dec other []) + where dec (Node v ts) vs = vertex_fn v : foldr dec vs ts + mentions_itself v = v `elem` (graph ! v) + + +-- The following two versions are provided for backwards compatability: +stronglyConnCompFromEdgedVertices + :: Ord key + => [Node key payload] + -> [SCC payload] +stronglyConnCompFromEdgedVertices + = map (fmap get_node) . stronglyConnCompFromEdgedVerticesR + where get_node (n, _, _) = n + +-- The "R" interface is used when you expect to apply SCC to +-- (some of) the result of SCC, so you dont want to lose the dependency info +stronglyConnCompFromEdgedVerticesR + :: Ord key + => [Node key payload] + -> [SCC (Node key payload)] +stronglyConnCompFromEdgedVerticesR = stronglyConnCompG . graphFromEdgedVertices + +{- +************************************************************************ +* * +* Misc wrappers for Graph +* * +************************************************************************ +-} + +topologicalSortG :: Graph node -> [node] +topologicalSortG graph = map (gr_vertex_to_node graph) result + where result = {-# SCC "Digraph.topSort" #-} topSort (gr_int_graph graph) + +dfsTopSortG :: Graph node -> [[node]] +dfsTopSortG graph = + map (map (gr_vertex_to_node graph) . flattenTree) $ dfs g (topSort g) + where + g = gr_int_graph graph + +reachableG :: Graph node -> node -> [node] +reachableG graph from = map (gr_vertex_to_node graph) result + where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from) + result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex] + +reachablesG :: Graph node -> [node] -> [node] +reachablesG graph froms = map (gr_vertex_to_node graph) result + where result = {-# SCC "Digraph.reachable" #-} + reachable (gr_int_graph graph) vs + vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ] + +hasVertexG :: Graph node -> node -> Bool +hasVertexG graph node = isJust $ gr_node_to_vertex graph node + +verticesG :: Graph node -> [node] +verticesG graph = map (gr_vertex_to_node graph) $ vertices (gr_int_graph graph) + +edgesG :: Graph node -> [Edge node] +edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ edges (gr_int_graph graph) + where v2n = gr_vertex_to_node graph + +transposeG :: Graph node -> Graph node +transposeG graph = Graph (transpose (gr_int_graph graph)) (gr_vertex_to_node graph) (gr_node_to_vertex graph) + +outdegreeG :: Graph node -> node -> Maybe Int +outdegreeG = degreeG outdegree + +indegreeG :: Graph node -> node -> Maybe Int +indegreeG = degreeG indegree + +degreeG :: (IntGraph -> Table Int) -> Graph node -> node -> Maybe Int +degreeG degree graph node = let table = degree (gr_int_graph graph) + in fmap ((!) table) $ gr_node_to_vertex graph node + +vertexGroupsG :: Graph node -> [[node]] +vertexGroupsG graph = map (map (gr_vertex_to_node graph)) result + where result = vertexGroups (gr_int_graph graph) + +emptyG :: Graph node -> Bool +emptyG g = graphEmpty (gr_int_graph g) + +componentsG :: Graph node -> [[node]] +componentsG graph = map (map (gr_vertex_to_node graph) . flattenTree) $ components (gr_int_graph graph) + +{- +************************************************************************ +* * +* Showing Graphs +* * +************************************************************************ +-} + +instance Outputable node => Outputable (Graph node) where + ppr graph = vcat [ + hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)), + hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph)) + ] + +instance Outputable node => Outputable (Edge node) where + ppr (Edge from to) = ppr from <+> text "->" <+> ppr to + +{- +************************************************************************ +* * +* IntGraphs +* * +************************************************************************ +-} + +type Vertex = Int +type Table a = Array Vertex a +type IntGraph = Table [Vertex] +type Bounds = (Vertex, Vertex) +type IntEdge = (Vertex, Vertex) + +vertices :: IntGraph -> [Vertex] +vertices = indices + +edges :: IntGraph -> [IntEdge] +edges g = [ (v, w) | v <- vertices g, w <- g!v ] + +mapT :: (Vertex -> a -> b) -> Table a -> Table b +mapT f t = array (bounds t) [ (v, f v (t ! v)) | v <- indices t ] + +buildG :: Bounds -> [IntEdge] -> IntGraph +buildG bounds edges = accumArray (flip (:)) [] bounds edges + +transpose :: IntGraph -> IntGraph +transpose g = buildG (bounds g) (reverseE g) + +reverseE :: IntGraph -> [IntEdge] +reverseE g = [ (w, v) | (v, w) <- edges g ] + +outdegree :: IntGraph -> Table Int +outdegree = mapT numEdges + where numEdges _ ws = length ws + +indegree :: IntGraph -> Table Int +indegree = outdegree . transpose + +graphEmpty :: IntGraph -> Bool +graphEmpty g = lo > hi + where (lo, hi) = bounds g + +{- +************************************************************************ +* * +* Trees and forests +* * +************************************************************************ +-} + +data Tree a = Node a (Forest a) +type Forest a = [Tree a] + +mapTree :: (a -> b) -> (Tree a -> Tree b) +mapTree f (Node x ts) = Node (f x) (map (mapTree f) ts) + +flattenTree :: Tree a -> [a] +flattenTree (Node x ts) = x : concatMap flattenTree ts + +instance Show a => Show (Tree a) where + showsPrec _ t s = showTree t ++ s + +showTree :: Show a => Tree a -> String +showTree = drawTree . mapTree show + +drawTree :: Tree String -> String +drawTree = unlines . draw + +draw :: Tree String -> [String] +draw (Node x ts) = grp this (space (length this)) (stLoop ts) + where this = s1 ++ x ++ " " + + space n = replicate n ' ' + + stLoop [] = [""] + stLoop [t] = grp s2 " " (draw t) + stLoop (t:ts) = grp s3 s4 (draw t) ++ [s4] ++ rsLoop ts + + rsLoop [] = [] + rsLoop [t] = grp s5 " " (draw t) + rsLoop (t:ts) = grp s6 s4 (draw t) ++ [s4] ++ rsLoop ts + + grp fst rst = zipWith (++) (fst:repeat rst) + + [s1,s2,s3,s4,s5,s6] = ["- ", "--", "-+", " |", " `", " +"] + +{- +************************************************************************ +* * +* Depth first search +* * +************************************************************************ +-} + +type Set s = STArray s Vertex Bool + +mkEmpty :: Bounds -> ST s (Set s) +mkEmpty bnds = newArray bnds False + +contains :: Set s -> Vertex -> ST s Bool +contains m v = readArray m v + +include :: Set s -> Vertex -> ST s () +include m v = writeArray m v True + +dff :: IntGraph -> Forest Vertex +dff g = dfs g (vertices g) + +dfs :: IntGraph -> [Vertex] -> Forest Vertex +dfs g vs = prune (bounds g) (map (generate g) vs) + +generate :: IntGraph -> Vertex -> Tree Vertex +generate g v = Node v (map (generate g) (g!v)) + +prune :: Bounds -> Forest Vertex -> Forest Vertex +prune bnds ts = runST (mkEmpty bnds >>= \m -> + chop m ts) + +chop :: Set s -> Forest Vertex -> ST s (Forest Vertex) +chop _ [] = return [] +chop m (Node v ts : us) + = contains m v >>= \visited -> + if visited then + chop m us + else + include m v >>= \_ -> + chop m ts >>= \as -> + chop m us >>= \bs -> + return (Node v as : bs) + +{- +************************************************************************ +* * +* Algorithms +* * +************************************************************************ + +------------------------------------------------------------ +-- Algorithm 1: depth first search numbering +------------------------------------------------------------ +-} + +preorder :: Tree a -> [a] +preorder (Node a ts) = a : preorderF ts + +preorderF :: Forest a -> [a] +preorderF ts = concat (map preorder ts) + +tabulate :: Bounds -> [Vertex] -> Table Int +tabulate bnds vs = array bnds (zip vs [1..]) + +preArr :: Bounds -> Forest Vertex -> Table Int +preArr bnds = tabulate bnds . preorderF + +{- +------------------------------------------------------------ +-- Algorithm 2: topological sorting +------------------------------------------------------------ +-} + +postorder :: Tree a -> [a] -> [a] +postorder (Node a ts) = postorderF ts . (a :) + +postorderF :: Forest a -> [a] -> [a] +postorderF ts = foldr (.) id $ map postorder ts + +postOrd :: IntGraph -> [Vertex] +postOrd g = postorderF (dff g) [] + +topSort :: IntGraph -> [Vertex] +topSort = reverse . postOrd + +{- +------------------------------------------------------------ +-- Algorithm 3: connected components +------------------------------------------------------------ +-} + +components :: IntGraph -> Forest Vertex +components = dff . undirected + +undirected :: IntGraph -> IntGraph +undirected g = buildG (bounds g) (edges g ++ reverseE g) + +{- +------------------------------------------------------------ +-- Algorithm 4: strongly connected components +------------------------------------------------------------ +-} + +scc :: IntGraph -> Forest Vertex +scc g = dfs g (reverse (postOrd (transpose g))) + +{- +------------------------------------------------------------ +-- Algorithm 5: Classifying edges +------------------------------------------------------------ +-} + +back :: IntGraph -> Table Int -> IntGraph +back g post = mapT select g + where select v ws = [ w | w <- ws, post!v < post!w ] + +cross :: IntGraph -> Table Int -> Table Int -> IntGraph +cross g pre post = mapT select g + where select v ws = [ w | w <- ws, post!v > post!w, pre!v > pre!w ] + +forward :: IntGraph -> IntGraph -> Table Int -> IntGraph +forward g tree pre = mapT select g + where select v ws = [ w | w <- ws, pre!v < pre!w ] \\ tree!v + +{- +------------------------------------------------------------ +-- Algorithm 6: Finding reachable vertices +------------------------------------------------------------ +-} + +reachable :: IntGraph -> [Vertex] -> [Vertex] +reachable g vs = preorderF (dfs g vs) + +path :: IntGraph -> Vertex -> Vertex -> Bool +path g v w = w `elem` (reachable g [v]) + +{- +------------------------------------------------------------ +-- Algorithm 7: Biconnected components +------------------------------------------------------------ +-} + +bcc :: IntGraph -> Forest [Vertex] +bcc g = (concat . map bicomps . map (do_label g dnum)) forest + where forest = dff g + dnum = preArr (bounds g) forest + +do_label :: IntGraph -> Table Int -> Tree Vertex -> Tree (Vertex,Int,Int) +do_label g dnum (Node v ts) = Node (v,dnum!v,lv) us + where us = map (do_label g dnum) ts + lv = minimum ([dnum!v] ++ [dnum!w | w <- g!v] + ++ [lu | Node (_,_,lu) _ <- us]) + +bicomps :: Tree (Vertex, Int, Int) -> Forest [Vertex] +bicomps (Node (v,_,_) ts) + = [ Node (v:vs) us | (_,Node vs us) <- map collect ts] + +collect :: Tree (Vertex, Int, Int) -> (Int, Tree [Vertex]) +collect (Node (v,dv,lv) ts) = (lv, Node (v:vs) cs) + where collected = map collect ts + vs = concat [ ws | (lw, Node ws _) <- collected, lw [[Vertex]] +vertexGroups g = runST (mkEmpty (bounds g) >>= \provided -> vertexGroupsS provided g next_vertices) + where next_vertices = noOutEdges g + +noOutEdges :: IntGraph -> [Vertex] +noOutEdges g = [ v | v <- vertices g, null (g!v)] + +vertexGroupsS :: Set s -> IntGraph -> [Vertex] -> ST s [[Vertex]] +vertexGroupsS provided g to_provide + = if null to_provide + then do { + all_provided <- allM (provided `contains`) (vertices g) + ; if all_provided + then return [] + else error "vertexGroup: cyclic graph" + } + else do { + mapM_ (include provided) to_provide + ; to_provide' <- filterM (vertexReady provided g) (vertices g) + ; rest <- vertexGroupsS provided g to_provide' + ; return $ to_provide : rest + } + +vertexReady :: Set s -> IntGraph -> Vertex -> ST s Bool +vertexReady provided g v = liftM2 (&&) (liftM not $ provided `contains` v) (allM (provided `contains`) (g!v)) diff --git a/compiler/utils/Digraph.lhs b/compiler/utils/Digraph.lhs deleted file mode 100644 index 35782bac6e..0000000000 --- a/compiler/utils/Digraph.lhs +++ /dev/null @@ -1,668 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% - -\begin{code} -{-# LANGUAGE CPP, ScopedTypeVariables #-} -module Digraph( - Graph, graphFromVerticesAndAdjacency, graphFromEdgedVertices, - - SCC(..), Node, flattenSCC, flattenSCCs, - stronglyConnCompG, - topologicalSortG, dfsTopSortG, - verticesG, edgesG, hasVertexG, - reachableG, reachablesG, transposeG, - outdegreeG, indegreeG, - vertexGroupsG, emptyG, - componentsG, - - findCycle, - - -- For backwards compatability with the simpler version of Digraph - stronglyConnCompFromEdgedVertices, stronglyConnCompFromEdgedVerticesR, - - -- No friendly interface yet, not used but exported to avoid warnings - tabulate, preArr, - components, undirected, - back, cross, forward, - path, - bcc, do_label, bicomps, collect - ) where - -#include "HsVersions.h" - ------------------------------------------------------------------------------- --- A version of the graph algorithms described in: --- --- ``Lazy Depth-First Search and Linear IntGraph Algorithms in Haskell'' --- by David King and John Launchbury --- --- Also included is some additional code for printing tree structures ... ------------------------------------------------------------------------------- - - -import Util ( minWith, count ) -import Outputable -import Maybes ( expectJust ) -import MonadUtils ( allM ) - --- Extensions -import Control.Monad ( filterM, liftM, liftM2 ) -import Control.Monad.ST - --- std interfaces -import Data.Maybe -import Data.Array -import Data.List hiding (transpose) -import Data.Ord -import Data.Array.ST -import qualified Data.Map as Map -import qualified Data.Set as Set -\end{code} - -%************************************************************************ -%* * -%* Graphs and Graph Construction -%* * -%************************************************************************ - -Note [Nodes, keys, vertices] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * A 'node' is a big blob of client-stuff - - * Each 'node' has a unique (client) 'key', but the latter - is in Ord and has fast comparison - - * Digraph then maps each 'key' to a Vertex (Int) which is - arranged densely in 0.n - -\begin{code} -data Graph node = Graph { - gr_int_graph :: IntGraph, - gr_vertex_to_node :: Vertex -> node, - gr_node_to_vertex :: node -> Maybe Vertex - } - -data Edge node = Edge node node - -type Node key payload = (payload, key, [key]) - -- The payload is user data, just carried around in this module - -- The keys are ordered - -- The [key] are the dependencies of the node; - -- it's ok to have extra keys in the dependencies that - -- are not the key of any Node in the graph - -emptyGraph :: Graph a -emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing) - -graphFromVerticesAndAdjacency - :: Ord key - => [(node, key)] - -> [(key, key)] -- First component is source vertex key, - -- second is target vertex key (thing depended on) - -- Unlike the other interface I insist they correspond to - -- actual vertices because the alternative hides bugs. I can't - -- do the same thing for the other one for backcompat reasons. - -> Graph (node, key) -graphFromVerticesAndAdjacency [] _ = emptyGraph -graphFromVerticesAndAdjacency vertices edges = Graph graph vertex_node (key_vertex . key_extractor) - where key_extractor = snd - (bounds, vertex_node, key_vertex, _) = reduceNodesIntoVertices vertices key_extractor - key_vertex_pair (a, b) = (expectJust "graphFromVerticesAndAdjacency" $ key_vertex a, - expectJust "graphFromVerticesAndAdjacency" $ key_vertex b) - reduced_edges = map key_vertex_pair edges - graph = buildG bounds reduced_edges - -graphFromEdgedVertices - :: Ord key - => [Node key payload] -- The graph; its ok for the - -- out-list to contain keys which arent - -- a vertex key, they are ignored - -> Graph (Node key payload) -graphFromEdgedVertices [] = emptyGraph -graphFromEdgedVertices edged_vertices = Graph graph vertex_fn (key_vertex . key_extractor) - where key_extractor (_, k, _) = k - (bounds, vertex_fn, key_vertex, numbered_nodes) = reduceNodesIntoVertices edged_vertices key_extractor - graph = array bounds [(v, mapMaybe key_vertex ks) | (v, (_, _, ks)) <- numbered_nodes] - -reduceNodesIntoVertices - :: Ord key - => [node] - -> (node -> key) - -> (Bounds, Vertex -> node, key -> Maybe Vertex, [(Int, node)]) -reduceNodesIntoVertices nodes key_extractor = (bounds, (!) vertex_map, key_vertex, numbered_nodes) - where - max_v = length nodes - 1 - bounds = (0, max_v) :: (Vertex, Vertex) - - sorted_nodes = sortBy (comparing key_extractor) nodes - numbered_nodes = zipWith (,) [0..] sorted_nodes - - key_map = array bounds [(i, key_extractor node) | (i, node) <- numbered_nodes] - vertex_map = array bounds numbered_nodes - - --key_vertex :: key -> Maybe Vertex - -- returns Nothing for non-interesting vertices - key_vertex k = find 0 max_v - where - find a b | a > b = Nothing - | otherwise = let mid = (a + b) `div` 2 - in case compare k (key_map ! mid) of - LT -> find a (mid - 1) - EQ -> Just mid - GT -> find (mid + 1) b -\end{code} - -%************************************************************************ -%* * -%* SCC -%* * -%************************************************************************ - -\begin{code} -type WorkItem key payload - = (Node key payload, -- Tip of the path - [payload]) -- Rest of the path; - -- [a,b,c] means c depends on b, b depends on a - --- | Find a reasonably short cycle a->b->c->a, in a strongly --- connected component. The input nodes are presumed to be --- a SCC, so you can start anywhere. -findCycle :: forall payload key. Ord key - => [Node key payload] -- The nodes. The dependencies can - -- contain extra keys, which are ignored - -> Maybe [payload] -- A cycle, starting with node - -- so each depends on the next -findCycle graph - = go Set.empty (new_work root_deps []) [] - where - env :: Map.Map key (Node key payload) - env = Map.fromList [ (key, node) | node@(_, key, _) <- graph ] - - -- Find the node with fewest dependencies among the SCC modules - -- This is just a heuristic to find some plausible root module - root :: Node key payload - root = fst (minWith snd [ (node, count (`Map.member` env) deps) - | node@(_,_,deps) <- graph ]) - (root_payload,root_key,root_deps) = root - - - -- 'go' implements Dijkstra's algorithm, more or less - go :: Set.Set key -- Visited - -> [WorkItem key payload] -- Work list, items length n - -> [WorkItem key payload] -- Work list, items length n+1 - -> Maybe [payload] -- Returned cycle - -- Invariant: in a call (go visited ps qs), - -- visited = union (map tail (ps ++ qs)) - - go _ [] [] = Nothing -- No cycles - go visited [] qs = go visited qs [] - go visited (((payload,key,deps), path) : ps) qs - | key == root_key = Just (root_payload : reverse path) - | key `Set.member` visited = go visited ps qs - | key `Map.notMember` env = go visited ps qs - | otherwise = go (Set.insert key visited) - ps (new_qs ++ qs) - where - new_qs = new_work deps (payload : path) - - new_work :: [key] -> [payload] -> [WorkItem key payload] - new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ] -\end{code} - -%************************************************************************ -%* * -%* SCC -%* * -%************************************************************************ - -\begin{code} -data SCC vertex = AcyclicSCC vertex - | CyclicSCC [vertex] - -instance Functor SCC where - fmap f (AcyclicSCC v) = AcyclicSCC (f v) - fmap f (CyclicSCC vs) = CyclicSCC (fmap f vs) - -flattenSCCs :: [SCC a] -> [a] -flattenSCCs = concatMap flattenSCC - -flattenSCC :: SCC a -> [a] -flattenSCC (AcyclicSCC v) = [v] -flattenSCC (CyclicSCC vs) = vs - -instance Outputable a => Outputable (SCC a) where - ppr (AcyclicSCC v) = text "NONREC" $$ (nest 3 (ppr v)) - ppr (CyclicSCC vs) = text "REC" $$ (nest 3 (vcat (map ppr vs))) -\end{code} - -%************************************************************************ -%* * -%* Strongly Connected Component wrappers for Graph -%* * -%************************************************************************ - -Note: the components are returned topologically sorted: later components -depend on earlier ones, but not vice versa i.e. later components only have -edges going from them to earlier ones. - -\begin{code} -stronglyConnCompG :: Graph node -> [SCC node] -stronglyConnCompG graph = decodeSccs graph forest - where forest = {-# SCC "Digraph.scc" #-} scc (gr_int_graph graph) - -decodeSccs :: Graph node -> Forest Vertex -> [SCC node] -decodeSccs Graph { gr_int_graph = graph, gr_vertex_to_node = vertex_fn } forest - = map decode forest - where - decode (Node v []) | mentions_itself v = CyclicSCC [vertex_fn v] - | otherwise = AcyclicSCC (vertex_fn v) - decode other = CyclicSCC (dec other []) - where dec (Node v ts) vs = vertex_fn v : foldr dec vs ts - mentions_itself v = v `elem` (graph ! v) - - --- The following two versions are provided for backwards compatability: -stronglyConnCompFromEdgedVertices - :: Ord key - => [Node key payload] - -> [SCC payload] -stronglyConnCompFromEdgedVertices - = map (fmap get_node) . stronglyConnCompFromEdgedVerticesR - where get_node (n, _, _) = n - --- The "R" interface is used when you expect to apply SCC to --- (some of) the result of SCC, so you dont want to lose the dependency info -stronglyConnCompFromEdgedVerticesR - :: Ord key - => [Node key payload] - -> [SCC (Node key payload)] -stronglyConnCompFromEdgedVerticesR = stronglyConnCompG . graphFromEdgedVertices -\end{code} - -%************************************************************************ -%* * -%* Misc wrappers for Graph -%* * -%************************************************************************ - -\begin{code} -topologicalSortG :: Graph node -> [node] -topologicalSortG graph = map (gr_vertex_to_node graph) result - where result = {-# SCC "Digraph.topSort" #-} topSort (gr_int_graph graph) - -dfsTopSortG :: Graph node -> [[node]] -dfsTopSortG graph = - map (map (gr_vertex_to_node graph) . flattenTree) $ dfs g (topSort g) - where - g = gr_int_graph graph - -reachableG :: Graph node -> node -> [node] -reachableG graph from = map (gr_vertex_to_node graph) result - where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from) - result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex] - -reachablesG :: Graph node -> [node] -> [node] -reachablesG graph froms = map (gr_vertex_to_node graph) result - where result = {-# SCC "Digraph.reachable" #-} - reachable (gr_int_graph graph) vs - vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ] - -hasVertexG :: Graph node -> node -> Bool -hasVertexG graph node = isJust $ gr_node_to_vertex graph node - -verticesG :: Graph node -> [node] -verticesG graph = map (gr_vertex_to_node graph) $ vertices (gr_int_graph graph) - -edgesG :: Graph node -> [Edge node] -edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ edges (gr_int_graph graph) - where v2n = gr_vertex_to_node graph - -transposeG :: Graph node -> Graph node -transposeG graph = Graph (transpose (gr_int_graph graph)) (gr_vertex_to_node graph) (gr_node_to_vertex graph) - -outdegreeG :: Graph node -> node -> Maybe Int -outdegreeG = degreeG outdegree - -indegreeG :: Graph node -> node -> Maybe Int -indegreeG = degreeG indegree - -degreeG :: (IntGraph -> Table Int) -> Graph node -> node -> Maybe Int -degreeG degree graph node = let table = degree (gr_int_graph graph) - in fmap ((!) table) $ gr_node_to_vertex graph node - -vertexGroupsG :: Graph node -> [[node]] -vertexGroupsG graph = map (map (gr_vertex_to_node graph)) result - where result = vertexGroups (gr_int_graph graph) - -emptyG :: Graph node -> Bool -emptyG g = graphEmpty (gr_int_graph g) - -componentsG :: Graph node -> [[node]] -componentsG graph = map (map (gr_vertex_to_node graph) . flattenTree) $ components (gr_int_graph graph) -\end{code} - -%************************************************************************ -%* * -%* Showing Graphs -%* * -%************************************************************************ - -\begin{code} - -instance Outputable node => Outputable (Graph node) where - ppr graph = vcat [ - hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)), - hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph)) - ] - -instance Outputable node => Outputable (Edge node) where - ppr (Edge from to) = ppr from <+> text "->" <+> ppr to - -\end{code} - -%************************************************************************ -%* * -%* IntGraphs -%* * -%************************************************************************ - -\begin{code} -type Vertex = Int -type Table a = Array Vertex a -type IntGraph = Table [Vertex] -type Bounds = (Vertex, Vertex) -type IntEdge = (Vertex, Vertex) -\end{code} - -\begin{code} -vertices :: IntGraph -> [Vertex] -vertices = indices - -edges :: IntGraph -> [IntEdge] -edges g = [ (v, w) | v <- vertices g, w <- g!v ] - -mapT :: (Vertex -> a -> b) -> Table a -> Table b -mapT f t = array (bounds t) [ (v, f v (t ! v)) | v <- indices t ] - -buildG :: Bounds -> [IntEdge] -> IntGraph -buildG bounds edges = accumArray (flip (:)) [] bounds edges - -transpose :: IntGraph -> IntGraph -transpose g = buildG (bounds g) (reverseE g) - -reverseE :: IntGraph -> [IntEdge] -reverseE g = [ (w, v) | (v, w) <- edges g ] - -outdegree :: IntGraph -> Table Int -outdegree = mapT numEdges - where numEdges _ ws = length ws - -indegree :: IntGraph -> Table Int -indegree = outdegree . transpose - -graphEmpty :: IntGraph -> Bool -graphEmpty g = lo > hi - where (lo, hi) = bounds g - -\end{code} - -%************************************************************************ -%* * -%* Trees and forests -%* * -%************************************************************************ - -\begin{code} -data Tree a = Node a (Forest a) -type Forest a = [Tree a] - -mapTree :: (a -> b) -> (Tree a -> Tree b) -mapTree f (Node x ts) = Node (f x) (map (mapTree f) ts) - -flattenTree :: Tree a -> [a] -flattenTree (Node x ts) = x : concatMap flattenTree ts -\end{code} - -\begin{code} -instance Show a => Show (Tree a) where - showsPrec _ t s = showTree t ++ s - -showTree :: Show a => Tree a -> String -showTree = drawTree . mapTree show - -drawTree :: Tree String -> String -drawTree = unlines . draw - -draw :: Tree String -> [String] -draw (Node x ts) = grp this (space (length this)) (stLoop ts) - where this = s1 ++ x ++ " " - - space n = replicate n ' ' - - stLoop [] = [""] - stLoop [t] = grp s2 " " (draw t) - stLoop (t:ts) = grp s3 s4 (draw t) ++ [s4] ++ rsLoop ts - - rsLoop [] = [] - rsLoop [t] = grp s5 " " (draw t) - rsLoop (t:ts) = grp s6 s4 (draw t) ++ [s4] ++ rsLoop ts - - grp fst rst = zipWith (++) (fst:repeat rst) - - [s1,s2,s3,s4,s5,s6] = ["- ", "--", "-+", " |", " `", " +"] -\end{code} - - -%************************************************************************ -%* * -%* Depth first search -%* * -%************************************************************************ - -\begin{code} -type Set s = STArray s Vertex Bool - -mkEmpty :: Bounds -> ST s (Set s) -mkEmpty bnds = newArray bnds False - -contains :: Set s -> Vertex -> ST s Bool -contains m v = readArray m v - -include :: Set s -> Vertex -> ST s () -include m v = writeArray m v True -\end{code} - -\begin{code} -dff :: IntGraph -> Forest Vertex -dff g = dfs g (vertices g) - -dfs :: IntGraph -> [Vertex] -> Forest Vertex -dfs g vs = prune (bounds g) (map (generate g) vs) - -generate :: IntGraph -> Vertex -> Tree Vertex -generate g v = Node v (map (generate g) (g!v)) - -prune :: Bounds -> Forest Vertex -> Forest Vertex -prune bnds ts = runST (mkEmpty bnds >>= \m -> - chop m ts) - -chop :: Set s -> Forest Vertex -> ST s (Forest Vertex) -chop _ [] = return [] -chop m (Node v ts : us) - = contains m v >>= \visited -> - if visited then - chop m us - else - include m v >>= \_ -> - chop m ts >>= \as -> - chop m us >>= \bs -> - return (Node v as : bs) -\end{code} - - -%************************************************************************ -%* * -%* Algorithms -%* * -%************************************************************************ - ------------------------------------------------------------- --- Algorithm 1: depth first search numbering ------------------------------------------------------------- - -\begin{code} -preorder :: Tree a -> [a] -preorder (Node a ts) = a : preorderF ts - -preorderF :: Forest a -> [a] -preorderF ts = concat (map preorder ts) - -tabulate :: Bounds -> [Vertex] -> Table Int -tabulate bnds vs = array bnds (zip vs [1..]) - -preArr :: Bounds -> Forest Vertex -> Table Int -preArr bnds = tabulate bnds . preorderF -\end{code} - ------------------------------------------------------------- --- Algorithm 2: topological sorting ------------------------------------------------------------- - -\begin{code} -postorder :: Tree a -> [a] -> [a] -postorder (Node a ts) = postorderF ts . (a :) - -postorderF :: Forest a -> [a] -> [a] -postorderF ts = foldr (.) id $ map postorder ts - -postOrd :: IntGraph -> [Vertex] -postOrd g = postorderF (dff g) [] - -topSort :: IntGraph -> [Vertex] -topSort = reverse . postOrd -\end{code} - ------------------------------------------------------------- --- Algorithm 3: connected components ------------------------------------------------------------- - -\begin{code} -components :: IntGraph -> Forest Vertex -components = dff . undirected - -undirected :: IntGraph -> IntGraph -undirected g = buildG (bounds g) (edges g ++ reverseE g) -\end{code} - ------------------------------------------------------------- --- Algorithm 4: strongly connected components ------------------------------------------------------------- - -\begin{code} -scc :: IntGraph -> Forest Vertex -scc g = dfs g (reverse (postOrd (transpose g))) -\end{code} - ------------------------------------------------------------- --- Algorithm 5: Classifying edges ------------------------------------------------------------- - -\begin{code} -back :: IntGraph -> Table Int -> IntGraph -back g post = mapT select g - where select v ws = [ w | w <- ws, post!v < post!w ] - -cross :: IntGraph -> Table Int -> Table Int -> IntGraph -cross g pre post = mapT select g - where select v ws = [ w | w <- ws, post!v > post!w, pre!v > pre!w ] - -forward :: IntGraph -> IntGraph -> Table Int -> IntGraph -forward g tree pre = mapT select g - where select v ws = [ w | w <- ws, pre!v < pre!w ] \\ tree!v -\end{code} - ------------------------------------------------------------- --- Algorithm 6: Finding reachable vertices ------------------------------------------------------------- - -\begin{code} -reachable :: IntGraph -> [Vertex] -> [Vertex] -reachable g vs = preorderF (dfs g vs) - -path :: IntGraph -> Vertex -> Vertex -> Bool -path g v w = w `elem` (reachable g [v]) -\end{code} - ------------------------------------------------------------- --- Algorithm 7: Biconnected components ------------------------------------------------------------- - -\begin{code} -bcc :: IntGraph -> Forest [Vertex] -bcc g = (concat . map bicomps . map (do_label g dnum)) forest - where forest = dff g - dnum = preArr (bounds g) forest - -do_label :: IntGraph -> Table Int -> Tree Vertex -> Tree (Vertex,Int,Int) -do_label g dnum (Node v ts) = Node (v,dnum!v,lv) us - where us = map (do_label g dnum) ts - lv = minimum ([dnum!v] ++ [dnum!w | w <- g!v] - ++ [lu | Node (_,_,lu) _ <- us]) - -bicomps :: Tree (Vertex, Int, Int) -> Forest [Vertex] -bicomps (Node (v,_,_) ts) - = [ Node (v:vs) us | (_,Node vs us) <- map collect ts] - -collect :: Tree (Vertex, Int, Int) -> (Int, Tree [Vertex]) -collect (Node (v,dv,lv) ts) = (lv, Node (v:vs) cs) - where collected = map collect ts - vs = concat [ ws | (lw, Node ws _) <- collected, lw [[Vertex]] -vertexGroups g = runST (mkEmpty (bounds g) >>= \provided -> vertexGroupsS provided g next_vertices) - where next_vertices = noOutEdges g - -noOutEdges :: IntGraph -> [Vertex] -noOutEdges g = [ v | v <- vertices g, null (g!v)] - -vertexGroupsS :: Set s -> IntGraph -> [Vertex] -> ST s [[Vertex]] -vertexGroupsS provided g to_provide - = if null to_provide - then do { - all_provided <- allM (provided `contains`) (vertices g) - ; if all_provided - then return [] - else error "vertexGroup: cyclic graph" - } - else do { - mapM_ (include provided) to_provide - ; to_provide' <- filterM (vertexReady provided g) (vertices g) - ; rest <- vertexGroupsS provided g to_provide' - ; return $ to_provide : rest - } - -vertexReady :: Set s -> IntGraph -> Vertex -> ST s Bool -vertexReady provided g v = liftM2 (&&) (liftM not $ provided `contains` v) (allM (provided `contains`) (g!v)) -\end{code} diff --git a/compiler/utils/FastBool.hs b/compiler/utils/FastBool.hs new file mode 100644 index 0000000000..9e88376f0a --- /dev/null +++ b/compiler/utils/FastBool.hs @@ -0,0 +1,70 @@ +{- +(c) The University of Glasgow, 2000-2006 + +\section{Fast booleans} +-} + +{-# LANGUAGE CPP, MagicHash #-} + +module FastBool ( + --fastBool could be called bBox; isFastTrue, bUnbox; but they're not + FastBool, fastBool, isFastTrue, fastOr, fastAnd + ) where + +-- Import the beggars +import GHC.Exts +#ifdef DEBUG +import Panic +#endif + +type FastBool = Int# +fastBool True = 1# +fastBool False = 0# + +#ifdef DEBUG +--then waste time deciding whether to panic. FastBool should normally +--be at least as fast as Bool, one would hope... + +isFastTrue 1# = True +isFastTrue 0# = False +isFastTrue _ = panic "FastTypes: isFastTrue" + +-- note that fastOr and fastAnd are strict in both arguments +-- since they are unboxed +fastOr 1# _ = 1# +fastOr 0# x = x +fastOr _ _ = panicFastInt "FastTypes: fastOr" + +fastAnd 0# _ = 0# +fastAnd 1# x = x +fastAnd _ _ = panicFastInt "FastTypes: fastAnd" + +--these "panicFastInt"s (formerly known as "panic#") rely on +--FastInt = FastBool ( = Int# presumably), +--haha, true enough when __GLASGOW_HASKELL__. Why can't we have functions +--that return _|_ be kind-polymorphic ( ?? to be precise ) ? + +#else /* ! DEBUG */ +--Isn't comparison to zero sometimes faster on CPUs than comparison to 1? +-- (since using Int# as _synonym_ fails to guarantee that it will +-- only take on values of 0 and 1) +isFastTrue 0# = False +isFastTrue _ = True + +-- note that fastOr and fastAnd are strict in both arguments +-- since they are unboxed +-- Also, to avoid incomplete-pattern warning +-- (and avoid wasting time with redundant runtime checks), +-- we don't pattern-match on both 0# and 1# . +fastOr 0# x = x +fastOr _ _ = 1# + +fastAnd 0# _ = 0# +fastAnd _ x = x + +#endif /* ! DEBUG */ + +fastBool :: Bool -> FastBool +isFastTrue :: FastBool -> Bool +fastOr :: FastBool -> FastBool -> FastBool +fastAnd :: FastBool -> FastBool -> FastBool diff --git a/compiler/utils/FastBool.lhs b/compiler/utils/FastBool.lhs deleted file mode 100644 index 9aa1a75b37..0000000000 --- a/compiler/utils/FastBool.lhs +++ /dev/null @@ -1,72 +0,0 @@ -% -% (c) The University of Glasgow, 2000-2006 -% -\section{Fast booleans} - -\begin{code} -{-# LANGUAGE CPP, MagicHash #-} - -module FastBool ( - --fastBool could be called bBox; isFastTrue, bUnbox; but they're not - FastBool, fastBool, isFastTrue, fastOr, fastAnd - ) where - --- Import the beggars -import GHC.Exts -#ifdef DEBUG -import Panic -#endif - -type FastBool = Int# -fastBool True = 1# -fastBool False = 0# - -#ifdef DEBUG ---then waste time deciding whether to panic. FastBool should normally ---be at least as fast as Bool, one would hope... - -isFastTrue 1# = True -isFastTrue 0# = False -isFastTrue _ = panic "FastTypes: isFastTrue" - --- note that fastOr and fastAnd are strict in both arguments --- since they are unboxed -fastOr 1# _ = 1# -fastOr 0# x = x -fastOr _ _ = panicFastInt "FastTypes: fastOr" - -fastAnd 0# _ = 0# -fastAnd 1# x = x -fastAnd _ _ = panicFastInt "FastTypes: fastAnd" - ---these "panicFastInt"s (formerly known as "panic#") rely on ---FastInt = FastBool ( = Int# presumably), ---haha, true enough when __GLASGOW_HASKELL__. Why can't we have functions ---that return _|_ be kind-polymorphic ( ?? to be precise ) ? - -#else /* ! DEBUG */ ---Isn't comparison to zero sometimes faster on CPUs than comparison to 1? --- (since using Int# as _synonym_ fails to guarantee that it will --- only take on values of 0 and 1) -isFastTrue 0# = False -isFastTrue _ = True - --- note that fastOr and fastAnd are strict in both arguments --- since they are unboxed --- Also, to avoid incomplete-pattern warning --- (and avoid wasting time with redundant runtime checks), --- we don't pattern-match on both 0# and 1# . -fastOr 0# x = x -fastOr _ _ = 1# - -fastAnd 0# _ = 0# -fastAnd _ x = x - -#endif /* ! DEBUG */ - -fastBool :: Bool -> FastBool -isFastTrue :: FastBool -> Bool -fastOr :: FastBool -> FastBool -> FastBool -fastAnd :: FastBool -> FastBool -> FastBool - -\end{code} diff --git a/compiler/utils/FastFunctions.hs b/compiler/utils/FastFunctions.hs new file mode 100644 index 0000000000..140e42949a --- /dev/null +++ b/compiler/utils/FastFunctions.hs @@ -0,0 +1,46 @@ +{- +Z% +(c) The University of Glasgow, 2000-2006 + +\section{Fast functions} +-} + +{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-} + +module FastFunctions ( + unsafeChr, inlinePerformIO, unsafeDupableInterleaveIO, + indexWord8OffFastPtr, + indexWord8OffFastPtrAsFastChar, indexWord8OffFastPtrAsFastInt, + global, Global + ) where + +#include "HsVersions.h" + +import FastTypes +import Data.IORef +import System.IO.Unsafe + +import GHC.Exts +import GHC.Word +import GHC.Base (unsafeChr) + +import GHC.IO (IO(..), unsafeDupableInterleaveIO) + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + +indexWord8OffFastPtr p i = W8# (indexWord8OffAddr# p i) +indexWord8OffFastPtrAsFastChar p i = indexCharOffAddr# p i +indexWord8OffFastPtrAsFastInt p i = word2Int# (indexWord8OffAddr# p i) +-- or ord# (indexCharOffAddr# p i) + +--just so we can refer to the type clearly in a macro +type Global a = IORef a +global :: a -> Global a +global a = unsafePerformIO (newIORef a) + +indexWord8OffFastPtr :: FastPtr Word8 -> FastInt -> Word8 +indexWord8OffFastPtrAsFastChar :: FastPtr Word8 -> FastInt -> FastChar +indexWord8OffFastPtrAsFastInt :: FastPtr Word8 -> FastInt -> FastInt diff --git a/compiler/utils/FastFunctions.lhs b/compiler/utils/FastFunctions.lhs deleted file mode 100644 index 854bd13b11..0000000000 --- a/compiler/utils/FastFunctions.lhs +++ /dev/null @@ -1,47 +0,0 @@ -Z% -% (c) The University of Glasgow, 2000-2006 -% -\section{Fast functions} - -\begin{code} -{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-} - -module FastFunctions ( - unsafeChr, inlinePerformIO, unsafeDupableInterleaveIO, - indexWord8OffFastPtr, - indexWord8OffFastPtrAsFastChar, indexWord8OffFastPtrAsFastInt, - global, Global - ) where - -#include "HsVersions.h" - -import FastTypes -import Data.IORef -import System.IO.Unsafe - -import GHC.Exts -import GHC.Word -import GHC.Base (unsafeChr) - -import GHC.IO (IO(..), unsafeDupableInterleaveIO) - --- Just like unsafePerformIO, but we inline it. -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r - -indexWord8OffFastPtr p i = W8# (indexWord8OffAddr# p i) -indexWord8OffFastPtrAsFastChar p i = indexCharOffAddr# p i -indexWord8OffFastPtrAsFastInt p i = word2Int# (indexWord8OffAddr# p i) --- or ord# (indexCharOffAddr# p i) - ---just so we can refer to the type clearly in a macro -type Global a = IORef a -global :: a -> Global a -global a = unsafePerformIO (newIORef a) - -indexWord8OffFastPtr :: FastPtr Word8 -> FastInt -> Word8 -indexWord8OffFastPtrAsFastChar :: FastPtr Word8 -> FastInt -> FastChar -indexWord8OffFastPtrAsFastInt :: FastPtr Word8 -> FastInt -> FastInt - -\end{code} diff --git a/compiler/utils/FastMutInt.hs b/compiler/utils/FastMutInt.hs new file mode 100644 index 0000000000..4cde1216ed --- /dev/null +++ b/compiler/utils/FastMutInt.hs @@ -0,0 +1,63 @@ +{-# LANGUAGE CPP, BangPatterns, MagicHash, UnboxedTuples #-} +{-# OPTIONS_GHC -O #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected +-- +-- (c) The University of Glasgow 2002-2006 +-- +-- Unboxed mutable Ints + +module FastMutInt( + FastMutInt, newFastMutInt, + readFastMutInt, writeFastMutInt, + + FastMutPtr, newFastMutPtr, + readFastMutPtr, writeFastMutPtr + ) where + + +#include "../includes/MachDeps.h" +#ifndef SIZEOF_HSINT +#define SIZEOF_HSINT INT_SIZE_IN_BYTES +#endif + +import GHC.Base +import GHC.Ptr + +newFastMutInt :: IO FastMutInt +readFastMutInt :: FastMutInt -> IO Int +writeFastMutInt :: FastMutInt -> Int -> IO () + +newFastMutPtr :: IO FastMutPtr +readFastMutPtr :: FastMutPtr -> IO (Ptr a) +writeFastMutPtr :: FastMutPtr -> Ptr a -> IO () + +data FastMutInt = FastMutInt (MutableByteArray# RealWorld) + +newFastMutInt = IO $ \s -> + case newByteArray# size s of { (# s, arr #) -> + (# s, FastMutInt arr #) } + where !(I# size) = SIZEOF_HSINT + +readFastMutInt (FastMutInt arr) = IO $ \s -> + case readIntArray# arr 0# s of { (# s, i #) -> + (# s, I# i #) } + +writeFastMutInt (FastMutInt arr) (I# i) = IO $ \s -> + case writeIntArray# arr 0# i s of { s -> + (# s, () #) } + +data FastMutPtr = FastMutPtr (MutableByteArray# RealWorld) + +newFastMutPtr = IO $ \s -> + case newByteArray# size s of { (# s, arr #) -> + (# s, FastMutPtr arr #) } + where !(I# size) = SIZEOF_VOID_P + +readFastMutPtr (FastMutPtr arr) = IO $ \s -> + case readAddrArray# arr 0# s of { (# s, i #) -> + (# s, Ptr i #) } + +writeFastMutPtr (FastMutPtr arr) (Ptr i) = IO $ \s -> + case writeAddrArray# arr 0# i s of { s -> + (# s, () #) } diff --git a/compiler/utils/FastMutInt.lhs b/compiler/utils/FastMutInt.lhs deleted file mode 100644 index e866aa5d38..0000000000 --- a/compiler/utils/FastMutInt.lhs +++ /dev/null @@ -1,68 +0,0 @@ -\begin{code} -{-# LANGUAGE CPP, BangPatterns, MagicHash, UnboxedTuples #-} -{-# OPTIONS_GHC -O #-} --- We always optimise this, otherwise performance of a non-optimised --- compiler is severely affected --- --- (c) The University of Glasgow 2002-2006 --- --- Unboxed mutable Ints - -module FastMutInt( - FastMutInt, newFastMutInt, - readFastMutInt, writeFastMutInt, - - FastMutPtr, newFastMutPtr, - readFastMutPtr, writeFastMutPtr - ) where - - -#include "../includes/MachDeps.h" -#ifndef SIZEOF_HSINT -#define SIZEOF_HSINT INT_SIZE_IN_BYTES -#endif - -import GHC.Base -import GHC.Ptr - -newFastMutInt :: IO FastMutInt -readFastMutInt :: FastMutInt -> IO Int -writeFastMutInt :: FastMutInt -> Int -> IO () - -newFastMutPtr :: IO FastMutPtr -readFastMutPtr :: FastMutPtr -> IO (Ptr a) -writeFastMutPtr :: FastMutPtr -> Ptr a -> IO () -\end{code} - -\begin{code} -data FastMutInt = FastMutInt (MutableByteArray# RealWorld) - -newFastMutInt = IO $ \s -> - case newByteArray# size s of { (# s, arr #) -> - (# s, FastMutInt arr #) } - where !(I# size) = SIZEOF_HSINT - -readFastMutInt (FastMutInt arr) = IO $ \s -> - case readIntArray# arr 0# s of { (# s, i #) -> - (# s, I# i #) } - -writeFastMutInt (FastMutInt arr) (I# i) = IO $ \s -> - case writeIntArray# arr 0# i s of { s -> - (# s, () #) } - -data FastMutPtr = FastMutPtr (MutableByteArray# RealWorld) - -newFastMutPtr = IO $ \s -> - case newByteArray# size s of { (# s, arr #) -> - (# s, FastMutPtr arr #) } - where !(I# size) = SIZEOF_VOID_P - -readFastMutPtr (FastMutPtr arr) = IO $ \s -> - case readAddrArray# arr 0# s of { (# s, i #) -> - (# s, Ptr i #) } - -writeFastMutPtr (FastMutPtr arr) (Ptr i) = IO $ \s -> - case writeAddrArray# arr 0# i s of { s -> - (# s, () #) } -\end{code} - diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs new file mode 100644 index 0000000000..9607d24823 --- /dev/null +++ b/compiler/utils/FastString.hs @@ -0,0 +1,640 @@ +-- (c) The University of Glasgow, 1997-2006 + +{-# LANGUAGE BangPatterns, CPP, DeriveDataTypeable, MagicHash, UnboxedTuples #-} +{-# OPTIONS_GHC -O -funbox-strict-fields #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected + +-- | +-- There are two principal string types used internally by GHC: +-- +-- ['FastString'] +-- +-- * A compact, hash-consed, representation of character strings. +-- * Comparison is O(1), and you can get a 'Unique.Unique' from them. +-- * Generated by 'fsLit'. +-- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'. +-- +-- ['LitString'] +-- +-- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@). +-- * Practically no operations. +-- * Outputing them is fast. +-- * Generated by 'sLit'. +-- * Turn into 'Outputable.SDoc' with 'Outputable.ptext' +-- +-- Use 'LitString' unless you want the facilities of 'FastString'. +module FastString + ( + -- * ByteString + fastStringToByteString, + mkFastStringByteString, + fastZStringToByteString, + unsafeMkByteString, + hashByteString, + + -- * FastZString + FastZString, + hPutFZS, + zString, + lengthFZS, + + -- * FastStrings + FastString(..), -- not abstract, for now. + + -- ** Construction + fsLit, + mkFastString, + mkFastStringBytes, + mkFastStringByteList, + mkFastStringForeignPtr, + mkFastString#, + + -- ** Deconstruction + unpackFS, -- :: FastString -> String + bytesFS, -- :: FastString -> [Word8] + + -- ** Encoding + zEncodeFS, + + -- ** Operations + uniqueOfFS, + lengthFS, + nullFS, + appendFS, + headFS, + tailFS, + concatFS, + consFS, + nilFS, + + -- ** Outputing + hPutFS, + + -- ** Internal + getFastStringTable, + hasZEncoding, + + -- * LitStrings + LitString, + + -- ** Construction + sLit, + mkLitString#, + mkLitString, + + -- ** Deconstruction + unpackLitString, + + -- ** Operations + lengthLS + ) where + +#include "HsVersions.h" + +import Encoding +import FastTypes +import FastFunctions +import Panic +import Util + +import Control.Monad +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC +import qualified Data.ByteString.Internal as BS +import qualified Data.ByteString.Unsafe as BS +import Foreign.C +import ExtsCompat46 +import System.IO +import System.IO.Unsafe ( unsafePerformIO ) +import Data.Data +import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef ) +import Data.Maybe ( isJust ) +import Data.Char +import Data.List ( elemIndex ) + +import GHC.IO ( IO(..), unsafeDupablePerformIO ) + +#if __GLASGOW_HASKELL__ >= 709 +import Foreign +#else +import Foreign.Safe +#endif + +#if STAGE >= 2 +import GHC.Conc.Sync (sharedCAF) +#endif + +import GHC.Base ( unpackCString# ) + +#define hASH_TBL_SIZE 4091 +#define hASH_TBL_SIZE_UNBOXED 4091# + + +fastStringToByteString :: FastString -> ByteString +fastStringToByteString f = fs_bs f + +fastZStringToByteString :: FastZString -> ByteString +fastZStringToByteString (FastZString bs) = bs + +-- This will drop information if any character > '\xFF' +unsafeMkByteString :: String -> ByteString +unsafeMkByteString = BSC.pack + +hashByteString :: ByteString -> Int +hashByteString bs + = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> + return $ hashStr (castPtr ptr) len + +-- ----------------------------------------------------------------------------- + +newtype FastZString = FastZString ByteString + +hPutFZS :: Handle -> FastZString -> IO () +hPutFZS handle (FastZString bs) = BS.hPut handle bs + +zString :: FastZString -> String +zString (FastZString bs) = + inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen + +lengthFZS :: FastZString -> Int +lengthFZS (FastZString bs) = BS.length bs + +mkFastZStringString :: String -> FastZString +mkFastZStringString str = FastZString (BSC.pack str) + +-- ----------------------------------------------------------------------------- + +{-| +A 'FastString' is an array of bytes, hashed to support fast O(1) +comparison. It is also associated with a character encoding, so that +we know how to convert a 'FastString' to the local encoding, or to the +Z-encoding used by the compiler internally. + +'FastString's support a memoized conversion to the Z-encoding via zEncodeFS. +-} + +data FastString = FastString { + uniq :: {-# UNPACK #-} !Int, -- unique id + n_chars :: {-# UNPACK #-} !Int, -- number of chars + fs_bs :: {-# UNPACK #-} !ByteString, + fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString)) + } deriving Typeable + +instance Eq FastString where + f1 == f2 = uniq f1 == uniq f2 + +instance Ord FastString where + -- Compares lexicographically, not by unique + a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False } + a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False } + a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True } + a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True } + max x y | x >= y = x + | otherwise = y + min x y | x <= y = x + | otherwise = y + compare a b = cmpFS a b + +instance Show FastString where + show fs = show (unpackFS fs) + +instance Data FastString where + -- don't traverse? + toConstr _ = abstractConstr "FastString" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNoRepType "FastString" + +cmpFS :: FastString -> FastString -> Ordering +cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) = + if u1 == u2 then EQ else + compare (fastStringToByteString f1) (fastStringToByteString f2) + +foreign import ccall unsafe "ghc_memcmp" + memcmp :: Ptr a -> Ptr b -> Int -> IO Int + +-- ----------------------------------------------------------------------------- +-- Construction + +{- +Internally, the compiler will maintain a fast string symbol table, providing +sharing and fast comparison. Creation of new @FastString@s then covertly does a +lookup, re-using the @FastString@ if there was a hit. + +The design of the FastString hash table allows for lockless concurrent reads +and updates to multiple buckets with low synchronization overhead. + +See Note [Updating the FastString table] on how it's updated. +-} +data FastStringTable = + FastStringTable + {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets + (MutableArray# RealWorld (IORef [FastString])) -- the array of mutable buckets + +string_table :: FastStringTable +{-# NOINLINE string_table #-} +string_table = unsafePerformIO $ do + uid <- newIORef 603979776 -- ord '$' * 0x01000000 + tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED (panic "string_table") s1# of + (# s2#, arr# #) -> + (# s2#, FastStringTable uid arr# #) + forM_ [0.. hASH_TBL_SIZE-1] $ \i -> do + bucket <- newIORef [] + updTbl tab i bucket + + -- use the support wired into the RTS to share this CAF among all images of + -- libHSghc +#if STAGE < 2 + return tab +#else + sharedCAF tab getOrSetLibHSghcFastStringTable + +-- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous +-- RTS might not have this symbol +foreign import ccall unsafe "getOrSetLibHSghcFastStringTable" + getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a) +#endif + +{- + +We include the FastString table in the `sharedCAF` mechanism because we'd like +FastStrings created by a Core plugin to have the same uniques as corresponding +strings created by the host compiler itself. For example, this allows plugins +to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or +even re-invoke the parser. + +In particular, the following little sanity test was failing in a plugin +prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not +be looked up /by the plugin/. + + let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT" + putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts + +`mkTcOcc` involves the lookup (or creation) of a FastString. Since the +plugin's FastString.string_table is empty, constructing the RdrName also +allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These +uniques are almost certainly unequal to the ones that the host compiler +originally assigned to those FastStrings. Thus the lookup fails since the +domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's +unique. + +The old `reinitializeGlobals` mechanism is enough to provide the plugin with +read-access to the table, but it insufficient in the general case where the +plugin may allocate FastStrings. This mutates the supply for the FastStrings' +unique, and that needs to be propagated back to the compiler's instance of the +global variable. Such propagation is beyond the `reinitializeGlobals` +mechanism. + +Maintaining synchronization of the two instances of this global is rather +difficult because of the uses of `unsafePerformIO` in this module. Not +synchronizing them risks breaking the rather major invariant that two +FastStrings with the same unique have the same string. Thus we use the +lower-level `sharedCAF` mechanism that relies on Globals.c. + +-} + +lookupTbl :: FastStringTable -> Int -> IO (IORef [FastString]) +lookupTbl (FastStringTable _ arr#) (I# i#) = + IO $ \ s# -> readArray# arr# i# s# + +updTbl :: FastStringTable -> Int -> IORef [FastString] -> IO () +updTbl (FastStringTable _uid arr#) (I# i#) ls = do + (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) + +mkFastString# :: Addr# -> FastString +mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr) + where ptr = Ptr a# + +{- Note [Updating the FastString table] + +The procedure goes like this: + +1. Read the relevant bucket and perform a look up of the string. +2. If it exists, return it. +3. Otherwise grab a unique ID, create a new FastString and atomically attempt + to update the relevant bucket with this FastString: + + * Double check that the string is not in the bucket. Another thread may have + inserted it while we were creating our string. + * Return the existing FastString if it exists. The one we preemptively + created will get GCed. + * Otherwise, insert and return the string we created. +-} + +{- Note [Double-checking the bucket] + +It is not necessary to check the entire bucket the second time. We only have to +check the strings that are new to the bucket since the last time we read it. +-} + +mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString +mkFastStringWith mk_fs !ptr !len = do + let hash = hashStr ptr len + bucket <- lookupTbl string_table hash + ls1 <- readIORef bucket + res <- bucket_match ls1 len ptr + case res of + Just v -> return v + Nothing -> do + n <- get_uid + new_fs <- mk_fs n + + atomicModifyIORef bucket $ \ls2 -> + -- Note [Double-checking the bucket] + let delta_ls = case ls1 of + [] -> ls2 + l:_ -> case l `elemIndex` ls2 of + Nothing -> panic "mkFastStringWith" + Just idx -> take idx ls2 + + -- NB: Might as well use inlinePerformIO, since the call to + -- bucket_match doesn't perform any IO that could be floated + -- out of this closure or erroneously duplicated. + in case inlinePerformIO (bucket_match delta_ls len ptr) of + Nothing -> (new_fs:ls2, new_fs) + Just fs -> (ls2,fs) + where + !(FastStringTable uid _arr) = string_table + + get_uid = atomicModifyIORef uid $ \n -> (n+1,n) + +mkFastStringBytes :: Ptr Word8 -> Int -> FastString +mkFastStringBytes !ptr !len = + -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is + -- idempotent. + unsafeDupablePerformIO $ + mkFastStringWith (copyNewFastString ptr len) ptr len + +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkFastStringForeignPtr ptr !fp len + = mkFastStringWith (mkNewFastString fp ptr len) ptr len + +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringByteString :: ByteString -> FastString +mkFastStringByteString bs = + inlinePerformIO $ + BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do + let ptr' = castPtr ptr + mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len + +-- | Creates a UTF-8 encoded 'FastString' from a 'String' +mkFastString :: String -> FastString +mkFastString str = + inlinePerformIO $ do + let l = utf8EncodedLength str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + mkFastStringForeignPtr ptr buf l + +-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@ +mkFastStringByteList :: [Word8] -> FastString +mkFastStringByteList str = + inlinePerformIO $ do + let l = Prelude.length str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + pokeArray (castPtr ptr) str + mkFastStringForeignPtr ptr buf l + +-- | Creates a Z-encoded 'FastString' from a 'String' +mkZFastString :: String -> FastZString +mkZFastString = mkFastZStringString + +bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString) +bucket_match [] _ _ = return Nothing +bucket_match (v@(FastString _ _ bs _):ls) len ptr + | len == BS.length bs = do + b <- BS.unsafeUseAsCString bs $ \buf -> + cmpStringPrefix ptr (castPtr buf) len + if b then return (Just v) + else bucket_match ls len ptr + | otherwise = + bucket_match ls len ptr + +mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int + -> IO FastString +mkNewFastString fp ptr len uid = do + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref) + +mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int + -> IO FastString +mkNewFastStringByteString bs ptr len uid = do + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid n_chars bs ref) + +copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString +copyNewFastString ptr len uid = do + fp <- copyBytesToForeignPtr ptr len + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref) + +copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8) +copyBytesToForeignPtr ptr len = do + fp <- mallocForeignPtrBytes len + withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len + return fp + +cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool +cmpStringPrefix ptr1 ptr2 len = + do r <- memcmp ptr1 ptr2 len + return (r == 0) + + +hashStr :: Ptr Word8 -> Int -> Int + -- use the Addr to produce a hash value between 0 & m (inclusive) +hashStr (Ptr a#) (I# len#) = loop 0# 0# + where + loop h n | n ExtsCompat46.==# len# = I# h + | otherwise = loop h2 (n ExtsCompat46.+# 1#) + where !c = ord# (indexCharOffAddr# a# n) + !h2 = (c ExtsCompat46.+# (h ExtsCompat46.*# 128#)) `remInt#` + hASH_TBL_SIZE# + +-- ----------------------------------------------------------------------------- +-- Operations + +-- | Returns the length of the 'FastString' in characters +lengthFS :: FastString -> Int +lengthFS f = n_chars f + +-- | Returns @True@ if this 'FastString' is not Z-encoded but already has +-- a Z-encoding cached (used in producing stats). +hasZEncoding :: FastString -> Bool +hasZEncoding (FastString _ _ _ ref) = + inlinePerformIO $ do + m <- readIORef ref + return (isJust m) + +-- | Returns @True@ if the 'FastString' is empty +nullFS :: FastString -> Bool +nullFS f = BS.null (fs_bs f) + +-- | Unpacks and decodes the FastString +unpackFS :: FastString -> String +unpackFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> + utf8DecodeString (castPtr ptr) len + +-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' +bytesFS :: FastString -> [Word8] +bytesFS fs = BS.unpack $ fastStringToByteString fs + +-- | Returns a Z-encoded version of a 'FastString'. This might be the +-- original, if it was already Z-encoded. The first time this +-- function is applied to a particular 'FastString', the results are +-- memoized. +-- +zEncodeFS :: FastString -> FastZString +zEncodeFS fs@(FastString _ _ _ ref) = + inlinePerformIO $ do + m <- readIORef ref + case m of + Just zfs -> return zfs + Nothing -> do + atomicModifyIORef ref $ \m' -> case m' of + Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs)) + in (Just zfs, zfs) + Just zfs -> (m', zfs) + +appendFS :: FastString -> FastString -> FastString +appendFS fs1 fs2 = mkFastStringByteString + $ BS.append (fastStringToByteString fs1) + (fastStringToByteString fs2) + +concatFS :: [FastString] -> FastString +concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better + +headFS :: FastString -> Char +headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString" +headFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> + return (fst (utf8DecodeChar (castPtr ptr))) + +tailFS :: FastString -> FastString +tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString" +tailFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> + do let (_, n) = utf8DecodeChar (castPtr ptr) + return $! mkFastStringByteString (BS.drop n bs) + +consFS :: Char -> FastString -> FastString +consFS c fs = mkFastString (c : unpackFS fs) + +uniqueOfFS :: FastString -> FastInt +uniqueOfFS (FastString u _ _ _) = iUnbox u + +nilFS :: FastString +nilFS = mkFastString "" + +-- ----------------------------------------------------------------------------- +-- Stats + +getFastStringTable :: IO [[FastString]] +getFastStringTable = do + buckets <- forM [0.. hASH_TBL_SIZE-1] $ \idx -> do + bucket <- lookupTbl string_table idx + readIORef bucket + return buckets + +-- ----------------------------------------------------------------------------- +-- Outputting 'FastString's + +-- |Outputs a 'FastString' with /no decoding at all/, that is, you +-- get the actual bytes in the 'FastString' written to the 'Handle'. +hPutFS :: Handle -> FastString -> IO () +hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs + +-- ToDo: we'll probably want an hPutFSLocal, or something, to output +-- in the current locale's encoding (for error messages and suchlike). + +-- ----------------------------------------------------------------------------- +-- LitStrings, here for convenience only. + +-- hmm, not unboxed (or rather FastPtr), interesting +--a.k.a. Ptr CChar, Ptr Word8, Ptr (), hmph. We don't +--really care about C types in naming, where we can help it. +type LitString = Ptr Word8 +--Why do we recalculate length every time it's requested? +--If it's commonly needed, we should perhaps have +--data LitString = LitString {-#UNPACK#-}!(FastPtr Word8) {-#UNPACK#-}!FastInt + +mkLitString# :: Addr# -> LitString +mkLitString# a# = Ptr a# +--can/should we use FastTypes here? +--Is this likely to be memory-preserving if only used on constant strings? +--should we inline it? If lucky, that would make a CAF that wouldn't +--be computationally repeated... although admittedly we're not +--really intending to use mkLitString when __GLASGOW_HASKELL__... +--(I wonder, is unicode / multi-byte characters allowed in LitStrings +-- at all?) +{-# INLINE mkLitString #-} +mkLitString :: String -> LitString +mkLitString s = + unsafePerformIO (do + p <- mallocBytes (length s + 1) + let + loop :: Int -> String -> IO () + loop !n [] = pokeByteOff p n (0 :: Word8) + loop n (c:cs) = do + pokeByteOff p n (fromIntegral (ord c) :: Word8) + loop (1+n) cs + loop 0 s + return p + ) + +unpackLitString :: LitString -> String +unpackLitString p_ = case pUnbox p_ of + p -> unpack (_ILIT(0)) + where + unpack n = case indexWord8OffFastPtrAsFastChar p n of + ch -> if ch `eqFastChar` _CLIT('\0') + then [] else cBox ch : unpack (n +# _ILIT(1)) + +lengthLS :: LitString -> Int +lengthLS = ptrStrLength + +-- for now, use a simple String representation +--no, let's not do that right now - it's work in other places +#if 0 +type LitString = String + +mkLitString :: String -> LitString +mkLitString = id + +unpackLitString :: LitString -> String +unpackLitString = id + +lengthLS :: LitString -> Int +lengthLS = length + +#endif + +-- ----------------------------------------------------------------------------- +-- under the carpet + +foreign import ccall unsafe "ghc_strlen" + ptrStrLength :: Ptr Word8 -> Int + +{-# NOINLINE sLit #-} +sLit :: String -> LitString +sLit x = mkLitString x + +{-# NOINLINE fsLit #-} +fsLit :: String -> FastString +fsLit x = mkFastString x + +{-# RULES "slit" + forall x . sLit (unpackCString# x) = mkLitString# x #-} +{-# RULES "fslit" + forall x . fsLit (unpackCString# x) = mkFastString# x #-} diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs deleted file mode 100644 index c1f7973e76..0000000000 --- a/compiler/utils/FastString.lhs +++ /dev/null @@ -1,643 +0,0 @@ -% -% (c) The University of Glasgow, 1997-2006 -% -\begin{code} -{-# LANGUAGE BangPatterns, CPP, DeriveDataTypeable, MagicHash, UnboxedTuples #-} -{-# OPTIONS_GHC -O -funbox-strict-fields #-} --- We always optimise this, otherwise performance of a non-optimised --- compiler is severely affected - --- | --- There are two principal string types used internally by GHC: --- --- ['FastString'] --- --- * A compact, hash-consed, representation of character strings. --- * Comparison is O(1), and you can get a 'Unique.Unique' from them. --- * Generated by 'fsLit'. --- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'. --- --- ['LitString'] --- --- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@). --- * Practically no operations. --- * Outputing them is fast. --- * Generated by 'sLit'. --- * Turn into 'Outputable.SDoc' with 'Outputable.ptext' --- --- Use 'LitString' unless you want the facilities of 'FastString'. -module FastString - ( - -- * ByteString - fastStringToByteString, - mkFastStringByteString, - fastZStringToByteString, - unsafeMkByteString, - hashByteString, - - -- * FastZString - FastZString, - hPutFZS, - zString, - lengthFZS, - - -- * FastStrings - FastString(..), -- not abstract, for now. - - -- ** Construction - fsLit, - mkFastString, - mkFastStringBytes, - mkFastStringByteList, - mkFastStringForeignPtr, - mkFastString#, - - -- ** Deconstruction - unpackFS, -- :: FastString -> String - bytesFS, -- :: FastString -> [Word8] - - -- ** Encoding - zEncodeFS, - - -- ** Operations - uniqueOfFS, - lengthFS, - nullFS, - appendFS, - headFS, - tailFS, - concatFS, - consFS, - nilFS, - - -- ** Outputing - hPutFS, - - -- ** Internal - getFastStringTable, - hasZEncoding, - - -- * LitStrings - LitString, - - -- ** Construction - sLit, - mkLitString#, - mkLitString, - - -- ** Deconstruction - unpackLitString, - - -- ** Operations - lengthLS - ) where - -#include "HsVersions.h" - -import Encoding -import FastTypes -import FastFunctions -import Panic -import Util - -import Control.Monad -import Data.ByteString (ByteString) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as BSC -import qualified Data.ByteString.Internal as BS -import qualified Data.ByteString.Unsafe as BS -import Foreign.C -import ExtsCompat46 -import System.IO -import System.IO.Unsafe ( unsafePerformIO ) -import Data.Data -import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef ) -import Data.Maybe ( isJust ) -import Data.Char -import Data.List ( elemIndex ) - -import GHC.IO ( IO(..), unsafeDupablePerformIO ) - -#if __GLASGOW_HASKELL__ >= 709 -import Foreign -#else -import Foreign.Safe -#endif - -#if STAGE >= 2 -import GHC.Conc.Sync (sharedCAF) -#endif - -import GHC.Base ( unpackCString# ) - -#define hASH_TBL_SIZE 4091 -#define hASH_TBL_SIZE_UNBOXED 4091# - - -fastStringToByteString :: FastString -> ByteString -fastStringToByteString f = fs_bs f - -fastZStringToByteString :: FastZString -> ByteString -fastZStringToByteString (FastZString bs) = bs - --- This will drop information if any character > '\xFF' -unsafeMkByteString :: String -> ByteString -unsafeMkByteString = BSC.pack - -hashByteString :: ByteString -> Int -hashByteString bs - = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> - return $ hashStr (castPtr ptr) len - --- ----------------------------------------------------------------------------- - -newtype FastZString = FastZString ByteString - -hPutFZS :: Handle -> FastZString -> IO () -hPutFZS handle (FastZString bs) = BS.hPut handle bs - -zString :: FastZString -> String -zString (FastZString bs) = - inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen - -lengthFZS :: FastZString -> Int -lengthFZS (FastZString bs) = BS.length bs - -mkFastZStringString :: String -> FastZString -mkFastZStringString str = FastZString (BSC.pack str) - --- ----------------------------------------------------------------------------- - -{-| -A 'FastString' is an array of bytes, hashed to support fast O(1) -comparison. It is also associated with a character encoding, so that -we know how to convert a 'FastString' to the local encoding, or to the -Z-encoding used by the compiler internally. - -'FastString's support a memoized conversion to the Z-encoding via zEncodeFS. --} - -data FastString = FastString { - uniq :: {-# UNPACK #-} !Int, -- unique id - n_chars :: {-# UNPACK #-} !Int, -- number of chars - fs_bs :: {-# UNPACK #-} !ByteString, - fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString)) - } deriving Typeable - -instance Eq FastString where - f1 == f2 = uniq f1 == uniq f2 - -instance Ord FastString where - -- Compares lexicographically, not by unique - a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False } - a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False } - a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True } - a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True } - max x y | x >= y = x - | otherwise = y - min x y | x <= y = x - | otherwise = y - compare a b = cmpFS a b - -instance Show FastString where - show fs = show (unpackFS fs) - -instance Data FastString where - -- don't traverse? - toConstr _ = abstractConstr "FastString" - gunfold _ _ = error "gunfold" - dataTypeOf _ = mkNoRepType "FastString" - -cmpFS :: FastString -> FastString -> Ordering -cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) = - if u1 == u2 then EQ else - compare (fastStringToByteString f1) (fastStringToByteString f2) - -foreign import ccall unsafe "ghc_memcmp" - memcmp :: Ptr a -> Ptr b -> Int -> IO Int - --- ----------------------------------------------------------------------------- --- Construction - -{- -Internally, the compiler will maintain a fast string symbol table, providing -sharing and fast comparison. Creation of new @FastString@s then covertly does a -lookup, re-using the @FastString@ if there was a hit. - -The design of the FastString hash table allows for lockless concurrent reads -and updates to multiple buckets with low synchronization overhead. - -See Note [Updating the FastString table] on how it's updated. --} -data FastStringTable = - FastStringTable - {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets - (MutableArray# RealWorld (IORef [FastString])) -- the array of mutable buckets - -string_table :: FastStringTable -{-# NOINLINE string_table #-} -string_table = unsafePerformIO $ do - uid <- newIORef 603979776 -- ord '$' * 0x01000000 - tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED (panic "string_table") s1# of - (# s2#, arr# #) -> - (# s2#, FastStringTable uid arr# #) - forM_ [0.. hASH_TBL_SIZE-1] $ \i -> do - bucket <- newIORef [] - updTbl tab i bucket - - -- use the support wired into the RTS to share this CAF among all images of - -- libHSghc -#if STAGE < 2 - return tab -#else - sharedCAF tab getOrSetLibHSghcFastStringTable - --- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous --- RTS might not have this symbol -foreign import ccall unsafe "getOrSetLibHSghcFastStringTable" - getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a) -#endif - -{- - -We include the FastString table in the `sharedCAF` mechanism because we'd like -FastStrings created by a Core plugin to have the same uniques as corresponding -strings created by the host compiler itself. For example, this allows plugins -to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or -even re-invoke the parser. - -In particular, the following little sanity test was failing in a plugin -prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not -be looked up /by the plugin/. - - let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT" - putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts - -`mkTcOcc` involves the lookup (or creation) of a FastString. Since the -plugin's FastString.string_table is empty, constructing the RdrName also -allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These -uniques are almost certainly unequal to the ones that the host compiler -originally assigned to those FastStrings. Thus the lookup fails since the -domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's -unique. - -The old `reinitializeGlobals` mechanism is enough to provide the plugin with -read-access to the table, but it insufficient in the general case where the -plugin may allocate FastStrings. This mutates the supply for the FastStrings' -unique, and that needs to be propagated back to the compiler's instance of the -global variable. Such propagation is beyond the `reinitializeGlobals` -mechanism. - -Maintaining synchronization of the two instances of this global is rather -difficult because of the uses of `unsafePerformIO` in this module. Not -synchronizing them risks breaking the rather major invariant that two -FastStrings with the same unique have the same string. Thus we use the -lower-level `sharedCAF` mechanism that relies on Globals.c. - --} - -lookupTbl :: FastStringTable -> Int -> IO (IORef [FastString]) -lookupTbl (FastStringTable _ arr#) (I# i#) = - IO $ \ s# -> readArray# arr# i# s# - -updTbl :: FastStringTable -> Int -> IORef [FastString] -> IO () -updTbl (FastStringTable _uid arr#) (I# i#) ls = do - (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) - -mkFastString# :: Addr# -> FastString -mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr) - where ptr = Ptr a# - -{- Note [Updating the FastString table] - -The procedure goes like this: - -1. Read the relevant bucket and perform a look up of the string. -2. If it exists, return it. -3. Otherwise grab a unique ID, create a new FastString and atomically attempt - to update the relevant bucket with this FastString: - - * Double check that the string is not in the bucket. Another thread may have - inserted it while we were creating our string. - * Return the existing FastString if it exists. The one we preemptively - created will get GCed. - * Otherwise, insert and return the string we created. --} - -{- Note [Double-checking the bucket] - -It is not necessary to check the entire bucket the second time. We only have to -check the strings that are new to the bucket since the last time we read it. --} - -mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString -mkFastStringWith mk_fs !ptr !len = do - let hash = hashStr ptr len - bucket <- lookupTbl string_table hash - ls1 <- readIORef bucket - res <- bucket_match ls1 len ptr - case res of - Just v -> return v - Nothing -> do - n <- get_uid - new_fs <- mk_fs n - - atomicModifyIORef bucket $ \ls2 -> - -- Note [Double-checking the bucket] - let delta_ls = case ls1 of - [] -> ls2 - l:_ -> case l `elemIndex` ls2 of - Nothing -> panic "mkFastStringWith" - Just idx -> take idx ls2 - - -- NB: Might as well use inlinePerformIO, since the call to - -- bucket_match doesn't perform any IO that could be floated - -- out of this closure or erroneously duplicated. - in case inlinePerformIO (bucket_match delta_ls len ptr) of - Nothing -> (new_fs:ls2, new_fs) - Just fs -> (ls2,fs) - where - !(FastStringTable uid _arr) = string_table - - get_uid = atomicModifyIORef uid $ \n -> (n+1,n) - -mkFastStringBytes :: Ptr Word8 -> Int -> FastString -mkFastStringBytes !ptr !len = - -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is - -- idempotent. - unsafeDupablePerformIO $ - mkFastStringWith (copyNewFastString ptr len) ptr len - --- | Create a 'FastString' from an existing 'ForeignPtr'; the difference --- between this and 'mkFastStringBytes' is that we don't have to copy --- the bytes if the string is new to the table. -mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString -mkFastStringForeignPtr ptr !fp len - = mkFastStringWith (mkNewFastString fp ptr len) ptr len - --- | Create a 'FastString' from an existing 'ForeignPtr'; the difference --- between this and 'mkFastStringBytes' is that we don't have to copy --- the bytes if the string is new to the table. -mkFastStringByteString :: ByteString -> FastString -mkFastStringByteString bs = - inlinePerformIO $ - BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do - let ptr' = castPtr ptr - mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len - --- | Creates a UTF-8 encoded 'FastString' from a 'String' -mkFastString :: String -> FastString -mkFastString str = - inlinePerformIO $ do - let l = utf8EncodedLength str - buf <- mallocForeignPtrBytes l - withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr str - mkFastStringForeignPtr ptr buf l - --- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@ -mkFastStringByteList :: [Word8] -> FastString -mkFastStringByteList str = - inlinePerformIO $ do - let l = Prelude.length str - buf <- mallocForeignPtrBytes l - withForeignPtr buf $ \ptr -> do - pokeArray (castPtr ptr) str - mkFastStringForeignPtr ptr buf l - --- | Creates a Z-encoded 'FastString' from a 'String' -mkZFastString :: String -> FastZString -mkZFastString = mkFastZStringString - -bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString) -bucket_match [] _ _ = return Nothing -bucket_match (v@(FastString _ _ bs _):ls) len ptr - | len == BS.length bs = do - b <- BS.unsafeUseAsCString bs $ \buf -> - cmpStringPrefix ptr (castPtr buf) len - if b then return (Just v) - else bucket_match ls len ptr - | otherwise = - bucket_match ls len ptr - -mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int - -> IO FastString -mkNewFastString fp ptr len uid = do - ref <- newIORef Nothing - n_chars <- countUTF8Chars ptr len - return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref) - -mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int - -> IO FastString -mkNewFastStringByteString bs ptr len uid = do - ref <- newIORef Nothing - n_chars <- countUTF8Chars ptr len - return (FastString uid n_chars bs ref) - -copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString -copyNewFastString ptr len uid = do - fp <- copyBytesToForeignPtr ptr len - ref <- newIORef Nothing - n_chars <- countUTF8Chars ptr len - return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref) - -copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8) -copyBytesToForeignPtr ptr len = do - fp <- mallocForeignPtrBytes len - withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len - return fp - -cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool -cmpStringPrefix ptr1 ptr2 len = - do r <- memcmp ptr1 ptr2 len - return (r == 0) - - -hashStr :: Ptr Word8 -> Int -> Int - -- use the Addr to produce a hash value between 0 & m (inclusive) -hashStr (Ptr a#) (I# len#) = loop 0# 0# - where - loop h n | n ExtsCompat46.==# len# = I# h - | otherwise = loop h2 (n ExtsCompat46.+# 1#) - where !c = ord# (indexCharOffAddr# a# n) - !h2 = (c ExtsCompat46.+# (h ExtsCompat46.*# 128#)) `remInt#` - hASH_TBL_SIZE# - --- ----------------------------------------------------------------------------- --- Operations - --- | Returns the length of the 'FastString' in characters -lengthFS :: FastString -> Int -lengthFS f = n_chars f - --- | Returns @True@ if this 'FastString' is not Z-encoded but already has --- a Z-encoding cached (used in producing stats). -hasZEncoding :: FastString -> Bool -hasZEncoding (FastString _ _ _ ref) = - inlinePerformIO $ do - m <- readIORef ref - return (isJust m) - --- | Returns @True@ if the 'FastString' is empty -nullFS :: FastString -> Bool -nullFS f = BS.null (fs_bs f) - --- | Unpacks and decodes the FastString -unpackFS :: FastString -> String -unpackFS (FastString _ _ bs _) = - inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> - utf8DecodeString (castPtr ptr) len - --- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' -bytesFS :: FastString -> [Word8] -bytesFS fs = BS.unpack $ fastStringToByteString fs - --- | Returns a Z-encoded version of a 'FastString'. This might be the --- original, if it was already Z-encoded. The first time this --- function is applied to a particular 'FastString', the results are --- memoized. --- -zEncodeFS :: FastString -> FastZString -zEncodeFS fs@(FastString _ _ _ ref) = - inlinePerformIO $ do - m <- readIORef ref - case m of - Just zfs -> return zfs - Nothing -> do - atomicModifyIORef ref $ \m' -> case m' of - Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs)) - in (Just zfs, zfs) - Just zfs -> (m', zfs) - -appendFS :: FastString -> FastString -> FastString -appendFS fs1 fs2 = mkFastStringByteString - $ BS.append (fastStringToByteString fs1) - (fastStringToByteString fs2) - -concatFS :: [FastString] -> FastString -concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better - -headFS :: FastString -> Char -headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString" -headFS (FastString _ _ bs _) = - inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> - return (fst (utf8DecodeChar (castPtr ptr))) - -tailFS :: FastString -> FastString -tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString" -tailFS (FastString _ _ bs _) = - inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> - do let (_, n) = utf8DecodeChar (castPtr ptr) - return $! mkFastStringByteString (BS.drop n bs) - -consFS :: Char -> FastString -> FastString -consFS c fs = mkFastString (c : unpackFS fs) - -uniqueOfFS :: FastString -> FastInt -uniqueOfFS (FastString u _ _ _) = iUnbox u - -nilFS :: FastString -nilFS = mkFastString "" - --- ----------------------------------------------------------------------------- --- Stats - -getFastStringTable :: IO [[FastString]] -getFastStringTable = do - buckets <- forM [0.. hASH_TBL_SIZE-1] $ \idx -> do - bucket <- lookupTbl string_table idx - readIORef bucket - return buckets - --- ----------------------------------------------------------------------------- --- Outputting 'FastString's - --- |Outputs a 'FastString' with /no decoding at all/, that is, you --- get the actual bytes in the 'FastString' written to the 'Handle'. -hPutFS :: Handle -> FastString -> IO () -hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs - --- ToDo: we'll probably want an hPutFSLocal, or something, to output --- in the current locale's encoding (for error messages and suchlike). - --- ----------------------------------------------------------------------------- --- LitStrings, here for convenience only. - --- hmm, not unboxed (or rather FastPtr), interesting ---a.k.a. Ptr CChar, Ptr Word8, Ptr (), hmph. We don't ---really care about C types in naming, where we can help it. -type LitString = Ptr Word8 ---Why do we recalculate length every time it's requested? ---If it's commonly needed, we should perhaps have ---data LitString = LitString {-#UNPACK#-}!(FastPtr Word8) {-#UNPACK#-}!FastInt - -mkLitString# :: Addr# -> LitString -mkLitString# a# = Ptr a# ---can/should we use FastTypes here? ---Is this likely to be memory-preserving if only used on constant strings? ---should we inline it? If lucky, that would make a CAF that wouldn't ---be computationally repeated... although admittedly we're not ---really intending to use mkLitString when __GLASGOW_HASKELL__... ---(I wonder, is unicode / multi-byte characters allowed in LitStrings --- at all?) -{-# INLINE mkLitString #-} -mkLitString :: String -> LitString -mkLitString s = - unsafePerformIO (do - p <- mallocBytes (length s + 1) - let - loop :: Int -> String -> IO () - loop !n [] = pokeByteOff p n (0 :: Word8) - loop n (c:cs) = do - pokeByteOff p n (fromIntegral (ord c) :: Word8) - loop (1+n) cs - loop 0 s - return p - ) - -unpackLitString :: LitString -> String -unpackLitString p_ = case pUnbox p_ of - p -> unpack (_ILIT(0)) - where - unpack n = case indexWord8OffFastPtrAsFastChar p n of - ch -> if ch `eqFastChar` _CLIT('\0') - then [] else cBox ch : unpack (n +# _ILIT(1)) - -lengthLS :: LitString -> Int -lengthLS = ptrStrLength - --- for now, use a simple String representation ---no, let's not do that right now - it's work in other places -#if 0 -type LitString = String - -mkLitString :: String -> LitString -mkLitString = id - -unpackLitString :: LitString -> String -unpackLitString = id - -lengthLS :: LitString -> Int -lengthLS = length - -#endif - --- ----------------------------------------------------------------------------- --- under the carpet - -foreign import ccall unsafe "ghc_strlen" - ptrStrLength :: Ptr Word8 -> Int - -{-# NOINLINE sLit #-} -sLit :: String -> LitString -sLit x = mkLitString x - -{-# NOINLINE fsLit #-} -fsLit :: String -> FastString -fsLit x = mkFastString x - -{-# RULES "slit" - forall x . sLit (unpackCString# x) = mkLitString# x #-} -{-# RULES "fslit" - forall x . fsLit (unpackCString# x) = mkFastString# x #-} -\end{code} diff --git a/compiler/utils/FastTypes.hs b/compiler/utils/FastTypes.hs new file mode 100644 index 0000000000..a5c1aa9637 --- /dev/null +++ b/compiler/utils/FastTypes.hs @@ -0,0 +1,138 @@ +{- +(c) The University of Glasgow, 2000-2006 + +\section{Fast integers, etc... booleans moved to FastBool for using panic} +-} + +{-# LANGUAGE CPP, MagicHash #-} + +--Even if the optimizer could handle boxed arithmetic equally well, +--this helps automatically check the sources to make sure that +--it's only used in an appropriate pattern of efficiency. +--(it also makes `let`s and `case`s stricter...) + +-- | Fast integers, characters and pointer types for use in many parts of GHC +module FastTypes ( + -- * FastInt + FastInt, + + -- ** Getting in and out of FastInt + _ILIT, iBox, iUnbox, + + -- ** Arithmetic on FastInt + (+#), (-#), (*#), quotFastInt, negateFastInt, + --quotRemFastInt is difficult because unboxed values can't + --be tupled, but unboxed tuples aren't portable. Just use + -- nuisance boxed quotRem and rely on optimization. + (==#), (/=#), (<#), (<=#), (>=#), (>#), + minFastInt, maxFastInt, + --prefer to distinguish operations, not types, between + --signed and unsigned. + --left-shift is the same for 'signed' and 'unsigned' numbers + shiftLFastInt, + --right-shift isn't the same for negative numbers (ones with + --the highest-order bit '1'). If you don't care because the + --number you're shifting is always nonnegative, use the '_' version + --which should just be the fastest one. + shiftR_FastInt, + --"L' = logical or unsigned shift; 'A' = arithmetic or signed shift + shiftRLFastInt, shiftRAFastInt, + bitAndFastInt, bitOrFastInt, + --add more operations to this file as you need them + + -- * FastChar + FastChar, + + -- ** Getting in and out of FastChar + _CLIT, cBox, cUnbox, + + -- ** Operations on FastChar + fastOrd, fastChr, eqFastChar, + --note, fastChr is "unsafe"Chr: it doesn't check for + --character values above the range of Unicode + + -- * FastPtr + FastPtr, + + -- ** Getting in and out of FastPtr + pBox, pUnbox, + + -- ** Casting FastPtrs + castFastPtr + ) where + +#include "HsVersions.h" + +-- Import the beggars +import ExtsCompat46 + +type FastInt = Int# + +--in case it's a macro, don't lexically feed an argument! +--e.g. #define _ILIT(x) (x#) , #define _ILIT(x) (x :: FastInt) +_ILIT = \(I# x) -> x +--perhaps for accomodating caseless-leading-underscore treatment, +--something like _iLIT or iLIT would be better? + +iBox x = I# x +iUnbox (I# x) = x +quotFastInt = quotInt# +negateFastInt = negateInt# + +--I think uncheckedIShiftL# and uncheckedIShiftRL# are the same +--as uncheckedShiftL# and uncheckedShiftRL# ... +--should they be used? How new are they? +--They existed as far back as GHC 6.0 at least... +shiftLFastInt x y = uncheckedIShiftL# x y +shiftR_FastInt x y = uncheckedIShiftRL# x y +shiftRLFastInt x y = uncheckedIShiftRL# x y +shiftRAFastInt x y = uncheckedIShiftRA# x y +--{-# INLINE shiftLNonnegativeFastInt #-} +--{-# INLINE shiftRNonnegativeFastInt #-} +--shiftLNonnegativeFastInt n p = word2Int#((int2Word# n) `uncheckedShiftL#` p) +--shiftRNonnegativeFastInt n p = word2Int#((int2Word# n) `uncheckedShiftRL#` p) +bitAndFastInt x y = word2Int# (and# (int2Word# x) (int2Word# y)) +bitOrFastInt x y = word2Int# (or# (int2Word# x) (int2Word# y)) + +type FastChar = Char# +_CLIT = \(C# c) -> c +cBox c = C# c +cUnbox (C# c) = c +fastOrd c = ord# c +fastChr x = chr# x +eqFastChar a b = eqChar# a b + +--note that the type-parameter doesn't provide any safety +--when it's a synonym, but as long as we keep it compiling +--with and without __GLASGOW_HASKELL__ defined, it's fine. +type FastPtr a = Addr# +pBox p = Ptr p +pUnbox (Ptr p) = p +castFastPtr p = p + +minFastInt, maxFastInt :: FastInt -> FastInt -> FastInt +minFastInt x y = if x <# y then x else y +maxFastInt x y = if x <# y then y else x + +-- type-signatures will improve the non-ghc-specific versions +-- and keep things accurate (and ABLE to compile!) +_ILIT :: Int -> FastInt +iBox :: FastInt -> Int +iUnbox :: Int -> FastInt + +quotFastInt :: FastInt -> FastInt -> FastInt +negateFastInt :: FastInt -> FastInt +shiftLFastInt, shiftR_FastInt, shiftRAFastInt, shiftRLFastInt + :: FastInt -> FastInt -> FastInt +bitAndFastInt, bitOrFastInt :: FastInt -> FastInt -> FastInt + +_CLIT :: Char -> FastChar +cBox :: FastChar -> Char +cUnbox :: Char -> FastChar +fastOrd :: FastChar -> FastInt +fastChr :: FastInt -> FastChar +eqFastChar :: FastChar -> FastChar -> Bool + +pBox :: FastPtr a -> Ptr a +pUnbox :: Ptr a -> FastPtr a +castFastPtr :: FastPtr a -> FastPtr b diff --git a/compiler/utils/FastTypes.lhs b/compiler/utils/FastTypes.lhs deleted file mode 100644 index 6b1517c484..0000000000 --- a/compiler/utils/FastTypes.lhs +++ /dev/null @@ -1,140 +0,0 @@ -% -% (c) The University of Glasgow, 2000-2006 -% -\section{Fast integers, etc... booleans moved to FastBool for using panic} - -\begin{code} -{-# LANGUAGE CPP, MagicHash #-} - ---Even if the optimizer could handle boxed arithmetic equally well, ---this helps automatically check the sources to make sure that ---it's only used in an appropriate pattern of efficiency. ---(it also makes `let`s and `case`s stricter...) - --- | Fast integers, characters and pointer types for use in many parts of GHC -module FastTypes ( - -- * FastInt - FastInt, - - -- ** Getting in and out of FastInt - _ILIT, iBox, iUnbox, - - -- ** Arithmetic on FastInt - (+#), (-#), (*#), quotFastInt, negateFastInt, - --quotRemFastInt is difficult because unboxed values can't - --be tupled, but unboxed tuples aren't portable. Just use - -- nuisance boxed quotRem and rely on optimization. - (==#), (/=#), (<#), (<=#), (>=#), (>#), - minFastInt, maxFastInt, - --prefer to distinguish operations, not types, between - --signed and unsigned. - --left-shift is the same for 'signed' and 'unsigned' numbers - shiftLFastInt, - --right-shift isn't the same for negative numbers (ones with - --the highest-order bit '1'). If you don't care because the - --number you're shifting is always nonnegative, use the '_' version - --which should just be the fastest one. - shiftR_FastInt, - --"L' = logical or unsigned shift; 'A' = arithmetic or signed shift - shiftRLFastInt, shiftRAFastInt, - bitAndFastInt, bitOrFastInt, - --add more operations to this file as you need them - - -- * FastChar - FastChar, - - -- ** Getting in and out of FastChar - _CLIT, cBox, cUnbox, - - -- ** Operations on FastChar - fastOrd, fastChr, eqFastChar, - --note, fastChr is "unsafe"Chr: it doesn't check for - --character values above the range of Unicode - - -- * FastPtr - FastPtr, - - -- ** Getting in and out of FastPtr - pBox, pUnbox, - - -- ** Casting FastPtrs - castFastPtr - ) where - -#include "HsVersions.h" - --- Import the beggars -import ExtsCompat46 - -type FastInt = Int# - ---in case it's a macro, don't lexically feed an argument! ---e.g. #define _ILIT(x) (x#) , #define _ILIT(x) (x :: FastInt) -_ILIT = \(I# x) -> x ---perhaps for accomodating caseless-leading-underscore treatment, ---something like _iLIT or iLIT would be better? - -iBox x = I# x -iUnbox (I# x) = x -quotFastInt = quotInt# -negateFastInt = negateInt# - ---I think uncheckedIShiftL# and uncheckedIShiftRL# are the same ---as uncheckedShiftL# and uncheckedShiftRL# ... ---should they be used? How new are they? ---They existed as far back as GHC 6.0 at least... -shiftLFastInt x y = uncheckedIShiftL# x y -shiftR_FastInt x y = uncheckedIShiftRL# x y -shiftRLFastInt x y = uncheckedIShiftRL# x y -shiftRAFastInt x y = uncheckedIShiftRA# x y ---{-# INLINE shiftLNonnegativeFastInt #-} ---{-# INLINE shiftRNonnegativeFastInt #-} ---shiftLNonnegativeFastInt n p = word2Int#((int2Word# n) `uncheckedShiftL#` p) ---shiftRNonnegativeFastInt n p = word2Int#((int2Word# n) `uncheckedShiftRL#` p) -bitAndFastInt x y = word2Int# (and# (int2Word# x) (int2Word# y)) -bitOrFastInt x y = word2Int# (or# (int2Word# x) (int2Word# y)) - -type FastChar = Char# -_CLIT = \(C# c) -> c -cBox c = C# c -cUnbox (C# c) = c -fastOrd c = ord# c -fastChr x = chr# x -eqFastChar a b = eqChar# a b - ---note that the type-parameter doesn't provide any safety ---when it's a synonym, but as long as we keep it compiling ---with and without __GLASGOW_HASKELL__ defined, it's fine. -type FastPtr a = Addr# -pBox p = Ptr p -pUnbox (Ptr p) = p -castFastPtr p = p - -minFastInt, maxFastInt :: FastInt -> FastInt -> FastInt -minFastInt x y = if x <# y then x else y -maxFastInt x y = if x <# y then y else x - --- type-signatures will improve the non-ghc-specific versions --- and keep things accurate (and ABLE to compile!) -_ILIT :: Int -> FastInt -iBox :: FastInt -> Int -iUnbox :: Int -> FastInt - -quotFastInt :: FastInt -> FastInt -> FastInt -negateFastInt :: FastInt -> FastInt -shiftLFastInt, shiftR_FastInt, shiftRAFastInt, shiftRLFastInt - :: FastInt -> FastInt -> FastInt -bitAndFastInt, bitOrFastInt :: FastInt -> FastInt -> FastInt - -_CLIT :: Char -> FastChar -cBox :: FastChar -> Char -cUnbox :: Char -> FastChar -fastOrd :: FastChar -> FastInt -fastChr :: FastInt -> FastChar -eqFastChar :: FastChar -> FastChar -> Bool - -pBox :: FastPtr a -> Ptr a -pUnbox :: Ptr a -> FastPtr a -castFastPtr :: FastPtr a -> FastPtr b - -\end{code} diff --git a/compiler/utils/FiniteMap.hs b/compiler/utils/FiniteMap.hs new file mode 100644 index 0000000000..dccfca10a9 --- /dev/null +++ b/compiler/utils/FiniteMap.hs @@ -0,0 +1,29 @@ +-- Some extra functions to extend Data.Map + +module FiniteMap ( + insertList, + insertListWith, + deleteList, + foldRight, foldRightWithKey + ) where + +import Data.Map (Map) +import qualified Data.Map as Map + +insertList :: Ord key => [(key,elt)] -> Map key elt -> Map key elt +insertList xs m = foldl (\m (k, v) -> Map.insert k v m) m xs + +insertListWith :: Ord key + => (elt -> elt -> elt) + -> [(key,elt)] + -> Map key elt + -> Map key elt +insertListWith f xs m0 = foldl (\m (k, v) -> Map.insertWith f k v m) m0 xs + +deleteList :: Ord key => [key] -> Map key elt -> Map key elt +deleteList ks m = foldl (flip Map.delete) m ks + +foldRight :: (elt -> a -> a) -> a -> Map key elt -> a +foldRight = Map.fold +foldRightWithKey :: (key -> elt -> a -> a) -> a -> Map key elt -> a +foldRightWithKey = Map.foldrWithKey diff --git a/compiler/utils/FiniteMap.lhs b/compiler/utils/FiniteMap.lhs deleted file mode 100644 index b52f28c324..0000000000 --- a/compiler/utils/FiniteMap.lhs +++ /dev/null @@ -1,32 +0,0 @@ -Some extra functions to extend Data.Map - -\begin{code} -module FiniteMap ( - insertList, - insertListWith, - deleteList, - foldRight, foldRightWithKey - ) where - -import Data.Map (Map) -import qualified Data.Map as Map - -insertList :: Ord key => [(key,elt)] -> Map key elt -> Map key elt -insertList xs m = foldl (\m (k, v) -> Map.insert k v m) m xs - -insertListWith :: Ord key - => (elt -> elt -> elt) - -> [(key,elt)] - -> Map key elt - -> Map key elt -insertListWith f xs m0 = foldl (\m (k, v) -> Map.insertWith f k v m) m0 xs - -deleteList :: Ord key => [key] -> Map key elt -> Map key elt -deleteList ks m = foldl (flip Map.delete) m ks - -foldRight :: (elt -> a -> a) -> a -> Map key elt -> a -foldRight = Map.fold -foldRightWithKey :: (key -> elt -> a -> a) -> a -> Map key elt -> a -foldRightWithKey = Map.foldrWithKey -\end{code} - diff --git a/compiler/utils/ListSetOps.hs b/compiler/utils/ListSetOps.hs new file mode 100644 index 0000000000..54faa4f600 --- /dev/null +++ b/compiler/utils/ListSetOps.hs @@ -0,0 +1,187 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +\section[ListSetOps]{Set-like operations on lists} +-} + +{-# LANGUAGE CPP #-} + +module ListSetOps ( + unionLists, minusList, insertList, + + -- Association lists + Assoc, assoc, assocMaybe, assocUsing, assocDefault, assocDefaultUsing, + + -- Duplicate handling + hasNoDups, runs, removeDups, findDupsEq, + equivClasses, equivClassesByUniq, + + -- Indexing + getNth + ) where + +#include "HsVersions.h" + +import Outputable +import Unique +import UniqFM +import Util + +import Data.List + +{- +--------- +#ifndef DEBUG +getNth :: [a] -> Int -> a +getNth xs n = xs !! n +#else +getNth :: Outputable a => [a] -> Int -> a +getNth xs n = ASSERT2( xs `lengthAtLeast` n, ppr n $$ ppr xs ) + xs !! n +#endif +---------- +-} + +getNth :: Outputable a => [a] -> Int -> a +getNth xs n = ASSERT2( xs `lengthExceeds` n, ppr n $$ ppr xs ) + xs !! n + +{- +************************************************************************ +* * + Treating lists as sets + Assumes the lists contain no duplicates, but are unordered +* * +************************************************************************ +-} + +insertList :: Eq a => a -> [a] -> [a] +-- Assumes the arg list contains no dups; guarantees the result has no dups +insertList x xs | isIn "insert" x xs = xs + | otherwise = x : xs + +unionLists :: (Outputable a, Eq a) => [a] -> [a] -> [a] +-- Assumes that the arguments contain no duplicates +unionLists xs ys + = WARN(length xs > 100 || length ys > 100, ppr xs $$ ppr ys) + [x | x <- xs, isn'tIn "unionLists" x ys] ++ ys + +minusList :: (Eq a) => [a] -> [a] -> [a] +-- Everything in the first list that is not in the second list: +minusList xs ys = [ x | x <- xs, isn'tIn "minusList" x ys] + +{- +************************************************************************ +* * +\subsection[Utils-assoc]{Association lists} +* * +************************************************************************ + +Inefficient finite maps based on association lists and equality. +-} + +-- A finite mapping based on equality and association lists +type Assoc a b = [(a,b)] + +assoc :: (Eq a) => String -> Assoc a b -> a -> b +assocDefault :: (Eq a) => b -> Assoc a b -> a -> b +assocUsing :: (a -> a -> Bool) -> String -> Assoc a b -> a -> b +assocMaybe :: (Eq a) => Assoc a b -> a -> Maybe b +assocDefaultUsing :: (a -> a -> Bool) -> b -> Assoc a b -> a -> b + +assocDefaultUsing _ deflt [] _ = deflt +assocDefaultUsing eq deflt ((k,v) : rest) key + | k `eq` key = v + | otherwise = assocDefaultUsing eq deflt rest key + +assoc crash_msg list key = assocDefaultUsing (==) (panic ("Failed in assoc: " ++ crash_msg)) list key +assocDefault deflt list key = assocDefaultUsing (==) deflt list key +assocUsing eq crash_msg list key = assocDefaultUsing eq (panic ("Failed in assoc: " ++ crash_msg)) list key + +assocMaybe alist key + = lookup alist + where + lookup [] = Nothing + lookup ((tv,ty):rest) = if key == tv then Just ty else lookup rest + +{- +************************************************************************ +* * +\subsection[Utils-dups]{Duplicate-handling} +* * +************************************************************************ +-} + +hasNoDups :: (Eq a) => [a] -> Bool + +hasNoDups xs = f [] xs + where + f _ [] = True + f seen_so_far (x:xs) = if x `is_elem` seen_so_far + then False + else f (x:seen_so_far) xs + + is_elem = isIn "hasNoDups" + +equivClasses :: (a -> a -> Ordering) -- Comparison + -> [a] + -> [[a]] + +equivClasses _ [] = [] +equivClasses _ stuff@[_] = [stuff] +equivClasses cmp items = runs eq (sortBy cmp items) + where + eq a b = case cmp a b of { EQ -> True; _ -> False } + +{- +The first cases in @equivClasses@ above are just to cut to the point +more quickly... + +@runs@ groups a list into a list of lists, each sublist being a run of +identical elements of the input list. It is passed a predicate @p@ which +tells when two elements are equal. +-} + +runs :: (a -> a -> Bool) -- Equality + -> [a] + -> [[a]] + +runs _ [] = [] +runs p (x:xs) = case (span (p x) xs) of + (first, rest) -> (x:first) : (runs p rest) + +removeDups :: (a -> a -> Ordering) -- Comparison function + -> [a] + -> ([a], -- List with no duplicates + [[a]]) -- List of duplicate groups. One representative from + -- each group appears in the first result + +removeDups _ [] = ([], []) +removeDups _ [x] = ([x],[]) +removeDups cmp xs + = case (mapAccumR collect_dups [] (equivClasses cmp xs)) of { (dups, xs') -> + (xs', dups) } + where + collect_dups _ [] = panic "ListSetOps: removeDups" + collect_dups dups_so_far [x] = (dups_so_far, x) + collect_dups dups_so_far dups@(x:_) = (dups:dups_so_far, x) + +findDupsEq :: (a->a->Bool) -> [a] -> [[a]] +findDupsEq _ [] = [] +findDupsEq eq (x:xs) | null eq_xs = findDupsEq eq xs + | otherwise = (x:eq_xs) : findDupsEq eq neq_xs + where (eq_xs, neq_xs) = partition (eq x) xs + +equivClassesByUniq :: (a -> Unique) -> [a] -> [[a]] + -- NB: it's *very* important that if we have the input list [a,b,c], + -- where a,b,c all have the same unique, then we get back the list + -- [a,b,c] + -- not + -- [c,b,a] + -- Hence the use of foldr, plus the reversed-args tack_on below +equivClassesByUniq get_uniq xs + = eltsUFM (foldr add emptyUFM xs) + where + add a ufm = addToUFM_C tack_on ufm (get_uniq a) [a] + tack_on old new = new++old diff --git a/compiler/utils/ListSetOps.lhs b/compiler/utils/ListSetOps.lhs deleted file mode 100644 index 6247dc67f6..0000000000 --- a/compiler/utils/ListSetOps.lhs +++ /dev/null @@ -1,196 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -% -\section[ListSetOps]{Set-like operations on lists} - -\begin{code} -{-# LANGUAGE CPP #-} - -module ListSetOps ( - unionLists, minusList, insertList, - - -- Association lists - Assoc, assoc, assocMaybe, assocUsing, assocDefault, assocDefaultUsing, - - -- Duplicate handling - hasNoDups, runs, removeDups, findDupsEq, - equivClasses, equivClassesByUniq, - - -- Indexing - getNth - ) where - -#include "HsVersions.h" - -import Outputable -import Unique -import UniqFM -import Util - -import Data.List -\end{code} - ---------- -#ifndef DEBUG -getNth :: [a] -> Int -> a -getNth xs n = xs !! n -#else -getNth :: Outputable a => [a] -> Int -> a -getNth xs n = ASSERT2( xs `lengthAtLeast` n, ppr n $$ ppr xs ) - xs !! n -#endif ----------- -\begin{code} -getNth :: Outputable a => [a] -> Int -> a -getNth xs n = ASSERT2( xs `lengthExceeds` n, ppr n $$ ppr xs ) - xs !! n -\end{code} - -%************************************************************************ -%* * - Treating lists as sets - Assumes the lists contain no duplicates, but are unordered -%* * -%************************************************************************ - -\begin{code} -insertList :: Eq a => a -> [a] -> [a] --- Assumes the arg list contains no dups; guarantees the result has no dups -insertList x xs | isIn "insert" x xs = xs - | otherwise = x : xs - -unionLists :: (Outputable a, Eq a) => [a] -> [a] -> [a] --- Assumes that the arguments contain no duplicates -unionLists xs ys - = WARN(length xs > 100 || length ys > 100, ppr xs $$ ppr ys) - [x | x <- xs, isn'tIn "unionLists" x ys] ++ ys - -minusList :: (Eq a) => [a] -> [a] -> [a] --- Everything in the first list that is not in the second list: -minusList xs ys = [ x | x <- xs, isn'tIn "minusList" x ys] -\end{code} - - -%************************************************************************ -%* * -\subsection[Utils-assoc]{Association lists} -%* * -%************************************************************************ - -Inefficient finite maps based on association lists and equality. - -\begin{code} --- A finite mapping based on equality and association lists -type Assoc a b = [(a,b)] - -assoc :: (Eq a) => String -> Assoc a b -> a -> b -assocDefault :: (Eq a) => b -> Assoc a b -> a -> b -assocUsing :: (a -> a -> Bool) -> String -> Assoc a b -> a -> b -assocMaybe :: (Eq a) => Assoc a b -> a -> Maybe b -assocDefaultUsing :: (a -> a -> Bool) -> b -> Assoc a b -> a -> b - -assocDefaultUsing _ deflt [] _ = deflt -assocDefaultUsing eq deflt ((k,v) : rest) key - | k `eq` key = v - | otherwise = assocDefaultUsing eq deflt rest key - -assoc crash_msg list key = assocDefaultUsing (==) (panic ("Failed in assoc: " ++ crash_msg)) list key -assocDefault deflt list key = assocDefaultUsing (==) deflt list key -assocUsing eq crash_msg list key = assocDefaultUsing eq (panic ("Failed in assoc: " ++ crash_msg)) list key - -assocMaybe alist key - = lookup alist - where - lookup [] = Nothing - lookup ((tv,ty):rest) = if key == tv then Just ty else lookup rest -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-dups]{Duplicate-handling} -%* * -%************************************************************************ - -\begin{code} -hasNoDups :: (Eq a) => [a] -> Bool - -hasNoDups xs = f [] xs - where - f _ [] = True - f seen_so_far (x:xs) = if x `is_elem` seen_so_far - then False - else f (x:seen_so_far) xs - - is_elem = isIn "hasNoDups" -\end{code} - -\begin{code} -equivClasses :: (a -> a -> Ordering) -- Comparison - -> [a] - -> [[a]] - -equivClasses _ [] = [] -equivClasses _ stuff@[_] = [stuff] -equivClasses cmp items = runs eq (sortBy cmp items) - where - eq a b = case cmp a b of { EQ -> True; _ -> False } -\end{code} - -The first cases in @equivClasses@ above are just to cut to the point -more quickly... - -@runs@ groups a list into a list of lists, each sublist being a run of -identical elements of the input list. It is passed a predicate @p@ which -tells when two elements are equal. - -\begin{code} -runs :: (a -> a -> Bool) -- Equality - -> [a] - -> [[a]] - -runs _ [] = [] -runs p (x:xs) = case (span (p x) xs) of - (first, rest) -> (x:first) : (runs p rest) -\end{code} - -\begin{code} -removeDups :: (a -> a -> Ordering) -- Comparison function - -> [a] - -> ([a], -- List with no duplicates - [[a]]) -- List of duplicate groups. One representative from - -- each group appears in the first result - -removeDups _ [] = ([], []) -removeDups _ [x] = ([x],[]) -removeDups cmp xs - = case (mapAccumR collect_dups [] (equivClasses cmp xs)) of { (dups, xs') -> - (xs', dups) } - where - collect_dups _ [] = panic "ListSetOps: removeDups" - collect_dups dups_so_far [x] = (dups_so_far, x) - collect_dups dups_so_far dups@(x:_) = (dups:dups_so_far, x) - -findDupsEq :: (a->a->Bool) -> [a] -> [[a]] -findDupsEq _ [] = [] -findDupsEq eq (x:xs) | null eq_xs = findDupsEq eq xs - | otherwise = (x:eq_xs) : findDupsEq eq neq_xs - where (eq_xs, neq_xs) = partition (eq x) xs -\end{code} - - -\begin{code} -equivClassesByUniq :: (a -> Unique) -> [a] -> [[a]] - -- NB: it's *very* important that if we have the input list [a,b,c], - -- where a,b,c all have the same unique, then we get back the list - -- [a,b,c] - -- not - -- [c,b,a] - -- Hence the use of foldr, plus the reversed-args tack_on below -equivClassesByUniq get_uniq xs - = eltsUFM (foldr add emptyUFM xs) - where - add a ufm = addToUFM_C tack_on ufm (get_uniq a) [a] - tack_on old new = new++old -\end{code} - diff --git a/compiler/utils/Maybes.hs b/compiler/utils/Maybes.hs new file mode 100644 index 0000000000..fc8e3199ae --- /dev/null +++ b/compiler/utils/Maybes.hs @@ -0,0 +1,106 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +-} + +{-# LANGUAGE CPP #-} +module Maybes ( + module Data.Maybe, + + MaybeErr(..), -- Instance of Monad + failME, isSuccess, + + orElse, + firstJust, firstJusts, + whenIsJust, + expectJust, + + MaybeT(..) + ) where +#if __GLASGOW_HASKELL__ < 709 +import Control.Applicative +#endif +import Control.Monad +import Data.Maybe + +infixr 4 `orElse` + +{- +************************************************************************ +* * +\subsection[Maybe type]{The @Maybe@ type} +* * +************************************************************************ +-} + +firstJust :: Maybe a -> Maybe a -> Maybe a +firstJust a b = firstJusts [a, b] + +-- | Takes a list of @Maybes@ and returns the first @Just@ if there is one, or +-- @Nothing@ otherwise. +firstJusts :: [Maybe a] -> Maybe a +firstJusts = msum + +expectJust :: String -> Maybe a -> a +{-# INLINE expectJust #-} +expectJust _ (Just x) = x +expectJust err Nothing = error ("expectJust " ++ err) + +whenIsJust :: Monad m => Maybe a -> (a -> m ()) -> m () +whenIsJust (Just x) f = f x +whenIsJust Nothing _ = return () + +-- | Flipped version of @fromMaybe@, useful for chaining. +orElse :: Maybe a -> a -> a +orElse = flip fromMaybe + +{- +************************************************************************ +* * +\subsection[MaybeT type]{The @MaybeT@ monad transformer} +* * +************************************************************************ +-} + +newtype MaybeT m a = MaybeT {runMaybeT :: m (Maybe a)} + +instance Functor m => Functor (MaybeT m) where + fmap f x = MaybeT $ fmap (fmap f) $ runMaybeT x + +instance (Monad m, Functor m) => Applicative (MaybeT m) where + pure = return + (<*>) = ap + +instance Monad m => Monad (MaybeT m) where + return = MaybeT . return . Just + x >>= f = MaybeT $ runMaybeT x >>= maybe (return Nothing) (runMaybeT . f) + fail _ = MaybeT $ return Nothing + +{- +************************************************************************ +* * +\subsection[MaybeErr type]{The @MaybeErr@ type} +* * +************************************************************************ +-} + +data MaybeErr err val = Succeeded val | Failed err + +instance Functor (MaybeErr err) where + fmap = liftM + +instance Applicative (MaybeErr err) where + pure = return + (<*>) = ap + +instance Monad (MaybeErr err) where + return v = Succeeded v + Succeeded v >>= k = k v + Failed e >>= _ = Failed e + +isSuccess :: MaybeErr err val -> Bool +isSuccess (Succeeded {}) = True +isSuccess (Failed {}) = False + +failME :: err -> MaybeErr err val +failME e = Failed e diff --git a/compiler/utils/Maybes.lhs b/compiler/utils/Maybes.lhs deleted file mode 100644 index 8052b1d848..0000000000 --- a/compiler/utils/Maybes.lhs +++ /dev/null @@ -1,111 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -% - -\begin{code} -{-# LANGUAGE CPP #-} -module Maybes ( - module Data.Maybe, - - MaybeErr(..), -- Instance of Monad - failME, isSuccess, - - orElse, - firstJust, firstJusts, - whenIsJust, - expectJust, - - MaybeT(..) - ) where -#if __GLASGOW_HASKELL__ < 709 -import Control.Applicative -#endif -import Control.Monad -import Data.Maybe - -infixr 4 `orElse` -\end{code} - -%************************************************************************ -%* * -\subsection[Maybe type]{The @Maybe@ type} -%* * -%************************************************************************ - -\begin{code} -firstJust :: Maybe a -> Maybe a -> Maybe a -firstJust a b = firstJusts [a, b] - --- | Takes a list of @Maybes@ and returns the first @Just@ if there is one, or --- @Nothing@ otherwise. -firstJusts :: [Maybe a] -> Maybe a -firstJusts = msum - -expectJust :: String -> Maybe a -> a -{-# INLINE expectJust #-} -expectJust _ (Just x) = x -expectJust err Nothing = error ("expectJust " ++ err) - -whenIsJust :: Monad m => Maybe a -> (a -> m ()) -> m () -whenIsJust (Just x) f = f x -whenIsJust Nothing _ = return () - --- | Flipped version of @fromMaybe@, useful for chaining. -orElse :: Maybe a -> a -> a -orElse = flip fromMaybe -\end{code} - -%************************************************************************ -%* * -\subsection[MaybeT type]{The @MaybeT@ monad transformer} -%* * -%************************************************************************ - -\begin{code} - -newtype MaybeT m a = MaybeT {runMaybeT :: m (Maybe a)} - -instance Functor m => Functor (MaybeT m) where - fmap f x = MaybeT $ fmap (fmap f) $ runMaybeT x - -instance (Monad m, Functor m) => Applicative (MaybeT m) where - pure = return - (<*>) = ap - -instance Monad m => Monad (MaybeT m) where - return = MaybeT . return . Just - x >>= f = MaybeT $ runMaybeT x >>= maybe (return Nothing) (runMaybeT . f) - fail _ = MaybeT $ return Nothing - -\end{code} - - -%************************************************************************ -%* * -\subsection[MaybeErr type]{The @MaybeErr@ type} -%* * -%************************************************************************ - -\begin{code} -data MaybeErr err val = Succeeded val | Failed err - -instance Functor (MaybeErr err) where - fmap = liftM - -instance Applicative (MaybeErr err) where - pure = return - (<*>) = ap - -instance Monad (MaybeErr err) where - return v = Succeeded v - Succeeded v >>= k = k v - Failed e >>= _ = Failed e - -isSuccess :: MaybeErr err val -> Bool -isSuccess (Succeeded {}) = True -isSuccess (Failed {}) = False - -failME :: err -> MaybeErr err val -failME e = Failed e -\end{code} diff --git a/compiler/utils/OrdList.hs b/compiler/utils/OrdList.hs new file mode 100644 index 0000000000..ad72ca1d45 --- /dev/null +++ b/compiler/utils/OrdList.hs @@ -0,0 +1,98 @@ +{- +(c) The University of Glasgow 2006 +(c) The AQUA Project, Glasgow University, 1993-1998 + + +This is useful, general stuff for the Native Code Generator. + +Provide trees (of instructions), so that lists of instructions +can be appended in linear time. +-} + +module OrdList ( + OrdList, + nilOL, isNilOL, unitOL, appOL, consOL, snocOL, concatOL, + mapOL, fromOL, toOL, foldrOL, foldlOL +) where + +import Outputable + +infixl 5 `appOL` +infixl 5 `snocOL` +infixr 5 `consOL` + +data OrdList a + = None + | One a + | Many [a] -- Invariant: non-empty + | Cons a (OrdList a) + | Snoc (OrdList a) a + | Two (OrdList a) -- Invariant: non-empty + (OrdList a) -- Invariant: non-empty + +instance Outputable a => Outputable (OrdList a) where + ppr ol = ppr (fromOL ol) -- Convert to list and print that + +nilOL :: OrdList a +isNilOL :: OrdList a -> Bool + +unitOL :: a -> OrdList a +snocOL :: OrdList a -> a -> OrdList a +consOL :: a -> OrdList a -> OrdList a +appOL :: OrdList a -> OrdList a -> OrdList a +concatOL :: [OrdList a] -> OrdList a + +nilOL = None +unitOL as = One as +snocOL as b = Snoc as b +consOL a bs = Cons a bs +concatOL aas = foldr appOL None aas + +isNilOL None = True +isNilOL _ = False + +None `appOL` b = b +a `appOL` None = a +One a `appOL` b = Cons a b +a `appOL` One b = Snoc a b +a `appOL` b = Two a b + +fromOL :: OrdList a -> [a] +fromOL a = go a [] + where go None acc = acc + go (One a) acc = a : acc + go (Cons a b) acc = a : go b acc + go (Snoc a b) acc = go a (b:acc) + go (Two a b) acc = go a (go b acc) + go (Many xs) acc = xs ++ acc + +mapOL :: (a -> b) -> OrdList a -> OrdList b +mapOL _ None = None +mapOL f (One x) = One (f x) +mapOL f (Cons x xs) = Cons (f x) (mapOL f xs) +mapOL f (Snoc xs x) = Snoc (mapOL f xs) (f x) +mapOL f (Two x y) = Two (mapOL f x) (mapOL f y) +mapOL f (Many xs) = Many (map f xs) + +instance Functor OrdList where + fmap = mapOL + +foldrOL :: (a->b->b) -> b -> OrdList a -> b +foldrOL _ z None = z +foldrOL k z (One x) = k x z +foldrOL k z (Cons x xs) = k x (foldrOL k z xs) +foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs +foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1 +foldrOL k z (Many xs) = foldr k z xs + +foldlOL :: (b->a->b) -> b -> OrdList a -> b +foldlOL _ z None = z +foldlOL k z (One x) = k z x +foldlOL k z (Cons x xs) = foldlOL k (k z x) xs +foldlOL k z (Snoc xs x) = k (foldlOL k z xs) x +foldlOL k z (Two b1 b2) = foldlOL k (foldlOL k z b1) b2 +foldlOL k z (Many xs) = foldl k z xs + +toOL :: [a] -> OrdList a +toOL [] = None +toOL xs = Many xs diff --git a/compiler/utils/OrdList.lhs b/compiler/utils/OrdList.lhs deleted file mode 100644 index 42abb51696..0000000000 --- a/compiler/utils/OrdList.lhs +++ /dev/null @@ -1,99 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The AQUA Project, Glasgow University, 1993-1998 -% - -This is useful, general stuff for the Native Code Generator. - -Provide trees (of instructions), so that lists of instructions -can be appended in linear time. - -\begin{code} -module OrdList ( - OrdList, - nilOL, isNilOL, unitOL, appOL, consOL, snocOL, concatOL, - mapOL, fromOL, toOL, foldrOL, foldlOL -) where - -import Outputable - -infixl 5 `appOL` -infixl 5 `snocOL` -infixr 5 `consOL` - -data OrdList a - = None - | One a - | Many [a] -- Invariant: non-empty - | Cons a (OrdList a) - | Snoc (OrdList a) a - | Two (OrdList a) -- Invariant: non-empty - (OrdList a) -- Invariant: non-empty - -instance Outputable a => Outputable (OrdList a) where - ppr ol = ppr (fromOL ol) -- Convert to list and print that - -nilOL :: OrdList a -isNilOL :: OrdList a -> Bool - -unitOL :: a -> OrdList a -snocOL :: OrdList a -> a -> OrdList a -consOL :: a -> OrdList a -> OrdList a -appOL :: OrdList a -> OrdList a -> OrdList a -concatOL :: [OrdList a] -> OrdList a - -nilOL = None -unitOL as = One as -snocOL as b = Snoc as b -consOL a bs = Cons a bs -concatOL aas = foldr appOL None aas - -isNilOL None = True -isNilOL _ = False - -None `appOL` b = b -a `appOL` None = a -One a `appOL` b = Cons a b -a `appOL` One b = Snoc a b -a `appOL` b = Two a b - -fromOL :: OrdList a -> [a] -fromOL a = go a [] - where go None acc = acc - go (One a) acc = a : acc - go (Cons a b) acc = a : go b acc - go (Snoc a b) acc = go a (b:acc) - go (Two a b) acc = go a (go b acc) - go (Many xs) acc = xs ++ acc - -mapOL :: (a -> b) -> OrdList a -> OrdList b -mapOL _ None = None -mapOL f (One x) = One (f x) -mapOL f (Cons x xs) = Cons (f x) (mapOL f xs) -mapOL f (Snoc xs x) = Snoc (mapOL f xs) (f x) -mapOL f (Two x y) = Two (mapOL f x) (mapOL f y) -mapOL f (Many xs) = Many (map f xs) - -instance Functor OrdList where - fmap = mapOL - -foldrOL :: (a->b->b) -> b -> OrdList a -> b -foldrOL _ z None = z -foldrOL k z (One x) = k x z -foldrOL k z (Cons x xs) = k x (foldrOL k z xs) -foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs -foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1 -foldrOL k z (Many xs) = foldr k z xs - -foldlOL :: (b->a->b) -> b -> OrdList a -> b -foldlOL _ z None = z -foldlOL k z (One x) = k z x -foldlOL k z (Cons x xs) = foldlOL k (k z x) xs -foldlOL k z (Snoc xs x) = k (foldlOL k z xs) x -foldlOL k z (Two b1 b2) = foldlOL k (foldlOL k z b1) b2 -foldlOL k z (Many xs) = foldl k z xs - -toOL :: [a] -> OrdList a -toOL [] = None -toOL xs = Many xs -\end{code} diff --git a/compiler/utils/Outputable.hs b/compiler/utils/Outputable.hs new file mode 100644 index 0000000000..488094a498 --- /dev/null +++ b/compiler/utils/Outputable.hs @@ -0,0 +1,1027 @@ +{- +(c) The University of Glasgow 2006-2012 +(c) The GRASP Project, Glasgow University, 1992-1998 +-} + +-- | This module defines classes and functions for pretty-printing. It also +-- exports a number of helpful debugging and other utilities such as 'trace' and 'panic'. +-- +-- The interface to this module is very similar to the standard Hughes-PJ pretty printing +-- module, except that it exports a number of additional functions that are rarely used, +-- and works over the 'SDoc' type. +module Outputable ( + -- * Type classes + Outputable(..), OutputableBndr(..), + + -- * Pretty printing combinators + SDoc, runSDoc, initSDocContext, + docToSDoc, + interppSP, interpp'SP, pprQuotedList, pprWithCommas, quotedListWithOr, + empty, nest, + char, + text, ftext, ptext, ztext, + int, intWithCommas, integer, float, double, rational, + parens, cparen, brackets, braces, quotes, quote, + doubleQuotes, angleBrackets, paBrackets, + semi, comma, colon, dcolon, space, equals, dot, + arrow, larrow, darrow, arrowt, larrowt, arrowtt, larrowtt, + lparen, rparen, lbrack, rbrack, lbrace, rbrace, underscore, + blankLine, forAllLit, + (<>), (<+>), hcat, hsep, + ($$), ($+$), vcat, + sep, cat, + fsep, fcat, + hang, punctuate, ppWhen, ppUnless, + speakNth, speakNTimes, speakN, speakNOf, plural, isOrAre, + + coloured, PprColour, colType, colCoerc, colDataCon, + colBinder, bold, keyword, + + -- * Converting 'SDoc' into strings and outputing it + printForC, printForAsm, printForUser, printForUserPartWay, + pprCode, mkCodeStyle, + showSDoc, showSDocSimple, showSDocOneLine, + showSDocForUser, showSDocDebug, showSDocDump, showSDocDumpOneLine, + showSDocUnqual, showPpr, + renderWithStyle, + + pprInfixVar, pprPrefixVar, + pprHsChar, pprHsString, pprHsBytes, + pprFastFilePath, + + -- * Controlling the style in which output is printed + BindingSite(..), + + PprStyle, CodeStyle(..), PrintUnqualified(..), + QueryQualifyName, QueryQualifyModule, QueryQualifyPackage, + reallyAlwaysQualify, reallyAlwaysQualifyNames, + alwaysQualify, alwaysQualifyNames, alwaysQualifyModules, + neverQualify, neverQualifyNames, neverQualifyModules, + QualifyName(..), queryQual, + sdocWithDynFlags, sdocWithPlatform, + getPprStyle, withPprStyle, withPprStyleDoc, + pprDeeper, pprDeeperList, pprSetDepth, + codeStyle, userStyle, debugStyle, dumpStyle, asmStyle, + ifPprDebug, qualName, qualModule, qualPackage, + mkErrStyle, defaultErrStyle, defaultDumpStyle, mkDumpStyle, defaultUserStyle, + mkUserStyle, cmdlineParserStyle, Depth(..), + + -- * Error handling and debugging utilities + pprPanic, pprSorry, assertPprPanic, pprPanicFastInt, pprPgmError, + pprTrace, warnPprTrace, + trace, pgmError, panic, sorry, panicFastInt, assertPanic, + pprDebugAndThen, + ) where + +import {-# SOURCE #-} DynFlags( DynFlags, + targetPlatform, pprUserLength, pprCols, + useUnicode, useUnicodeSyntax, + unsafeGlobalDynFlags ) +import {-# SOURCE #-} Module( PackageKey, Module, ModuleName, moduleName ) +import {-# SOURCE #-} OccName( OccName ) +import {-# SOURCE #-} StaticFlags( opt_PprStyle_Debug, opt_NoDebugOutput ) + +import FastString +import FastTypes +import qualified Pretty +import Util +import Platform +import Pretty ( Doc, Mode(..) ) +import Panic + +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import Data.Char +import qualified Data.Map as M +import Data.Int +import qualified Data.IntMap as IM +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Word +import System.IO ( Handle ) +import System.FilePath +import Text.Printf + +import GHC.Fingerprint +import GHC.Show ( showMultiLineString ) + +{- +************************************************************************ +* * +\subsection{The @PprStyle@ data type} +* * +************************************************************************ +-} + +data PprStyle + = PprUser PrintUnqualified Depth + -- Pretty-print in a way that will make sense to the + -- ordinary user; must be very close to Haskell + -- syntax, etc. + -- Assumes printing tidied code: non-system names are + -- printed without uniques. + + | PprDump PrintUnqualified + -- For -ddump-foo; less verbose than PprDebug, but more than PprUser + -- Does not assume tidied code: non-external names + -- are printed with uniques. + + | PprDebug -- Full debugging output + + | PprCode CodeStyle + -- Print code; either C or assembler + +data CodeStyle = CStyle -- The format of labels differs for C and assembler + | AsmStyle + +data Depth = AllTheWay + | PartWay Int -- 0 => stop + + +-- ----------------------------------------------------------------------------- +-- Printing original names + +-- | When printing code that contains original names, we need to map the +-- original names back to something the user understands. This is the +-- purpose of the triple of functions that gets passed around +-- when rendering 'SDoc'. +data PrintUnqualified = QueryQualify { + queryQualifyName :: QueryQualifyName, + queryQualifyModule :: QueryQualifyModule, + queryQualifyPackage :: QueryQualifyPackage +} + +-- | given an /original/ name, this function tells you which module +-- name it should be qualified with when printing for the user, if +-- any. For example, given @Control.Exception.catch@, which is in scope +-- as @Exception.catch@, this fuction will return @Just "Exception"@. +-- Note that the return value is a ModuleName, not a Module, because +-- in source code, names are qualified by ModuleNames. +type QueryQualifyName = Module -> OccName -> QualifyName + +-- | For a given module, we need to know whether to print it with +-- a package name to disambiguate it. +type QueryQualifyModule = Module -> Bool + +-- | For a given package, we need to know whether to print it with +-- the package key to disambiguate it. +type QueryQualifyPackage = PackageKey -> Bool + +-- See Note [Printing original names] in HscTypes +data QualifyName -- given P:M.T + = NameUnqual -- refer to it as "T" + | NameQual ModuleName -- refer to it as "X.T" for the supplied X + | NameNotInScope1 + -- it is not in scope at all, but M.T is not bound in the current + -- scope, so we can refer to it as "M.T" + | NameNotInScope2 + -- it is not in scope at all, and M.T is already bound in the + -- current scope, so we must refer to it as "P:M.T" + +reallyAlwaysQualifyNames :: QueryQualifyName +reallyAlwaysQualifyNames _ _ = NameNotInScope2 + +-- | NB: This won't ever show package IDs +alwaysQualifyNames :: QueryQualifyName +alwaysQualifyNames m _ = NameQual (moduleName m) + +neverQualifyNames :: QueryQualifyName +neverQualifyNames _ _ = NameUnqual + +alwaysQualifyModules :: QueryQualifyModule +alwaysQualifyModules _ = True + +neverQualifyModules :: QueryQualifyModule +neverQualifyModules _ = False + +alwaysQualifyPackages :: QueryQualifyPackage +alwaysQualifyPackages _ = True + +neverQualifyPackages :: QueryQualifyPackage +neverQualifyPackages _ = False + +reallyAlwaysQualify, alwaysQualify, neverQualify :: PrintUnqualified +reallyAlwaysQualify + = QueryQualify reallyAlwaysQualifyNames + alwaysQualifyModules + alwaysQualifyPackages +alwaysQualify = QueryQualify alwaysQualifyNames + alwaysQualifyModules + alwaysQualifyPackages +neverQualify = QueryQualify neverQualifyNames + neverQualifyModules + neverQualifyPackages + +defaultUserStyle, defaultDumpStyle :: PprStyle + +defaultUserStyle = mkUserStyle neverQualify AllTheWay + -- Print without qualifiers to reduce verbosity, unless -dppr-debug + +defaultDumpStyle | opt_PprStyle_Debug = PprDebug + | otherwise = PprDump neverQualify + +mkDumpStyle :: PrintUnqualified -> PprStyle +mkDumpStyle print_unqual | opt_PprStyle_Debug = PprDebug + | otherwise = PprDump print_unqual + +defaultErrStyle :: DynFlags -> PprStyle +-- Default style for error messages, when we don't know PrintUnqualified +-- It's a bit of a hack because it doesn't take into account what's in scope +-- Only used for desugarer warnings, and typechecker errors in interface sigs +-- NB that -dppr-debug will still get into PprDebug style +defaultErrStyle dflags = mkErrStyle dflags neverQualify + +-- | Style for printing error messages +mkErrStyle :: DynFlags -> PrintUnqualified -> PprStyle +mkErrStyle dflags qual = mkUserStyle qual (PartWay (pprUserLength dflags)) + +cmdlineParserStyle :: PprStyle +cmdlineParserStyle = mkUserStyle alwaysQualify AllTheWay + +mkUserStyle :: PrintUnqualified -> Depth -> PprStyle +mkUserStyle unqual depth + | opt_PprStyle_Debug = PprDebug + | otherwise = PprUser unqual depth + +{- +Orthogonal to the above printing styles are (possibly) some +command-line flags that affect printing (often carried with the +style). The most likely ones are variations on how much type info is +shown. + +The following test decides whether or not we are actually generating +code (either C or assembly), or generating interface files. + +************************************************************************ +* * +\subsection{The @SDoc@ data type} +* * +************************************************************************ +-} + +newtype SDoc = SDoc { runSDoc :: SDocContext -> Doc } + +data SDocContext = SDC + { sdocStyle :: !PprStyle + , sdocLastColour :: !PprColour + -- ^ The most recently used colour. This allows nesting colours. + , sdocDynFlags :: !DynFlags + } + +initSDocContext :: DynFlags -> PprStyle -> SDocContext +initSDocContext dflags sty = SDC + { sdocStyle = sty + , sdocLastColour = colReset + , sdocDynFlags = dflags + } + +withPprStyle :: PprStyle -> SDoc -> SDoc +withPprStyle sty d = SDoc $ \ctxt -> runSDoc d ctxt{sdocStyle=sty} + +withPprStyleDoc :: DynFlags -> PprStyle -> SDoc -> Doc +withPprStyleDoc dflags sty d = runSDoc d (initSDocContext dflags sty) + +pprDeeper :: SDoc -> SDoc +pprDeeper d = SDoc $ \ctx -> case ctx of + SDC{sdocStyle=PprUser _ (PartWay 0)} -> Pretty.text "..." + SDC{sdocStyle=PprUser q (PartWay n)} -> + runSDoc d ctx{sdocStyle = PprUser q (PartWay (n-1))} + _ -> runSDoc d ctx + +pprDeeperList :: ([SDoc] -> SDoc) -> [SDoc] -> SDoc +-- Truncate a list that list that is longer than the current depth +pprDeeperList f ds + | null ds = f [] + | otherwise = SDoc work + where + work ctx@SDC{sdocStyle=PprUser q (PartWay n)} + | n==0 = Pretty.text "..." + | otherwise = + runSDoc (f (go 0 ds)) ctx{sdocStyle = PprUser q (PartWay (n-1))} + where + go _ [] = [] + go i (d:ds) | i >= n = [text "...."] + | otherwise = d : go (i+1) ds + work other_ctx = runSDoc (f ds) other_ctx + +pprSetDepth :: Depth -> SDoc -> SDoc +pprSetDepth depth doc = SDoc $ \ctx -> + case ctx of + SDC{sdocStyle=PprUser q _} -> + runSDoc doc ctx{sdocStyle = PprUser q depth} + _ -> + runSDoc doc ctx + +getPprStyle :: (PprStyle -> SDoc) -> SDoc +getPprStyle df = SDoc $ \ctx -> runSDoc (df (sdocStyle ctx)) ctx + +sdocWithDynFlags :: (DynFlags -> SDoc) -> SDoc +sdocWithDynFlags f = SDoc $ \ctx -> runSDoc (f (sdocDynFlags ctx)) ctx + +sdocWithPlatform :: (Platform -> SDoc) -> SDoc +sdocWithPlatform f = sdocWithDynFlags (f . targetPlatform) + +qualName :: PprStyle -> QueryQualifyName +qualName (PprUser q _) mod occ = queryQualifyName q mod occ +qualName (PprDump q) mod occ = queryQualifyName q mod occ +qualName _other mod _ = NameQual (moduleName mod) + +qualModule :: PprStyle -> QueryQualifyModule +qualModule (PprUser q _) m = queryQualifyModule q m +qualModule (PprDump q) m = queryQualifyModule q m +qualModule _other _m = True + +qualPackage :: PprStyle -> QueryQualifyPackage +qualPackage (PprUser q _) m = queryQualifyPackage q m +qualPackage (PprDump q) m = queryQualifyPackage q m +qualPackage _other _m = True + +queryQual :: PprStyle -> PrintUnqualified +queryQual s = QueryQualify (qualName s) + (qualModule s) + (qualPackage s) + +codeStyle :: PprStyle -> Bool +codeStyle (PprCode _) = True +codeStyle _ = False + +asmStyle :: PprStyle -> Bool +asmStyle (PprCode AsmStyle) = True +asmStyle _other = False + +dumpStyle :: PprStyle -> Bool +dumpStyle (PprDump {}) = True +dumpStyle _other = False + +debugStyle :: PprStyle -> Bool +debugStyle PprDebug = True +debugStyle _other = False + +userStyle :: PprStyle -> Bool +userStyle (PprUser _ _) = True +userStyle _other = False + +ifPprDebug :: SDoc -> SDoc -- Empty for non-debug style +ifPprDebug d = SDoc $ \ctx -> + case ctx of + SDC{sdocStyle=PprDebug} -> runSDoc d ctx + _ -> Pretty.empty + +printForUser :: DynFlags -> Handle -> PrintUnqualified -> SDoc -> IO () +printForUser dflags handle unqual doc + = Pretty.printDoc PageMode (pprCols dflags) handle + (runSDoc doc (initSDocContext dflags (mkUserStyle unqual AllTheWay))) + +printForUserPartWay :: DynFlags -> Handle -> Int -> PrintUnqualified -> SDoc + -> IO () +printForUserPartWay dflags handle d unqual doc + = Pretty.printDoc PageMode (pprCols dflags) handle + (runSDoc doc (initSDocContext dflags (mkUserStyle unqual (PartWay d)))) + +-- printForC, printForAsm do what they sound like +printForC :: DynFlags -> Handle -> SDoc -> IO () +printForC dflags handle doc = + Pretty.printDoc LeftMode (pprCols dflags) handle + (runSDoc doc (initSDocContext dflags (PprCode CStyle))) + +printForAsm :: DynFlags -> Handle -> SDoc -> IO () +printForAsm dflags handle doc = + Pretty.printDoc LeftMode (pprCols dflags) handle + (runSDoc doc (initSDocContext dflags (PprCode AsmStyle))) + +pprCode :: CodeStyle -> SDoc -> SDoc +pprCode cs d = withPprStyle (PprCode cs) d + +mkCodeStyle :: CodeStyle -> PprStyle +mkCodeStyle = PprCode + +-- Can't make SDoc an instance of Show because SDoc is just a function type +-- However, Doc *is* an instance of Show +-- showSDoc just blasts it out as a string +showSDoc :: DynFlags -> SDoc -> String +showSDoc dflags sdoc = renderWithStyle dflags sdoc defaultUserStyle + +showSDocSimple :: SDoc -> String +showSDocSimple sdoc = showSDoc unsafeGlobalDynFlags sdoc + +showPpr :: Outputable a => DynFlags -> a -> String +showPpr dflags thing = showSDoc dflags (ppr thing) + +showSDocUnqual :: DynFlags -> SDoc -> String +-- Only used by Haddock +showSDocUnqual dflags sdoc = showSDoc dflags sdoc + +showSDocForUser :: DynFlags -> PrintUnqualified -> SDoc -> String +-- Allows caller to specify the PrintUnqualified to use +showSDocForUser dflags unqual doc + = renderWithStyle dflags doc (mkUserStyle unqual AllTheWay) + +showSDocDump :: DynFlags -> SDoc -> String +showSDocDump dflags d = renderWithStyle dflags d defaultDumpStyle + +showSDocDebug :: DynFlags -> SDoc -> String +showSDocDebug dflags d = renderWithStyle dflags d PprDebug + +renderWithStyle :: DynFlags -> SDoc -> PprStyle -> String +renderWithStyle dflags sdoc sty + = Pretty.showDoc PageMode (pprCols dflags) $ + runSDoc sdoc (initSDocContext dflags sty) + +-- This shows an SDoc, but on one line only. It's cheaper than a full +-- showSDoc, designed for when we're getting results like "Foo.bar" +-- and "foo{uniq strictness}" so we don't want fancy layout anyway. +showSDocOneLine :: DynFlags -> SDoc -> String +showSDocOneLine dflags d + = Pretty.showDoc OneLineMode (pprCols dflags) $ + runSDoc d (initSDocContext dflags defaultUserStyle) + +showSDocDumpOneLine :: DynFlags -> SDoc -> String +showSDocDumpOneLine dflags d + = Pretty.showDoc OneLineMode irrelevantNCols $ + runSDoc d (initSDocContext dflags defaultDumpStyle) + +irrelevantNCols :: Int +-- Used for OneLineMode and LeftMode when number of cols isn't used +irrelevantNCols = 1 + +docToSDoc :: Doc -> SDoc +docToSDoc d = SDoc (\_ -> d) + +empty :: SDoc +char :: Char -> SDoc +text :: String -> SDoc +ftext :: FastString -> SDoc +ptext :: LitString -> SDoc +ztext :: FastZString -> SDoc +int :: Int -> SDoc +integer :: Integer -> SDoc +float :: Float -> SDoc +double :: Double -> SDoc +rational :: Rational -> SDoc + +empty = docToSDoc $ Pretty.empty +char c = docToSDoc $ Pretty.char c + +text s = docToSDoc $ Pretty.text s +{-# INLINE text #-} -- Inline so that the RULE Pretty.text will fire + +ftext s = docToSDoc $ Pretty.ftext s +ptext s = docToSDoc $ Pretty.ptext s +ztext s = docToSDoc $ Pretty.ztext s +int n = docToSDoc $ Pretty.int n +integer n = docToSDoc $ Pretty.integer n +float n = docToSDoc $ Pretty.float n +double n = docToSDoc $ Pretty.double n +rational n = docToSDoc $ Pretty.rational n + +parens, braces, brackets, quotes, quote, + paBrackets, doubleQuotes, angleBrackets :: SDoc -> SDoc + +parens d = SDoc $ Pretty.parens . runSDoc d +braces d = SDoc $ Pretty.braces . runSDoc d +brackets d = SDoc $ Pretty.brackets . runSDoc d +quote d = SDoc $ Pretty.quote . runSDoc d +doubleQuotes d = SDoc $ Pretty.doubleQuotes . runSDoc d +angleBrackets d = char '<' <> d <> char '>' +paBrackets d = ptext (sLit "[:") <> d <> ptext (sLit ":]") + +cparen :: Bool -> SDoc -> SDoc + +cparen b d = SDoc $ Pretty.cparen b . runSDoc d + +-- 'quotes' encloses something in single quotes... +-- but it omits them if the thing begins or ends in a single quote +-- so that we don't get `foo''. Instead we just have foo'. +quotes d = + sdocWithDynFlags $ \dflags -> + if useUnicode dflags + then char '‘' <> d <> char '’' + else SDoc $ \sty -> + let pp_d = runSDoc d sty + str = show pp_d + in case (str, snocView str) of + (_, Just (_, '\'')) -> pp_d + ('\'' : _, _) -> pp_d + _other -> Pretty.quotes pp_d + +semi, comma, colon, equals, space, dcolon, underscore, dot :: SDoc +arrow, larrow, darrow, arrowt, larrowt, arrowtt, larrowtt :: SDoc +lparen, rparen, lbrack, rbrack, lbrace, rbrace, blankLine :: SDoc + +blankLine = docToSDoc $ Pretty.ptext (sLit "") +dcolon = unicodeSyntax (char '∷') (docToSDoc $ Pretty.ptext (sLit "::")) +arrow = unicodeSyntax (char '→') (docToSDoc $ Pretty.ptext (sLit "->")) +larrow = unicodeSyntax (char '←') (docToSDoc $ Pretty.ptext (sLit "<-")) +darrow = unicodeSyntax (char '⇒') (docToSDoc $ Pretty.ptext (sLit "=>")) +arrowt = unicodeSyntax (char '↣') (docToSDoc $ Pretty.ptext (sLit ">-")) +larrowt = unicodeSyntax (char '↢') (docToSDoc $ Pretty.ptext (sLit "-<")) +arrowtt = unicodeSyntax (char '⤜') (docToSDoc $ Pretty.ptext (sLit ">>-")) +larrowtt = unicodeSyntax (char '⤛') (docToSDoc $ Pretty.ptext (sLit "-<<")) +semi = docToSDoc $ Pretty.semi +comma = docToSDoc $ Pretty.comma +colon = docToSDoc $ Pretty.colon +equals = docToSDoc $ Pretty.equals +space = docToSDoc $ Pretty.space +underscore = char '_' +dot = char '.' +lparen = docToSDoc $ Pretty.lparen +rparen = docToSDoc $ Pretty.rparen +lbrack = docToSDoc $ Pretty.lbrack +rbrack = docToSDoc $ Pretty.rbrack +lbrace = docToSDoc $ Pretty.lbrace +rbrace = docToSDoc $ Pretty.rbrace + +forAllLit :: SDoc +forAllLit = unicodeSyntax (char '∀') (ptext (sLit "forall")) + +unicodeSyntax :: SDoc -> SDoc -> SDoc +unicodeSyntax unicode plain = sdocWithDynFlags $ \dflags -> + if useUnicode dflags && useUnicodeSyntax dflags + then unicode + else plain + +nest :: Int -> SDoc -> SDoc +-- ^ Indent 'SDoc' some specified amount +(<>) :: SDoc -> SDoc -> SDoc +-- ^ Join two 'SDoc' together horizontally without a gap +(<+>) :: SDoc -> SDoc -> SDoc +-- ^ Join two 'SDoc' together horizontally with a gap between them +($$) :: SDoc -> SDoc -> SDoc +-- ^ Join two 'SDoc' together vertically; if there is +-- no vertical overlap it "dovetails" the two onto one line +($+$) :: SDoc -> SDoc -> SDoc +-- ^ Join two 'SDoc' together vertically + +nest n d = SDoc $ Pretty.nest n . runSDoc d +(<>) d1 d2 = SDoc $ \sty -> (Pretty.<>) (runSDoc d1 sty) (runSDoc d2 sty) +(<+>) d1 d2 = SDoc $ \sty -> (Pretty.<+>) (runSDoc d1 sty) (runSDoc d2 sty) +($$) d1 d2 = SDoc $ \sty -> (Pretty.$$) (runSDoc d1 sty) (runSDoc d2 sty) +($+$) d1 d2 = SDoc $ \sty -> (Pretty.$+$) (runSDoc d1 sty) (runSDoc d2 sty) + +hcat :: [SDoc] -> SDoc +-- ^ Concatenate 'SDoc' horizontally +hsep :: [SDoc] -> SDoc +-- ^ Concatenate 'SDoc' horizontally with a space between each one +vcat :: [SDoc] -> SDoc +-- ^ Concatenate 'SDoc' vertically with dovetailing +sep :: [SDoc] -> SDoc +-- ^ Separate: is either like 'hsep' or like 'vcat', depending on what fits +cat :: [SDoc] -> SDoc +-- ^ Catenate: is either like 'hcat' or like 'vcat', depending on what fits +fsep :: [SDoc] -> SDoc +-- ^ A paragraph-fill combinator. It's much like sep, only it +-- keeps fitting things on one line until it can't fit any more. +fcat :: [SDoc] -> SDoc +-- ^ This behaves like 'fsep', but it uses '<>' for horizontal conposition rather than '<+>' + + +hcat ds = SDoc $ \sty -> Pretty.hcat [runSDoc d sty | d <- ds] +hsep ds = SDoc $ \sty -> Pretty.hsep [runSDoc d sty | d <- ds] +vcat ds = SDoc $ \sty -> Pretty.vcat [runSDoc d sty | d <- ds] +sep ds = SDoc $ \sty -> Pretty.sep [runSDoc d sty | d <- ds] +cat ds = SDoc $ \sty -> Pretty.cat [runSDoc d sty | d <- ds] +fsep ds = SDoc $ \sty -> Pretty.fsep [runSDoc d sty | d <- ds] +fcat ds = SDoc $ \sty -> Pretty.fcat [runSDoc d sty | d <- ds] + +hang :: SDoc -- ^ The header + -> Int -- ^ Amount to indent the hung body + -> SDoc -- ^ The hung body, indented and placed below the header + -> SDoc +hang d1 n d2 = SDoc $ \sty -> Pretty.hang (runSDoc d1 sty) n (runSDoc d2 sty) + +punctuate :: SDoc -- ^ The punctuation + -> [SDoc] -- ^ The list that will have punctuation added between every adjacent pair of elements + -> [SDoc] -- ^ Punctuated list +punctuate _ [] = [] +punctuate p (d:ds) = go d ds + where + go d [] = [d] + go d (e:es) = (d <> p) : go e es + +ppWhen, ppUnless :: Bool -> SDoc -> SDoc +ppWhen True doc = doc +ppWhen False _ = empty + +ppUnless True _ = empty +ppUnless False doc = doc + +-- | A colour\/style for use with 'coloured'. +newtype PprColour = PprColour String + +-- Colours + +colType :: PprColour +colType = PprColour "\27[34m" + +colBold :: PprColour +colBold = PprColour "\27[;1m" + +colCoerc :: PprColour +colCoerc = PprColour "\27[34m" + +colDataCon :: PprColour +colDataCon = PprColour "\27[31m" + +colBinder :: PprColour +colBinder = PprColour "\27[32m" + +colReset :: PprColour +colReset = PprColour "\27[0m" + +-- | Apply the given colour\/style for the argument. +-- +-- Only takes effect if colours are enabled. +coloured :: PprColour -> SDoc -> SDoc +-- TODO: coloured _ sdoc ctxt | coloursDisabled = sdoc ctxt +coloured col@(PprColour c) sdoc = + SDoc $ \ctx@SDC{ sdocLastColour = PprColour lc } -> + let ctx' = ctx{ sdocLastColour = col } in + Pretty.zeroWidthText c Pretty.<> runSDoc sdoc ctx' Pretty.<> Pretty.zeroWidthText lc + +bold :: SDoc -> SDoc +bold = coloured colBold + +keyword :: SDoc -> SDoc +keyword = bold + +{- +************************************************************************ +* * +\subsection[Outputable-class]{The @Outputable@ class} +* * +************************************************************************ +-} + +-- | Class designating that some type has an 'SDoc' representation +class Outputable a where + ppr :: a -> SDoc + pprPrec :: Rational -> a -> SDoc + -- 0 binds least tightly + -- We use Rational because there is always a + -- Rational between any other two Rationals + + ppr = pprPrec 0 + pprPrec _ = ppr + +instance Outputable Char where + ppr c = text [c] + +instance Outputable Bool where + ppr True = ptext (sLit "True") + ppr False = ptext (sLit "False") + +instance Outputable Int32 where + ppr n = integer $ fromIntegral n + +instance Outputable Int64 where + ppr n = integer $ fromIntegral n + +instance Outputable Int where + ppr n = int n + +instance Outputable Word16 where + ppr n = integer $ fromIntegral n + +instance Outputable Word32 where + ppr n = integer $ fromIntegral n + +instance Outputable Word where + ppr n = integer $ fromIntegral n + +instance Outputable () where + ppr _ = text "()" + +instance (Outputable a) => Outputable [a] where + ppr xs = brackets (fsep (punctuate comma (map ppr xs))) + +instance (Outputable a) => Outputable (Set a) where + ppr s = braces (fsep (punctuate comma (map ppr (Set.toList s)))) + +instance (Outputable a, Outputable b) => Outputable (a, b) where + ppr (x,y) = parens (sep [ppr x <> comma, ppr y]) + +instance Outputable a => Outputable (Maybe a) where + ppr Nothing = ptext (sLit "Nothing") + ppr (Just x) = ptext (sLit "Just") <+> ppr x + +instance (Outputable a, Outputable b) => Outputable (Either a b) where + ppr (Left x) = ptext (sLit "Left") <+> ppr x + ppr (Right y) = ptext (sLit "Right") <+> ppr y + +-- ToDo: may not be used +instance (Outputable a, Outputable b, Outputable c) => Outputable (a, b, c) where + ppr (x,y,z) = + parens (sep [ppr x <> comma, + ppr y <> comma, + ppr z ]) + +instance (Outputable a, Outputable b, Outputable c, Outputable d) => + Outputable (a, b, c, d) where + ppr (a,b,c,d) = + parens (sep [ppr a <> comma, + ppr b <> comma, + ppr c <> comma, + ppr d]) + +instance (Outputable a, Outputable b, Outputable c, Outputable d, Outputable e) => + Outputable (a, b, c, d, e) where + ppr (a,b,c,d,e) = + parens (sep [ppr a <> comma, + ppr b <> comma, + ppr c <> comma, + ppr d <> comma, + ppr e]) + +instance (Outputable a, Outputable b, Outputable c, Outputable d, Outputable e, Outputable f) => + Outputable (a, b, c, d, e, f) where + ppr (a,b,c,d,e,f) = + parens (sep [ppr a <> comma, + ppr b <> comma, + ppr c <> comma, + ppr d <> comma, + ppr e <> comma, + ppr f]) + +instance (Outputable a, Outputable b, Outputable c, Outputable d, Outputable e, Outputable f, Outputable g) => + Outputable (a, b, c, d, e, f, g) where + ppr (a,b,c,d,e,f,g) = + parens (sep [ppr a <> comma, + ppr b <> comma, + ppr c <> comma, + ppr d <> comma, + ppr e <> comma, + ppr f <> comma, + ppr g]) + +instance Outputable FastString where + ppr fs = ftext fs -- Prints an unadorned string, + -- no double quotes or anything + +instance (Outputable key, Outputable elt) => Outputable (M.Map key elt) where + ppr m = ppr (M.toList m) +instance (Outputable elt) => Outputable (IM.IntMap elt) where + ppr m = ppr (IM.toList m) + +instance Outputable Fingerprint where + ppr (Fingerprint w1 w2) = text (printf "%016x%016x" w1 w2) + +{- +************************************************************************ +* * +\subsection{The @OutputableBndr@ class} +* * +************************************************************************ +-} + +-- | 'BindingSite' is used to tell the thing that prints binder what +-- language construct is binding the identifier. This can be used +-- to decide how much info to print. +data BindingSite = LambdaBind | CaseBind | LetBind + +-- | When we print a binder, we often want to print its type too. +-- The @OutputableBndr@ class encapsulates this idea. +class Outputable a => OutputableBndr a where + pprBndr :: BindingSite -> a -> SDoc + pprBndr _b x = ppr x + + pprPrefixOcc, pprInfixOcc :: a -> SDoc + -- Print an occurrence of the name, suitable either in the + -- prefix position of an application, thus (f a b) or ((+) x) + -- or infix position, thus (a `f` b) or (x + y) + +{- +************************************************************************ +* * +\subsection{Random printing helpers} +* * +************************************************************************ +-} + +-- We have 31-bit Chars and will simply use Show instances of Char and String. + +-- | Special combinator for showing character literals. +pprHsChar :: Char -> SDoc +pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) :: Word32)) + | otherwise = text (show c) + +-- | Special combinator for showing string literals. +pprHsString :: FastString -> SDoc +pprHsString fs = vcat (map text (showMultiLineString (unpackFS fs))) + +-- | Special combinator for showing string literals. +pprHsBytes :: ByteString -> SDoc +pprHsBytes bs = let escaped = concatMap escape $ BS.unpack bs + in vcat (map text (showMultiLineString escaped)) <> char '#' + where escape :: Word8 -> String + escape w = let c = chr (fromIntegral w) + in if isAscii c + then [c] + else '\\' : show w + +--------------------- +-- Put a name in parens if it's an operator +pprPrefixVar :: Bool -> SDoc -> SDoc +pprPrefixVar is_operator pp_v + | is_operator = parens pp_v + | otherwise = pp_v + +-- Put a name in backquotes if it's not an operator +pprInfixVar :: Bool -> SDoc -> SDoc +pprInfixVar is_operator pp_v + | is_operator = pp_v + | otherwise = char '`' <> pp_v <> char '`' + +--------------------- +pprFastFilePath :: FastString -> SDoc +pprFastFilePath path = text $ normalise $ unpackFS path + +{- +************************************************************************ +* * +\subsection{Other helper functions} +* * +************************************************************************ +-} + +pprWithCommas :: (a -> SDoc) -- ^ The pretty printing function to use + -> [a] -- ^ The things to be pretty printed + -> SDoc -- ^ 'SDoc' where the things have been pretty printed, + -- comma-separated and finally packed into a paragraph. +pprWithCommas pp xs = fsep (punctuate comma (map pp xs)) + +-- | Returns the separated concatenation of the pretty printed things. +interppSP :: Outputable a => [a] -> SDoc +interppSP xs = sep (map ppr xs) + +-- | Returns the comma-separated concatenation of the pretty printed things. +interpp'SP :: Outputable a => [a] -> SDoc +interpp'SP xs = sep (punctuate comma (map ppr xs)) + +-- | Returns the comma-separated concatenation of the quoted pretty printed things. +-- +-- > [x,y,z] ==> `x', `y', `z' +pprQuotedList :: Outputable a => [a] -> SDoc +pprQuotedList = quotedList . map ppr + +quotedList :: [SDoc] -> SDoc +quotedList xs = hsep (punctuate comma (map quotes xs)) + +quotedListWithOr :: [SDoc] -> SDoc +-- [x,y,z] ==> `x', `y' or `z' +quotedListWithOr xs@(_:_:_) = quotedList (init xs) <+> ptext (sLit "or") <+> quotes (last xs) +quotedListWithOr xs = quotedList xs + +{- +************************************************************************ +* * +\subsection{Printing numbers verbally} +* * +************************************************************************ +-} + +intWithCommas :: Integral a => a -> SDoc +-- Prints a big integer with commas, eg 345,821 +intWithCommas n + | n < 0 = char '-' <> intWithCommas (-n) + | q == 0 = int (fromIntegral r) + | otherwise = intWithCommas q <> comma <> zeroes <> int (fromIntegral r) + where + (q,r) = n `quotRem` 1000 + zeroes | r >= 100 = empty + | r >= 10 = char '0' + | otherwise = ptext (sLit "00") + +-- | Converts an integer to a verbal index: +-- +-- > speakNth 1 = text "first" +-- > speakNth 5 = text "fifth" +-- > speakNth 21 = text "21st" +speakNth :: Int -> SDoc +speakNth 1 = ptext (sLit "first") +speakNth 2 = ptext (sLit "second") +speakNth 3 = ptext (sLit "third") +speakNth 4 = ptext (sLit "fourth") +speakNth 5 = ptext (sLit "fifth") +speakNth 6 = ptext (sLit "sixth") +speakNth n = hcat [ int n, text suffix ] + where + suffix | n <= 20 = "th" -- 11,12,13 are non-std + | last_dig == 1 = "st" + | last_dig == 2 = "nd" + | last_dig == 3 = "rd" + | otherwise = "th" + + last_dig = n `rem` 10 + +-- | Converts an integer to a verbal multiplicity: +-- +-- > speakN 0 = text "none" +-- > speakN 5 = text "five" +-- > speakN 10 = text "10" +speakN :: Int -> SDoc +speakN 0 = ptext (sLit "none") -- E.g. "he has none" +speakN 1 = ptext (sLit "one") -- E.g. "he has one" +speakN 2 = ptext (sLit "two") +speakN 3 = ptext (sLit "three") +speakN 4 = ptext (sLit "four") +speakN 5 = ptext (sLit "five") +speakN 6 = ptext (sLit "six") +speakN n = int n + +-- | Converts an integer and object description to a statement about the +-- multiplicity of those objects: +-- +-- > speakNOf 0 (text "melon") = text "no melons" +-- > speakNOf 1 (text "melon") = text "one melon" +-- > speakNOf 3 (text "melon") = text "three melons" +speakNOf :: Int -> SDoc -> SDoc +speakNOf 0 d = ptext (sLit "no") <+> d <> char 's' +speakNOf 1 d = ptext (sLit "one") <+> d -- E.g. "one argument" +speakNOf n d = speakN n <+> d <> char 's' -- E.g. "three arguments" + +-- | Converts a strictly positive integer into a number of times: +-- +-- > speakNTimes 1 = text "once" +-- > speakNTimes 2 = text "twice" +-- > speakNTimes 4 = text "4 times" +speakNTimes :: Int {- >=1 -} -> SDoc +speakNTimes t | t == 1 = ptext (sLit "once") + | t == 2 = ptext (sLit "twice") + | otherwise = speakN t <+> ptext (sLit "times") + +-- | Determines the pluralisation suffix appropriate for the length of a list: +-- +-- > plural [] = char 's' +-- > plural ["Hello"] = empty +-- > plural ["Hello", "World"] = char 's' +plural :: [a] -> SDoc +plural [_] = empty -- a bit frightening, but there you are +plural _ = char 's' + +-- | Determines the form of to be appropriate for the length of a list: +-- +-- > isOrAre [] = ptext (sLit "are") +-- > isOrAre ["Hello"] = ptext (sLit "is") +-- > isOrAre ["Hello", "World"] = ptext (sLit "are") +isOrAre :: [a] -> SDoc +isOrAre [_] = ptext (sLit "is") +isOrAre _ = ptext (sLit "are") + +{- +************************************************************************ +* * +\subsection{Error handling} +* * +************************************************************************ +-} + +pprPanic :: String -> SDoc -> a +-- ^ Throw an exception saying "bug in GHC" +pprPanic = panicDoc + +pprSorry :: String -> SDoc -> a +-- ^ Throw an exception saying "this isn't finished yet" +pprSorry = sorryDoc + + +pprPgmError :: String -> SDoc -> a +-- ^ Throw an exception saying "bug in pgm being compiled" (used for unusual program errors) +pprPgmError = pgmErrorDoc + + +pprTrace :: String -> SDoc -> a -> a +-- ^ If debug output is on, show some 'SDoc' on the screen +pprTrace str doc x + | opt_NoDebugOutput = x + | otherwise = pprDebugAndThen unsafeGlobalDynFlags trace (text str) doc x + +pprPanicFastInt :: String -> SDoc -> FastInt +-- ^ Specialization of pprPanic that can be safely used with 'FastInt' +pprPanicFastInt heading pretty_msg = panicDocFastInt heading pretty_msg + +warnPprTrace :: Bool -> String -> Int -> SDoc -> a -> a +-- ^ Just warn about an assertion failure, recording the given file and line number. +-- Should typically be accessed with the WARN macros +warnPprTrace _ _ _ _ x | not debugIsOn = x +warnPprTrace _ _file _line _msg x | opt_NoDebugOutput = x +warnPprTrace False _file _line _msg x = x +warnPprTrace True file line msg x + = pprDebugAndThen unsafeGlobalDynFlags trace heading msg x + where + heading = hsep [text "WARNING: file", text file <> comma, text "line", int line] + +assertPprPanic :: String -> Int -> SDoc -> a +-- ^ Panic with an assertation failure, recording the given file and line number. +-- Should typically be accessed with the ASSERT family of macros +assertPprPanic file line msg + = pprPanic "ASSERT failed!" doc + where + doc = sep [ hsep [ text "file", text file + , text "line", int line ] + , msg ] + +pprDebugAndThen :: DynFlags -> (String -> a) -> SDoc -> SDoc -> a +pprDebugAndThen dflags cont heading pretty_msg + = cont (showSDocDump dflags doc) + where + doc = sep [heading, nest 2 pretty_msg] diff --git a/compiler/utils/Outputable.hs-boot b/compiler/utils/Outputable.hs-boot new file mode 100644 index 0000000000..1c15a6982a --- /dev/null +++ b/compiler/utils/Outputable.hs-boot @@ -0,0 +1,3 @@ +module Outputable where + +data SDoc diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs deleted file mode 100644 index a4ba48c609..0000000000 --- a/compiler/utils/Outputable.lhs +++ /dev/null @@ -1,1047 +0,0 @@ -% -% (c) The University of Glasgow 2006-2012 -% (c) The GRASP Project, Glasgow University, 1992-1998 -% - -\begin{code} --- | This module defines classes and functions for pretty-printing. It also --- exports a number of helpful debugging and other utilities such as 'trace' and 'panic'. --- --- The interface to this module is very similar to the standard Hughes-PJ pretty printing --- module, except that it exports a number of additional functions that are rarely used, --- and works over the 'SDoc' type. -module Outputable ( - -- * Type classes - Outputable(..), OutputableBndr(..), - - -- * Pretty printing combinators - SDoc, runSDoc, initSDocContext, - docToSDoc, - interppSP, interpp'SP, pprQuotedList, pprWithCommas, quotedListWithOr, - empty, nest, - char, - text, ftext, ptext, ztext, - int, intWithCommas, integer, float, double, rational, - parens, cparen, brackets, braces, quotes, quote, - doubleQuotes, angleBrackets, paBrackets, - semi, comma, colon, dcolon, space, equals, dot, - arrow, larrow, darrow, arrowt, larrowt, arrowtt, larrowtt, - lparen, rparen, lbrack, rbrack, lbrace, rbrace, underscore, - blankLine, forAllLit, - (<>), (<+>), hcat, hsep, - ($$), ($+$), vcat, - sep, cat, - fsep, fcat, - hang, punctuate, ppWhen, ppUnless, - speakNth, speakNTimes, speakN, speakNOf, plural, isOrAre, - - coloured, PprColour, colType, colCoerc, colDataCon, - colBinder, bold, keyword, - - -- * Converting 'SDoc' into strings and outputing it - printForC, printForAsm, printForUser, printForUserPartWay, - pprCode, mkCodeStyle, - showSDoc, showSDocSimple, showSDocOneLine, - showSDocForUser, showSDocDebug, showSDocDump, showSDocDumpOneLine, - showSDocUnqual, showPpr, - renderWithStyle, - - pprInfixVar, pprPrefixVar, - pprHsChar, pprHsString, pprHsBytes, - pprFastFilePath, - - -- * Controlling the style in which output is printed - BindingSite(..), - - PprStyle, CodeStyle(..), PrintUnqualified(..), - QueryQualifyName, QueryQualifyModule, QueryQualifyPackage, - reallyAlwaysQualify, reallyAlwaysQualifyNames, - alwaysQualify, alwaysQualifyNames, alwaysQualifyModules, - neverQualify, neverQualifyNames, neverQualifyModules, - QualifyName(..), queryQual, - sdocWithDynFlags, sdocWithPlatform, - getPprStyle, withPprStyle, withPprStyleDoc, - pprDeeper, pprDeeperList, pprSetDepth, - codeStyle, userStyle, debugStyle, dumpStyle, asmStyle, - ifPprDebug, qualName, qualModule, qualPackage, - mkErrStyle, defaultErrStyle, defaultDumpStyle, mkDumpStyle, defaultUserStyle, - mkUserStyle, cmdlineParserStyle, Depth(..), - - -- * Error handling and debugging utilities - pprPanic, pprSorry, assertPprPanic, pprPanicFastInt, pprPgmError, - pprTrace, warnPprTrace, - trace, pgmError, panic, sorry, panicFastInt, assertPanic, - pprDebugAndThen, - ) where - -import {-# SOURCE #-} DynFlags( DynFlags, - targetPlatform, pprUserLength, pprCols, - useUnicode, useUnicodeSyntax, - unsafeGlobalDynFlags ) -import {-# SOURCE #-} Module( PackageKey, Module, ModuleName, moduleName ) -import {-# SOURCE #-} OccName( OccName ) -import {-# SOURCE #-} StaticFlags( opt_PprStyle_Debug, opt_NoDebugOutput ) - -import FastString -import FastTypes -import qualified Pretty -import Util -import Platform -import Pretty ( Doc, Mode(..) ) -import Panic - -import Data.ByteString (ByteString) -import qualified Data.ByteString as BS -import Data.Char -import qualified Data.Map as M -import Data.Int -import qualified Data.IntMap as IM -import Data.Set (Set) -import qualified Data.Set as Set -import Data.Word -import System.IO ( Handle ) -import System.FilePath -import Text.Printf - -import GHC.Fingerprint -import GHC.Show ( showMultiLineString ) -\end{code} - - - -%************************************************************************ -%* * -\subsection{The @PprStyle@ data type} -%* * -%************************************************************************ - -\begin{code} - -data PprStyle - = PprUser PrintUnqualified Depth - -- Pretty-print in a way that will make sense to the - -- ordinary user; must be very close to Haskell - -- syntax, etc. - -- Assumes printing tidied code: non-system names are - -- printed without uniques. - - | PprDump PrintUnqualified - -- For -ddump-foo; less verbose than PprDebug, but more than PprUser - -- Does not assume tidied code: non-external names - -- are printed with uniques. - - | PprDebug -- Full debugging output - - | PprCode CodeStyle - -- Print code; either C or assembler - -data CodeStyle = CStyle -- The format of labels differs for C and assembler - | AsmStyle - -data Depth = AllTheWay - | PartWay Int -- 0 => stop - - --- ----------------------------------------------------------------------------- --- Printing original names - --- | When printing code that contains original names, we need to map the --- original names back to something the user understands. This is the --- purpose of the triple of functions that gets passed around --- when rendering 'SDoc'. -data PrintUnqualified = QueryQualify { - queryQualifyName :: QueryQualifyName, - queryQualifyModule :: QueryQualifyModule, - queryQualifyPackage :: QueryQualifyPackage -} - --- | given an /original/ name, this function tells you which module --- name it should be qualified with when printing for the user, if --- any. For example, given @Control.Exception.catch@, which is in scope --- as @Exception.catch@, this fuction will return @Just "Exception"@. --- Note that the return value is a ModuleName, not a Module, because --- in source code, names are qualified by ModuleNames. -type QueryQualifyName = Module -> OccName -> QualifyName - --- | For a given module, we need to know whether to print it with --- a package name to disambiguate it. -type QueryQualifyModule = Module -> Bool - --- | For a given package, we need to know whether to print it with --- the package key to disambiguate it. -type QueryQualifyPackage = PackageKey -> Bool - --- See Note [Printing original names] in HscTypes -data QualifyName -- given P:M.T - = NameUnqual -- refer to it as "T" - | NameQual ModuleName -- refer to it as "X.T" for the supplied X - | NameNotInScope1 - -- it is not in scope at all, but M.T is not bound in the current - -- scope, so we can refer to it as "M.T" - | NameNotInScope2 - -- it is not in scope at all, and M.T is already bound in the - -- current scope, so we must refer to it as "P:M.T" - -reallyAlwaysQualifyNames :: QueryQualifyName -reallyAlwaysQualifyNames _ _ = NameNotInScope2 - --- | NB: This won't ever show package IDs -alwaysQualifyNames :: QueryQualifyName -alwaysQualifyNames m _ = NameQual (moduleName m) - -neverQualifyNames :: QueryQualifyName -neverQualifyNames _ _ = NameUnqual - -alwaysQualifyModules :: QueryQualifyModule -alwaysQualifyModules _ = True - -neverQualifyModules :: QueryQualifyModule -neverQualifyModules _ = False - -alwaysQualifyPackages :: QueryQualifyPackage -alwaysQualifyPackages _ = True - -neverQualifyPackages :: QueryQualifyPackage -neverQualifyPackages _ = False - -reallyAlwaysQualify, alwaysQualify, neverQualify :: PrintUnqualified -reallyAlwaysQualify - = QueryQualify reallyAlwaysQualifyNames - alwaysQualifyModules - alwaysQualifyPackages -alwaysQualify = QueryQualify alwaysQualifyNames - alwaysQualifyModules - alwaysQualifyPackages -neverQualify = QueryQualify neverQualifyNames - neverQualifyModules - neverQualifyPackages - -defaultUserStyle, defaultDumpStyle :: PprStyle - -defaultUserStyle = mkUserStyle neverQualify AllTheWay - -- Print without qualifiers to reduce verbosity, unless -dppr-debug - -defaultDumpStyle | opt_PprStyle_Debug = PprDebug - | otherwise = PprDump neverQualify - -mkDumpStyle :: PrintUnqualified -> PprStyle -mkDumpStyle print_unqual | opt_PprStyle_Debug = PprDebug - | otherwise = PprDump print_unqual - -defaultErrStyle :: DynFlags -> PprStyle --- Default style for error messages, when we don't know PrintUnqualified --- It's a bit of a hack because it doesn't take into account what's in scope --- Only used for desugarer warnings, and typechecker errors in interface sigs --- NB that -dppr-debug will still get into PprDebug style -defaultErrStyle dflags = mkErrStyle dflags neverQualify - --- | Style for printing error messages -mkErrStyle :: DynFlags -> PrintUnqualified -> PprStyle -mkErrStyle dflags qual = mkUserStyle qual (PartWay (pprUserLength dflags)) - -cmdlineParserStyle :: PprStyle -cmdlineParserStyle = mkUserStyle alwaysQualify AllTheWay - -mkUserStyle :: PrintUnqualified -> Depth -> PprStyle -mkUserStyle unqual depth - | opt_PprStyle_Debug = PprDebug - | otherwise = PprUser unqual depth -\end{code} - -Orthogonal to the above printing styles are (possibly) some -command-line flags that affect printing (often carried with the -style). The most likely ones are variations on how much type info is -shown. - -The following test decides whether or not we are actually generating -code (either C or assembly), or generating interface files. - -%************************************************************************ -%* * -\subsection{The @SDoc@ data type} -%* * -%************************************************************************ - -\begin{code} -newtype SDoc = SDoc { runSDoc :: SDocContext -> Doc } - -data SDocContext = SDC - { sdocStyle :: !PprStyle - , sdocLastColour :: !PprColour - -- ^ The most recently used colour. This allows nesting colours. - , sdocDynFlags :: !DynFlags - } - -initSDocContext :: DynFlags -> PprStyle -> SDocContext -initSDocContext dflags sty = SDC - { sdocStyle = sty - , sdocLastColour = colReset - , sdocDynFlags = dflags - } - -withPprStyle :: PprStyle -> SDoc -> SDoc -withPprStyle sty d = SDoc $ \ctxt -> runSDoc d ctxt{sdocStyle=sty} - -withPprStyleDoc :: DynFlags -> PprStyle -> SDoc -> Doc -withPprStyleDoc dflags sty d = runSDoc d (initSDocContext dflags sty) - -pprDeeper :: SDoc -> SDoc -pprDeeper d = SDoc $ \ctx -> case ctx of - SDC{sdocStyle=PprUser _ (PartWay 0)} -> Pretty.text "..." - SDC{sdocStyle=PprUser q (PartWay n)} -> - runSDoc d ctx{sdocStyle = PprUser q (PartWay (n-1))} - _ -> runSDoc d ctx - -pprDeeperList :: ([SDoc] -> SDoc) -> [SDoc] -> SDoc --- Truncate a list that list that is longer than the current depth -pprDeeperList f ds - | null ds = f [] - | otherwise = SDoc work - where - work ctx@SDC{sdocStyle=PprUser q (PartWay n)} - | n==0 = Pretty.text "..." - | otherwise = - runSDoc (f (go 0 ds)) ctx{sdocStyle = PprUser q (PartWay (n-1))} - where - go _ [] = [] - go i (d:ds) | i >= n = [text "...."] - | otherwise = d : go (i+1) ds - work other_ctx = runSDoc (f ds) other_ctx - -pprSetDepth :: Depth -> SDoc -> SDoc -pprSetDepth depth doc = SDoc $ \ctx -> - case ctx of - SDC{sdocStyle=PprUser q _} -> - runSDoc doc ctx{sdocStyle = PprUser q depth} - _ -> - runSDoc doc ctx - -getPprStyle :: (PprStyle -> SDoc) -> SDoc -getPprStyle df = SDoc $ \ctx -> runSDoc (df (sdocStyle ctx)) ctx - -sdocWithDynFlags :: (DynFlags -> SDoc) -> SDoc -sdocWithDynFlags f = SDoc $ \ctx -> runSDoc (f (sdocDynFlags ctx)) ctx - -sdocWithPlatform :: (Platform -> SDoc) -> SDoc -sdocWithPlatform f = sdocWithDynFlags (f . targetPlatform) -\end{code} - -\begin{code} -qualName :: PprStyle -> QueryQualifyName -qualName (PprUser q _) mod occ = queryQualifyName q mod occ -qualName (PprDump q) mod occ = queryQualifyName q mod occ -qualName _other mod _ = NameQual (moduleName mod) - -qualModule :: PprStyle -> QueryQualifyModule -qualModule (PprUser q _) m = queryQualifyModule q m -qualModule (PprDump q) m = queryQualifyModule q m -qualModule _other _m = True - -qualPackage :: PprStyle -> QueryQualifyPackage -qualPackage (PprUser q _) m = queryQualifyPackage q m -qualPackage (PprDump q) m = queryQualifyPackage q m -qualPackage _other _m = True - -queryQual :: PprStyle -> PrintUnqualified -queryQual s = QueryQualify (qualName s) - (qualModule s) - (qualPackage s) - -codeStyle :: PprStyle -> Bool -codeStyle (PprCode _) = True -codeStyle _ = False - -asmStyle :: PprStyle -> Bool -asmStyle (PprCode AsmStyle) = True -asmStyle _other = False - -dumpStyle :: PprStyle -> Bool -dumpStyle (PprDump {}) = True -dumpStyle _other = False - -debugStyle :: PprStyle -> Bool -debugStyle PprDebug = True -debugStyle _other = False - -userStyle :: PprStyle -> Bool -userStyle (PprUser _ _) = True -userStyle _other = False - -ifPprDebug :: SDoc -> SDoc -- Empty for non-debug style -ifPprDebug d = SDoc $ \ctx -> - case ctx of - SDC{sdocStyle=PprDebug} -> runSDoc d ctx - _ -> Pretty.empty -\end{code} - -\begin{code} - -printForUser :: DynFlags -> Handle -> PrintUnqualified -> SDoc -> IO () -printForUser dflags handle unqual doc - = Pretty.printDoc PageMode (pprCols dflags) handle - (runSDoc doc (initSDocContext dflags (mkUserStyle unqual AllTheWay))) - -printForUserPartWay :: DynFlags -> Handle -> Int -> PrintUnqualified -> SDoc - -> IO () -printForUserPartWay dflags handle d unqual doc - = Pretty.printDoc PageMode (pprCols dflags) handle - (runSDoc doc (initSDocContext dflags (mkUserStyle unqual (PartWay d)))) - --- printForC, printForAsm do what they sound like -printForC :: DynFlags -> Handle -> SDoc -> IO () -printForC dflags handle doc = - Pretty.printDoc LeftMode (pprCols dflags) handle - (runSDoc doc (initSDocContext dflags (PprCode CStyle))) - -printForAsm :: DynFlags -> Handle -> SDoc -> IO () -printForAsm dflags handle doc = - Pretty.printDoc LeftMode (pprCols dflags) handle - (runSDoc doc (initSDocContext dflags (PprCode AsmStyle))) - -pprCode :: CodeStyle -> SDoc -> SDoc -pprCode cs d = withPprStyle (PprCode cs) d - -mkCodeStyle :: CodeStyle -> PprStyle -mkCodeStyle = PprCode - --- Can't make SDoc an instance of Show because SDoc is just a function type --- However, Doc *is* an instance of Show --- showSDoc just blasts it out as a string -showSDoc :: DynFlags -> SDoc -> String -showSDoc dflags sdoc = renderWithStyle dflags sdoc defaultUserStyle - -showSDocSimple :: SDoc -> String -showSDocSimple sdoc = showSDoc unsafeGlobalDynFlags sdoc - -showPpr :: Outputable a => DynFlags -> a -> String -showPpr dflags thing = showSDoc dflags (ppr thing) - -showSDocUnqual :: DynFlags -> SDoc -> String --- Only used by Haddock -showSDocUnqual dflags sdoc = showSDoc dflags sdoc - -showSDocForUser :: DynFlags -> PrintUnqualified -> SDoc -> String --- Allows caller to specify the PrintUnqualified to use -showSDocForUser dflags unqual doc - = renderWithStyle dflags doc (mkUserStyle unqual AllTheWay) - -showSDocDump :: DynFlags -> SDoc -> String -showSDocDump dflags d = renderWithStyle dflags d defaultDumpStyle - -showSDocDebug :: DynFlags -> SDoc -> String -showSDocDebug dflags d = renderWithStyle dflags d PprDebug - -renderWithStyle :: DynFlags -> SDoc -> PprStyle -> String -renderWithStyle dflags sdoc sty - = Pretty.showDoc PageMode (pprCols dflags) $ - runSDoc sdoc (initSDocContext dflags sty) - --- This shows an SDoc, but on one line only. It's cheaper than a full --- showSDoc, designed for when we're getting results like "Foo.bar" --- and "foo{uniq strictness}" so we don't want fancy layout anyway. -showSDocOneLine :: DynFlags -> SDoc -> String -showSDocOneLine dflags d - = Pretty.showDoc OneLineMode (pprCols dflags) $ - runSDoc d (initSDocContext dflags defaultUserStyle) - -showSDocDumpOneLine :: DynFlags -> SDoc -> String -showSDocDumpOneLine dflags d - = Pretty.showDoc OneLineMode irrelevantNCols $ - runSDoc d (initSDocContext dflags defaultDumpStyle) - -irrelevantNCols :: Int --- Used for OneLineMode and LeftMode when number of cols isn't used -irrelevantNCols = 1 -\end{code} - -\begin{code} -docToSDoc :: Doc -> SDoc -docToSDoc d = SDoc (\_ -> d) - -empty :: SDoc -char :: Char -> SDoc -text :: String -> SDoc -ftext :: FastString -> SDoc -ptext :: LitString -> SDoc -ztext :: FastZString -> SDoc -int :: Int -> SDoc -integer :: Integer -> SDoc -float :: Float -> SDoc -double :: Double -> SDoc -rational :: Rational -> SDoc - -empty = docToSDoc $ Pretty.empty -char c = docToSDoc $ Pretty.char c - -text s = docToSDoc $ Pretty.text s -{-# INLINE text #-} -- Inline so that the RULE Pretty.text will fire - -ftext s = docToSDoc $ Pretty.ftext s -ptext s = docToSDoc $ Pretty.ptext s -ztext s = docToSDoc $ Pretty.ztext s -int n = docToSDoc $ Pretty.int n -integer n = docToSDoc $ Pretty.integer n -float n = docToSDoc $ Pretty.float n -double n = docToSDoc $ Pretty.double n -rational n = docToSDoc $ Pretty.rational n - -parens, braces, brackets, quotes, quote, - paBrackets, doubleQuotes, angleBrackets :: SDoc -> SDoc - -parens d = SDoc $ Pretty.parens . runSDoc d -braces d = SDoc $ Pretty.braces . runSDoc d -brackets d = SDoc $ Pretty.brackets . runSDoc d -quote d = SDoc $ Pretty.quote . runSDoc d -doubleQuotes d = SDoc $ Pretty.doubleQuotes . runSDoc d -angleBrackets d = char '<' <> d <> char '>' -paBrackets d = ptext (sLit "[:") <> d <> ptext (sLit ":]") - -cparen :: Bool -> SDoc -> SDoc - -cparen b d = SDoc $ Pretty.cparen b . runSDoc d - --- 'quotes' encloses something in single quotes... --- but it omits them if the thing begins or ends in a single quote --- so that we don't get `foo''. Instead we just have foo'. -quotes d = - sdocWithDynFlags $ \dflags -> - if useUnicode dflags - then char '‘' <> d <> char '’' - else SDoc $ \sty -> - let pp_d = runSDoc d sty - str = show pp_d - in case (str, snocView str) of - (_, Just (_, '\'')) -> pp_d - ('\'' : _, _) -> pp_d - _other -> Pretty.quotes pp_d - -semi, comma, colon, equals, space, dcolon, underscore, dot :: SDoc -arrow, larrow, darrow, arrowt, larrowt, arrowtt, larrowtt :: SDoc -lparen, rparen, lbrack, rbrack, lbrace, rbrace, blankLine :: SDoc - -blankLine = docToSDoc $ Pretty.ptext (sLit "") -dcolon = unicodeSyntax (char '∷') (docToSDoc $ Pretty.ptext (sLit "::")) -arrow = unicodeSyntax (char '→') (docToSDoc $ Pretty.ptext (sLit "->")) -larrow = unicodeSyntax (char '←') (docToSDoc $ Pretty.ptext (sLit "<-")) -darrow = unicodeSyntax (char '⇒') (docToSDoc $ Pretty.ptext (sLit "=>")) -arrowt = unicodeSyntax (char '↣') (docToSDoc $ Pretty.ptext (sLit ">-")) -larrowt = unicodeSyntax (char '↢') (docToSDoc $ Pretty.ptext (sLit "-<")) -arrowtt = unicodeSyntax (char '⤜') (docToSDoc $ Pretty.ptext (sLit ">>-")) -larrowtt = unicodeSyntax (char '⤛') (docToSDoc $ Pretty.ptext (sLit "-<<")) -semi = docToSDoc $ Pretty.semi -comma = docToSDoc $ Pretty.comma -colon = docToSDoc $ Pretty.colon -equals = docToSDoc $ Pretty.equals -space = docToSDoc $ Pretty.space -underscore = char '_' -dot = char '.' -lparen = docToSDoc $ Pretty.lparen -rparen = docToSDoc $ Pretty.rparen -lbrack = docToSDoc $ Pretty.lbrack -rbrack = docToSDoc $ Pretty.rbrack -lbrace = docToSDoc $ Pretty.lbrace -rbrace = docToSDoc $ Pretty.rbrace - -forAllLit :: SDoc -forAllLit = unicodeSyntax (char '∀') (ptext (sLit "forall")) - -unicodeSyntax :: SDoc -> SDoc -> SDoc -unicodeSyntax unicode plain = sdocWithDynFlags $ \dflags -> - if useUnicode dflags && useUnicodeSyntax dflags - then unicode - else plain - -nest :: Int -> SDoc -> SDoc --- ^ Indent 'SDoc' some specified amount -(<>) :: SDoc -> SDoc -> SDoc --- ^ Join two 'SDoc' together horizontally without a gap -(<+>) :: SDoc -> SDoc -> SDoc --- ^ Join two 'SDoc' together horizontally with a gap between them -($$) :: SDoc -> SDoc -> SDoc --- ^ Join two 'SDoc' together vertically; if there is --- no vertical overlap it "dovetails" the two onto one line -($+$) :: SDoc -> SDoc -> SDoc --- ^ Join two 'SDoc' together vertically - -nest n d = SDoc $ Pretty.nest n . runSDoc d -(<>) d1 d2 = SDoc $ \sty -> (Pretty.<>) (runSDoc d1 sty) (runSDoc d2 sty) -(<+>) d1 d2 = SDoc $ \sty -> (Pretty.<+>) (runSDoc d1 sty) (runSDoc d2 sty) -($$) d1 d2 = SDoc $ \sty -> (Pretty.$$) (runSDoc d1 sty) (runSDoc d2 sty) -($+$) d1 d2 = SDoc $ \sty -> (Pretty.$+$) (runSDoc d1 sty) (runSDoc d2 sty) - -hcat :: [SDoc] -> SDoc --- ^ Concatenate 'SDoc' horizontally -hsep :: [SDoc] -> SDoc --- ^ Concatenate 'SDoc' horizontally with a space between each one -vcat :: [SDoc] -> SDoc --- ^ Concatenate 'SDoc' vertically with dovetailing -sep :: [SDoc] -> SDoc --- ^ Separate: is either like 'hsep' or like 'vcat', depending on what fits -cat :: [SDoc] -> SDoc --- ^ Catenate: is either like 'hcat' or like 'vcat', depending on what fits -fsep :: [SDoc] -> SDoc --- ^ A paragraph-fill combinator. It's much like sep, only it --- keeps fitting things on one line until it can't fit any more. -fcat :: [SDoc] -> SDoc --- ^ This behaves like 'fsep', but it uses '<>' for horizontal conposition rather than '<+>' - - -hcat ds = SDoc $ \sty -> Pretty.hcat [runSDoc d sty | d <- ds] -hsep ds = SDoc $ \sty -> Pretty.hsep [runSDoc d sty | d <- ds] -vcat ds = SDoc $ \sty -> Pretty.vcat [runSDoc d sty | d <- ds] -sep ds = SDoc $ \sty -> Pretty.sep [runSDoc d sty | d <- ds] -cat ds = SDoc $ \sty -> Pretty.cat [runSDoc d sty | d <- ds] -fsep ds = SDoc $ \sty -> Pretty.fsep [runSDoc d sty | d <- ds] -fcat ds = SDoc $ \sty -> Pretty.fcat [runSDoc d sty | d <- ds] - -hang :: SDoc -- ^ The header - -> Int -- ^ Amount to indent the hung body - -> SDoc -- ^ The hung body, indented and placed below the header - -> SDoc -hang d1 n d2 = SDoc $ \sty -> Pretty.hang (runSDoc d1 sty) n (runSDoc d2 sty) - -punctuate :: SDoc -- ^ The punctuation - -> [SDoc] -- ^ The list that will have punctuation added between every adjacent pair of elements - -> [SDoc] -- ^ Punctuated list -punctuate _ [] = [] -punctuate p (d:ds) = go d ds - where - go d [] = [d] - go d (e:es) = (d <> p) : go e es - -ppWhen, ppUnless :: Bool -> SDoc -> SDoc -ppWhen True doc = doc -ppWhen False _ = empty - -ppUnless True _ = empty -ppUnless False doc = doc - --- | A colour\/style for use with 'coloured'. -newtype PprColour = PprColour String - --- Colours - -colType :: PprColour -colType = PprColour "\27[34m" - -colBold :: PprColour -colBold = PprColour "\27[;1m" - -colCoerc :: PprColour -colCoerc = PprColour "\27[34m" - -colDataCon :: PprColour -colDataCon = PprColour "\27[31m" - -colBinder :: PprColour -colBinder = PprColour "\27[32m" - -colReset :: PprColour -colReset = PprColour "\27[0m" - --- | Apply the given colour\/style for the argument. --- --- Only takes effect if colours are enabled. -coloured :: PprColour -> SDoc -> SDoc --- TODO: coloured _ sdoc ctxt | coloursDisabled = sdoc ctxt -coloured col@(PprColour c) sdoc = - SDoc $ \ctx@SDC{ sdocLastColour = PprColour lc } -> - let ctx' = ctx{ sdocLastColour = col } in - Pretty.zeroWidthText c Pretty.<> runSDoc sdoc ctx' Pretty.<> Pretty.zeroWidthText lc - -bold :: SDoc -> SDoc -bold = coloured colBold - -keyword :: SDoc -> SDoc -keyword = bold - -\end{code} - - -%************************************************************************ -%* * -\subsection[Outputable-class]{The @Outputable@ class} -%* * -%************************************************************************ - -\begin{code} --- | Class designating that some type has an 'SDoc' representation -class Outputable a where - ppr :: a -> SDoc - pprPrec :: Rational -> a -> SDoc - -- 0 binds least tightly - -- We use Rational because there is always a - -- Rational between any other two Rationals - - ppr = pprPrec 0 - pprPrec _ = ppr -\end{code} - -\begin{code} -instance Outputable Char where - ppr c = text [c] - -instance Outputable Bool where - ppr True = ptext (sLit "True") - ppr False = ptext (sLit "False") - -instance Outputable Int32 where - ppr n = integer $ fromIntegral n - -instance Outputable Int64 where - ppr n = integer $ fromIntegral n - -instance Outputable Int where - ppr n = int n - -instance Outputable Word16 where - ppr n = integer $ fromIntegral n - -instance Outputable Word32 where - ppr n = integer $ fromIntegral n - -instance Outputable Word where - ppr n = integer $ fromIntegral n - -instance Outputable () where - ppr _ = text "()" - -instance (Outputable a) => Outputable [a] where - ppr xs = brackets (fsep (punctuate comma (map ppr xs))) - -instance (Outputable a) => Outputable (Set a) where - ppr s = braces (fsep (punctuate comma (map ppr (Set.toList s)))) - -instance (Outputable a, Outputable b) => Outputable (a, b) where - ppr (x,y) = parens (sep [ppr x <> comma, ppr y]) - -instance Outputable a => Outputable (Maybe a) where - ppr Nothing = ptext (sLit "Nothing") - ppr (Just x) = ptext (sLit "Just") <+> ppr x - -instance (Outputable a, Outputable b) => Outputable (Either a b) where - ppr (Left x) = ptext (sLit "Left") <+> ppr x - ppr (Right y) = ptext (sLit "Right") <+> ppr y - --- ToDo: may not be used -instance (Outputable a, Outputable b, Outputable c) => Outputable (a, b, c) where - ppr (x,y,z) = - parens (sep [ppr x <> comma, - ppr y <> comma, - ppr z ]) - -instance (Outputable a, Outputable b, Outputable c, Outputable d) => - Outputable (a, b, c, d) where - ppr (a,b,c,d) = - parens (sep [ppr a <> comma, - ppr b <> comma, - ppr c <> comma, - ppr d]) - -instance (Outputable a, Outputable b, Outputable c, Outputable d, Outputable e) => - Outputable (a, b, c, d, e) where - ppr (a,b,c,d,e) = - parens (sep [ppr a <> comma, - ppr b <> comma, - ppr c <> comma, - ppr d <> comma, - ppr e]) - -instance (Outputable a, Outputable b, Outputable c, Outputable d, Outputable e, Outputable f) => - Outputable (a, b, c, d, e, f) where - ppr (a,b,c,d,e,f) = - parens (sep [ppr a <> comma, - ppr b <> comma, - ppr c <> comma, - ppr d <> comma, - ppr e <> comma, - ppr f]) - -instance (Outputable a, Outputable b, Outputable c, Outputable d, Outputable e, Outputable f, Outputable g) => - Outputable (a, b, c, d, e, f, g) where - ppr (a,b,c,d,e,f,g) = - parens (sep [ppr a <> comma, - ppr b <> comma, - ppr c <> comma, - ppr d <> comma, - ppr e <> comma, - ppr f <> comma, - ppr g]) - -instance Outputable FastString where - ppr fs = ftext fs -- Prints an unadorned string, - -- no double quotes or anything - -instance (Outputable key, Outputable elt) => Outputable (M.Map key elt) where - ppr m = ppr (M.toList m) -instance (Outputable elt) => Outputable (IM.IntMap elt) where - ppr m = ppr (IM.toList m) - -instance Outputable Fingerprint where - ppr (Fingerprint w1 w2) = text (printf "%016x%016x" w1 w2) -\end{code} - -%************************************************************************ -%* * -\subsection{The @OutputableBndr@ class} -%* * -%************************************************************************ - -\begin{code} --- | 'BindingSite' is used to tell the thing that prints binder what --- language construct is binding the identifier. This can be used --- to decide how much info to print. -data BindingSite = LambdaBind | CaseBind | LetBind - --- | When we print a binder, we often want to print its type too. --- The @OutputableBndr@ class encapsulates this idea. -class Outputable a => OutputableBndr a where - pprBndr :: BindingSite -> a -> SDoc - pprBndr _b x = ppr x - - pprPrefixOcc, pprInfixOcc :: a -> SDoc - -- Print an occurrence of the name, suitable either in the - -- prefix position of an application, thus (f a b) or ((+) x) - -- or infix position, thus (a `f` b) or (x + y) -\end{code} - -%************************************************************************ -%* * -\subsection{Random printing helpers} -%* * -%************************************************************************ - -\begin{code} --- We have 31-bit Chars and will simply use Show instances of Char and String. - --- | Special combinator for showing character literals. -pprHsChar :: Char -> SDoc -pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) :: Word32)) - | otherwise = text (show c) - --- | Special combinator for showing string literals. -pprHsString :: FastString -> SDoc -pprHsString fs = vcat (map text (showMultiLineString (unpackFS fs))) - --- | Special combinator for showing string literals. -pprHsBytes :: ByteString -> SDoc -pprHsBytes bs = let escaped = concatMap escape $ BS.unpack bs - in vcat (map text (showMultiLineString escaped)) <> char '#' - where escape :: Word8 -> String - escape w = let c = chr (fromIntegral w) - in if isAscii c - then [c] - else '\\' : show w - ---------------------- --- Put a name in parens if it's an operator -pprPrefixVar :: Bool -> SDoc -> SDoc -pprPrefixVar is_operator pp_v - | is_operator = parens pp_v - | otherwise = pp_v - --- Put a name in backquotes if it's not an operator -pprInfixVar :: Bool -> SDoc -> SDoc -pprInfixVar is_operator pp_v - | is_operator = pp_v - | otherwise = char '`' <> pp_v <> char '`' - ---------------------- -pprFastFilePath :: FastString -> SDoc -pprFastFilePath path = text $ normalise $ unpackFS path -\end{code} - -%************************************************************************ -%* * -\subsection{Other helper functions} -%* * -%************************************************************************ - -\begin{code} -pprWithCommas :: (a -> SDoc) -- ^ The pretty printing function to use - -> [a] -- ^ The things to be pretty printed - -> SDoc -- ^ 'SDoc' where the things have been pretty printed, - -- comma-separated and finally packed into a paragraph. -pprWithCommas pp xs = fsep (punctuate comma (map pp xs)) - --- | Returns the separated concatenation of the pretty printed things. -interppSP :: Outputable a => [a] -> SDoc -interppSP xs = sep (map ppr xs) - --- | Returns the comma-separated concatenation of the pretty printed things. -interpp'SP :: Outputable a => [a] -> SDoc -interpp'SP xs = sep (punctuate comma (map ppr xs)) - --- | Returns the comma-separated concatenation of the quoted pretty printed things. --- --- > [x,y,z] ==> `x', `y', `z' -pprQuotedList :: Outputable a => [a] -> SDoc -pprQuotedList = quotedList . map ppr - -quotedList :: [SDoc] -> SDoc -quotedList xs = hsep (punctuate comma (map quotes xs)) - -quotedListWithOr :: [SDoc] -> SDoc --- [x,y,z] ==> `x', `y' or `z' -quotedListWithOr xs@(_:_:_) = quotedList (init xs) <+> ptext (sLit "or") <+> quotes (last xs) -quotedListWithOr xs = quotedList xs -\end{code} - - -%************************************************************************ -%* * -\subsection{Printing numbers verbally} -%* * -%************************************************************************ - -\begin{code} -intWithCommas :: Integral a => a -> SDoc --- Prints a big integer with commas, eg 345,821 -intWithCommas n - | n < 0 = char '-' <> intWithCommas (-n) - | q == 0 = int (fromIntegral r) - | otherwise = intWithCommas q <> comma <> zeroes <> int (fromIntegral r) - where - (q,r) = n `quotRem` 1000 - zeroes | r >= 100 = empty - | r >= 10 = char '0' - | otherwise = ptext (sLit "00") - --- | Converts an integer to a verbal index: --- --- > speakNth 1 = text "first" --- > speakNth 5 = text "fifth" --- > speakNth 21 = text "21st" -speakNth :: Int -> SDoc -speakNth 1 = ptext (sLit "first") -speakNth 2 = ptext (sLit "second") -speakNth 3 = ptext (sLit "third") -speakNth 4 = ptext (sLit "fourth") -speakNth 5 = ptext (sLit "fifth") -speakNth 6 = ptext (sLit "sixth") -speakNth n = hcat [ int n, text suffix ] - where - suffix | n <= 20 = "th" -- 11,12,13 are non-std - | last_dig == 1 = "st" - | last_dig == 2 = "nd" - | last_dig == 3 = "rd" - | otherwise = "th" - - last_dig = n `rem` 10 - --- | Converts an integer to a verbal multiplicity: --- --- > speakN 0 = text "none" --- > speakN 5 = text "five" --- > speakN 10 = text "10" -speakN :: Int -> SDoc -speakN 0 = ptext (sLit "none") -- E.g. "he has none" -speakN 1 = ptext (sLit "one") -- E.g. "he has one" -speakN 2 = ptext (sLit "two") -speakN 3 = ptext (sLit "three") -speakN 4 = ptext (sLit "four") -speakN 5 = ptext (sLit "five") -speakN 6 = ptext (sLit "six") -speakN n = int n - --- | Converts an integer and object description to a statement about the --- multiplicity of those objects: --- --- > speakNOf 0 (text "melon") = text "no melons" --- > speakNOf 1 (text "melon") = text "one melon" --- > speakNOf 3 (text "melon") = text "three melons" -speakNOf :: Int -> SDoc -> SDoc -speakNOf 0 d = ptext (sLit "no") <+> d <> char 's' -speakNOf 1 d = ptext (sLit "one") <+> d -- E.g. "one argument" -speakNOf n d = speakN n <+> d <> char 's' -- E.g. "three arguments" - --- | Converts a strictly positive integer into a number of times: --- --- > speakNTimes 1 = text "once" --- > speakNTimes 2 = text "twice" --- > speakNTimes 4 = text "4 times" -speakNTimes :: Int {- >=1 -} -> SDoc -speakNTimes t | t == 1 = ptext (sLit "once") - | t == 2 = ptext (sLit "twice") - | otherwise = speakN t <+> ptext (sLit "times") - --- | Determines the pluralisation suffix appropriate for the length of a list: --- --- > plural [] = char 's' --- > plural ["Hello"] = empty --- > plural ["Hello", "World"] = char 's' -plural :: [a] -> SDoc -plural [_] = empty -- a bit frightening, but there you are -plural _ = char 's' - --- | Determines the form of to be appropriate for the length of a list: --- --- > isOrAre [] = ptext (sLit "are") --- > isOrAre ["Hello"] = ptext (sLit "is") --- > isOrAre ["Hello", "World"] = ptext (sLit "are") -isOrAre :: [a] -> SDoc -isOrAre [_] = ptext (sLit "is") -isOrAre _ = ptext (sLit "are") -\end{code} - - -%************************************************************************ -%* * -\subsection{Error handling} -%* * -%************************************************************************ - -\begin{code} - -pprPanic :: String -> SDoc -> a --- ^ Throw an exception saying "bug in GHC" -pprPanic = panicDoc - -pprSorry :: String -> SDoc -> a --- ^ Throw an exception saying "this isn't finished yet" -pprSorry = sorryDoc - - -pprPgmError :: String -> SDoc -> a --- ^ Throw an exception saying "bug in pgm being compiled" (used for unusual program errors) -pprPgmError = pgmErrorDoc - - -pprTrace :: String -> SDoc -> a -> a --- ^ If debug output is on, show some 'SDoc' on the screen -pprTrace str doc x - | opt_NoDebugOutput = x - | otherwise = pprDebugAndThen unsafeGlobalDynFlags trace (text str) doc x - -pprPanicFastInt :: String -> SDoc -> FastInt --- ^ Specialization of pprPanic that can be safely used with 'FastInt' -pprPanicFastInt heading pretty_msg = panicDocFastInt heading pretty_msg - -warnPprTrace :: Bool -> String -> Int -> SDoc -> a -> a --- ^ Just warn about an assertion failure, recording the given file and line number. --- Should typically be accessed with the WARN macros -warnPprTrace _ _ _ _ x | not debugIsOn = x -warnPprTrace _ _file _line _msg x | opt_NoDebugOutput = x -warnPprTrace False _file _line _msg x = x -warnPprTrace True file line msg x - = pprDebugAndThen unsafeGlobalDynFlags trace heading msg x - where - heading = hsep [text "WARNING: file", text file <> comma, text "line", int line] - -assertPprPanic :: String -> Int -> SDoc -> a --- ^ Panic with an assertation failure, recording the given file and line number. --- Should typically be accessed with the ASSERT family of macros -assertPprPanic file line msg - = pprPanic "ASSERT failed!" doc - where - doc = sep [ hsep [ text "file", text file - , text "line", int line ] - , msg ] - -pprDebugAndThen :: DynFlags -> (String -> a) -> SDoc -> SDoc -> a -pprDebugAndThen dflags cont heading pretty_msg - = cont (showSDocDump dflags doc) - where - doc = sep [heading, nest 2 pretty_msg] -\end{code} - diff --git a/compiler/utils/Outputable.lhs-boot b/compiler/utils/Outputable.lhs-boot deleted file mode 100644 index e013307ef9..0000000000 --- a/compiler/utils/Outputable.lhs-boot +++ /dev/null @@ -1,7 +0,0 @@ - -\begin{code} -module Outputable where - -data SDoc -\end{code} - diff --git a/compiler/utils/Pair.hs b/compiler/utils/Pair.hs new file mode 100644 index 0000000000..f2d39de48e --- /dev/null +++ b/compiler/utils/Pair.hs @@ -0,0 +1,50 @@ +{- +A simple homogeneous pair type with useful Functor, Applicative, and +Traversable instances. +-} + +{-# LANGUAGE CPP #-} + +module Pair ( Pair(..), unPair, toPair, swap ) where + +#include "HsVersions.h" + +import Outputable +import Control.Applicative +#if __GLASGOW_HASKELL__ < 709 +import Data.Foldable +import Data.Monoid +import Data.Traversable +#endif + +data Pair a = Pair { pFst :: a, pSnd :: a } +-- Note that Pair is a *unary* type constructor +-- whereas (,) is binary + +-- The important thing about Pair is that it has a *homogenous* +-- Functor instance, so you can easily apply the same function +-- to both components +instance Functor Pair where + fmap f (Pair x y) = Pair (f x) (f y) + +instance Applicative Pair where + pure x = Pair x x + (Pair f g) <*> (Pair x y) = Pair (f x) (g y) + +instance Foldable Pair where + foldMap f (Pair x y) = f x `mappend` f y + +instance Traversable Pair where + traverse f (Pair x y) = Pair <$> f x <*> f y + +instance Outputable a => Outputable (Pair a) where + ppr (Pair a b) = ppr a <+> char '~' <+> ppr b + +unPair :: Pair a -> (a,a) +unPair (Pair x y) = (x,y) + +toPair :: (a,a) -> Pair a +toPair (x,y) = Pair x y + +swap :: Pair a -> Pair a +swap (Pair x y) = Pair y x diff --git a/compiler/utils/Pair.lhs b/compiler/utils/Pair.lhs deleted file mode 100644 index 529ba669ea..0000000000 --- a/compiler/utils/Pair.lhs +++ /dev/null @@ -1,51 +0,0 @@ - -A simple homogeneous pair type with useful Functor, Applicative, and -Traversable instances. - -\begin{code} -{-# LANGUAGE CPP #-} - -module Pair ( Pair(..), unPair, toPair, swap ) where - -#include "HsVersions.h" - -import Outputable -import Control.Applicative -#if __GLASGOW_HASKELL__ < 709 -import Data.Foldable -import Data.Monoid -import Data.Traversable -#endif - -data Pair a = Pair { pFst :: a, pSnd :: a } --- Note that Pair is a *unary* type constructor --- whereas (,) is binary - --- The important thing about Pair is that it has a *homogenous* --- Functor instance, so you can easily apply the same function --- to both components -instance Functor Pair where - fmap f (Pair x y) = Pair (f x) (f y) - -instance Applicative Pair where - pure x = Pair x x - (Pair f g) <*> (Pair x y) = Pair (f x) (g y) - -instance Foldable Pair where - foldMap f (Pair x y) = f x `mappend` f y - -instance Traversable Pair where - traverse f (Pair x y) = Pair <$> f x <*> f y - -instance Outputable a => Outputable (Pair a) where - ppr (Pair a b) = ppr a <+> char '~' <+> ppr b - -unPair :: Pair a -> (a,a) -unPair (Pair x y) = (x,y) - -toPair :: (a,a) -> Pair a -toPair (x,y) = Pair x y - -swap :: Pair a -> Pair a -swap (Pair x y) = Pair y x -\end{code} diff --git a/compiler/utils/Panic.hs b/compiler/utils/Panic.hs new file mode 100644 index 0000000000..bfb9df3ad3 --- /dev/null +++ b/compiler/utils/Panic.hs @@ -0,0 +1,307 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP Project, Glasgow University, 1992-2000 + +Defines basic functions for printing error messages. + +It's hard to put these functions anywhere else without causing +some unnecessary loops in the module dependency graph. +-} + +{-# LANGUAGE CPP, DeriveDataTypeable, ScopedTypeVariables #-} + +module Panic ( + GhcException(..), showGhcException, + throwGhcException, throwGhcExceptionIO, + handleGhcException, + progName, + pgmError, + + panic, sorry, panicFastInt, assertPanic, trace, + panicDoc, sorryDoc, panicDocFastInt, pgmErrorDoc, + + Exception.Exception(..), showException, safeShowException, try, tryMost, throwTo, + + installSignalHandlers, + pushInterruptTargetThread, popInterruptTargetThread +) where +#include "HsVersions.h" + +import {-# SOURCE #-} Outputable (SDoc) + +import Config +import FastTypes +import Exception + +import Control.Concurrent +import Data.Dynamic +import Debug.Trace ( trace ) +import System.IO.Unsafe +import System.Exit +import System.Environment + +#ifndef mingw32_HOST_OS +import System.Posix.Signals +#endif + +#if defined(mingw32_HOST_OS) +import GHC.ConsoleHandler +#endif + +import GHC.Stack +import System.Mem.Weak ( Weak, deRefWeak ) + +-- | GHC's own exception type +-- error messages all take the form: +-- +-- @ +-- : +-- @ +-- +-- If the location is on the command line, or in GHC itself, then +-- ="ghc". All of the error types below correspond to +-- a of "ghc", except for ProgramError (where the string is +-- assumed to contain a location already, so we don't print one). + +data GhcException + = PhaseFailed String -- name of phase + ExitCode -- an external phase (eg. cpp) failed + + -- | Some other fatal signal (SIGHUP,SIGTERM) + | Signal Int + + -- | Prints the short usage msg after the error + | UsageError String + + -- | A problem with the command line arguments, but don't print usage. + | CmdLineError String + + -- | The 'impossible' happened. + | Panic String + | PprPanic String SDoc + + -- | The user tickled something that's known not to work yet, + -- but we're not counting it as a bug. + | Sorry String + | PprSorry String SDoc + + -- | An installation problem. + | InstallationError String + + -- | An error in the user's code, probably. + | ProgramError String + | PprProgramError String SDoc + deriving (Typeable) + +instance Exception GhcException + +instance Show GhcException where + showsPrec _ e@(ProgramError _) = showGhcException e + showsPrec _ e@(CmdLineError _) = showString ": " . showGhcException e + showsPrec _ e = showString progName . showString ": " . showGhcException e + + +-- | The name of this GHC. +progName :: String +progName = unsafePerformIO (getProgName) +{-# NOINLINE progName #-} + + +-- | Short usage information to display when we are given the wrong cmd line arguments. +short_usage :: String +short_usage = "Usage: For basic information, try the `--help' option." + + +-- | Show an exception as a string. +showException :: Exception e => e -> String +showException = show + +-- | Show an exception which can possibly throw other exceptions. +-- Used when displaying exception thrown within TH code. +safeShowException :: Exception e => e -> IO String +safeShowException e = do + -- ensure the whole error message is evaluated inside try + r <- try (return $! forceList (showException e)) + case r of + Right msg -> return msg + Left e' -> safeShowException (e' :: SomeException) + where + forceList [] = [] + forceList xs@(x : xt) = x `seq` forceList xt `seq` xs + +-- | Append a description of the given exception to this string. +showGhcException :: GhcException -> String -> String +showGhcException exception + = case exception of + UsageError str + -> showString str . showChar '\n' . showString short_usage + + PhaseFailed phase code + -> showString "phase `" . showString phase . + showString "' failed (exitcode = " . shows (int_code code) . + showString ")" + + CmdLineError str -> showString str + PprProgramError str _ -> + showGhcException (ProgramError (str ++ "\n<
>")) + ProgramError str -> showString str + InstallationError str -> showString str + Signal n -> showString "signal: " . shows n + + PprPanic s _ -> + showGhcException (Panic (s ++ "\n<
>")) + Panic s + -> showString $ + "panic! (the 'impossible' happened)\n" + ++ " (GHC version " ++ cProjectVersion ++ " for " ++ TargetPlatform_NAME ++ "):\n\t" + ++ s ++ "\n\n" + ++ "Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug\n" + + PprSorry s _ -> + showGhcException (Sorry (s ++ "\n<
>")) + Sorry s + -> showString $ + "sorry! (unimplemented feature or known bug)\n" + ++ " (GHC version " ++ cProjectVersion ++ " for " ++ TargetPlatform_NAME ++ "):\n\t" + ++ s ++ "\n" + + where int_code code = + case code of + ExitSuccess -> (0::Int) + ExitFailure x -> x + + +throwGhcException :: GhcException -> a +throwGhcException = Exception.throw + +throwGhcExceptionIO :: GhcException -> IO a +throwGhcExceptionIO = Exception.throwIO + +handleGhcException :: ExceptionMonad m => (GhcException -> m a) -> m a -> m a +handleGhcException = ghandle + + +-- | Panics and asserts. +panic, sorry, pgmError :: String -> a +panic x = unsafeDupablePerformIO $ do + stack <- ccsToStrings =<< getCurrentCCS x + if null stack + then throwGhcException (Panic x) + else throwGhcException (Panic (x ++ '\n' : renderStack stack)) + +sorry x = throwGhcException (Sorry x) +pgmError x = throwGhcException (ProgramError x) + +panicDoc, sorryDoc, pgmErrorDoc :: String -> SDoc -> a +panicDoc x doc = throwGhcException (PprPanic x doc) +sorryDoc x doc = throwGhcException (PprSorry x doc) +pgmErrorDoc x doc = throwGhcException (PprProgramError x doc) + + +-- | Panic while pretending to return an unboxed int. +-- You can't use the regular panic functions in expressions +-- producing unboxed ints because they have the wrong kind. +panicFastInt :: String -> FastInt +panicFastInt s = case (panic s) of () -> _ILIT(0) + +panicDocFastInt :: String -> SDoc -> FastInt +panicDocFastInt s d = case (panicDoc s d) of () -> _ILIT(0) + + +-- | Throw an failed assertion exception for a given filename and line number. +assertPanic :: String -> Int -> a +assertPanic file line = + Exception.throw (Exception.AssertionFailed + ("ASSERT failed! file " ++ file ++ ", line " ++ show line)) + + +-- | Like try, but pass through UserInterrupt and Panic exceptions. +-- Used when we want soft failures when reading interface files, for example. +-- TODO: I'm not entirely sure if this is catching what we really want to catch +tryMost :: IO a -> IO (Either SomeException a) +tryMost action = do r <- try action + case r of + Left se -> + case fromException se of + -- Some GhcException's we rethrow, + Just (Signal _) -> throwIO se + Just (Panic _) -> throwIO se + -- others we return + Just _ -> return (Left se) + Nothing -> + case fromException se of + -- All IOExceptions are returned + Just (_ :: IOException) -> + return (Left se) + -- Anything else is rethrown + Nothing -> throwIO se + Right v -> return (Right v) + + +-- | Install standard signal handlers for catching ^C, which just throw an +-- exception in the target thread. The current target thread is the +-- thread at the head of the list in the MVar passed to +-- installSignalHandlers. +installSignalHandlers :: IO () +installSignalHandlers = do + main_thread <- myThreadId + pushInterruptTargetThread main_thread + + let + interrupt_exn = (toException UserInterrupt) + + interrupt = do + mt <- peekInterruptTargetThread + case mt of + Nothing -> return () + Just t -> throwTo t interrupt_exn + + -- +#if !defined(mingw32_HOST_OS) + _ <- installHandler sigQUIT (Catch interrupt) Nothing + _ <- installHandler sigINT (Catch interrupt) Nothing + -- see #3656; in the future we should install these automatically for + -- all Haskell programs in the same way that we install a ^C handler. + let fatal_signal n = throwTo main_thread (Signal (fromIntegral n)) + _ <- installHandler sigHUP (Catch (fatal_signal sigHUP)) Nothing + _ <- installHandler sigTERM (Catch (fatal_signal sigTERM)) Nothing + return () +#else + -- GHC 6.3+ has support for console events on Windows + -- NOTE: running GHCi under a bash shell for some reason requires + -- you to press Ctrl-Break rather than Ctrl-C to provoke + -- an interrupt. Ctrl-C is getting blocked somewhere, I don't know + -- why --SDM 17/12/2004 + let sig_handler ControlC = interrupt + sig_handler Break = interrupt + sig_handler _ = return () + + _ <- installHandler (Catch sig_handler) + return () +#endif + +{-# NOINLINE interruptTargetThread #-} +interruptTargetThread :: MVar [Weak ThreadId] +interruptTargetThread = unsafePerformIO (newMVar []) + +pushInterruptTargetThread :: ThreadId -> IO () +pushInterruptTargetThread tid = do + wtid <- mkWeakThreadId tid + modifyMVar_ interruptTargetThread $ return . (wtid :) + +peekInterruptTargetThread :: IO (Maybe ThreadId) +peekInterruptTargetThread = + withMVar interruptTargetThread $ loop + where + loop [] = return Nothing + loop (t:ts) = do + r <- deRefWeak t + case r of + Nothing -> loop ts + Just t -> return (Just t) + +popInterruptTargetThread :: IO () +popInterruptTargetThread = + modifyMVar_ interruptTargetThread $ + \tids -> return $! case tids of [] -> [] + (_:ts) -> ts diff --git a/compiler/utils/Panic.lhs b/compiler/utils/Panic.lhs deleted file mode 100644 index 23bf01cafe..0000000000 --- a/compiler/utils/Panic.lhs +++ /dev/null @@ -1,309 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The GRASP Project, Glasgow University, 1992-2000 -% -Defines basic functions for printing error messages. - -It's hard to put these functions anywhere else without causing -some unnecessary loops in the module dependency graph. - -\begin{code} -{-# LANGUAGE CPP, DeriveDataTypeable, ScopedTypeVariables #-} - -module Panic ( - GhcException(..), showGhcException, - throwGhcException, throwGhcExceptionIO, - handleGhcException, - progName, - pgmError, - - panic, sorry, panicFastInt, assertPanic, trace, - panicDoc, sorryDoc, panicDocFastInt, pgmErrorDoc, - - Exception.Exception(..), showException, safeShowException, try, tryMost, throwTo, - - installSignalHandlers, - pushInterruptTargetThread, popInterruptTargetThread -) where -#include "HsVersions.h" - -import {-# SOURCE #-} Outputable (SDoc) - -import Config -import FastTypes -import Exception - -import Control.Concurrent -import Data.Dynamic -import Debug.Trace ( trace ) -import System.IO.Unsafe -import System.Exit -import System.Environment - -#ifndef mingw32_HOST_OS -import System.Posix.Signals -#endif - -#if defined(mingw32_HOST_OS) -import GHC.ConsoleHandler -#endif - -import GHC.Stack -import System.Mem.Weak ( Weak, deRefWeak ) - --- | GHC's own exception type --- error messages all take the form: --- --- @ --- : --- @ --- --- If the location is on the command line, or in GHC itself, then --- ="ghc". All of the error types below correspond to --- a of "ghc", except for ProgramError (where the string is --- assumed to contain a location already, so we don't print one). - -data GhcException - = PhaseFailed String -- name of phase - ExitCode -- an external phase (eg. cpp) failed - - -- | Some other fatal signal (SIGHUP,SIGTERM) - | Signal Int - - -- | Prints the short usage msg after the error - | UsageError String - - -- | A problem with the command line arguments, but don't print usage. - | CmdLineError String - - -- | The 'impossible' happened. - | Panic String - | PprPanic String SDoc - - -- | The user tickled something that's known not to work yet, - -- but we're not counting it as a bug. - | Sorry String - | PprSorry String SDoc - - -- | An installation problem. - | InstallationError String - - -- | An error in the user's code, probably. - | ProgramError String - | PprProgramError String SDoc - deriving (Typeable) - -instance Exception GhcException - -instance Show GhcException where - showsPrec _ e@(ProgramError _) = showGhcException e - showsPrec _ e@(CmdLineError _) = showString ": " . showGhcException e - showsPrec _ e = showString progName . showString ": " . showGhcException e - - --- | The name of this GHC. -progName :: String -progName = unsafePerformIO (getProgName) -{-# NOINLINE progName #-} - - --- | Short usage information to display when we are given the wrong cmd line arguments. -short_usage :: String -short_usage = "Usage: For basic information, try the `--help' option." - - --- | Show an exception as a string. -showException :: Exception e => e -> String -showException = show - --- | Show an exception which can possibly throw other exceptions. --- Used when displaying exception thrown within TH code. -safeShowException :: Exception e => e -> IO String -safeShowException e = do - -- ensure the whole error message is evaluated inside try - r <- try (return $! forceList (showException e)) - case r of - Right msg -> return msg - Left e' -> safeShowException (e' :: SomeException) - where - forceList [] = [] - forceList xs@(x : xt) = x `seq` forceList xt `seq` xs - --- | Append a description of the given exception to this string. -showGhcException :: GhcException -> String -> String -showGhcException exception - = case exception of - UsageError str - -> showString str . showChar '\n' . showString short_usage - - PhaseFailed phase code - -> showString "phase `" . showString phase . - showString "' failed (exitcode = " . shows (int_code code) . - showString ")" - - CmdLineError str -> showString str - PprProgramError str _ -> - showGhcException (ProgramError (str ++ "\n<
>")) - ProgramError str -> showString str - InstallationError str -> showString str - Signal n -> showString "signal: " . shows n - - PprPanic s _ -> - showGhcException (Panic (s ++ "\n<
>")) - Panic s - -> showString $ - "panic! (the 'impossible' happened)\n" - ++ " (GHC version " ++ cProjectVersion ++ " for " ++ TargetPlatform_NAME ++ "):\n\t" - ++ s ++ "\n\n" - ++ "Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug\n" - - PprSorry s _ -> - showGhcException (Sorry (s ++ "\n<
>")) - Sorry s - -> showString $ - "sorry! (unimplemented feature or known bug)\n" - ++ " (GHC version " ++ cProjectVersion ++ " for " ++ TargetPlatform_NAME ++ "):\n\t" - ++ s ++ "\n" - - where int_code code = - case code of - ExitSuccess -> (0::Int) - ExitFailure x -> x - - -throwGhcException :: GhcException -> a -throwGhcException = Exception.throw - -throwGhcExceptionIO :: GhcException -> IO a -throwGhcExceptionIO = Exception.throwIO - -handleGhcException :: ExceptionMonad m => (GhcException -> m a) -> m a -> m a -handleGhcException = ghandle - - --- | Panics and asserts. -panic, sorry, pgmError :: String -> a -panic x = unsafeDupablePerformIO $ do - stack <- ccsToStrings =<< getCurrentCCS x - if null stack - then throwGhcException (Panic x) - else throwGhcException (Panic (x ++ '\n' : renderStack stack)) - -sorry x = throwGhcException (Sorry x) -pgmError x = throwGhcException (ProgramError x) - -panicDoc, sorryDoc, pgmErrorDoc :: String -> SDoc -> a -panicDoc x doc = throwGhcException (PprPanic x doc) -sorryDoc x doc = throwGhcException (PprSorry x doc) -pgmErrorDoc x doc = throwGhcException (PprProgramError x doc) - - --- | Panic while pretending to return an unboxed int. --- You can't use the regular panic functions in expressions --- producing unboxed ints because they have the wrong kind. -panicFastInt :: String -> FastInt -panicFastInt s = case (panic s) of () -> _ILIT(0) - -panicDocFastInt :: String -> SDoc -> FastInt -panicDocFastInt s d = case (panicDoc s d) of () -> _ILIT(0) - - --- | Throw an failed assertion exception for a given filename and line number. -assertPanic :: String -> Int -> a -assertPanic file line = - Exception.throw (Exception.AssertionFailed - ("ASSERT failed! file " ++ file ++ ", line " ++ show line)) - - --- | Like try, but pass through UserInterrupt and Panic exceptions. --- Used when we want soft failures when reading interface files, for example. --- TODO: I'm not entirely sure if this is catching what we really want to catch -tryMost :: IO a -> IO (Either SomeException a) -tryMost action = do r <- try action - case r of - Left se -> - case fromException se of - -- Some GhcException's we rethrow, - Just (Signal _) -> throwIO se - Just (Panic _) -> throwIO se - -- others we return - Just _ -> return (Left se) - Nothing -> - case fromException se of - -- All IOExceptions are returned - Just (_ :: IOException) -> - return (Left se) - -- Anything else is rethrown - Nothing -> throwIO se - Right v -> return (Right v) - - --- | Install standard signal handlers for catching ^C, which just throw an --- exception in the target thread. The current target thread is the --- thread at the head of the list in the MVar passed to --- installSignalHandlers. -installSignalHandlers :: IO () -installSignalHandlers = do - main_thread <- myThreadId - pushInterruptTargetThread main_thread - - let - interrupt_exn = (toException UserInterrupt) - - interrupt = do - mt <- peekInterruptTargetThread - case mt of - Nothing -> return () - Just t -> throwTo t interrupt_exn - - -- -#if !defined(mingw32_HOST_OS) - _ <- installHandler sigQUIT (Catch interrupt) Nothing - _ <- installHandler sigINT (Catch interrupt) Nothing - -- see #3656; in the future we should install these automatically for - -- all Haskell programs in the same way that we install a ^C handler. - let fatal_signal n = throwTo main_thread (Signal (fromIntegral n)) - _ <- installHandler sigHUP (Catch (fatal_signal sigHUP)) Nothing - _ <- installHandler sigTERM (Catch (fatal_signal sigTERM)) Nothing - return () -#else - -- GHC 6.3+ has support for console events on Windows - -- NOTE: running GHCi under a bash shell for some reason requires - -- you to press Ctrl-Break rather than Ctrl-C to provoke - -- an interrupt. Ctrl-C is getting blocked somewhere, I don't know - -- why --SDM 17/12/2004 - let sig_handler ControlC = interrupt - sig_handler Break = interrupt - sig_handler _ = return () - - _ <- installHandler (Catch sig_handler) - return () -#endif - -{-# NOINLINE interruptTargetThread #-} -interruptTargetThread :: MVar [Weak ThreadId] -interruptTargetThread = unsafePerformIO (newMVar []) - -pushInterruptTargetThread :: ThreadId -> IO () -pushInterruptTargetThread tid = do - wtid <- mkWeakThreadId tid - modifyMVar_ interruptTargetThread $ return . (wtid :) - -peekInterruptTargetThread :: IO (Maybe ThreadId) -peekInterruptTargetThread = - withMVar interruptTargetThread $ loop - where - loop [] = return Nothing - loop (t:ts) = do - r <- deRefWeak t - case r of - Nothing -> loop ts - Just t -> return (Just t) - -popInterruptTargetThread :: IO () -popInterruptTargetThread = - modifyMVar_ interruptTargetThread $ - \tids -> return $! case tids of [] -> [] - (_:ts) -> ts - -\end{code} diff --git a/compiler/utils/Pretty.hs b/compiler/utils/Pretty.hs new file mode 100644 index 0000000000..5e441838fc --- /dev/null +++ b/compiler/utils/Pretty.hs @@ -0,0 +1,1024 @@ +{- +********************************************************************************* +* * +* John Hughes's and Simon Peyton Jones's Pretty Printer Combinators * +* * +* based on "The Design of a Pretty-printing Library" * +* in Advanced Functional Programming, * +* Johan Jeuring and Erik Meijer (eds), LNCS 925 * +* http://www.cs.chalmers.se/~rjmh/Papers/pretty.ps * +* * +* Heavily modified by Simon Peyton Jones, Dec 96 * +* * +********************************************************************************* + +Version 3.0 28 May 1997 + * Cured massive performance bug. If you write + + foldl <> empty (map (text.show) [1..10000]) + + you get quadratic behaviour with V2.0. Why? For just the same reason as you get + quadratic behaviour with left-associated (++) chains. + + This is really bad news. One thing a pretty-printer abstraction should + certainly guarantee is insensivity to associativity. It matters: suddenly + GHC's compilation times went up by a factor of 100 when I switched to the + new pretty printer. + + I fixed it with a bit of a hack (because I wanted to get GHC back on the + road). I added two new constructors to the Doc type, Above and Beside: + + <> = Beside + $$ = Above + + Then, where I need to get to a "TextBeside" or "NilAbove" form I "force" + the Doc to squeeze out these suspended calls to Beside and Above; but in so + doing I re-associate. It's quite simple, but I'm not satisfied that I've done + the best possible job. I'll send you the code if you are interested. + + * Added new exports: + punctuate, hang + int, integer, float, double, rational, + lparen, rparen, lbrack, rbrack, lbrace, rbrace, + + * fullRender's type signature has changed. Rather than producing a string it + now takes an extra couple of arguments that tells it how to glue fragments + of output together: + + fullRender :: Mode + -> Int -- Line length + -> Float -- Ribbons per line + -> (TextDetails -> a -> a) -- What to do with text + -> a -- What to do at the end + -> Doc + -> a -- Result + + The "fragments" are encapsulated in the TextDetails data type: + data TextDetails = Chr Char + | Str String + | PStr FastString + + The Chr and Str constructors are obvious enough. The PStr constructor has a packed + string (FastString) inside it. It's generated by using the new "ptext" export. + + An advantage of this new setup is that you can get the renderer to do output + directly (by passing in a function of type (TextDetails -> IO () -> IO ()), + rather than producing a string that you then print. + + +Version 2.0 24 April 1997 + * Made empty into a left unit for <> as well as a right unit; + it is also now true that + nest k empty = empty + which wasn't true before. + + * Fixed an obscure bug in sep that occasionally gave very weird behaviour + + * Added $+$ + + * Corrected and tidied up the laws and invariants + +====================================================================== +Relative to John's original paper, there are the following new features: + +1. There's an empty document, "empty". It's a left and right unit for + both <> and $$, and anywhere in the argument list for + sep, hcat, hsep, vcat, fcat etc. + + It is Really Useful in practice. + +2. There is a paragraph-fill combinator, fsep, that's much like sep, + only it keeps fitting things on one line until it can't fit any more. + +3. Some random useful extra combinators are provided. + <+> puts its arguments beside each other with a space between them, + unless either argument is empty in which case it returns the other + + + hcat is a list version of <> + hsep is a list version of <+> + vcat is a list version of $$ + + sep (separate) is either like hsep or like vcat, depending on what fits + + cat is behaves like sep, but it uses <> for horizontal conposition + fcat is behaves like fsep, but it uses <> for horizontal conposition + + These new ones do the obvious things: + char, semi, comma, colon, space, + parens, brackets, braces, + quotes, quote, doubleQuotes + +4. The "above" combinator, $$, now overlaps its two arguments if the + last line of the top argument stops before the first line of the second begins. + For example: text "hi" $$ nest 5 "there" + lays out as + hi there + rather than + hi + there + + There are two places this is really useful + + a) When making labelled blocks, like this: + Left -> code for left + Right -> code for right + LongLongLongLabel -> + code for longlonglonglabel + The block is on the same line as the label if the label is + short, but on the next line otherwise. + + b) When laying out lists like this: + [ first + , second + , third + ] + which some people like. But if the list fits on one line + you want [first, second, third]. You can't do this with + John's original combinators, but it's quite easy with the + new $$. + + The combinator $+$ gives the original "never-overlap" behaviour. + +5. Several different renderers are provided: + * a standard one + * one that uses cut-marks to avoid deeply-nested documents + simply piling up in the right-hand margin + * one that ignores indentation (fewer chars output; good for machines) + * one that ignores indentation and newlines (ditto, only more so) + +6. Numerous implementation tidy-ups + Use of unboxed data types to speed up the implementation +-} + +{-# LANGUAGE BangPatterns, CPP, MagicHash #-} + +module Pretty ( + Doc, -- Abstract + Mode(..), TextDetails(..), + + empty, isEmpty, nest, + + char, text, ftext, ptext, ztext, zeroWidthText, + int, integer, float, double, rational, + parens, brackets, braces, quotes, quote, doubleQuotes, + semi, comma, colon, space, equals, + lparen, rparen, lbrack, rbrack, lbrace, rbrace, cparen, + + (<>), (<+>), hcat, hsep, + ($$), ($+$), vcat, + sep, cat, + fsep, fcat, + + hang, punctuate, + + fullRender, printDoc, printDoc_, showDoc, + bufLeftRender -- performance hack + ) where + +import BufWrite +import FastString +import FastTypes +import Panic +import Numeric (fromRat) +import System.IO + +--for a RULES +import GHC.Base ( unpackCString# ) +import GHC.Exts ( Int# ) +import GHC.Ptr ( Ptr(..) ) + +-- Don't import Util( assertPanic ) because it makes a loop in the module structure + +infixl 6 <> +infixl 6 <+> +infixl 5 $$, $+$ + +-- Disable ASSERT checks; they are expensive! +#define LOCAL_ASSERT(x) + +{- +********************************************************* +* * +\subsection{The interface} +* * +********************************************************* + +The primitive @Doc@ values +-} + +empty :: Doc +isEmpty :: Doc -> Bool +-- | Some text, but without any width. Use for non-printing text +-- such as a HTML or Latex tags +zeroWidthText :: String -> Doc + +text :: String -> Doc +char :: Char -> Doc + +semi, comma, colon, space, equals :: Doc +lparen, rparen, lbrack, rbrack, lbrace, rbrace :: Doc + +parens, brackets, braces :: Doc -> Doc +quotes, quote, doubleQuotes :: Doc -> Doc + +int :: Int -> Doc +integer :: Integer -> Doc +float :: Float -> Doc +double :: Double -> Doc +rational :: Rational -> Doc + +-- Combining @Doc@ values + +(<>) :: Doc -> Doc -> Doc -- Beside +hcat :: [Doc] -> Doc -- List version of <> +(<+>) :: Doc -> Doc -> Doc -- Beside, separated by space +hsep :: [Doc] -> Doc -- List version of <+> + +($$) :: Doc -> Doc -> Doc -- Above; if there is no + -- overlap it "dovetails" the two +vcat :: [Doc] -> Doc -- List version of $$ + +cat :: [Doc] -> Doc -- Either hcat or vcat +sep :: [Doc] -> Doc -- Either hsep or vcat +fcat :: [Doc] -> Doc -- ``Paragraph fill'' version of cat +fsep :: [Doc] -> Doc -- ``Paragraph fill'' version of sep + +nest :: Int -> Doc -> Doc -- Nested + +-- GHC-specific ones. + +hang :: Doc -> Int -> Doc -> Doc +punctuate :: Doc -> [Doc] -> [Doc] -- punctuate p [d1, ... dn] = [d1 <> p, d2 <> p, ... dn-1 <> p, dn] + +-- Displaying @Doc@ values. + +instance Show Doc where + showsPrec _ doc cont = showDocPlus PageMode 100 doc cont + +fullRender :: Mode + -> Int -- Line length + -> Float -- Ribbons per line + -> (TextDetails -> a -> a) -- What to do with text + -> a -- What to do at the end + -> Doc + -> a -- Result + +data Mode = PageMode -- Normal + | ZigZagMode -- With zig-zag cuts + | LeftMode -- No indentation, infinitely long lines + | OneLineMode -- All on one line + +{- +********************************************************* +* * +\subsection{The @Doc@ calculus} +* * +********************************************************* + +The @Doc@ combinators satisfy the following laws: +\begin{verbatim} +Laws for $$ +~~~~~~~~~~~ + (x $$ y) $$ z = x $$ (y $$ z) + empty $$ x = x + x $$ empty = x + + ...ditto $+$... + +Laws for <> +~~~~~~~~~~~ + (x <> y) <> z = x <> (y <> z) + empty <> x = empty + x <> empty = x + + ...ditto <+>... + +Laws for text +~~~~~~~~~~~~~ + text s <> text t = text (s++t) + text "" <> x = x, if x non-empty + +Laws for nest +~~~~~~~~~~~~~ + nest 0 x = x + nest k (nest k' x) = nest (k+k') x + nest k (x <> y) = nest k z <> nest k y + nest k (x $$ y) = nest k x $$ nest k y + nest k empty = empty + x <> nest k y = x <> y, if x non-empty + + - Note the side condition on ! It is this that + makes it OK for empty to be a left unit for <>. + +Miscellaneous +~~~~~~~~~~~~~ + (text s <> x) $$ y = text s <> ((text "" <> x)) $$ + nest (-length s) y) + + (x $$ y) <> z = x $$ (y <> z) + if y non-empty + + +Laws for list versions +~~~~~~~~~~~~~~~~~~~~~~ + sep (ps++[empty]++qs) = sep (ps ++ qs) + ...ditto hsep, hcat, vcat, fill... + + nest k (sep ps) = sep (map (nest k) ps) + ...ditto hsep, hcat, vcat, fill... + +Laws for oneLiner +~~~~~~~~~~~~~~~~~ + oneLiner (nest k p) = nest k (oneLiner p) + oneLiner (x <> y) = oneLiner x <> oneLiner y +\end{verbatim} + + +You might think that the following verion of would +be neater: +\begin{verbatim} +<3 NO> (text s <> x) $$ y = text s <> ((empty <> x)) $$ + nest (-length s) y) +\end{verbatim} +But it doesn't work, for if x=empty, we would have +\begin{verbatim} + text s $$ y = text s <> (empty $$ nest (-length s) y) + = text s <> nest (-length s) y +\end{verbatim} + + + +********************************************************* +* * +\subsection{Simple derived definitions} +* * +********************************************************* +-} + +semi = char ';' +colon = char ':' +comma = char ',' +space = char ' ' +equals = char '=' +lparen = char '(' +rparen = char ')' +lbrack = char '[' +rbrack = char ']' +lbrace = char '{' +rbrace = char '}' + +int n = text (show n) +integer n = text (show n) +float n = text (show n) +double n = text (show n) +rational n = text (show (fromRat n :: Double)) +--rational n = text (show (fromRationalX n)) -- _showRational 30 n) + +quotes p = char '`' <> p <> char '\'' +quote p = char '\'' <> p +doubleQuotes p = char '"' <> p <> char '"' +parens p = char '(' <> p <> char ')' +brackets p = char '[' <> p <> char ']' +braces p = char '{' <> p <> char '}' + +cparen :: Bool -> Doc -> Doc +cparen True = parens +cparen False = id + +hcat = foldr (<>) empty +hsep = foldr (<+>) empty +vcat = foldr ($$) empty + +hang d1 n d2 = sep [d1, nest n d2] + +punctuate _ [] = [] +punctuate p (d:ds) = go d ds + where + go d [] = [d] + go d (e:es) = (d <> p) : go e es + +{- +********************************************************* +* * +\subsection{The @Doc@ data type} +* * +********************************************************* + +A @Doc@ represents a {\em set} of layouts. A @Doc@ with +no occurrences of @Union@ or @NoDoc@ represents just one layout. +-} + +data Doc + = Empty -- empty + | NilAbove Doc -- text "" $$ x + | TextBeside !TextDetails FastInt Doc -- text s <> x + | Nest FastInt Doc -- nest k x + | Union Doc Doc -- ul `union` ur + | NoDoc -- The empty set of documents + | Beside Doc Bool Doc -- True <=> space between + | Above Doc Bool Doc -- True <=> never overlap + +type RDoc = Doc -- RDoc is a "reduced Doc", guaranteed not to have a top-level Above or Beside + + +reduceDoc :: Doc -> RDoc +reduceDoc (Beside p g q) = beside p g (reduceDoc q) +reduceDoc (Above p g q) = above p g (reduceDoc q) +reduceDoc p = p + + +data TextDetails = Chr {-#UNPACK#-}!Char + | Str String + | PStr FastString -- a hashed string + | ZStr FastZString -- a z-encoded string + | LStr {-#UNPACK#-}!LitString FastInt -- a '\0'-terminated + -- array of bytes + +space_text :: TextDetails +space_text = Chr ' ' +nl_text :: TextDetails +nl_text = Chr '\n' + +{- +Here are the invariants: +\begin{itemize} +\item +The argument of @NilAbove@ is never @Empty@. Therefore +a @NilAbove@ occupies at least two lines. + +\item +The arugment of @TextBeside@ is never @Nest@. + +\item +The layouts of the two arguments of @Union@ both flatten to the same string. + +\item +The arguments of @Union@ are either @TextBeside@, or @NilAbove@. + +\item +The right argument of a union cannot be equivalent to the empty set (@NoDoc@). +If the left argument of a union is equivalent to the empty set (@NoDoc@), +then the @NoDoc@ appears in the first line. + +\item +An empty document is always represented by @Empty@. +It can't be hidden inside a @Nest@, or a @Union@ of two @Empty@s. + +\item +The first line of every layout in the left argument of @Union@ +is longer than the first line of any layout in the right argument. +(1) ensures that the left argument has a first line. In view of (3), +this invariant means that the right argument must have at least two +lines. +\end{itemize} +-} + +-- Arg of a NilAbove is always an RDoc +nilAbove_ :: Doc -> Doc +nilAbove_ p = LOCAL_ASSERT( _ok p ) NilAbove p + where + _ok Empty = False + _ok _ = True + +-- Arg of a TextBeside is always an RDoc +textBeside_ :: TextDetails -> FastInt -> Doc -> Doc +textBeside_ s sl p = TextBeside s sl (LOCAL_ASSERT( _ok p ) p) + where + _ok (Nest _ _) = False + _ok _ = True + +-- Arg of Nest is always an RDoc +nest_ :: FastInt -> Doc -> Doc +nest_ k p = Nest k (LOCAL_ASSERT( _ok p ) p) + where + _ok Empty = False + _ok _ = True + +-- Args of union are always RDocs +union_ :: Doc -> Doc -> Doc +union_ p q = Union (LOCAL_ASSERT( _ok p ) p) (LOCAL_ASSERT( _ok q ) q) + where + _ok (TextBeside _ _ _) = True + _ok (NilAbove _) = True + _ok (Union _ _) = True + _ok _ = False + +{- +Notice the difference between + * NoDoc (no documents) + * Empty (one empty document; no height and no width) + * text "" (a document containing the empty string; + one line high, but has no width) + + + +********************************************************* +* * +\subsection{@empty@, @text@, @nest@, @union@} +* * +********************************************************* +-} + +empty = Empty + +isEmpty Empty = True +isEmpty _ = False + +char c = textBeside_ (Chr c) (_ILIT(1)) Empty + +text s = case iUnbox (length s) of {sl -> textBeside_ (Str s) sl Empty} +{-# NOINLINE [0] text #-} -- Give the RULE a chance to fire + -- It must wait till after phase 1 when + -- the unpackCString first is manifested + +ftext :: FastString -> Doc +ftext s = case iUnbox (lengthFS s) of {sl -> textBeside_ (PStr s) sl Empty} +ptext :: LitString -> Doc +ptext s = case iUnbox (lengthLS s) of {sl -> textBeside_ (LStr s sl) sl Empty} +ztext :: FastZString -> Doc +ztext s = case iUnbox (lengthFZS s) of {sl -> textBeside_ (ZStr s) sl Empty} +zeroWidthText s = textBeside_ (Str s) (_ILIT(0)) Empty + +-- RULE that turns (text "abc") into (ptext (A# "abc"#)) to avoid the +-- intermediate packing/unpacking of the string. +{-# RULES + "text/str" forall a. text (unpackCString# a) = ptext (Ptr a) + #-} + +nest k p = mkNest (iUnbox k) (reduceDoc p) -- Externally callable version + +-- mkNest checks for Nest's invariant that it doesn't have an Empty inside it +mkNest :: Int# -> Doc -> Doc +mkNest k (Nest k1 p) = mkNest (k +# k1) p +mkNest _ NoDoc = NoDoc +mkNest _ Empty = Empty +mkNest k p | k ==# _ILIT(0) = p -- Worth a try! +mkNest k p = nest_ k p + +-- mkUnion checks for an empty document +mkUnion :: Doc -> Doc -> Doc +mkUnion Empty _ = Empty +mkUnion p q = p `union_` q + +{- +********************************************************* +* * +\subsection{Vertical composition @$$@} +* * +********************************************************* +-} + +p $$ q = Above p False q +($+$) :: Doc -> Doc -> Doc +p $+$ q = Above p True q + +above :: Doc -> Bool -> RDoc -> RDoc +above (Above p g1 q1) g2 q2 = above p g1 (above q1 g2 q2) +above p@(Beside _ _ _) g q = aboveNest (reduceDoc p) g (_ILIT(0)) (reduceDoc q) +above p g q = aboveNest p g (_ILIT(0)) (reduceDoc q) + +aboveNest :: RDoc -> Bool -> FastInt -> RDoc -> RDoc +-- Specfication: aboveNest p g k q = p $g$ (nest k q) + +aboveNest NoDoc _ _ _ = NoDoc +aboveNest (p1 `Union` p2) g k q = aboveNest p1 g k q `union_` + aboveNest p2 g k q + +aboveNest Empty _ k q = mkNest k q +aboveNest (Nest k1 p) g k q = nest_ k1 (aboveNest p g (k -# k1) q) + -- p can't be Empty, so no need for mkNest + +aboveNest (NilAbove p) g k q = nilAbove_ (aboveNest p g k q) +aboveNest (TextBeside s sl p) g k q = textBeside_ s sl rest + where + !k1 = k -# sl + rest = case p of + Empty -> nilAboveNest g k1 q + _ -> aboveNest p g k1 q +aboveNest _ _ _ _ = panic "aboveNest: Unhandled case" + +nilAboveNest :: Bool -> FastInt -> RDoc -> RDoc +-- Specification: text s <> nilaboveNest g k q +-- = text s <> (text "" $g$ nest k q) + +nilAboveNest _ _ Empty = Empty -- Here's why the "text s <>" is in the spec! +nilAboveNest g k (Nest k1 q) = nilAboveNest g (k +# k1) q + +nilAboveNest g k q | (not g) && (k ># _ILIT(0)) -- No newline if no overlap + = textBeside_ (Str (spaces k)) k q + | otherwise -- Put them really above + = nilAbove_ (mkNest k q) + +{- +********************************************************* +* * +\subsection{Horizontal composition @<>@} +* * +********************************************************* +-} + +p <> q = Beside p False q +p <+> q = Beside p True q + +beside :: Doc -> Bool -> RDoc -> RDoc +-- Specification: beside g p q = p q + +beside NoDoc _ _ = NoDoc +beside (p1 `Union` p2) g q = (beside p1 g q) `union_` (beside p2 g q) +beside Empty _ q = q +beside (Nest k p) g q = nest_ k $! beside p g q -- p non-empty +beside p@(Beside p1 g1 q1) g2 q2 + {- (A `op1` B) `op2` C == A `op1` (B `op2` C) iff op1 == op2 + [ && (op1 == <> || op1 == <+>) ] -} + | g1 == g2 = beside p1 g1 $! beside q1 g2 q2 + | otherwise = beside (reduceDoc p) g2 q2 +beside p@(Above _ _ _) g q = let d = reduceDoc p in d `seq` beside d g q +beside (NilAbove p) g q = nilAbove_ $! beside p g q +beside (TextBeside s sl p) g q = textBeside_ s sl $! rest + where + rest = case p of + Empty -> nilBeside g q + _ -> beside p g q + +nilBeside :: Bool -> RDoc -> RDoc +-- Specification: text "" <> nilBeside g p +-- = text "" p + +nilBeside _ Empty = Empty -- Hence the text "" in the spec +nilBeside g (Nest _ p) = nilBeside g p +nilBeside g p | g = textBeside_ space_text (_ILIT(1)) p + | otherwise = p + +{- +********************************************************* +* * +\subsection{Separate, @sep@, Hughes version} +* * +********************************************************* +-} + +-- Specification: sep ps = oneLiner (hsep ps) +-- `union` +-- vcat ps + +sep = sepX True -- Separate with spaces +cat = sepX False -- Don't + +sepX :: Bool -> [Doc] -> Doc +sepX _ [] = empty +sepX x (p:ps) = sep1 x (reduceDoc p) (_ILIT(0)) ps + + +-- Specification: sep1 g k ys = sep (x : map (nest k) ys) +-- = oneLiner (x nest k (hsep ys)) +-- `union` x $$ nest k (vcat ys) + +sep1 :: Bool -> RDoc -> FastInt -> [Doc] -> RDoc +sep1 _ NoDoc _ _ = NoDoc +sep1 g (p `Union` q) k ys = sep1 g p k ys + `union_` + (aboveNest q False k (reduceDoc (vcat ys))) + +sep1 g Empty k ys = mkNest k (sepX g ys) +sep1 g (Nest n p) k ys = nest_ n (sep1 g p (k -# n) ys) + +sep1 _ (NilAbove p) k ys = nilAbove_ (aboveNest p False k (reduceDoc (vcat ys))) +sep1 g (TextBeside s sl p) k ys = textBeside_ s sl (sepNB g p (k -# sl) ys) +sep1 _ _ _ _ = panic "sep1: Unhandled case" + +-- Specification: sepNB p k ys = sep1 (text "" <> p) k ys +-- Called when we have already found some text in the first item +-- We have to eat up nests + +sepNB :: Bool -> Doc -> FastInt -> [Doc] -> Doc +sepNB g (Nest _ p) k ys = sepNB g p k ys + +sepNB g Empty k ys = oneLiner (nilBeside g (reduceDoc rest)) + `mkUnion` + nilAboveNest False k (reduceDoc (vcat ys)) + where + rest | g = hsep ys + | otherwise = hcat ys + +sepNB g p k ys = sep1 g p k ys + +{- +********************************************************* +* * +\subsection{@fill@} +* * +********************************************************* +-} + +fsep = fill True +fcat = fill False + +-- Specification: +-- fill [] = empty +-- fill [p] = p +-- fill (p1:p2:ps) = oneLiner p1 <#> nest (length p1) +-- (fill (oneLiner p2 : ps)) +-- `union` +-- p1 $$ fill ps + +fill :: Bool -> [Doc] -> Doc +fill _ [] = empty +fill g (p:ps) = fill1 g (reduceDoc p) (_ILIT(0)) ps + + +fill1 :: Bool -> RDoc -> FastInt -> [Doc] -> Doc +fill1 _ NoDoc _ _ = NoDoc +fill1 g (p `Union` q) k ys = fill1 g p k ys + `union_` + (aboveNest q False k (fill g ys)) + +fill1 g Empty k ys = mkNest k (fill g ys) +fill1 g (Nest n p) k ys = nest_ n (fill1 g p (k -# n) ys) + +fill1 g (NilAbove p) k ys = nilAbove_ (aboveNest p False k (fill g ys)) +fill1 g (TextBeside s sl p) k ys = textBeside_ s sl (fillNB g p (k -# sl) ys) +fill1 _ _ _ _ = panic "fill1: Unhandled case" + +fillNB :: Bool -> Doc -> Int# -> [Doc] -> Doc +fillNB g (Nest _ p) k ys = fillNB g p k ys +fillNB _ Empty _ [] = Empty +fillNB g Empty k (y:ys) = nilBeside g (fill1 g (oneLiner (reduceDoc y)) k1 ys) + `mkUnion` + nilAboveNest False k (fill g (y:ys)) + where + !k1 | g = k -# _ILIT(1) + | otherwise = k + +fillNB g p k ys = fill1 g p k ys + +{- +********************************************************* +* * +\subsection{Selecting the best layout} +* * +********************************************************* +-} + +best :: Int -- Line length + -> Int -- Ribbon length + -> RDoc + -> RDoc -- No unions in here! + +best w_ r_ p + = get (iUnbox w_) p + where + !r = iUnbox r_ + get :: FastInt -- (Remaining) width of line + -> Doc -> Doc + get _ Empty = Empty + get _ NoDoc = NoDoc + get w (NilAbove p) = nilAbove_ (get w p) + get w (TextBeside s sl p) = textBeside_ s sl (get1 w sl p) + get w (Nest k p) = nest_ k (get (w -# k) p) + get w (p `Union` q) = nicest w r (get w p) (get w q) + get _ _ = panic "best/get: Unhandled case" + + get1 :: FastInt -- (Remaining) width of line + -> FastInt -- Amount of first line already eaten up + -> Doc -- This is an argument to TextBeside => eat Nests + -> Doc -- No unions in here! + + get1 _ _ Empty = Empty + get1 _ _ NoDoc = NoDoc + get1 w sl (NilAbove p) = nilAbove_ (get (w -# sl) p) + get1 w sl (TextBeside t tl p) = textBeside_ t tl (get1 w (sl +# tl) p) + get1 w sl (Nest _ p) = get1 w sl p + get1 w sl (p `Union` q) = nicest1 w r sl (get1 w sl p) + (get1 w sl q) + get1 _ _ _ = panic "best/get1: Unhandled case" + +nicest :: FastInt -> FastInt -> Doc -> Doc -> Doc +nicest w r p q = nicest1 w r (_ILIT(0)) p q +nicest1 :: FastInt -> FastInt -> Int# -> Doc -> Doc -> Doc +nicest1 w r sl p q | fits ((w `minFastInt` r) -# sl) p = p + | otherwise = q + +fits :: FastInt -- Space available + -> Doc + -> Bool -- True if *first line* of Doc fits in space available + +fits n _ | n <# _ILIT(0) = False +fits _ NoDoc = False +fits _ Empty = True +fits _ (NilAbove _) = True +fits n (TextBeside _ sl p) = fits (n -# sl) p +fits _ _ = panic "fits: Unhandled case" + +{- +@first@ and @nonEmptySet@ are similar to @nicest@ and @fits@, only simpler. +@first@ returns its first argument if it is non-empty, otherwise its second. +-} + +first :: Doc -> Doc -> Doc +first p q | nonEmptySet p = p + | otherwise = q + +nonEmptySet :: Doc -> Bool +nonEmptySet NoDoc = False +nonEmptySet (_ `Union` _) = True +nonEmptySet Empty = True +nonEmptySet (NilAbove _) = True -- NoDoc always in first line +nonEmptySet (TextBeside _ _ p) = nonEmptySet p +nonEmptySet (Nest _ p) = nonEmptySet p +nonEmptySet _ = panic "nonEmptySet: Unhandled case" + +-- @oneLiner@ returns the one-line members of the given set of @Doc@s. + +oneLiner :: Doc -> Doc +oneLiner NoDoc = NoDoc +oneLiner Empty = Empty +oneLiner (NilAbove _) = NoDoc +oneLiner (TextBeside s sl p) = textBeside_ s sl (oneLiner p) +oneLiner (Nest k p) = nest_ k (oneLiner p) +oneLiner (p `Union` _) = oneLiner p +oneLiner _ = panic "oneLiner: Unhandled case" + +{- +********************************************************* +* * +\subsection{Displaying the best layout} +* * +********************************************************* +-} + +showDocPlus :: Mode -> Int -> Doc -> String -> String +showDocPlus mode cols doc rest = fullRender mode cols 1.5 string_txt rest doc + +showDoc :: Mode -> Int -> Doc -> String +showDoc mode cols doc = showDocPlus mode cols doc "" + +string_txt :: TextDetails -> String -> String +string_txt (Chr c) s = c:s +string_txt (Str s1) s2 = s1 ++ s2 +string_txt (PStr s1) s2 = unpackFS s1 ++ s2 +string_txt (ZStr s1) s2 = zString s1 ++ s2 +string_txt (LStr s1 _) s2 = unpackLitString s1 ++ s2 + +fullRender OneLineMode _ _ txt end doc + = lay (reduceDoc doc) + where + lay NoDoc = cant_fail + lay (Union _ q) = lay q -- Second arg can't be NoDoc + lay (Nest _ p) = lay p + lay Empty = end + lay (NilAbove p) = space_text `txt` lay p -- NoDoc always on + -- first line + lay (TextBeside s _ p) = s `txt` lay p + lay _ = panic "fullRender/OneLineMode/lay: Unhandled case" + +fullRender LeftMode _ _ txt end doc + = lay (reduceDoc doc) + where + lay NoDoc = cant_fail + lay (Union p q) = lay (first p q) + lay (Nest _ p) = lay p + lay Empty = end + lay (NilAbove p) = nl_text `txt` lay p -- NoDoc always on first line + lay (TextBeside s _ p) = s `txt` lay p + lay _ = panic "fullRender/LeftMode/lay: Unhandled case" + +fullRender mode line_length ribbons_per_line txt end doc + = display mode line_length ribbon_length txt end best_doc + where + best_doc = best hacked_line_length ribbon_length (reduceDoc doc) + + hacked_line_length, ribbon_length :: Int + ribbon_length = round (fromIntegral line_length / ribbons_per_line) + hacked_line_length = case mode of + ZigZagMode -> maxBound + _ -> line_length + +display :: Mode -> Int -> Int -> (TextDetails -> t -> t) -> t -> Doc -> t +display mode page_width ribbon_width txt end doc + = case (iUnbox page_width) -# (iUnbox ribbon_width) of { gap_width -> + case gap_width `quotFastInt` _ILIT(2) of { shift -> + let + lay k (Nest k1 p) = lay (k +# k1) p + lay _ Empty = end + + lay k (NilAbove p) = nl_text `txt` lay k p + + lay k (TextBeside s sl p) + = case mode of + ZigZagMode | k >=# gap_width + -> nl_text `txt` ( + Str (multi_ch shift '/') `txt` ( + nl_text `txt` ( + lay1 (k -# shift) s sl p))) + + | k <# _ILIT(0) + -> nl_text `txt` ( + Str (multi_ch shift '\\') `txt` ( + nl_text `txt` ( + lay1 (k +# shift) s sl p ))) + + _ -> lay1 k s sl p + lay _ _ = panic "display/lay: Unhandled case" + + lay1 k s sl p = indent k (s `txt` lay2 (k +# sl) p) + + lay2 k (NilAbove p) = nl_text `txt` lay k p + lay2 k (TextBeside s sl p) = s `txt` (lay2 (k +# sl) p) + lay2 k (Nest _ p) = lay2 k p + lay2 _ Empty = end + lay2 _ _ = panic "display/lay2: Unhandled case" + + -- optimise long indentations using LitString chunks of 8 spaces + indent n r | n >=# _ILIT(8) = LStr (sLit " ") (_ILIT(8)) `txt` + indent (n -# _ILIT(8)) r + | otherwise = Str (spaces n) `txt` r + in + lay (_ILIT(0)) doc + }} + +cant_fail :: a +cant_fail = error "easy_display: NoDoc" + +multi_ch :: Int# -> Char -> String +multi_ch n ch | n <=# _ILIT(0) = "" + | otherwise = ch : multi_ch (n -# _ILIT(1)) ch + +spaces :: Int# -> String +spaces n | n <=# _ILIT(0) = "" + | otherwise = ' ' : spaces (n -# _ILIT(1)) + +printDoc :: Mode -> Int -> Handle -> Doc -> IO () +-- printDoc adds a newline to the end +printDoc mode cols hdl doc = printDoc_ mode cols hdl (doc $$ text "") + +printDoc_ :: Mode -> Int -> Handle -> Doc -> IO () +-- printDoc_ does not add a newline at the end, so that +-- successive calls can output stuff on the same line +-- Rather like putStr vs putStrLn +printDoc_ LeftMode _ hdl doc + = do { printLeftRender hdl doc; hFlush hdl } +printDoc_ mode pprCols hdl doc + = do { fullRender mode pprCols 1.5 put done doc ; + hFlush hdl } + where + put (Chr c) next = hPutChar hdl c >> next + put (Str s) next = hPutStr hdl s >> next + put (PStr s) next = hPutStr hdl (unpackFS s) >> next + -- NB. not hPutFS, we want this to go through + -- the I/O library's encoding layer. (#3398) + put (ZStr s) next = hPutFZS hdl s >> next + put (LStr s l) next = hPutLitString hdl s l >> next + + done = return () -- hPutChar hdl '\n' + + -- some versions of hPutBuf will barf if the length is zero +hPutLitString :: Handle -> Ptr a -> Int# -> IO () +hPutLitString handle a l = if l ==# _ILIT(0) + then return () + else hPutBuf handle a (iBox l) + +-- Printing output in LeftMode is performance critical: it's used when +-- dumping C and assembly output, so we allow ourselves a few dirty +-- hacks: +-- +-- (1) we specialise fullRender for LeftMode with IO output. +-- +-- (2) we add a layer of buffering on top of Handles. Handles +-- don't perform well with lots of hPutChars, which is mostly +-- what we're doing here, because Handles have to be thread-safe +-- and async exception-safe. We only have a single thread and don't +-- care about exceptions, so we add a layer of fast buffering +-- over the Handle interface. +-- +-- (3) a few hacks in layLeft below to convince GHC to generate the right +-- code. + +printLeftRender :: Handle -> Doc -> IO () +printLeftRender hdl doc = do + b <- newBufHandle hdl + bufLeftRender b doc + bFlush b + +bufLeftRender :: BufHandle -> Doc -> IO () +bufLeftRender b doc = layLeft b (reduceDoc doc) + +-- HACK ALERT! the "return () >>" below convinces GHC to eta-expand +-- this function with the IO state lambda. Otherwise we end up with +-- closures in all the case branches. +layLeft :: BufHandle -> Doc -> IO () +layLeft b _ | b `seq` False = undefined -- make it strict in b +layLeft _ NoDoc = cant_fail +layLeft b (Union p q) = return () >> layLeft b (first p q) +layLeft b (Nest _ p) = return () >> layLeft b p +layLeft b Empty = bPutChar b '\n' +layLeft b (NilAbove p) = bPutChar b '\n' >> layLeft b p +layLeft b (TextBeside s _ p) = put b s >> layLeft b p + where + put b _ | b `seq` False = undefined + put b (Chr c) = bPutChar b c + put b (Str s) = bPutStr b s + put b (PStr s) = bPutFS b s + put b (ZStr s) = bPutFZS b s + put b (LStr s l) = bPutLitString b s l +layLeft _ _ = panic "layLeft: Unhandled case" diff --git a/compiler/utils/Pretty.lhs b/compiler/utils/Pretty.lhs deleted file mode 100644 index 0357c8cfba..0000000000 --- a/compiler/utils/Pretty.lhs +++ /dev/null @@ -1,1057 +0,0 @@ -%********************************************************************************* -%* * -%* John Hughes's and Simon Peyton Jones's Pretty Printer Combinators * -%* * -%* based on "The Design of a Pretty-printing Library" * -%* in Advanced Functional Programming, * -%* Johan Jeuring and Erik Meijer (eds), LNCS 925 * -%* http://www.cs.chalmers.se/~rjmh/Papers/pretty.ps * -%* * -%* Heavily modified by Simon Peyton Jones, Dec 96 * -%* * -%********************************************************************************* - -Version 3.0 28 May 1997 - * Cured massive performance bug. If you write - - foldl <> empty (map (text.show) [1..10000]) - - you get quadratic behaviour with V2.0. Why? For just the same reason as you get - quadratic behaviour with left-associated (++) chains. - - This is really bad news. One thing a pretty-printer abstraction should - certainly guarantee is insensivity to associativity. It matters: suddenly - GHC's compilation times went up by a factor of 100 when I switched to the - new pretty printer. - - I fixed it with a bit of a hack (because I wanted to get GHC back on the - road). I added two new constructors to the Doc type, Above and Beside: - - <> = Beside - $$ = Above - - Then, where I need to get to a "TextBeside" or "NilAbove" form I "force" - the Doc to squeeze out these suspended calls to Beside and Above; but in so - doing I re-associate. It's quite simple, but I'm not satisfied that I've done - the best possible job. I'll send you the code if you are interested. - - * Added new exports: - punctuate, hang - int, integer, float, double, rational, - lparen, rparen, lbrack, rbrack, lbrace, rbrace, - - * fullRender's type signature has changed. Rather than producing a string it - now takes an extra couple of arguments that tells it how to glue fragments - of output together: - - fullRender :: Mode - -> Int -- Line length - -> Float -- Ribbons per line - -> (TextDetails -> a -> a) -- What to do with text - -> a -- What to do at the end - -> Doc - -> a -- Result - - The "fragments" are encapsulated in the TextDetails data type: - data TextDetails = Chr Char - | Str String - | PStr FastString - - The Chr and Str constructors are obvious enough. The PStr constructor has a packed - string (FastString) inside it. It's generated by using the new "ptext" export. - - An advantage of this new setup is that you can get the renderer to do output - directly (by passing in a function of type (TextDetails -> IO () -> IO ()), - rather than producing a string that you then print. - - -Version 2.0 24 April 1997 - * Made empty into a left unit for <> as well as a right unit; - it is also now true that - nest k empty = empty - which wasn't true before. - - * Fixed an obscure bug in sep that occasionally gave very weird behaviour - - * Added $+$ - - * Corrected and tidied up the laws and invariants - -====================================================================== -Relative to John's original paper, there are the following new features: - -1. There's an empty document, "empty". It's a left and right unit for - both <> and $$, and anywhere in the argument list for - sep, hcat, hsep, vcat, fcat etc. - - It is Really Useful in practice. - -2. There is a paragraph-fill combinator, fsep, that's much like sep, - only it keeps fitting things on one line until it can't fit any more. - -3. Some random useful extra combinators are provided. - <+> puts its arguments beside each other with a space between them, - unless either argument is empty in which case it returns the other - - - hcat is a list version of <> - hsep is a list version of <+> - vcat is a list version of $$ - - sep (separate) is either like hsep or like vcat, depending on what fits - - cat is behaves like sep, but it uses <> for horizontal conposition - fcat is behaves like fsep, but it uses <> for horizontal conposition - - These new ones do the obvious things: - char, semi, comma, colon, space, - parens, brackets, braces, - quotes, quote, doubleQuotes - -4. The "above" combinator, $$, now overlaps its two arguments if the - last line of the top argument stops before the first line of the second begins. - For example: text "hi" $$ nest 5 "there" - lays out as - hi there - rather than - hi - there - - There are two places this is really useful - - a) When making labelled blocks, like this: - Left -> code for left - Right -> code for right - LongLongLongLabel -> - code for longlonglonglabel - The block is on the same line as the label if the label is - short, but on the next line otherwise. - - b) When laying out lists like this: - [ first - , second - , third - ] - which some people like. But if the list fits on one line - you want [first, second, third]. You can't do this with - John's original combinators, but it's quite easy with the - new $$. - - The combinator $+$ gives the original "never-overlap" behaviour. - -5. Several different renderers are provided: - * a standard one - * one that uses cut-marks to avoid deeply-nested documents - simply piling up in the right-hand margin - * one that ignores indentation (fewer chars output; good for machines) - * one that ignores indentation and newlines (ditto, only more so) - -6. Numerous implementation tidy-ups - Use of unboxed data types to speed up the implementation - - - -\begin{code} -{-# LANGUAGE BangPatterns, CPP, MagicHash #-} - -module Pretty ( - Doc, -- Abstract - Mode(..), TextDetails(..), - - empty, isEmpty, nest, - - char, text, ftext, ptext, ztext, zeroWidthText, - int, integer, float, double, rational, - parens, brackets, braces, quotes, quote, doubleQuotes, - semi, comma, colon, space, equals, - lparen, rparen, lbrack, rbrack, lbrace, rbrace, cparen, - - (<>), (<+>), hcat, hsep, - ($$), ($+$), vcat, - sep, cat, - fsep, fcat, - - hang, punctuate, - - fullRender, printDoc, printDoc_, showDoc, - bufLeftRender -- performance hack - ) where - -import BufWrite -import FastString -import FastTypes -import Panic -import Numeric (fromRat) -import System.IO - ---for a RULES -import GHC.Base ( unpackCString# ) -import GHC.Exts ( Int# ) -import GHC.Ptr ( Ptr(..) ) - --- Don't import Util( assertPanic ) because it makes a loop in the module structure - -infixl 6 <> -infixl 6 <+> -infixl 5 $$, $+$ -\end{code} - - -\begin{code} - --- Disable ASSERT checks; they are expensive! -#define LOCAL_ASSERT(x) - -\end{code} - - -%********************************************************* -%* * -\subsection{The interface} -%* * -%********************************************************* - -The primitive @Doc@ values - -\begin{code} -empty :: Doc -isEmpty :: Doc -> Bool --- | Some text, but without any width. Use for non-printing text --- such as a HTML or Latex tags -zeroWidthText :: String -> Doc - -text :: String -> Doc -char :: Char -> Doc - -semi, comma, colon, space, equals :: Doc -lparen, rparen, lbrack, rbrack, lbrace, rbrace :: Doc - -parens, brackets, braces :: Doc -> Doc -quotes, quote, doubleQuotes :: Doc -> Doc - -int :: Int -> Doc -integer :: Integer -> Doc -float :: Float -> Doc -double :: Double -> Doc -rational :: Rational -> Doc -\end{code} - -Combining @Doc@ values - -\begin{code} -(<>) :: Doc -> Doc -> Doc -- Beside -hcat :: [Doc] -> Doc -- List version of <> -(<+>) :: Doc -> Doc -> Doc -- Beside, separated by space -hsep :: [Doc] -> Doc -- List version of <+> - -($$) :: Doc -> Doc -> Doc -- Above; if there is no - -- overlap it "dovetails" the two -vcat :: [Doc] -> Doc -- List version of $$ - -cat :: [Doc] -> Doc -- Either hcat or vcat -sep :: [Doc] -> Doc -- Either hsep or vcat -fcat :: [Doc] -> Doc -- ``Paragraph fill'' version of cat -fsep :: [Doc] -> Doc -- ``Paragraph fill'' version of sep - -nest :: Int -> Doc -> Doc -- Nested -\end{code} - -GHC-specific ones. - -\begin{code} -hang :: Doc -> Int -> Doc -> Doc -punctuate :: Doc -> [Doc] -> [Doc] -- punctuate p [d1, ... dn] = [d1 <> p, d2 <> p, ... dn-1 <> p, dn] -\end{code} - -Displaying @Doc@ values. - -\begin{code} -instance Show Doc where - showsPrec _ doc cont = showDocPlus PageMode 100 doc cont - -fullRender :: Mode - -> Int -- Line length - -> Float -- Ribbons per line - -> (TextDetails -> a -> a) -- What to do with text - -> a -- What to do at the end - -> Doc - -> a -- Result - -data Mode = PageMode -- Normal - | ZigZagMode -- With zig-zag cuts - | LeftMode -- No indentation, infinitely long lines - | OneLineMode -- All on one line -\end{code} - - -%********************************************************* -%* * -\subsection{The @Doc@ calculus} -%* * -%********************************************************* - -The @Doc@ combinators satisfy the following laws: -\begin{verbatim} -Laws for $$ -~~~~~~~~~~~ - (x $$ y) $$ z = x $$ (y $$ z) - empty $$ x = x - x $$ empty = x - - ...ditto $+$... - -Laws for <> -~~~~~~~~~~~ - (x <> y) <> z = x <> (y <> z) - empty <> x = empty - x <> empty = x - - ...ditto <+>... - -Laws for text -~~~~~~~~~~~~~ - text s <> text t = text (s++t) - text "" <> x = x, if x non-empty - -Laws for nest -~~~~~~~~~~~~~ - nest 0 x = x - nest k (nest k' x) = nest (k+k') x - nest k (x <> y) = nest k z <> nest k y - nest k (x $$ y) = nest k x $$ nest k y - nest k empty = empty - x <> nest k y = x <> y, if x non-empty - - - Note the side condition on ! It is this that - makes it OK for empty to be a left unit for <>. - -Miscellaneous -~~~~~~~~~~~~~ - (text s <> x) $$ y = text s <> ((text "" <> x)) $$ - nest (-length s) y) - - (x $$ y) <> z = x $$ (y <> z) - if y non-empty - - -Laws for list versions -~~~~~~~~~~~~~~~~~~~~~~ - sep (ps++[empty]++qs) = sep (ps ++ qs) - ...ditto hsep, hcat, vcat, fill... - - nest k (sep ps) = sep (map (nest k) ps) - ...ditto hsep, hcat, vcat, fill... - -Laws for oneLiner -~~~~~~~~~~~~~~~~~ - oneLiner (nest k p) = nest k (oneLiner p) - oneLiner (x <> y) = oneLiner x <> oneLiner y -\end{verbatim} - - -You might think that the following verion of would -be neater: -\begin{verbatim} -<3 NO> (text s <> x) $$ y = text s <> ((empty <> x)) $$ - nest (-length s) y) -\end{verbatim} -But it doesn't work, for if x=empty, we would have -\begin{verbatim} - text s $$ y = text s <> (empty $$ nest (-length s) y) - = text s <> nest (-length s) y -\end{verbatim} - - - -%********************************************************* -%* * -\subsection{Simple derived definitions} -%* * -%********************************************************* - -\begin{code} -semi = char ';' -colon = char ':' -comma = char ',' -space = char ' ' -equals = char '=' -lparen = char '(' -rparen = char ')' -lbrack = char '[' -rbrack = char ']' -lbrace = char '{' -rbrace = char '}' - -int n = text (show n) -integer n = text (show n) -float n = text (show n) -double n = text (show n) -rational n = text (show (fromRat n :: Double)) ---rational n = text (show (fromRationalX n)) -- _showRational 30 n) - -quotes p = char '`' <> p <> char '\'' -quote p = char '\'' <> p -doubleQuotes p = char '"' <> p <> char '"' -parens p = char '(' <> p <> char ')' -brackets p = char '[' <> p <> char ']' -braces p = char '{' <> p <> char '}' - -cparen :: Bool -> Doc -> Doc -cparen True = parens -cparen False = id - -hcat = foldr (<>) empty -hsep = foldr (<+>) empty -vcat = foldr ($$) empty - -hang d1 n d2 = sep [d1, nest n d2] - -punctuate _ [] = [] -punctuate p (d:ds) = go d ds - where - go d [] = [d] - go d (e:es) = (d <> p) : go e es -\end{code} - - -%********************************************************* -%* * -\subsection{The @Doc@ data type} -%* * -%********************************************************* - -A @Doc@ represents a {\em set} of layouts. A @Doc@ with -no occurrences of @Union@ or @NoDoc@ represents just one layout. -\begin{code} -data Doc - = Empty -- empty - | NilAbove Doc -- text "" $$ x - | TextBeside !TextDetails FastInt Doc -- text s <> x - | Nest FastInt Doc -- nest k x - | Union Doc Doc -- ul `union` ur - | NoDoc -- The empty set of documents - | Beside Doc Bool Doc -- True <=> space between - | Above Doc Bool Doc -- True <=> never overlap - -type RDoc = Doc -- RDoc is a "reduced Doc", guaranteed not to have a top-level Above or Beside - - -reduceDoc :: Doc -> RDoc -reduceDoc (Beside p g q) = beside p g (reduceDoc q) -reduceDoc (Above p g q) = above p g (reduceDoc q) -reduceDoc p = p - - -data TextDetails = Chr {-#UNPACK#-}!Char - | Str String - | PStr FastString -- a hashed string - | ZStr FastZString -- a z-encoded string - | LStr {-#UNPACK#-}!LitString FastInt -- a '\0'-terminated - -- array of bytes - -space_text :: TextDetails -space_text = Chr ' ' -nl_text :: TextDetails -nl_text = Chr '\n' -\end{code} - -Here are the invariants: -\begin{itemize} -\item -The argument of @NilAbove@ is never @Empty@. Therefore -a @NilAbove@ occupies at least two lines. - -\item -The arugment of @TextBeside@ is never @Nest@. - -\item -The layouts of the two arguments of @Union@ both flatten to the same string. - -\item -The arguments of @Union@ are either @TextBeside@, or @NilAbove@. - -\item -The right argument of a union cannot be equivalent to the empty set (@NoDoc@). -If the left argument of a union is equivalent to the empty set (@NoDoc@), -then the @NoDoc@ appears in the first line. - -\item -An empty document is always represented by @Empty@. -It can't be hidden inside a @Nest@, or a @Union@ of two @Empty@s. - -\item -The first line of every layout in the left argument of @Union@ -is longer than the first line of any layout in the right argument. -(1) ensures that the left argument has a first line. In view of (3), -this invariant means that the right argument must have at least two -lines. -\end{itemize} - -\begin{code} --- Arg of a NilAbove is always an RDoc -nilAbove_ :: Doc -> Doc -nilAbove_ p = LOCAL_ASSERT( _ok p ) NilAbove p - where - _ok Empty = False - _ok _ = True - --- Arg of a TextBeside is always an RDoc -textBeside_ :: TextDetails -> FastInt -> Doc -> Doc -textBeside_ s sl p = TextBeside s sl (LOCAL_ASSERT( _ok p ) p) - where - _ok (Nest _ _) = False - _ok _ = True - --- Arg of Nest is always an RDoc -nest_ :: FastInt -> Doc -> Doc -nest_ k p = Nest k (LOCAL_ASSERT( _ok p ) p) - where - _ok Empty = False - _ok _ = True - --- Args of union are always RDocs -union_ :: Doc -> Doc -> Doc -union_ p q = Union (LOCAL_ASSERT( _ok p ) p) (LOCAL_ASSERT( _ok q ) q) - where - _ok (TextBeside _ _ _) = True - _ok (NilAbove _) = True - _ok (Union _ _) = True - _ok _ = False -\end{code} - -Notice the difference between - * NoDoc (no documents) - * Empty (one empty document; no height and no width) - * text "" (a document containing the empty string; - one line high, but has no width) - - - -%********************************************************* -%* * -\subsection{@empty@, @text@, @nest@, @union@} -%* * -%********************************************************* - -\begin{code} -empty = Empty - -isEmpty Empty = True -isEmpty _ = False - -char c = textBeside_ (Chr c) (_ILIT(1)) Empty - -text s = case iUnbox (length s) of {sl -> textBeside_ (Str s) sl Empty} -{-# NOINLINE [0] text #-} -- Give the RULE a chance to fire - -- It must wait till after phase 1 when - -- the unpackCString first is manifested - -ftext :: FastString -> Doc -ftext s = case iUnbox (lengthFS s) of {sl -> textBeside_ (PStr s) sl Empty} -ptext :: LitString -> Doc -ptext s = case iUnbox (lengthLS s) of {sl -> textBeside_ (LStr s sl) sl Empty} -ztext :: FastZString -> Doc -ztext s = case iUnbox (lengthFZS s) of {sl -> textBeside_ (ZStr s) sl Empty} -zeroWidthText s = textBeside_ (Str s) (_ILIT(0)) Empty - --- RULE that turns (text "abc") into (ptext (A# "abc"#)) to avoid the --- intermediate packing/unpacking of the string. -{-# RULES - "text/str" forall a. text (unpackCString# a) = ptext (Ptr a) - #-} - -nest k p = mkNest (iUnbox k) (reduceDoc p) -- Externally callable version - --- mkNest checks for Nest's invariant that it doesn't have an Empty inside it -mkNest :: Int# -> Doc -> Doc -mkNest k (Nest k1 p) = mkNest (k +# k1) p -mkNest _ NoDoc = NoDoc -mkNest _ Empty = Empty -mkNest k p | k ==# _ILIT(0) = p -- Worth a try! -mkNest k p = nest_ k p - --- mkUnion checks for an empty document -mkUnion :: Doc -> Doc -> Doc -mkUnion Empty _ = Empty -mkUnion p q = p `union_` q -\end{code} - -%********************************************************* -%* * -\subsection{Vertical composition @$$@} -%* * -%********************************************************* - - -\begin{code} -p $$ q = Above p False q -($+$) :: Doc -> Doc -> Doc -p $+$ q = Above p True q - -above :: Doc -> Bool -> RDoc -> RDoc -above (Above p g1 q1) g2 q2 = above p g1 (above q1 g2 q2) -above p@(Beside _ _ _) g q = aboveNest (reduceDoc p) g (_ILIT(0)) (reduceDoc q) -above p g q = aboveNest p g (_ILIT(0)) (reduceDoc q) - -aboveNest :: RDoc -> Bool -> FastInt -> RDoc -> RDoc --- Specfication: aboveNest p g k q = p $g$ (nest k q) - -aboveNest NoDoc _ _ _ = NoDoc -aboveNest (p1 `Union` p2) g k q = aboveNest p1 g k q `union_` - aboveNest p2 g k q - -aboveNest Empty _ k q = mkNest k q -aboveNest (Nest k1 p) g k q = nest_ k1 (aboveNest p g (k -# k1) q) - -- p can't be Empty, so no need for mkNest - -aboveNest (NilAbove p) g k q = nilAbove_ (aboveNest p g k q) -aboveNest (TextBeside s sl p) g k q = textBeside_ s sl rest - where - !k1 = k -# sl - rest = case p of - Empty -> nilAboveNest g k1 q - _ -> aboveNest p g k1 q -aboveNest _ _ _ _ = panic "aboveNest: Unhandled case" -\end{code} - -\begin{code} -nilAboveNest :: Bool -> FastInt -> RDoc -> RDoc --- Specification: text s <> nilaboveNest g k q --- = text s <> (text "" $g$ nest k q) - -nilAboveNest _ _ Empty = Empty -- Here's why the "text s <>" is in the spec! -nilAboveNest g k (Nest k1 q) = nilAboveNest g (k +# k1) q - -nilAboveNest g k q | (not g) && (k ># _ILIT(0)) -- No newline if no overlap - = textBeside_ (Str (spaces k)) k q - | otherwise -- Put them really above - = nilAbove_ (mkNest k q) -\end{code} - - -%********************************************************* -%* * -\subsection{Horizontal composition @<>@} -%* * -%********************************************************* - -\begin{code} -p <> q = Beside p False q -p <+> q = Beside p True q - -beside :: Doc -> Bool -> RDoc -> RDoc --- Specification: beside g p q = p q - -beside NoDoc _ _ = NoDoc -beside (p1 `Union` p2) g q = (beside p1 g q) `union_` (beside p2 g q) -beside Empty _ q = q -beside (Nest k p) g q = nest_ k $! beside p g q -- p non-empty -beside p@(Beside p1 g1 q1) g2 q2 - {- (A `op1` B) `op2` C == A `op1` (B `op2` C) iff op1 == op2 - [ && (op1 == <> || op1 == <+>) ] -} - | g1 == g2 = beside p1 g1 $! beside q1 g2 q2 - | otherwise = beside (reduceDoc p) g2 q2 -beside p@(Above _ _ _) g q = let d = reduceDoc p in d `seq` beside d g q -beside (NilAbove p) g q = nilAbove_ $! beside p g q -beside (TextBeside s sl p) g q = textBeside_ s sl $! rest - where - rest = case p of - Empty -> nilBeside g q - _ -> beside p g q -\end{code} - -\begin{code} -nilBeside :: Bool -> RDoc -> RDoc --- Specification: text "" <> nilBeside g p --- = text "" p - -nilBeside _ Empty = Empty -- Hence the text "" in the spec -nilBeside g (Nest _ p) = nilBeside g p -nilBeside g p | g = textBeside_ space_text (_ILIT(1)) p - | otherwise = p -\end{code} - -%********************************************************* -%* * -\subsection{Separate, @sep@, Hughes version} -%* * -%********************************************************* - -\begin{code} --- Specification: sep ps = oneLiner (hsep ps) --- `union` --- vcat ps - -sep = sepX True -- Separate with spaces -cat = sepX False -- Don't - -sepX :: Bool -> [Doc] -> Doc -sepX _ [] = empty -sepX x (p:ps) = sep1 x (reduceDoc p) (_ILIT(0)) ps - - --- Specification: sep1 g k ys = sep (x : map (nest k) ys) --- = oneLiner (x nest k (hsep ys)) --- `union` x $$ nest k (vcat ys) - -sep1 :: Bool -> RDoc -> FastInt -> [Doc] -> RDoc -sep1 _ NoDoc _ _ = NoDoc -sep1 g (p `Union` q) k ys = sep1 g p k ys - `union_` - (aboveNest q False k (reduceDoc (vcat ys))) - -sep1 g Empty k ys = mkNest k (sepX g ys) -sep1 g (Nest n p) k ys = nest_ n (sep1 g p (k -# n) ys) - -sep1 _ (NilAbove p) k ys = nilAbove_ (aboveNest p False k (reduceDoc (vcat ys))) -sep1 g (TextBeside s sl p) k ys = textBeside_ s sl (sepNB g p (k -# sl) ys) -sep1 _ _ _ _ = panic "sep1: Unhandled case" - --- Specification: sepNB p k ys = sep1 (text "" <> p) k ys --- Called when we have already found some text in the first item --- We have to eat up nests - -sepNB :: Bool -> Doc -> FastInt -> [Doc] -> Doc -sepNB g (Nest _ p) k ys = sepNB g p k ys - -sepNB g Empty k ys = oneLiner (nilBeside g (reduceDoc rest)) - `mkUnion` - nilAboveNest False k (reduceDoc (vcat ys)) - where - rest | g = hsep ys - | otherwise = hcat ys - -sepNB g p k ys = sep1 g p k ys -\end{code} - -%********************************************************* -%* * -\subsection{@fill@} -%* * -%********************************************************* - -\begin{code} -fsep = fill True -fcat = fill False - --- Specification: --- fill [] = empty --- fill [p] = p --- fill (p1:p2:ps) = oneLiner p1 <#> nest (length p1) --- (fill (oneLiner p2 : ps)) --- `union` --- p1 $$ fill ps - -fill :: Bool -> [Doc] -> Doc -fill _ [] = empty -fill g (p:ps) = fill1 g (reduceDoc p) (_ILIT(0)) ps - - -fill1 :: Bool -> RDoc -> FastInt -> [Doc] -> Doc -fill1 _ NoDoc _ _ = NoDoc -fill1 g (p `Union` q) k ys = fill1 g p k ys - `union_` - (aboveNest q False k (fill g ys)) - -fill1 g Empty k ys = mkNest k (fill g ys) -fill1 g (Nest n p) k ys = nest_ n (fill1 g p (k -# n) ys) - -fill1 g (NilAbove p) k ys = nilAbove_ (aboveNest p False k (fill g ys)) -fill1 g (TextBeside s sl p) k ys = textBeside_ s sl (fillNB g p (k -# sl) ys) -fill1 _ _ _ _ = panic "fill1: Unhandled case" - -fillNB :: Bool -> Doc -> Int# -> [Doc] -> Doc -fillNB g (Nest _ p) k ys = fillNB g p k ys -fillNB _ Empty _ [] = Empty -fillNB g Empty k (y:ys) = nilBeside g (fill1 g (oneLiner (reduceDoc y)) k1 ys) - `mkUnion` - nilAboveNest False k (fill g (y:ys)) - where - !k1 | g = k -# _ILIT(1) - | otherwise = k - -fillNB g p k ys = fill1 g p k ys -\end{code} - - -%********************************************************* -%* * -\subsection{Selecting the best layout} -%* * -%********************************************************* - -\begin{code} -best :: Int -- Line length - -> Int -- Ribbon length - -> RDoc - -> RDoc -- No unions in here! - -best w_ r_ p - = get (iUnbox w_) p - where - !r = iUnbox r_ - get :: FastInt -- (Remaining) width of line - -> Doc -> Doc - get _ Empty = Empty - get _ NoDoc = NoDoc - get w (NilAbove p) = nilAbove_ (get w p) - get w (TextBeside s sl p) = textBeside_ s sl (get1 w sl p) - get w (Nest k p) = nest_ k (get (w -# k) p) - get w (p `Union` q) = nicest w r (get w p) (get w q) - get _ _ = panic "best/get: Unhandled case" - - get1 :: FastInt -- (Remaining) width of line - -> FastInt -- Amount of first line already eaten up - -> Doc -- This is an argument to TextBeside => eat Nests - -> Doc -- No unions in here! - - get1 _ _ Empty = Empty - get1 _ _ NoDoc = NoDoc - get1 w sl (NilAbove p) = nilAbove_ (get (w -# sl) p) - get1 w sl (TextBeside t tl p) = textBeside_ t tl (get1 w (sl +# tl) p) - get1 w sl (Nest _ p) = get1 w sl p - get1 w sl (p `Union` q) = nicest1 w r sl (get1 w sl p) - (get1 w sl q) - get1 _ _ _ = panic "best/get1: Unhandled case" - -nicest :: FastInt -> FastInt -> Doc -> Doc -> Doc -nicest w r p q = nicest1 w r (_ILIT(0)) p q -nicest1 :: FastInt -> FastInt -> Int# -> Doc -> Doc -> Doc -nicest1 w r sl p q | fits ((w `minFastInt` r) -# sl) p = p - | otherwise = q - -fits :: FastInt -- Space available - -> Doc - -> Bool -- True if *first line* of Doc fits in space available - -fits n _ | n <# _ILIT(0) = False -fits _ NoDoc = False -fits _ Empty = True -fits _ (NilAbove _) = True -fits n (TextBeside _ sl p) = fits (n -# sl) p -fits _ _ = panic "fits: Unhandled case" -\end{code} - -@first@ and @nonEmptySet@ are similar to @nicest@ and @fits@, only simpler. -@first@ returns its first argument if it is non-empty, otherwise its second. - -\begin{code} -first :: Doc -> Doc -> Doc -first p q | nonEmptySet p = p - | otherwise = q - -nonEmptySet :: Doc -> Bool -nonEmptySet NoDoc = False -nonEmptySet (_ `Union` _) = True -nonEmptySet Empty = True -nonEmptySet (NilAbove _) = True -- NoDoc always in first line -nonEmptySet (TextBeside _ _ p) = nonEmptySet p -nonEmptySet (Nest _ p) = nonEmptySet p -nonEmptySet _ = panic "nonEmptySet: Unhandled case" -\end{code} - -@oneLiner@ returns the one-line members of the given set of @Doc@s. - -\begin{code} -oneLiner :: Doc -> Doc -oneLiner NoDoc = NoDoc -oneLiner Empty = Empty -oneLiner (NilAbove _) = NoDoc -oneLiner (TextBeside s sl p) = textBeside_ s sl (oneLiner p) -oneLiner (Nest k p) = nest_ k (oneLiner p) -oneLiner (p `Union` _) = oneLiner p -oneLiner _ = panic "oneLiner: Unhandled case" -\end{code} - - - -%********************************************************* -%* * -\subsection{Displaying the best layout} -%* * -%********************************************************* - - -\begin{code} -showDocPlus :: Mode -> Int -> Doc -> String -> String -showDocPlus mode cols doc rest = fullRender mode cols 1.5 string_txt rest doc - -showDoc :: Mode -> Int -> Doc -> String -showDoc mode cols doc = showDocPlus mode cols doc "" - -string_txt :: TextDetails -> String -> String -string_txt (Chr c) s = c:s -string_txt (Str s1) s2 = s1 ++ s2 -string_txt (PStr s1) s2 = unpackFS s1 ++ s2 -string_txt (ZStr s1) s2 = zString s1 ++ s2 -string_txt (LStr s1 _) s2 = unpackLitString s1 ++ s2 -\end{code} - -\begin{code} - -fullRender OneLineMode _ _ txt end doc - = lay (reduceDoc doc) - where - lay NoDoc = cant_fail - lay (Union _ q) = lay q -- Second arg can't be NoDoc - lay (Nest _ p) = lay p - lay Empty = end - lay (NilAbove p) = space_text `txt` lay p -- NoDoc always on - -- first line - lay (TextBeside s _ p) = s `txt` lay p - lay _ = panic "fullRender/OneLineMode/lay: Unhandled case" - -fullRender LeftMode _ _ txt end doc - = lay (reduceDoc doc) - where - lay NoDoc = cant_fail - lay (Union p q) = lay (first p q) - lay (Nest _ p) = lay p - lay Empty = end - lay (NilAbove p) = nl_text `txt` lay p -- NoDoc always on first line - lay (TextBeside s _ p) = s `txt` lay p - lay _ = panic "fullRender/LeftMode/lay: Unhandled case" - -fullRender mode line_length ribbons_per_line txt end doc - = display mode line_length ribbon_length txt end best_doc - where - best_doc = best hacked_line_length ribbon_length (reduceDoc doc) - - hacked_line_length, ribbon_length :: Int - ribbon_length = round (fromIntegral line_length / ribbons_per_line) - hacked_line_length = case mode of - ZigZagMode -> maxBound - _ -> line_length - -display :: Mode -> Int -> Int -> (TextDetails -> t -> t) -> t -> Doc -> t -display mode page_width ribbon_width txt end doc - = case (iUnbox page_width) -# (iUnbox ribbon_width) of { gap_width -> - case gap_width `quotFastInt` _ILIT(2) of { shift -> - let - lay k (Nest k1 p) = lay (k +# k1) p - lay _ Empty = end - - lay k (NilAbove p) = nl_text `txt` lay k p - - lay k (TextBeside s sl p) - = case mode of - ZigZagMode | k >=# gap_width - -> nl_text `txt` ( - Str (multi_ch shift '/') `txt` ( - nl_text `txt` ( - lay1 (k -# shift) s sl p))) - - | k <# _ILIT(0) - -> nl_text `txt` ( - Str (multi_ch shift '\\') `txt` ( - nl_text `txt` ( - lay1 (k +# shift) s sl p ))) - - _ -> lay1 k s sl p - lay _ _ = panic "display/lay: Unhandled case" - - lay1 k s sl p = indent k (s `txt` lay2 (k +# sl) p) - - lay2 k (NilAbove p) = nl_text `txt` lay k p - lay2 k (TextBeside s sl p) = s `txt` (lay2 (k +# sl) p) - lay2 k (Nest _ p) = lay2 k p - lay2 _ Empty = end - lay2 _ _ = panic "display/lay2: Unhandled case" - - -- optimise long indentations using LitString chunks of 8 spaces - indent n r | n >=# _ILIT(8) = LStr (sLit " ") (_ILIT(8)) `txt` - indent (n -# _ILIT(8)) r - | otherwise = Str (spaces n) `txt` r - in - lay (_ILIT(0)) doc - }} - -cant_fail :: a -cant_fail = error "easy_display: NoDoc" - -multi_ch :: Int# -> Char -> String -multi_ch n ch | n <=# _ILIT(0) = "" - | otherwise = ch : multi_ch (n -# _ILIT(1)) ch - -spaces :: Int# -> String -spaces n | n <=# _ILIT(0) = "" - | otherwise = ' ' : spaces (n -# _ILIT(1)) - -\end{code} - -\begin{code} -printDoc :: Mode -> Int -> Handle -> Doc -> IO () --- printDoc adds a newline to the end -printDoc mode cols hdl doc = printDoc_ mode cols hdl (doc $$ text "") - -printDoc_ :: Mode -> Int -> Handle -> Doc -> IO () --- printDoc_ does not add a newline at the end, so that --- successive calls can output stuff on the same line --- Rather like putStr vs putStrLn -printDoc_ LeftMode _ hdl doc - = do { printLeftRender hdl doc; hFlush hdl } -printDoc_ mode pprCols hdl doc - = do { fullRender mode pprCols 1.5 put done doc ; - hFlush hdl } - where - put (Chr c) next = hPutChar hdl c >> next - put (Str s) next = hPutStr hdl s >> next - put (PStr s) next = hPutStr hdl (unpackFS s) >> next - -- NB. not hPutFS, we want this to go through - -- the I/O library's encoding layer. (#3398) - put (ZStr s) next = hPutFZS hdl s >> next - put (LStr s l) next = hPutLitString hdl s l >> next - - done = return () -- hPutChar hdl '\n' - - -- some versions of hPutBuf will barf if the length is zero -hPutLitString :: Handle -> Ptr a -> Int# -> IO () -hPutLitString handle a l = if l ==# _ILIT(0) - then return () - else hPutBuf handle a (iBox l) - --- Printing output in LeftMode is performance critical: it's used when --- dumping C and assembly output, so we allow ourselves a few dirty --- hacks: --- --- (1) we specialise fullRender for LeftMode with IO output. --- --- (2) we add a layer of buffering on top of Handles. Handles --- don't perform well with lots of hPutChars, which is mostly --- what we're doing here, because Handles have to be thread-safe --- and async exception-safe. We only have a single thread and don't --- care about exceptions, so we add a layer of fast buffering --- over the Handle interface. --- --- (3) a few hacks in layLeft below to convince GHC to generate the right --- code. - -printLeftRender :: Handle -> Doc -> IO () -printLeftRender hdl doc = do - b <- newBufHandle hdl - bufLeftRender b doc - bFlush b - -bufLeftRender :: BufHandle -> Doc -> IO () -bufLeftRender b doc = layLeft b (reduceDoc doc) - --- HACK ALERT! the "return () >>" below convinces GHC to eta-expand --- this function with the IO state lambda. Otherwise we end up with --- closures in all the case branches. -layLeft :: BufHandle -> Doc -> IO () -layLeft b _ | b `seq` False = undefined -- make it strict in b -layLeft _ NoDoc = cant_fail -layLeft b (Union p q) = return () >> layLeft b (first p q) -layLeft b (Nest _ p) = return () >> layLeft b p -layLeft b Empty = bPutChar b '\n' -layLeft b (NilAbove p) = bPutChar b '\n' >> layLeft b p -layLeft b (TextBeside s _ p) = put b s >> layLeft b p - where - put b _ | b `seq` False = undefined - put b (Chr c) = bPutChar b c - put b (Str s) = bPutStr b s - put b (PStr s) = bPutFS b s - put b (ZStr s) = bPutFZS b s - put b (LStr s l) = bPutLitString b s l -layLeft _ _ = panic "layLeft: Unhandled case" -\end{code} diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs new file mode 100644 index 0000000000..570282da57 --- /dev/null +++ b/compiler/utils/StringBuffer.hs @@ -0,0 +1,257 @@ +{- +(c) The University of Glasgow 2006 +(c) The University of Glasgow, 1997-2006 + + +Buffers for scanning string input stored in external arrays. +-} + +{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-} +{-# OPTIONS_GHC -O -funbox-strict-fields #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected + +module StringBuffer + ( + StringBuffer(..), + -- non-abstract for vs\/HaskellService + + -- * Creation\/destruction + hGetStringBuffer, + hGetStringBufferBlock, + appendStringBuffers, + stringToStringBuffer, + + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, + + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, + + -- * Conversion + lexemeToString, + lexemeToFastString, + + -- * Parsing integers + parseUnsignedInteger, + ) where + +#include "HsVersions.h" + +import Encoding +import FastString +import FastTypes +import FastFunctions +import Outputable +import Util + +import Data.Maybe +import Control.Exception +import System.IO +import System.IO.Unsafe ( unsafePerformIO ) + +import GHC.Exts + +#if __GLASGOW_HASKELL__ >= 709 +import Foreign +#else +import Foreign.Safe +#endif + +-- ----------------------------------------------------------------------------- +-- The StringBuffer type + +-- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- The bytes are intended to be *immutable*. There are pure +-- operations to read the contents of a StringBuffer. +-- +-- A StringBuffer may have a finalizer, depending on how it was +-- obtained. +-- +data StringBuffer + = StringBuffer { + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos + } + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. + +instance Show StringBuffer where + showsPrec _ s = showString "" + +-- ----------------------------------------------------------------------------- +-- Creation / Destruction + +hGetStringBuffer :: FilePath -> IO StringBuffer +hGetStringBuffer fname = do + h <- openBinaryFile fname ReadMode + size_i <- hFileSize h + offset_i <- skipBOM h size_i 0 -- offset is 0 initially + let size = fromIntegral $ size_i - offset_i + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + r <- if size == 0 then return 0 else hGetBuf h ptr size + hClose h + if (r /= size) + then ioError (userError "short read of file") + else newUTF8StringBuffer buf ptr size + +hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer +hGetStringBufferBlock handle wanted + = do size_i <- hFileSize handle + offset_i <- hTell handle >>= skipBOM handle size_i + let size = min wanted (fromIntegral $ size_i-offset_i) + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> + do r <- if size == 0 then return 0 else hGetBuf handle ptr size + if r /= size + then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) + else newUTF8StringBuffer buf ptr size + +-- | Skip the byte-order mark if there is one (see #1744 and #6016), +-- and return the new position of the handle in bytes. +-- +-- This is better than treating #FEFF as whitespace, +-- because that would mess up layout. We don't have a concept +-- of zero-width whitespace in Haskell: all whitespace codepoints +-- have a width of one column. +skipBOM :: Handle -> Integer -> Integer -> IO Integer +skipBOM h size offset = + -- Only skip BOM at the beginning of a file. + if size > 0 && offset == 0 + then do + -- Validate assumption that handle is in binary mode. + ASSERTM( hGetEncoding h >>= return . isNothing ) + -- Temporarily select text mode to make `hLookAhead` and + -- `hGetChar` return full Unicode characters. + bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do + c <- hLookAhead h + if c == '\xfeff' + then hGetChar h >> hTell h + else return offset + else return offset + +newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer +newUTF8StringBuffer buf ptr size = do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return $ StringBuffer buf size 0 + +appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer +appendStringBuffers sb1 sb2 + = do newBuf <- mallocForeignPtrArray (size+3) + withForeignPtr newBuf $ \ptr -> + withForeignPtr (buf sb1) $ \sb1Ptr -> + withForeignPtr (buf sb2) $ \sb2Ptr -> + do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len + copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len + pokeArray (ptr `advancePtr` size) [0,0,0] + return (StringBuffer newBuf size 0) + where sb1_len = calcLen sb1 + sb2_len = calcLen sb2 + calcLen sb = len sb - cur sb + size = sb1_len + sb2_len + +stringToStringBuffer :: String -> StringBuffer +stringToStringBuffer str = + unsafePerformIO $ do + let size = utf8EncodedLength str + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +-- ----------------------------------------------------------------------------- +-- Grab a character + +-- Getting our fingers dirty a little here, but this is performance-critical +{-# INLINE nextChar #-} +nextChar :: StringBuffer -> (Char,StringBuffer) +nextChar (StringBuffer buf len (I# cur#)) = + inlinePerformIO $ do + withForeignPtr buf $ \(Ptr a#) -> do + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, nBytes# #) -> + let cur' = I# (cur# +# nBytes#) in + return (C# c#, StringBuffer buf len cur') + +currentChar :: StringBuffer -> Char +currentChar = fst . nextChar + +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer _ _ 0) deflt = deflt +prevChar (StringBuffer buf _ cur) _ = + inlinePerformIO $ do + withForeignPtr buf $ \p -> do + p' <- utf8PrevChar (p `plusPtr` cur) + return (fst (utf8DecodeChar p')) + +-- ----------------------------------------------------------------------------- +-- Moving + +stepOn :: StringBuffer -> StringBuffer +stepOn s = snd (nextChar s) + +offsetBytes :: Int -> StringBuffer -> StringBuffer +offsetBytes i s = s { cur = cur s + i } + +byteDiff :: StringBuffer -> StringBuffer -> Int +byteDiff s1 s2 = cur s2 - cur s1 + +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l c) = l == c + +-- ----------------------------------------------------------------------------- +-- Conversion + +lexemeToString :: StringBuffer -> Int {-bytes-} -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer buf _ cur) bytes = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + utf8DecodeString (ptr `plusPtr` cur) bytes + +lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString +lexemeToFastString _ 0 = nilFS +lexemeToFastString (StringBuffer buf _ cur) len = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len + +-- ----------------------------------------------------------------------------- +-- Parsing integer strings in various bases +{- +byteOff :: StringBuffer -> Int -> Char +byteOff (StringBuffer buf _ cur) i = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do +-- return $! cBox (indexWord8OffFastPtrAsFastChar +-- (pUnbox ptr) (iUnbox (cur+i))) +--or +-- w <- peek (ptr `plusPtr` (cur+i)) +-- return (unsafeChr (fromIntegral (w::Word8))) +-} +-- | XXX assumes ASCII digits only (by using byteOff) +parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int + = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let + --LOL, in implementations where the indexing needs slow unsafePerformIO, + --this is less (not more) efficient than using the IO monad explicitly + --here. + !ptr' = pUnbox ptr + byteOff i = cBox (indexWord8OffFastPtrAsFastChar ptr' (iUnbox (cur + i))) + go i x | i == len = x + | otherwise = case byteOff i of + char -> go (i + 1) (x * radix + toInteger (char_to_int char)) + in go 0 0 diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs deleted file mode 100644 index 9e6e6c1824..0000000000 --- a/compiler/utils/StringBuffer.lhs +++ /dev/null @@ -1,259 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The University of Glasgow, 1997-2006 -% - -Buffers for scanning string input stored in external arrays. - -\begin{code} -{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-} -{-# OPTIONS_GHC -O -funbox-strict-fields #-} --- We always optimise this, otherwise performance of a non-optimised --- compiler is severely affected - -module StringBuffer - ( - StringBuffer(..), - -- non-abstract for vs\/HaskellService - - -- * Creation\/destruction - hGetStringBuffer, - hGetStringBufferBlock, - appendStringBuffers, - stringToStringBuffer, - - -- * Inspection - nextChar, - currentChar, - prevChar, - atEnd, - - -- * Moving and comparison - stepOn, - offsetBytes, - byteDiff, - - -- * Conversion - lexemeToString, - lexemeToFastString, - - -- * Parsing integers - parseUnsignedInteger, - ) where - -#include "HsVersions.h" - -import Encoding -import FastString -import FastTypes -import FastFunctions -import Outputable -import Util - -import Data.Maybe -import Control.Exception -import System.IO -import System.IO.Unsafe ( unsafePerformIO ) - -import GHC.Exts - -#if __GLASGOW_HASKELL__ >= 709 -import Foreign -#else -import Foreign.Safe -#endif - --- ----------------------------------------------------------------------------- --- The StringBuffer type - --- |A StringBuffer is an internal pointer to a sized chunk of bytes. --- The bytes are intended to be *immutable*. There are pure --- operations to read the contents of a StringBuffer. --- --- A StringBuffer may have a finalizer, depending on how it was --- obtained. --- -data StringBuffer - = StringBuffer { - buf :: {-# UNPACK #-} !(ForeignPtr Word8), - len :: {-# UNPACK #-} !Int, -- length - cur :: {-# UNPACK #-} !Int -- current pos - } - -- The buffer is assumed to be UTF-8 encoded, and furthermore - -- we add three '\0' bytes to the end as sentinels so that the - -- decoder doesn't have to check for overflow at every single byte - -- of a multibyte sequence. - -instance Show StringBuffer where - showsPrec _ s = showString "" - --- ----------------------------------------------------------------------------- --- Creation / Destruction - -hGetStringBuffer :: FilePath -> IO StringBuffer -hGetStringBuffer fname = do - h <- openBinaryFile fname ReadMode - size_i <- hFileSize h - offset_i <- skipBOM h size_i 0 -- offset is 0 initially - let size = fromIntegral $ size_i - offset_i - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> do - r <- if size == 0 then return 0 else hGetBuf h ptr size - hClose h - if (r /= size) - then ioError (userError "short read of file") - else newUTF8StringBuffer buf ptr size - -hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer -hGetStringBufferBlock handle wanted - = do size_i <- hFileSize handle - offset_i <- hTell handle >>= skipBOM handle size_i - let size = min wanted (fromIntegral $ size_i-offset_i) - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> - do r <- if size == 0 then return 0 else hGetBuf handle ptr size - if r /= size - then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) - else newUTF8StringBuffer buf ptr size - --- | Skip the byte-order mark if there is one (see #1744 and #6016), --- and return the new position of the handle in bytes. --- --- This is better than treating #FEFF as whitespace, --- because that would mess up layout. We don't have a concept --- of zero-width whitespace in Haskell: all whitespace codepoints --- have a width of one column. -skipBOM :: Handle -> Integer -> Integer -> IO Integer -skipBOM h size offset = - -- Only skip BOM at the beginning of a file. - if size > 0 && offset == 0 - then do - -- Validate assumption that handle is in binary mode. - ASSERTM( hGetEncoding h >>= return . isNothing ) - -- Temporarily select text mode to make `hLookAhead` and - -- `hGetChar` return full Unicode characters. - bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do - c <- hLookAhead h - if c == '\xfeff' - then hGetChar h >> hTell h - else return offset - else return offset - -newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer -newUTF8StringBuffer buf ptr size = do - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return $ StringBuffer buf size 0 - -appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer -appendStringBuffers sb1 sb2 - = do newBuf <- mallocForeignPtrArray (size+3) - withForeignPtr newBuf $ \ptr -> - withForeignPtr (buf sb1) $ \sb1Ptr -> - withForeignPtr (buf sb2) $ \sb2Ptr -> - do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len - copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len - pokeArray (ptr `advancePtr` size) [0,0,0] - return (StringBuffer newBuf size 0) - where sb1_len = calcLen sb1 - sb2_len = calcLen sb2 - calcLen sb = len sb - cur sb - size = sb1_len + sb2_len - -stringToStringBuffer :: String -> StringBuffer -stringToStringBuffer str = - unsafePerformIO $ do - let size = utf8EncodedLength str - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr str - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return (StringBuffer buf size 0) - --- ----------------------------------------------------------------------------- --- Grab a character - --- Getting our fingers dirty a little here, but this is performance-critical -{-# INLINE nextChar #-} -nextChar :: StringBuffer -> (Char,StringBuffer) -nextChar (StringBuffer buf len (I# cur#)) = - inlinePerformIO $ do - withForeignPtr buf $ \(Ptr a#) -> do - case utf8DecodeChar# (a# `plusAddr#` cur#) of - (# c#, nBytes# #) -> - let cur' = I# (cur# +# nBytes#) in - return (C# c#, StringBuffer buf len cur') - -currentChar :: StringBuffer -> Char -currentChar = fst . nextChar - -prevChar :: StringBuffer -> Char -> Char -prevChar (StringBuffer _ _ 0) deflt = deflt -prevChar (StringBuffer buf _ cur) _ = - inlinePerformIO $ do - withForeignPtr buf $ \p -> do - p' <- utf8PrevChar (p `plusPtr` cur) - return (fst (utf8DecodeChar p')) - --- ----------------------------------------------------------------------------- --- Moving - -stepOn :: StringBuffer -> StringBuffer -stepOn s = snd (nextChar s) - -offsetBytes :: Int -> StringBuffer -> StringBuffer -offsetBytes i s = s { cur = cur s + i } - -byteDiff :: StringBuffer -> StringBuffer -> Int -byteDiff s1 s2 = cur s2 - cur s1 - -atEnd :: StringBuffer -> Bool -atEnd (StringBuffer _ l c) = l == c - --- ----------------------------------------------------------------------------- --- Conversion - -lexemeToString :: StringBuffer -> Int {-bytes-} -> String -lexemeToString _ 0 = "" -lexemeToString (StringBuffer buf _ cur) bytes = - inlinePerformIO $ - withForeignPtr buf $ \ptr -> - utf8DecodeString (ptr `plusPtr` cur) bytes - -lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString -lexemeToFastString _ 0 = nilFS -lexemeToFastString (StringBuffer buf _ cur) len = - inlinePerformIO $ - withForeignPtr buf $ \ptr -> - return $! mkFastStringBytes (ptr `plusPtr` cur) len - --- ----------------------------------------------------------------------------- --- Parsing integer strings in various bases -{- -byteOff :: StringBuffer -> Int -> Char -byteOff (StringBuffer buf _ cur) i = - inlinePerformIO $ withForeignPtr buf $ \ptr -> do --- return $! cBox (indexWord8OffFastPtrAsFastChar --- (pUnbox ptr) (iUnbox (cur+i))) ---or --- w <- peek (ptr `plusPtr` (cur+i)) --- return (unsafeChr (fromIntegral (w::Word8))) --} --- | XXX assumes ASCII digits only (by using byteOff) -parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer -parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int - = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let - --LOL, in implementations where the indexing needs slow unsafePerformIO, - --this is less (not more) efficient than using the IO monad explicitly - --here. - !ptr' = pUnbox ptr - byteOff i = cBox (indexWord8OffFastPtrAsFastChar ptr' (iUnbox (cur + i))) - go i x | i == len = x - | otherwise = case byteOff i of - char -> go (i + 1) (x * radix + toInteger (char_to_int char)) - in go 0 0 - -\end{code} diff --git a/compiler/utils/UniqFM.hs b/compiler/utils/UniqFM.hs new file mode 100644 index 0000000000..8f962d4f5e --- /dev/null +++ b/compiler/utils/UniqFM.hs @@ -0,0 +1,311 @@ +{- +(c) The University of Glasgow 2006 +(c) The AQUA Project, Glasgow University, 1994-1998 + + +UniqFM: Specialised finite maps, for things with @Uniques@. + +Basically, the things need to be in class @Uniquable@, and we use the +@getUnique@ method to grab their @Uniques@. + +(A similar thing to @UniqSet@, as opposed to @Set@.) + +The interface is based on @FiniteMap@s, but the implementation uses +@Data.IntMap@, which is both maintained and faster than the past +implementation (see commit log). + +The @UniqFM@ interface maps directly to Data.IntMap, only +``Data.IntMap.union'' is left-biased and ``plusUFM'' right-biased +and ``addToUFM\_C'' and ``Data.IntMap.insertWith'' differ in the order +of arguments of combining function. +-} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -Wall #-} + +module UniqFM ( + -- * Unique-keyed mappings + UniqFM, -- abstract type + + -- ** Manipulating those mappings + emptyUFM, + unitUFM, + unitDirectlyUFM, + listToUFM, + listToUFM_Directly, + listToUFM_C, + addToUFM,addToUFM_C,addToUFM_Acc, + addListToUFM,addListToUFM_C, + addToUFM_Directly, + addListToUFM_Directly, + adjustUFM, alterUFM, + adjustUFM_Directly, + delFromUFM, + delFromUFM_Directly, + delListFromUFM, + plusUFM, + plusUFM_C, + plusUFM_CD, + minusUFM, + intersectUFM, + intersectUFM_C, + foldUFM, foldUFM_Directly, + mapUFM, mapUFM_Directly, + elemUFM, elemUFM_Directly, + filterUFM, filterUFM_Directly, partitionUFM, + sizeUFM, + isNullUFM, + lookupUFM, lookupUFM_Directly, + lookupWithDefaultUFM, lookupWithDefaultUFM_Directly, + eltsUFM, keysUFM, splitUFM, + ufmToSet_Directly, + ufmToList, + joinUFM, pprUniqFM + ) where + +import FastString +import Unique ( Uniquable(..), Unique, getKey ) +import Outputable + +import Compiler.Hoopl hiding (Unique) + +import qualified Data.IntMap as M +import qualified Data.IntSet as S +import qualified Data.Foldable as Foldable +import qualified Data.Traversable as Traversable +import Data.Typeable +import Data.Data +#if __GLASGOW_HASKELL__ < 709 +import Data.Monoid +#endif + +{- +************************************************************************ +* * +\subsection{The signature of the module} +* * +************************************************************************ +-} + +emptyUFM :: UniqFM elt +isNullUFM :: UniqFM elt -> Bool +unitUFM :: Uniquable key => key -> elt -> UniqFM elt +unitDirectlyUFM -- got the Unique already + :: Unique -> elt -> UniqFM elt +listToUFM :: Uniquable key => [(key,elt)] -> UniqFM elt +listToUFM_Directly + :: [(Unique, elt)] -> UniqFM elt +listToUFM_C :: Uniquable key => (elt -> elt -> elt) + -> [(key, elt)] + -> UniqFM elt + +addToUFM :: Uniquable key => UniqFM elt -> key -> elt -> UniqFM elt +addListToUFM :: Uniquable key => UniqFM elt -> [(key,elt)] -> UniqFM elt +addListToUFM_Directly :: UniqFM elt -> [(Unique,elt)] -> UniqFM elt +addToUFM_Directly + :: UniqFM elt -> Unique -> elt -> UniqFM elt + +addToUFM_C :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result + -> UniqFM elt -- old + -> key -> elt -- new + -> UniqFM elt -- result + +addToUFM_Acc :: Uniquable key => + (elt -> elts -> elts) -- Add to existing + -> (elt -> elts) -- New element + -> UniqFM elts -- old + -> key -> elt -- new + -> UniqFM elts -- result + +alterUFM :: Uniquable key => + (Maybe elt -> Maybe elt) -- How to adjust + -> UniqFM elt -- old + -> key -- new + -> UniqFM elt -- result + +addListToUFM_C :: Uniquable key => (elt -> elt -> elt) + -> UniqFM elt -> [(key,elt)] + -> UniqFM elt + +adjustUFM :: Uniquable key => (elt -> elt) -> UniqFM elt -> key -> UniqFM elt +adjustUFM_Directly :: (elt -> elt) -> UniqFM elt -> Unique -> UniqFM elt + +delFromUFM :: Uniquable key => UniqFM elt -> key -> UniqFM elt +delListFromUFM :: Uniquable key => UniqFM elt -> [key] -> UniqFM elt +delFromUFM_Directly :: UniqFM elt -> Unique -> UniqFM elt + +-- Bindings in right argument shadow those in the left +plusUFM :: UniqFM elt -> UniqFM elt -> UniqFM elt + +plusUFM_C :: (elt -> elt -> elt) + -> UniqFM elt -> UniqFM elt -> UniqFM elt + +-- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the +-- combinding function and `d1` resp. `d2` as the default value if +-- there is no entry in `m1` reps. `m2`. The domain is the union of +-- the domains of `m1` and `m2`. +-- +-- Representative example: +-- +-- @ +-- plusUFM_CD f {A: 1, B: 2} 23 {B: 3, C: 4} 42 +-- == {A: f 1 42, B: f 2 3, C: f 23 4 } +-- @ +plusUFM_CD :: (elt -> elt -> elt) + -> UniqFM elt -> elt -> UniqFM elt -> elt -> UniqFM elt + +minusUFM :: UniqFM elt1 -> UniqFM elt2 -> UniqFM elt1 + +intersectUFM :: UniqFM elt -> UniqFM elt -> UniqFM elt +intersectUFM_C :: (elt1 -> elt2 -> elt3) + -> UniqFM elt1 -> UniqFM elt2 -> UniqFM elt3 + +foldUFM :: (elt -> a -> a) -> a -> UniqFM elt -> a +foldUFM_Directly:: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a +mapUFM :: (elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2 +mapUFM_Directly :: (Unique -> elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2 +filterUFM :: (elt -> Bool) -> UniqFM elt -> UniqFM elt +filterUFM_Directly :: (Unique -> elt -> Bool) -> UniqFM elt -> UniqFM elt +partitionUFM :: (elt -> Bool) -> UniqFM elt -> (UniqFM elt, UniqFM elt) + +sizeUFM :: UniqFM elt -> Int +--hashUFM :: UniqFM elt -> Int +elemUFM :: Uniquable key => key -> UniqFM elt -> Bool +elemUFM_Directly:: Unique -> UniqFM elt -> Bool + +splitUFM :: Uniquable key => UniqFM elt -> key -> (UniqFM elt, Maybe elt, UniqFM elt) + -- Splits a UFM into things less than, equal to, and greater than the key +lookupUFM :: Uniquable key => UniqFM elt -> key -> Maybe elt +lookupUFM_Directly -- when you've got the Unique already + :: UniqFM elt -> Unique -> Maybe elt +lookupWithDefaultUFM + :: Uniquable key => UniqFM elt -> elt -> key -> elt +lookupWithDefaultUFM_Directly + :: UniqFM elt -> elt -> Unique -> elt +keysUFM :: UniqFM elt -> [Unique] -- Get the keys +eltsUFM :: UniqFM elt -> [elt] +ufmToSet_Directly :: UniqFM elt -> S.IntSet +ufmToList :: UniqFM elt -> [(Unique, elt)] + +{- +************************************************************************ +* * +\subsection{Monoid interface} +* * +************************************************************************ +-} + +instance Monoid (UniqFM a) where + mempty = emptyUFM + mappend = plusUFM + +{- +************************************************************************ +* * +\subsection{Implementation using ``Data.IntMap''} +* * +************************************************************************ +-} + +newtype UniqFM ele = UFM (M.IntMap ele) + deriving (Data, Eq, Foldable.Foldable, Functor, Traversable.Traversable, + Typeable) + +emptyUFM = UFM M.empty +isNullUFM (UFM m) = M.null m +unitUFM k v = UFM (M.singleton (getKey $ getUnique k) v) +unitDirectlyUFM u v = UFM (M.singleton (getKey u) v) +listToUFM = foldl (\m (k, v) -> addToUFM m k v) emptyUFM +listToUFM_Directly = foldl (\m (u, v) -> addToUFM_Directly m u v) emptyUFM +listToUFM_C f = foldl (\m (k, v) -> addToUFM_C f m k v) emptyUFM + +alterUFM f (UFM m) k = UFM (M.alter f (getKey $ getUnique k) m) +addToUFM (UFM m) k v = UFM (M.insert (getKey $ getUnique k) v m) +addListToUFM = foldl (\m (k, v) -> addToUFM m k v) +addListToUFM_Directly = foldl (\m (k, v) -> addToUFM_Directly m k v) +addToUFM_Directly (UFM m) u v = UFM (M.insert (getKey u) v m) + +-- Arguments of combining function of M.insertWith and addToUFM_C are flipped. +addToUFM_C f (UFM m) k v = + UFM (M.insertWith (flip f) (getKey $ getUnique k) v m) +addToUFM_Acc exi new (UFM m) k v = + UFM (M.insertWith (\_new old -> exi v old) (getKey $ getUnique k) (new v) m) +addListToUFM_C f = foldl (\m (k, v) -> addToUFM_C f m k v) + +adjustUFM f (UFM m) k = UFM (M.adjust f (getKey $ getUnique k) m) +adjustUFM_Directly f (UFM m) u = UFM (M.adjust f (getKey u) m) + +delFromUFM (UFM m) k = UFM (M.delete (getKey $ getUnique k) m) +delListFromUFM = foldl delFromUFM +delFromUFM_Directly (UFM m) u = UFM (M.delete (getKey u) m) + +-- M.union is left-biased, plusUFM should be right-biased. +plusUFM (UFM x) (UFM y) = UFM (M.union y x) + -- Note (M.union y x), with arguments flipped + -- M.union is left-biased, plusUFM should be right-biased. + +plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y) + +plusUFM_CD f (UFM xm) dx (UFM ym) dy + = UFM $ M.mergeWithKey + (\_ x y -> Just (x `f` y)) + (M.map (\x -> x `f` dy)) + (M.map (\y -> dx `f` y)) + xm ym +minusUFM (UFM x) (UFM y) = UFM (M.difference x y) +intersectUFM (UFM x) (UFM y) = UFM (M.intersection x y) +intersectUFM_C f (UFM x) (UFM y) = UFM (M.intersectionWith f x y) + +foldUFM k z (UFM m) = M.fold k z m +foldUFM_Directly k z (UFM m) = M.foldWithKey (k . getUnique) z m +mapUFM f (UFM m) = UFM (M.map f m) +mapUFM_Directly f (UFM m) = UFM (M.mapWithKey (f . getUnique) m) +filterUFM p (UFM m) = UFM (M.filter p m) +filterUFM_Directly p (UFM m) = UFM (M.filterWithKey (p . getUnique) m) +partitionUFM p (UFM m) = case M.partition p m of + (left, right) -> (UFM left, UFM right) + +sizeUFM (UFM m) = M.size m +elemUFM k (UFM m) = M.member (getKey $ getUnique k) m +elemUFM_Directly u (UFM m) = M.member (getKey u) m + +splitUFM (UFM m) k = case M.splitLookup (getKey $ getUnique k) m of + (less, equal, greater) -> (UFM less, equal, UFM greater) +lookupUFM (UFM m) k = M.lookup (getKey $ getUnique k) m +lookupUFM_Directly (UFM m) u = M.lookup (getKey u) m +lookupWithDefaultUFM (UFM m) v k = M.findWithDefault v (getKey $ getUnique k) m +lookupWithDefaultUFM_Directly (UFM m) v u = M.findWithDefault v (getKey u) m +keysUFM (UFM m) = map getUnique $ M.keys m +eltsUFM (UFM m) = M.elems m +ufmToSet_Directly (UFM m) = M.keysSet m +ufmToList (UFM m) = map (\(k, v) -> (getUnique k, v)) $ M.toList m + +-- Hoopl +joinUFM :: JoinFun v -> JoinFun (UniqFM v) +joinUFM eltJoin l (OldFact old) (NewFact new) = foldUFM_Directly add (NoChange, old) new + where add k new_v (ch, joinmap) = + case lookupUFM_Directly joinmap k of + Nothing -> (SomeChange, addToUFM_Directly joinmap k new_v) + Just old_v -> case eltJoin l (OldFact old_v) (NewFact new_v) of + (SomeChange, v') -> (SomeChange, addToUFM_Directly joinmap k v') + (NoChange, _) -> (ch, joinmap) + +{- +************************************************************************ +* * +\subsection{Output-ery} +* * +************************************************************************ +-} + +instance Outputable a => Outputable (UniqFM a) where + ppr ufm = pprUniqFM ppr ufm + +pprUniqFM :: (a -> SDoc) -> UniqFM a -> SDoc +pprUniqFM ppr_elt ufm + = brackets $ fsep $ punctuate comma $ + [ ppr uq <+> ptext (sLit ":->") <+> ppr_elt elt + | (uq, elt) <- ufmToList ufm ] diff --git a/compiler/utils/UniqFM.lhs b/compiler/utils/UniqFM.lhs deleted file mode 100644 index f0f903522b..0000000000 --- a/compiler/utils/UniqFM.lhs +++ /dev/null @@ -1,314 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The AQUA Project, Glasgow University, 1994-1998 -% - -UniqFM: Specialised finite maps, for things with @Uniques@. - -Basically, the things need to be in class @Uniquable@, and we use the -@getUnique@ method to grab their @Uniques@. - -(A similar thing to @UniqSet@, as opposed to @Set@.) - -The interface is based on @FiniteMap@s, but the implementation uses -@Data.IntMap@, which is both maintained and faster than the past -implementation (see commit log). - -The @UniqFM@ interface maps directly to Data.IntMap, only -``Data.IntMap.union'' is left-biased and ``plusUFM'' right-biased -and ``addToUFM\_C'' and ``Data.IntMap.insertWith'' differ in the order -of arguments of combining function. - -\begin{code} -{-# LANGUAGE CPP #-} -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE DeriveTraversable #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# OPTIONS_GHC -Wall #-} - -module UniqFM ( - -- * Unique-keyed mappings - UniqFM, -- abstract type - - -- ** Manipulating those mappings - emptyUFM, - unitUFM, - unitDirectlyUFM, - listToUFM, - listToUFM_Directly, - listToUFM_C, - addToUFM,addToUFM_C,addToUFM_Acc, - addListToUFM,addListToUFM_C, - addToUFM_Directly, - addListToUFM_Directly, - adjustUFM, alterUFM, - adjustUFM_Directly, - delFromUFM, - delFromUFM_Directly, - delListFromUFM, - plusUFM, - plusUFM_C, - plusUFM_CD, - minusUFM, - intersectUFM, - intersectUFM_C, - foldUFM, foldUFM_Directly, - mapUFM, mapUFM_Directly, - elemUFM, elemUFM_Directly, - filterUFM, filterUFM_Directly, partitionUFM, - sizeUFM, - isNullUFM, - lookupUFM, lookupUFM_Directly, - lookupWithDefaultUFM, lookupWithDefaultUFM_Directly, - eltsUFM, keysUFM, splitUFM, - ufmToSet_Directly, - ufmToList, - joinUFM, pprUniqFM - ) where - -import FastString -import Unique ( Uniquable(..), Unique, getKey ) -import Outputable - -import Compiler.Hoopl hiding (Unique) - -import qualified Data.IntMap as M -import qualified Data.IntSet as S -import qualified Data.Foldable as Foldable -import qualified Data.Traversable as Traversable -import Data.Typeable -import Data.Data -#if __GLASGOW_HASKELL__ < 709 -import Data.Monoid -#endif -\end{code} - -%************************************************************************ -%* * -\subsection{The signature of the module} -%* * -%************************************************************************ - -\begin{code} -emptyUFM :: UniqFM elt -isNullUFM :: UniqFM elt -> Bool -unitUFM :: Uniquable key => key -> elt -> UniqFM elt -unitDirectlyUFM -- got the Unique already - :: Unique -> elt -> UniqFM elt -listToUFM :: Uniquable key => [(key,elt)] -> UniqFM elt -listToUFM_Directly - :: [(Unique, elt)] -> UniqFM elt -listToUFM_C :: Uniquable key => (elt -> elt -> elt) - -> [(key, elt)] - -> UniqFM elt - -addToUFM :: Uniquable key => UniqFM elt -> key -> elt -> UniqFM elt -addListToUFM :: Uniquable key => UniqFM elt -> [(key,elt)] -> UniqFM elt -addListToUFM_Directly :: UniqFM elt -> [(Unique,elt)] -> UniqFM elt -addToUFM_Directly - :: UniqFM elt -> Unique -> elt -> UniqFM elt - -addToUFM_C :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result - -> UniqFM elt -- old - -> key -> elt -- new - -> UniqFM elt -- result - -addToUFM_Acc :: Uniquable key => - (elt -> elts -> elts) -- Add to existing - -> (elt -> elts) -- New element - -> UniqFM elts -- old - -> key -> elt -- new - -> UniqFM elts -- result - -alterUFM :: Uniquable key => - (Maybe elt -> Maybe elt) -- How to adjust - -> UniqFM elt -- old - -> key -- new - -> UniqFM elt -- result - -addListToUFM_C :: Uniquable key => (elt -> elt -> elt) - -> UniqFM elt -> [(key,elt)] - -> UniqFM elt - -adjustUFM :: Uniquable key => (elt -> elt) -> UniqFM elt -> key -> UniqFM elt -adjustUFM_Directly :: (elt -> elt) -> UniqFM elt -> Unique -> UniqFM elt - -delFromUFM :: Uniquable key => UniqFM elt -> key -> UniqFM elt -delListFromUFM :: Uniquable key => UniqFM elt -> [key] -> UniqFM elt -delFromUFM_Directly :: UniqFM elt -> Unique -> UniqFM elt - --- Bindings in right argument shadow those in the left -plusUFM :: UniqFM elt -> UniqFM elt -> UniqFM elt - -plusUFM_C :: (elt -> elt -> elt) - -> UniqFM elt -> UniqFM elt -> UniqFM elt - --- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the --- combinding function and `d1` resp. `d2` as the default value if --- there is no entry in `m1` reps. `m2`. The domain is the union of --- the domains of `m1` and `m2`. --- --- Representative example: --- --- @ --- plusUFM_CD f {A: 1, B: 2} 23 {B: 3, C: 4} 42 --- == {A: f 1 42, B: f 2 3, C: f 23 4 } --- @ -plusUFM_CD :: (elt -> elt -> elt) - -> UniqFM elt -> elt -> UniqFM elt -> elt -> UniqFM elt - -minusUFM :: UniqFM elt1 -> UniqFM elt2 -> UniqFM elt1 - -intersectUFM :: UniqFM elt -> UniqFM elt -> UniqFM elt -intersectUFM_C :: (elt1 -> elt2 -> elt3) - -> UniqFM elt1 -> UniqFM elt2 -> UniqFM elt3 - -foldUFM :: (elt -> a -> a) -> a -> UniqFM elt -> a -foldUFM_Directly:: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a -mapUFM :: (elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2 -mapUFM_Directly :: (Unique -> elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2 -filterUFM :: (elt -> Bool) -> UniqFM elt -> UniqFM elt -filterUFM_Directly :: (Unique -> elt -> Bool) -> UniqFM elt -> UniqFM elt -partitionUFM :: (elt -> Bool) -> UniqFM elt -> (UniqFM elt, UniqFM elt) - -sizeUFM :: UniqFM elt -> Int ---hashUFM :: UniqFM elt -> Int -elemUFM :: Uniquable key => key -> UniqFM elt -> Bool -elemUFM_Directly:: Unique -> UniqFM elt -> Bool - -splitUFM :: Uniquable key => UniqFM elt -> key -> (UniqFM elt, Maybe elt, UniqFM elt) - -- Splits a UFM into things less than, equal to, and greater than the key -lookupUFM :: Uniquable key => UniqFM elt -> key -> Maybe elt -lookupUFM_Directly -- when you've got the Unique already - :: UniqFM elt -> Unique -> Maybe elt -lookupWithDefaultUFM - :: Uniquable key => UniqFM elt -> elt -> key -> elt -lookupWithDefaultUFM_Directly - :: UniqFM elt -> elt -> Unique -> elt -keysUFM :: UniqFM elt -> [Unique] -- Get the keys -eltsUFM :: UniqFM elt -> [elt] -ufmToSet_Directly :: UniqFM elt -> S.IntSet -ufmToList :: UniqFM elt -> [(Unique, elt)] - -\end{code} - -%************************************************************************ -%* * -\subsection{Monoid interface} -%* * -%************************************************************************ - -\begin{code} -instance Monoid (UniqFM a) where - mempty = emptyUFM - mappend = plusUFM -\end{code} - -%************************************************************************ -%* * -\subsection{Implementation using ``Data.IntMap''} -%* * -%************************************************************************ - -\begin{code} -newtype UniqFM ele = UFM (M.IntMap ele) - deriving (Data, Eq, Foldable.Foldable, Functor, Traversable.Traversable, - Typeable) - -emptyUFM = UFM M.empty -isNullUFM (UFM m) = M.null m -unitUFM k v = UFM (M.singleton (getKey $ getUnique k) v) -unitDirectlyUFM u v = UFM (M.singleton (getKey u) v) -listToUFM = foldl (\m (k, v) -> addToUFM m k v) emptyUFM -listToUFM_Directly = foldl (\m (u, v) -> addToUFM_Directly m u v) emptyUFM -listToUFM_C f = foldl (\m (k, v) -> addToUFM_C f m k v) emptyUFM - -alterUFM f (UFM m) k = UFM (M.alter f (getKey $ getUnique k) m) -addToUFM (UFM m) k v = UFM (M.insert (getKey $ getUnique k) v m) -addListToUFM = foldl (\m (k, v) -> addToUFM m k v) -addListToUFM_Directly = foldl (\m (k, v) -> addToUFM_Directly m k v) -addToUFM_Directly (UFM m) u v = UFM (M.insert (getKey u) v m) - --- Arguments of combining function of M.insertWith and addToUFM_C are flipped. -addToUFM_C f (UFM m) k v = - UFM (M.insertWith (flip f) (getKey $ getUnique k) v m) -addToUFM_Acc exi new (UFM m) k v = - UFM (M.insertWith (\_new old -> exi v old) (getKey $ getUnique k) (new v) m) -addListToUFM_C f = foldl (\m (k, v) -> addToUFM_C f m k v) - -adjustUFM f (UFM m) k = UFM (M.adjust f (getKey $ getUnique k) m) -adjustUFM_Directly f (UFM m) u = UFM (M.adjust f (getKey u) m) - -delFromUFM (UFM m) k = UFM (M.delete (getKey $ getUnique k) m) -delListFromUFM = foldl delFromUFM -delFromUFM_Directly (UFM m) u = UFM (M.delete (getKey u) m) - --- M.union is left-biased, plusUFM should be right-biased. -plusUFM (UFM x) (UFM y) = UFM (M.union y x) - -- Note (M.union y x), with arguments flipped - -- M.union is left-biased, plusUFM should be right-biased. - -plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y) - -plusUFM_CD f (UFM xm) dx (UFM ym) dy - = UFM $ M.mergeWithKey - (\_ x y -> Just (x `f` y)) - (M.map (\x -> x `f` dy)) - (M.map (\y -> dx `f` y)) - xm ym -minusUFM (UFM x) (UFM y) = UFM (M.difference x y) -intersectUFM (UFM x) (UFM y) = UFM (M.intersection x y) -intersectUFM_C f (UFM x) (UFM y) = UFM (M.intersectionWith f x y) - -foldUFM k z (UFM m) = M.fold k z m -foldUFM_Directly k z (UFM m) = M.foldWithKey (k . getUnique) z m -mapUFM f (UFM m) = UFM (M.map f m) -mapUFM_Directly f (UFM m) = UFM (M.mapWithKey (f . getUnique) m) -filterUFM p (UFM m) = UFM (M.filter p m) -filterUFM_Directly p (UFM m) = UFM (M.filterWithKey (p . getUnique) m) -partitionUFM p (UFM m) = case M.partition p m of - (left, right) -> (UFM left, UFM right) - -sizeUFM (UFM m) = M.size m -elemUFM k (UFM m) = M.member (getKey $ getUnique k) m -elemUFM_Directly u (UFM m) = M.member (getKey u) m - -splitUFM (UFM m) k = case M.splitLookup (getKey $ getUnique k) m of - (less, equal, greater) -> (UFM less, equal, UFM greater) -lookupUFM (UFM m) k = M.lookup (getKey $ getUnique k) m -lookupUFM_Directly (UFM m) u = M.lookup (getKey u) m -lookupWithDefaultUFM (UFM m) v k = M.findWithDefault v (getKey $ getUnique k) m -lookupWithDefaultUFM_Directly (UFM m) v u = M.findWithDefault v (getKey u) m -keysUFM (UFM m) = map getUnique $ M.keys m -eltsUFM (UFM m) = M.elems m -ufmToSet_Directly (UFM m) = M.keysSet m -ufmToList (UFM m) = map (\(k, v) -> (getUnique k, v)) $ M.toList m - --- Hoopl -joinUFM :: JoinFun v -> JoinFun (UniqFM v) -joinUFM eltJoin l (OldFact old) (NewFact new) = foldUFM_Directly add (NoChange, old) new - where add k new_v (ch, joinmap) = - case lookupUFM_Directly joinmap k of - Nothing -> (SomeChange, addToUFM_Directly joinmap k new_v) - Just old_v -> case eltJoin l (OldFact old_v) (NewFact new_v) of - (SomeChange, v') -> (SomeChange, addToUFM_Directly joinmap k v') - (NoChange, _) -> (ch, joinmap) - -\end{code} - -%************************************************************************ -%* * -\subsection{Output-ery} -%* * -%************************************************************************ - -\begin{code} -instance Outputable a => Outputable (UniqFM a) where - ppr ufm = pprUniqFM ppr ufm - -pprUniqFM :: (a -> SDoc) -> UniqFM a -> SDoc -pprUniqFM ppr_elt ufm - = brackets $ fsep $ punctuate comma $ - [ ppr uq <+> ptext (sLit ":->") <+> ppr_elt elt - | (uq, elt) <- ufmToList ufm ] -\end{code} diff --git a/compiler/utils/UniqSet.hs b/compiler/utils/UniqSet.hs new file mode 100644 index 0000000000..5a82303157 --- /dev/null +++ b/compiler/utils/UniqSet.hs @@ -0,0 +1,115 @@ +{- +(c) The University of Glasgow 2006 +(c) The AQUA Project, Glasgow University, 1994-1998 + +\section[UniqSet]{Specialised sets, for things with @Uniques@} + +Based on @UniqFMs@ (as you would expect). + +Basically, the things need to be in class @Uniquable@. +-} + +module UniqSet ( + -- * Unique set type + UniqSet, -- type synonym for UniqFM a + + -- ** Manipulating these sets + emptyUniqSet, + unitUniqSet, + mkUniqSet, + addOneToUniqSet, addOneToUniqSet_C, addListToUniqSet, + delOneFromUniqSet, delOneFromUniqSet_Directly, delListFromUniqSet, + unionUniqSets, unionManyUniqSets, + minusUniqSet, + intersectUniqSets, + foldUniqSet, + mapUniqSet, + elementOfUniqSet, + elemUniqSet_Directly, + filterUniqSet, + sizeUniqSet, + isEmptyUniqSet, + lookupUniqSet, + uniqSetToList, + partitionUniqSet + ) where + +import UniqFM +import Unique + +{- +************************************************************************ +* * +\subsection{The signature of the module} +* * +************************************************************************ +-} + +emptyUniqSet :: UniqSet a +unitUniqSet :: Uniquable a => a -> UniqSet a +mkUniqSet :: Uniquable a => [a] -> UniqSet a + +addOneToUniqSet :: Uniquable a => UniqSet a -> a -> UniqSet a +addOneToUniqSet_C :: Uniquable a => (a -> a -> a) -> UniqSet a -> a -> UniqSet a +addListToUniqSet :: Uniquable a => UniqSet a -> [a] -> UniqSet a + +delOneFromUniqSet :: Uniquable a => UniqSet a -> a -> UniqSet a +delOneFromUniqSet_Directly :: Uniquable a => UniqSet a -> Unique -> UniqSet a +delListFromUniqSet :: Uniquable a => UniqSet a -> [a] -> UniqSet a + +unionUniqSets :: UniqSet a -> UniqSet a -> UniqSet a +unionManyUniqSets :: [UniqSet a] -> UniqSet a +minusUniqSet :: UniqSet a -> UniqSet a -> UniqSet a +intersectUniqSets :: UniqSet a -> UniqSet a -> UniqSet a + +foldUniqSet :: (a -> b -> b) -> b -> UniqSet a -> b +mapUniqSet :: (a -> b) -> UniqSet a -> UniqSet b +elementOfUniqSet :: Uniquable a => a -> UniqSet a -> Bool +elemUniqSet_Directly :: Unique -> UniqSet a -> Bool +filterUniqSet :: (a -> Bool) -> UniqSet a -> UniqSet a +partitionUniqSet :: (a -> Bool) -> UniqSet a -> (UniqSet a, UniqSet a) + +sizeUniqSet :: UniqSet a -> Int +isEmptyUniqSet :: UniqSet a -> Bool +lookupUniqSet :: Uniquable a => UniqSet a -> a -> Maybe a +uniqSetToList :: UniqSet a -> [a] + +{- +************************************************************************ +* * +\subsection{Implementation using ``UniqFM''} +* * +************************************************************************ +-} + +type UniqSet a = UniqFM a + +emptyUniqSet = emptyUFM +unitUniqSet x = unitUFM x x +mkUniqSet = foldl addOneToUniqSet emptyUniqSet + +addOneToUniqSet set x = addToUFM set x x +addOneToUniqSet_C f set x = addToUFM_C f set x x +addListToUniqSet = foldl addOneToUniqSet + +delOneFromUniqSet = delFromUFM +delOneFromUniqSet_Directly = delFromUFM_Directly +delListFromUniqSet = delListFromUFM + +unionUniqSets = plusUFM +unionManyUniqSets [] = emptyUniqSet +unionManyUniqSets sets = foldr1 unionUniqSets sets +minusUniqSet = minusUFM +intersectUniqSets = intersectUFM + +foldUniqSet = foldUFM +mapUniqSet = mapUFM +elementOfUniqSet = elemUFM +elemUniqSet_Directly = elemUFM_Directly +filterUniqSet = filterUFM +partitionUniqSet = partitionUFM + +sizeUniqSet = sizeUFM +isEmptyUniqSet = isNullUFM +lookupUniqSet = lookupUFM +uniqSetToList = eltsUFM diff --git a/compiler/utils/UniqSet.lhs b/compiler/utils/UniqSet.lhs deleted file mode 100644 index fae5ddabb6..0000000000 --- a/compiler/utils/UniqSet.lhs +++ /dev/null @@ -1,119 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The AQUA Project, Glasgow University, 1994-1998 -% -\section[UniqSet]{Specialised sets, for things with @Uniques@} - -Based on @UniqFMs@ (as you would expect). - -Basically, the things need to be in class @Uniquable@. - -\begin{code} -module UniqSet ( - -- * Unique set type - UniqSet, -- type synonym for UniqFM a - - -- ** Manipulating these sets - emptyUniqSet, - unitUniqSet, - mkUniqSet, - addOneToUniqSet, addOneToUniqSet_C, addListToUniqSet, - delOneFromUniqSet, delOneFromUniqSet_Directly, delListFromUniqSet, - unionUniqSets, unionManyUniqSets, - minusUniqSet, - intersectUniqSets, - foldUniqSet, - mapUniqSet, - elementOfUniqSet, - elemUniqSet_Directly, - filterUniqSet, - sizeUniqSet, - isEmptyUniqSet, - lookupUniqSet, - uniqSetToList, - partitionUniqSet - ) where - -import UniqFM -import Unique - -\end{code} - -%************************************************************************ -%* * -\subsection{The signature of the module} -%* * -%************************************************************************ - -\begin{code} -emptyUniqSet :: UniqSet a -unitUniqSet :: Uniquable a => a -> UniqSet a -mkUniqSet :: Uniquable a => [a] -> UniqSet a - -addOneToUniqSet :: Uniquable a => UniqSet a -> a -> UniqSet a -addOneToUniqSet_C :: Uniquable a => (a -> a -> a) -> UniqSet a -> a -> UniqSet a -addListToUniqSet :: Uniquable a => UniqSet a -> [a] -> UniqSet a - -delOneFromUniqSet :: Uniquable a => UniqSet a -> a -> UniqSet a -delOneFromUniqSet_Directly :: Uniquable a => UniqSet a -> Unique -> UniqSet a -delListFromUniqSet :: Uniquable a => UniqSet a -> [a] -> UniqSet a - -unionUniqSets :: UniqSet a -> UniqSet a -> UniqSet a -unionManyUniqSets :: [UniqSet a] -> UniqSet a -minusUniqSet :: UniqSet a -> UniqSet a -> UniqSet a -intersectUniqSets :: UniqSet a -> UniqSet a -> UniqSet a - -foldUniqSet :: (a -> b -> b) -> b -> UniqSet a -> b -mapUniqSet :: (a -> b) -> UniqSet a -> UniqSet b -elementOfUniqSet :: Uniquable a => a -> UniqSet a -> Bool -elemUniqSet_Directly :: Unique -> UniqSet a -> Bool -filterUniqSet :: (a -> Bool) -> UniqSet a -> UniqSet a -partitionUniqSet :: (a -> Bool) -> UniqSet a -> (UniqSet a, UniqSet a) - -sizeUniqSet :: UniqSet a -> Int -isEmptyUniqSet :: UniqSet a -> Bool -lookupUniqSet :: Uniquable a => UniqSet a -> a -> Maybe a -uniqSetToList :: UniqSet a -> [a] -\end{code} - -%************************************************************************ -%* * -\subsection{Implementation using ``UniqFM''} -%* * -%************************************************************************ - -\begin{code} - -type UniqSet a = UniqFM a - -emptyUniqSet = emptyUFM -unitUniqSet x = unitUFM x x -mkUniqSet = foldl addOneToUniqSet emptyUniqSet - -addOneToUniqSet set x = addToUFM set x x -addOneToUniqSet_C f set x = addToUFM_C f set x x -addListToUniqSet = foldl addOneToUniqSet - -delOneFromUniqSet = delFromUFM -delOneFromUniqSet_Directly = delFromUFM_Directly -delListFromUniqSet = delListFromUFM - -unionUniqSets = plusUFM -unionManyUniqSets [] = emptyUniqSet -unionManyUniqSets sets = foldr1 unionUniqSets sets -minusUniqSet = minusUFM -intersectUniqSets = intersectUFM - -foldUniqSet = foldUFM -mapUniqSet = mapUFM -elementOfUniqSet = elemUFM -elemUniqSet_Directly = elemUFM_Directly -filterUniqSet = filterUFM -partitionUniqSet = partitionUFM - -sizeUniqSet = sizeUFM -isEmptyUniqSet = isNullUFM -lookupUniqSet = lookupUFM -uniqSetToList = eltsUFM - -\end{code} diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs new file mode 100644 index 0000000000..7d44a5004b --- /dev/null +++ b/compiler/utils/Util.hs @@ -0,0 +1,1097 @@ +-- (c) The University of Glasgow 2006 + +{-# LANGUAGE CPP #-} + +-- | Highly random utility functions +-- +module Util ( + -- * Flags dependent on the compiler build + ghciSupported, debugIsOn, ncgDebugIsOn, + ghciTablesNextToCode, + isWindowsHost, isDarwinHost, + + -- * General list processing + zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal, + zipLazy, stretchZipWith, zipWithAndUnzip, + + unzipWith, + + mapFst, mapSnd, chkAppend, + mapAndUnzip, mapAndUnzip3, mapAccumL2, + nOfThem, filterOut, partitionWith, splitEithers, + + dropWhileEndLE, + + foldl1', foldl2, count, all2, + + lengthExceeds, lengthIs, lengthAtLeast, + listLengthCmp, atLength, + equalLength, compareLength, leLength, + + isSingleton, only, singleton, + notNull, snocView, + + isIn, isn'tIn, + + -- * Tuples + fstOf3, sndOf3, thirdOf3, + firstM, first3M, + third3, + uncurry3, + + -- * List operations controlled by another list + takeList, dropList, splitAtList, split, + dropTail, + + -- * For loop + nTimes, + + -- * Sorting + sortWith, minWith, nubSort, + + -- * Comparisons + isEqual, eqListBy, eqMaybeBy, + thenCmp, cmpList, + removeSpaces, + + -- * Edit distance + fuzzyMatch, fuzzyLookup, + + -- * Transitive closures + transitiveClosure, + + -- * Strictness + seqList, + + -- * Module names + looksLikeModuleName, + + -- * Argument processing + getCmd, toCmdArgs, toArgs, + + -- * Floating point + readRational, + + -- * read helpers + maybeRead, maybeReadFuzzy, + + -- * IO-ish utilities + doesDirNameExist, + getModificationUTCTime, + modificationTimeIfExists, + + global, consIORef, globalM, + + -- * Filenames and paths + Suffix, + splitLongestPrefix, + escapeSpaces, + Direction(..), reslash, + makeRelativeTo, + + -- * Utils for defining Data instances + abstractConstr, abstractDataType, mkNoRepType, + + -- * Utils for printing C code + charToC, + + -- * Hashing + hashString, + ) where + +#include "HsVersions.h" + +import Exception +import Panic + +import Data.Data +import Data.IORef ( IORef, newIORef, atomicModifyIORef ) +import System.IO.Unsafe ( unsafePerformIO ) +import Data.List hiding (group) + +#ifdef DEBUG +import FastTypes +#endif + +import Control.Monad ( liftM ) +import System.IO.Error as IO ( isDoesNotExistError ) +import System.Directory ( doesDirectoryExist, getModificationTime ) +import System.FilePath + +import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit ) +import Data.Int +import Data.Ratio ( (%) ) +import Data.Ord ( comparing ) +import Data.Bits +import Data.Word +import qualified Data.IntMap as IM +import qualified Data.Set as Set + +import Data.Time + +infixr 9 `thenCmp` + +{- +************************************************************************ +* * +\subsection{Is DEBUG on, are we on Windows, etc?} +* * +************************************************************************ + +These booleans are global constants, set by CPP flags. They allow us to +recompile a single module (this one) to change whether or not debug output +appears. They sometimes let us avoid even running CPP elsewhere. + +It's important that the flags are literal constants (True/False). Then, +with -0, tests of the flags in other modules will simplify to the correct +branch of the conditional, thereby dropping debug code altogether when +the flags are off. +-} + +ghciSupported :: Bool +#ifdef GHCI +ghciSupported = True +#else +ghciSupported = False +#endif + +debugIsOn :: Bool +#ifdef DEBUG +debugIsOn = True +#else +debugIsOn = False +#endif + +ncgDebugIsOn :: Bool +#ifdef NCG_DEBUG +ncgDebugIsOn = True +#else +ncgDebugIsOn = False +#endif + +ghciTablesNextToCode :: Bool +#ifdef GHCI_TABLES_NEXT_TO_CODE +ghciTablesNextToCode = True +#else +ghciTablesNextToCode = False +#endif + +isWindowsHost :: Bool +#ifdef mingw32_HOST_OS +isWindowsHost = True +#else +isWindowsHost = False +#endif + +isDarwinHost :: Bool +#ifdef darwin_HOST_OS +isDarwinHost = True +#else +isDarwinHost = False +#endif + +{- +************************************************************************ +* * +\subsection{A for loop} +* * +************************************************************************ +-} + +-- | Compose a function with itself n times. (nth rather than twice) +nTimes :: Int -> (a -> a) -> (a -> a) +nTimes 0 _ = id +nTimes 1 f = f +nTimes n f = f . nTimes (n-1) f + +fstOf3 :: (a,b,c) -> a +sndOf3 :: (a,b,c) -> b +thirdOf3 :: (a,b,c) -> c +fstOf3 (a,_,_) = a +sndOf3 (_,b,_) = b +thirdOf3 (_,_,c) = c + +third3 :: (c -> d) -> (a, b, c) -> (a, b, d) +third3 f (a, b, c) = (a, b, f c) + +uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d +uncurry3 f (a, b, c) = f a b c + +firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b) +firstM f (x, y) = liftM (\x' -> (x', y)) (f x) + +first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c) +first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x) + +{- +************************************************************************ +* * +\subsection[Utils-lists]{General list processing} +* * +************************************************************************ +-} + +filterOut :: (a->Bool) -> [a] -> [a] +-- ^ Like filter, only it reverses the sense of the test +filterOut _ [] = [] +filterOut p (x:xs) | p x = filterOut p xs + | otherwise = x : filterOut p xs + +partitionWith :: (a -> Either b c) -> [a] -> ([b], [c]) +-- ^ Uses a function to determine which of two output lists an input element should join +partitionWith _ [] = ([],[]) +partitionWith f (x:xs) = case f x of + Left b -> (b:bs, cs) + Right c -> (bs, c:cs) + where (bs,cs) = partitionWith f xs + +splitEithers :: [Either a b] -> ([a], [b]) +-- ^ Teases a list of 'Either's apart into two lists +splitEithers [] = ([],[]) +splitEithers (e : es) = case e of + Left x -> (x:xs, ys) + Right y -> (xs, y:ys) + where (xs,ys) = splitEithers es + +chkAppend :: [a] -> [a] -> [a] +-- Checks for the second arguemnt being empty +-- Used in situations where that situation is common +chkAppend xs ys + | null ys = xs + | otherwise = xs ++ ys + +{- +A paranoid @zip@ (and some @zipWith@ friends) that checks the lists +are of equal length. Alastair Reid thinks this should only happen if +DEBUGging on; hey, why not? +-} + +zipEqual :: String -> [a] -> [b] -> [(a,b)] +zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c] +zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d] +zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e] + +#ifndef DEBUG +zipEqual _ = zip +zipWithEqual _ = zipWith +zipWith3Equal _ = zipWith3 +zipWith4Equal _ = zipWith4 +#else +zipEqual _ [] [] = [] +zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs +zipEqual msg _ _ = panic ("zipEqual: unequal lists:"++msg) + +zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs +zipWithEqual _ _ [] [] = [] +zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg) + +zipWith3Equal msg z (a:as) (b:bs) (c:cs) + = z a b c : zipWith3Equal msg z as bs cs +zipWith3Equal _ _ [] [] [] = [] +zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg) + +zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds) + = z a b c d : zipWith4Equal msg z as bs cs ds +zipWith4Equal _ _ [] [] [] [] = [] +zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg) +#endif + +-- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~) +zipLazy :: [a] -> [b] -> [(a,b)] +zipLazy [] _ = [] +zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys + +stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c] +-- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in +-- the places where @p@ returns @True@ + +stretchZipWith _ _ _ [] _ = [] +stretchZipWith p z f (x:xs) ys + | p x = f x z : stretchZipWith p z f xs ys + | otherwise = case ys of + [] -> [] + (y:ys) -> f x y : stretchZipWith p z f xs ys + +mapFst :: (a->c) -> [(a,b)] -> [(c,b)] +mapSnd :: (b->c) -> [(a,b)] -> [(a,c)] + +mapFst f xys = [(f x, y) | (x,y) <- xys] +mapSnd f xys = [(x, f y) | (x,y) <- xys] + +mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c]) + +mapAndUnzip _ [] = ([], []) +mapAndUnzip f (x:xs) + = let (r1, r2) = f x + (rs1, rs2) = mapAndUnzip f xs + in + (r1:rs1, r2:rs2) + +mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d]) + +mapAndUnzip3 _ [] = ([], [], []) +mapAndUnzip3 f (x:xs) + = let (r1, r2, r3) = f x + (rs1, rs2, rs3) = mapAndUnzip3 f xs + in + (r1:rs1, r2:rs2, r3:rs3) + +zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d]) +zipWithAndUnzip f (a:as) (b:bs) + = let (r1, r2) = f a b + (rs1, rs2) = zipWithAndUnzip f as bs + in + (r1:rs1, r2:rs2) +zipWithAndUnzip _ _ _ = ([],[]) + +mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b]) +mapAccumL2 f s1 s2 xs = (s1', s2', ys) + where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of + (s1', s2', y) -> ((s1', s2'), y)) + (s1, s2) xs + +nOfThem :: Int -> a -> [a] +nOfThem n thing = replicate n thing + +-- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely: +-- +-- @ +-- atLength atLenPred atEndPred ls n +-- | n < 0 = atLenPred n +-- | length ls < n = atEndPred (n - length ls) +-- | otherwise = atLenPred (drop n ls) +-- @ +atLength :: ([a] -> b) + -> (Int -> b) + -> [a] + -> Int + -> b +atLength atLenPred atEndPred ls n + | n < 0 = atEndPred n + | otherwise = go n ls + where + go n [] = atEndPred n + go 0 ls = atLenPred ls + go n (_:xs) = go (n-1) xs + +-- Some special cases of atLength: + +lengthExceeds :: [a] -> Int -> Bool +-- ^ > (lengthExceeds xs n) = (length xs > n) +lengthExceeds = atLength notNull (const False) + +lengthAtLeast :: [a] -> Int -> Bool +lengthAtLeast = atLength notNull (== 0) + +lengthIs :: [a] -> Int -> Bool +lengthIs = atLength null (==0) + +listLengthCmp :: [a] -> Int -> Ordering +listLengthCmp = atLength atLen atEnd + where + atEnd 0 = EQ + atEnd x + | x > 0 = LT -- not yet seen 'n' elts, so list length is < n. + | otherwise = GT + + atLen [] = EQ + atLen _ = GT + +equalLength :: [a] -> [b] -> Bool +equalLength [] [] = True +equalLength (_:xs) (_:ys) = equalLength xs ys +equalLength _ _ = False + +compareLength :: [a] -> [b] -> Ordering +compareLength [] [] = EQ +compareLength (_:xs) (_:ys) = compareLength xs ys +compareLength [] _ = LT +compareLength _ [] = GT + +leLength :: [a] -> [b] -> Bool +-- ^ True if length xs <= length ys +leLength xs ys = case compareLength xs ys of + LT -> True + EQ -> True + GT -> False + +---------------------------- +singleton :: a -> [a] +singleton x = [x] + +isSingleton :: [a] -> Bool +isSingleton [_] = True +isSingleton _ = False + +notNull :: [a] -> Bool +notNull [] = False +notNull _ = True + +only :: [a] -> a +#ifdef DEBUG +only [a] = a +#else +only (a:_) = a +#endif +only _ = panic "Util: only" + +-- Debugging/specialising versions of \tr{elem} and \tr{notElem} + +isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool + +# ifndef DEBUG +isIn _msg x ys = x `elem` ys +isn'tIn _msg x ys = x `notElem` ys + +# else /* DEBUG */ +isIn msg x ys + = elem100 (_ILIT(0)) x ys + where + elem100 _ _ [] = False + elem100 i x (y:ys) + | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg) + (x `elem` (y:ys)) + | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys + +isn'tIn msg x ys + = notElem100 (_ILIT(0)) x ys + where + notElem100 _ _ [] = True + notElem100 i x (y:ys) + | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg) + (x `notElem` (y:ys)) + | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys +# endif /* DEBUG */ + +{- +************************************************************************ +* * +\subsubsection{Sort utils} +* * +************************************************************************ +-} + +sortWith :: Ord b => (a->b) -> [a] -> [a] +sortWith get_key xs = sortBy (comparing get_key) xs + +minWith :: Ord b => (a -> b) -> [a] -> a +minWith get_key xs = ASSERT( not (null xs) ) + head (sortWith get_key xs) + +nubSort :: Ord a => [a] -> [a] +nubSort = Set.toAscList . Set.fromList + +{- +************************************************************************ +* * +\subsection[Utils-transitive-closure]{Transitive closure} +* * +************************************************************************ + +This algorithm for transitive closure is straightforward, albeit quadratic. +-} + +transitiveClosure :: (a -> [a]) -- Successor function + -> (a -> a -> Bool) -- Equality predicate + -> [a] + -> [a] -- The transitive closure + +transitiveClosure succ eq xs + = go [] xs + where + go done [] = done + go done (x:xs) | x `is_in` done = go done xs + | otherwise = go (x:done) (succ x ++ xs) + + _ `is_in` [] = False + x `is_in` (y:ys) | eq x y = True + | otherwise = x `is_in` ys + +{- +************************************************************************ +* * +\subsection[Utils-accum]{Accumulating} +* * +************************************************************************ + +A combination of foldl with zip. It works with equal length lists. +-} + +foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc +foldl2 _ z [] [] = z +foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs +foldl2 _ _ _ _ = panic "Util: foldl2" + +all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool +-- True if the lists are the same length, and +-- all corresponding elements satisfy the predicate +all2 _ [] [] = True +all2 p (x:xs) (y:ys) = p x y && all2 p xs ys +all2 _ _ _ = False + +-- Count the number of times a predicate is true + +count :: (a -> Bool) -> [a] -> Int +count _ [] = 0 +count p (x:xs) | p x = 1 + count p xs + | otherwise = count p xs + +{- +@splitAt@, @take@, and @drop@ but with length of another +list giving the break-off point: +-} + +takeList :: [b] -> [a] -> [a] +takeList [] _ = [] +takeList (_:xs) ls = + case ls of + [] -> [] + (y:ys) -> y : takeList xs ys + +dropList :: [b] -> [a] -> [a] +dropList [] xs = xs +dropList _ xs@[] = xs +dropList (_:xs) (_:ys) = dropList xs ys + + +splitAtList :: [b] -> [a] -> ([a], [a]) +splitAtList [] xs = ([], xs) +splitAtList _ xs@[] = (xs, xs) +splitAtList (_:xs) (y:ys) = (y:ys', ys'') + where + (ys', ys'') = splitAtList xs ys + +-- drop from the end of a list +dropTail :: Int -> [a] -> [a] +-- Specification: dropTail n = reverse . drop n . reverse +-- Better implemention due to Joachim Breitner +-- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html +dropTail n xs + = go (drop n xs) xs + where + go (_:ys) (x:xs) = x : go ys xs + go _ _ = [] -- Stop when ys runs out + -- It'll always run out before xs does + +-- dropWhile from the end of a list. This is similar to Data.List.dropWhileEnd, +-- but is lazy in the elements and strict in the spine. For reasonably short lists, +-- such as path names and typical lines of text, dropWhileEndLE is generally +-- faster than dropWhileEnd. Its advantage is magnified when the predicate is +-- expensive--using dropWhileEndLE isSpace to strip the space off a line of text +-- is generally much faster than using dropWhileEnd isSpace for that purpose. +-- Specification: dropWhileEndLE p = reverse . dropWhile p . reverse +-- Pay attention to the short-circuit (&&)! The order of its arguments is the only +-- difference between dropWhileEnd and dropWhileEndLE. +dropWhileEndLE :: (a -> Bool) -> [a] -> [a] +dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) [] + +snocView :: [a] -> Maybe ([a],a) + -- Split off the last element +snocView [] = Nothing +snocView xs = go [] xs + where + -- Invariant: second arg is non-empty + go acc [x] = Just (reverse acc, x) + go acc (x:xs) = go (x:acc) xs + go _ [] = panic "Util: snocView" + +split :: Char -> String -> [String] +split c s = case rest of + [] -> [chunk] + _:rest -> chunk : split c rest + where (chunk, rest) = break (==c) s + +{- +************************************************************************ +* * +\subsection[Utils-comparison]{Comparisons} +* * +************************************************************************ +-} + +isEqual :: Ordering -> Bool +-- Often used in (isEqual (a `compare` b)) +isEqual GT = False +isEqual EQ = True +isEqual LT = False + +thenCmp :: Ordering -> Ordering -> Ordering +{-# INLINE thenCmp #-} +thenCmp EQ ordering = ordering +thenCmp ordering _ = ordering + +eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool +eqListBy _ [] [] = True +eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys +eqListBy _ _ _ = False + +eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool +eqMaybeBy _ Nothing Nothing = True +eqMaybeBy eq (Just x) (Just y) = eq x y +eqMaybeBy _ _ _ = False + +cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering + -- `cmpList' uses a user-specified comparer + +cmpList _ [] [] = EQ +cmpList _ [] _ = LT +cmpList _ _ [] = GT +cmpList cmp (a:as) (b:bs) + = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx } + +removeSpaces :: String -> String +removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace + +{- +************************************************************************ +* * +\subsection{Edit distance} +* * +************************************************************************ +-} + +-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. +-- See: . +-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing +-- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro). +-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and +-- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation +restrictedDamerauLevenshteinDistance :: String -> String -> Int +restrictedDamerauLevenshteinDistance str1 str2 + = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + where + m = length str1 + n = length str2 + +restrictedDamerauLevenshteinDistanceWithLengths + :: Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + | m <= n + = if n <= 32 -- n must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2 + + | otherwise + = if m <= 32 -- m must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1 + +restrictedDamerauLevenshteinDistance' + :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2 + | [] <- str1 = n + | otherwise = extractAnswer $ + foldl' (restrictedDamerauLevenshteinDistanceWorker + (matchVectors str1) top_bit_mask vector_mask) + (0, 0, m_ones, 0, m) str2 + where + m_ones@vector_mask = (2 ^ m) - 1 + top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy + extractAnswer (_, _, _, _, distance) = distance + +restrictedDamerauLevenshteinDistanceWorker + :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv + -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int) +restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask + (pm, d0, vp, vn, distance) char2 + = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $ + seq pm' $ seq d0' $ seq vp' $ seq vn' $ + seq distance'' $ seq char2 $ + (pm', d0', vp', vn', distance'') + where + pm' = IM.findWithDefault 0 (ord char2) str1_mvs + + d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm) + .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn + -- No need to mask the shiftL because of the restricted range of pm + + hp' = vn .|. sizedComplement vector_mask (d0' .|. vp) + hn' = d0' .&. vp + + hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask + hn'_shift = (hn' `shiftL` 1) .&. vector_mask + vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift) + vn' = d0' .&. hp'_shift + + distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance + distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance' + +sizedComplement :: Bits bv => bv -> bv -> bv +sizedComplement vector_mask vect = vector_mask `xor` vect + +matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv +matchVectors = snd . foldl' go (0 :: Int, IM.empty) + where + go (ix, im) char = let ix' = ix + 1 + im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im + in seq ix' $ seq im' $ (ix', im') + +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' + :: Word32 -> Int -> Int -> String -> String -> Int #-} +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' + :: Integer -> Int -> Int -> String -> String -> Int #-} + +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker + :: IM.IntMap Word32 -> Word32 -> Word32 + -> (Word32, Word32, Word32, Word32, Int) + -> Char -> (Word32, Word32, Word32, Word32, Int) #-} +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker + :: IM.IntMap Integer -> Integer -> Integer + -> (Integer, Integer, Integer, Integer, Int) + -> Char -> (Integer, Integer, Integer, Integer, Int) #-} + +{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-} +{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-} + +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-} +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-} + +fuzzyMatch :: String -> [String] -> [String] +fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals] + +-- | Search for possible matches to the users input in the given list, +-- returning a small number of ranked results +fuzzyLookup :: String -> [(String,a)] -> [a] +fuzzyLookup user_entered possibilites + = map fst $ take mAX_RESULTS $ sortBy (comparing snd) + [ (poss_val, distance) | (poss_str, poss_val) <- possibilites + , let distance = restrictedDamerauLevenshteinDistance + poss_str user_entered + , distance <= fuzzy_threshold ] + where + -- Work out an approriate match threshold: + -- We report a candidate if its edit distance is <= the threshold, + -- The threshhold is set to about a quarter of the # of characters the user entered + -- Length Threshold + -- 1 0 -- Don't suggest *any* candidates + -- 2 1 -- for single-char identifiers + -- 3 1 + -- 4 1 + -- 5 1 + -- 6 2 + -- + fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational) + mAX_RESULTS = 3 + +{- +************************************************************************ +* * +\subsection[Utils-pairs]{Pairs} +* * +************************************************************************ +-} + +unzipWith :: (a -> b -> c) -> [(a, b)] -> [c] +unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs + +seqList :: [a] -> b -> b +seqList [] b = b +seqList (x:xs) b = x `seq` seqList xs b + +-- Global variables: + +global :: a -> IORef a +global a = unsafePerformIO (newIORef a) + +consIORef :: IORef [a] -> a -> IO () +consIORef var x = do + atomicModifyIORef var (\xs -> (x:xs,())) + +globalM :: IO a -> IORef a +globalM ma = unsafePerformIO (ma >>= newIORef) + +-- Module names: + +looksLikeModuleName :: String -> Bool +looksLikeModuleName [] = False +looksLikeModuleName (c:cs) = isUpper c && go cs + where go [] = True + go ('.':cs) = looksLikeModuleName cs + go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs + +{- +Akin to @Prelude.words@, but acts like the Bourne shell, treating +quoted strings as Haskell Strings, and also parses Haskell [String] +syntax. +-} + +getCmd :: String -> Either String -- Error + (String, String) -- (Cmd, Rest) +getCmd s = case break isSpace $ dropWhile isSpace s of + ([], _) -> Left ("Couldn't find command in " ++ show s) + res -> Right res + +toCmdArgs :: String -> Either String -- Error + (String, [String]) -- (Cmd, Args) +toCmdArgs s = case getCmd s of + Left err -> Left err + Right (cmd, s') -> case toArgs s' of + Left err -> Left err + Right args -> Right (cmd, args) + +toArgs :: String -> Either String -- Error + [String] -- Args +toArgs str + = case dropWhile isSpace str of + s@('[':_) -> case reads s of + [(args, spaces)] + | all isSpace spaces -> + Right args + _ -> + Left ("Couldn't read " ++ show str ++ "as [String]") + s -> toArgs' s + where + toArgs' s = case dropWhile isSpace s of + [] -> Right [] + ('"' : _) -> case reads s of + [(arg, rest)] + -- rest must either be [] or start with a space + | all isSpace (take 1 rest) -> + case toArgs' rest of + Left err -> Left err + Right args -> Right (arg : args) + _ -> + Left ("Couldn't read " ++ show s ++ "as String") + s' -> case break isSpace s' of + (arg, s'') -> case toArgs' s'' of + Left err -> Left err + Right args -> Right (arg : args) + +{- +-- ----------------------------------------------------------------------------- +-- Floats +-} + +readRational__ :: ReadS Rational -- NB: doesn't handle leading "-" +readRational__ r = do + (n,d,s) <- readFix r + (k,t) <- readExp s + return ((n%1)*10^^(k-d), t) + where + readFix r = do + (ds,s) <- lexDecDigits r + (ds',t) <- lexDotDigits s + return (read (ds++ds'), length ds', t) + + readExp (e:s) | e `elem` "eE" = readExp' s + readExp s = return (0,s) + + readExp' ('+':s) = readDec s + readExp' ('-':s) = do (k,t) <- readDec s + return (-k,t) + readExp' s = readDec s + + readDec s = do + (ds,r) <- nonnull isDigit s + return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ], + r) + + lexDecDigits = nonnull isDigit + + lexDotDigits ('.':s) = return (span isDigit s) + lexDotDigits s = return ("",s) + + nonnull p s = do (cs@(_:_),t) <- return (span p s) + return (cs,t) + +readRational :: String -> Rational -- NB: *does* handle a leading "-" +readRational top_s + = case top_s of + '-' : xs -> - (read_me xs) + xs -> read_me xs + where + read_me s + = case (do { (x,"") <- readRational__ s ; return x }) of + [x] -> x + [] -> error ("readRational: no parse:" ++ top_s) + _ -> error ("readRational: ambiguous parse:" ++ top_s) + + +----------------------------------------------------------------------------- +-- read helpers + +maybeRead :: Read a => String -> Maybe a +maybeRead str = case reads str of + [(x, "")] -> Just x + _ -> Nothing + +maybeReadFuzzy :: Read a => String -> Maybe a +maybeReadFuzzy str = case reads str of + [(x, s)] + | all isSpace s -> + Just x + _ -> + Nothing + +----------------------------------------------------------------------------- +-- Verify that the 'dirname' portion of a FilePath exists. +-- +doesDirNameExist :: FilePath -> IO Bool +doesDirNameExist fpath = doesDirectoryExist (takeDirectory fpath) + +----------------------------------------------------------------------------- +-- Backwards compatibility definition of getModificationTime + +getModificationUTCTime :: FilePath -> IO UTCTime +getModificationUTCTime = getModificationTime + +-- -------------------------------------------------------------- +-- check existence & modification time at the same time + +modificationTimeIfExists :: FilePath -> IO (Maybe UTCTime) +modificationTimeIfExists f = do + (do t <- getModificationUTCTime f; return (Just t)) + `catchIO` \e -> if isDoesNotExistError e + then return Nothing + else ioError e + +-- split a string at the last character where 'pred' is True, +-- returning a pair of strings. The first component holds the string +-- up (but not including) the last character for which 'pred' returned +-- True, the second whatever comes after (but also not including the +-- last character). +-- +-- If 'pred' returns False for all characters in the string, the original +-- string is returned in the first component (and the second one is just +-- empty). +splitLongestPrefix :: String -> (Char -> Bool) -> (String,String) +splitLongestPrefix str pred + | null r_pre = (str, []) + | otherwise = (reverse (tail r_pre), reverse r_suf) + -- 'tail' drops the char satisfying 'pred' + where (r_suf, r_pre) = break pred (reverse str) + +escapeSpaces :: String -> String +escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) "" + +type Suffix = String + +-------------------------------------------------------------- +-- * Search path +-------------------------------------------------------------- + +data Direction = Forwards | Backwards + +reslash :: Direction -> FilePath -> FilePath +reslash d = f + where f ('/' : xs) = slash : f xs + f ('\\' : xs) = slash : f xs + f (x : xs) = x : f xs + f "" = "" + slash = case d of + Forwards -> '/' + Backwards -> '\\' + +makeRelativeTo :: FilePath -> FilePath -> FilePath +this `makeRelativeTo` that = directory thisFilename + where (thisDirectory, thisFilename) = splitFileName this + thatDirectory = dropFileName that + directory = joinPath $ f (splitPath thisDirectory) + (splitPath thatDirectory) + + f (x : xs) (y : ys) + | x == y = f xs ys + f xs ys = replicate (length ys) ".." ++ xs + +{- +************************************************************************ +* * +\subsection[Utils-Data]{Utils for defining Data instances} +* * +************************************************************************ + +These functions helps us to define Data instances for abstract types. +-} + +abstractConstr :: String -> Constr +abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix + +abstractDataType :: String -> DataType +abstractDataType n = mkDataType n [abstractConstr n] + +{- +************************************************************************ +* * +\subsection[Utils-C]{Utils for printing C code} +* * +************************************************************************ +-} + +charToC :: Word8 -> String +charToC w = + case chr (fromIntegral w) of + '\"' -> "\\\"" + '\'' -> "\\\'" + '\\' -> "\\\\" + c | c >= ' ' && c <= '~' -> [c] + | otherwise -> ['\\', + chr (ord '0' + ord c `div` 64), + chr (ord '0' + ord c `div` 8 `mod` 8), + chr (ord '0' + ord c `mod` 8)] + +{- +************************************************************************ +* * +\subsection[Utils-Hashing]{Utils for hashing} +* * +************************************************************************ +-} + +-- | A sample hash function for Strings. We keep multiplying by the +-- golden ratio and adding. The implementation is: +-- +-- > hashString = foldl' f golden +-- > where f m c = fromIntegral (ord c) * magic + hashInt32 m +-- > magic = 0xdeadbeef +-- +-- Where hashInt32 works just as hashInt shown above. +-- +-- Knuth argues that repeated multiplication by the golden ratio +-- will minimize gaps in the hash space, and thus it's a good choice +-- for combining together multiple keys to form one. +-- +-- Here we know that individual characters c are often small, and this +-- produces frequent collisions if we use ord c alone. A +-- particular problem are the shorter low ASCII and ISO-8859-1 +-- character strings. We pre-multiply by a magic twiddle factor to +-- obtain a good distribution. In fact, given the following test: +-- +-- > testp :: Int32 -> Int +-- > testp k = (n - ) . length . group . sort . map hs . take n $ ls +-- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']] +-- > hs = foldl' f golden +-- > f m c = fromIntegral (ord c) * k + hashInt32 m +-- > n = 100000 +-- +-- We discover that testp magic = 0. +hashString :: String -> Int32 +hashString = foldl' f golden + where f m c = fromIntegral (ord c) * magic + hashInt32 m + magic = fromIntegral (0xdeadbeef :: Word32) + +golden :: Int32 +golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32 +-- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32 +-- but that has bad mulHi properties (even adding 2^32 to get its inverse) +-- Whereas the above works well and contains no hash duplications for +-- [-32767..65536] + +-- | A sample (and useful) hash function for Int32, +-- implemented by extracting the uppermost 32 bits of the 64-bit +-- result of multiplying by a 33-bit constant. The constant is from +-- Knuth, derived from the golden ratio: +-- +-- > golden = round ((sqrt 5 - 1) * 2^32) +-- +-- We get good key uniqueness on small inputs +-- (a problem with previous versions): +-- (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768 +-- +hashInt32 :: Int32 -> Int32 +hashInt32 x = mulHi x golden + x + +-- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply +mulHi :: Int32 -> Int32 -> Int32 +mulHi a b = fromIntegral (r `shiftR` 32) + where r :: Int64 + r = fromIntegral a * fromIntegral b diff --git a/compiler/utils/Util.lhs b/compiler/utils/Util.lhs deleted file mode 100644 index df293f091b..0000000000 --- a/compiler/utils/Util.lhs +++ /dev/null @@ -1,1135 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% - -\begin{code} -{-# LANGUAGE CPP #-} - --- | Highly random utility functions --- -module Util ( - -- * Flags dependent on the compiler build - ghciSupported, debugIsOn, ncgDebugIsOn, - ghciTablesNextToCode, - isWindowsHost, isDarwinHost, - - -- * General list processing - zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal, - zipLazy, stretchZipWith, zipWithAndUnzip, - - unzipWith, - - mapFst, mapSnd, chkAppend, - mapAndUnzip, mapAndUnzip3, mapAccumL2, - nOfThem, filterOut, partitionWith, splitEithers, - - dropWhileEndLE, - - foldl1', foldl2, count, all2, - - lengthExceeds, lengthIs, lengthAtLeast, - listLengthCmp, atLength, - equalLength, compareLength, leLength, - - isSingleton, only, singleton, - notNull, snocView, - - isIn, isn'tIn, - - -- * Tuples - fstOf3, sndOf3, thirdOf3, - firstM, first3M, - third3, - uncurry3, - - -- * List operations controlled by another list - takeList, dropList, splitAtList, split, - dropTail, - - -- * For loop - nTimes, - - -- * Sorting - sortWith, minWith, nubSort, - - -- * Comparisons - isEqual, eqListBy, eqMaybeBy, - thenCmp, cmpList, - removeSpaces, - - -- * Edit distance - fuzzyMatch, fuzzyLookup, - - -- * Transitive closures - transitiveClosure, - - -- * Strictness - seqList, - - -- * Module names - looksLikeModuleName, - - -- * Argument processing - getCmd, toCmdArgs, toArgs, - - -- * Floating point - readRational, - - -- * read helpers - maybeRead, maybeReadFuzzy, - - -- * IO-ish utilities - doesDirNameExist, - getModificationUTCTime, - modificationTimeIfExists, - - global, consIORef, globalM, - - -- * Filenames and paths - Suffix, - splitLongestPrefix, - escapeSpaces, - Direction(..), reslash, - makeRelativeTo, - - -- * Utils for defining Data instances - abstractConstr, abstractDataType, mkNoRepType, - - -- * Utils for printing C code - charToC, - - -- * Hashing - hashString, - ) where - -#include "HsVersions.h" - -import Exception -import Panic - -import Data.Data -import Data.IORef ( IORef, newIORef, atomicModifyIORef ) -import System.IO.Unsafe ( unsafePerformIO ) -import Data.List hiding (group) - -#ifdef DEBUG -import FastTypes -#endif - -import Control.Monad ( liftM ) -import System.IO.Error as IO ( isDoesNotExistError ) -import System.Directory ( doesDirectoryExist, getModificationTime ) -import System.FilePath - -import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit ) -import Data.Int -import Data.Ratio ( (%) ) -import Data.Ord ( comparing ) -import Data.Bits -import Data.Word -import qualified Data.IntMap as IM -import qualified Data.Set as Set - -import Data.Time - -infixr 9 `thenCmp` -\end{code} - -%************************************************************************ -%* * -\subsection{Is DEBUG on, are we on Windows, etc?} -%* * -%************************************************************************ - -These booleans are global constants, set by CPP flags. They allow us to -recompile a single module (this one) to change whether or not debug output -appears. They sometimes let us avoid even running CPP elsewhere. - -It's important that the flags are literal constants (True/False). Then, -with -0, tests of the flags in other modules will simplify to the correct -branch of the conditional, thereby dropping debug code altogether when -the flags are off. - -\begin{code} -ghciSupported :: Bool -#ifdef GHCI -ghciSupported = True -#else -ghciSupported = False -#endif - -debugIsOn :: Bool -#ifdef DEBUG -debugIsOn = True -#else -debugIsOn = False -#endif - -ncgDebugIsOn :: Bool -#ifdef NCG_DEBUG -ncgDebugIsOn = True -#else -ncgDebugIsOn = False -#endif - -ghciTablesNextToCode :: Bool -#ifdef GHCI_TABLES_NEXT_TO_CODE -ghciTablesNextToCode = True -#else -ghciTablesNextToCode = False -#endif - -isWindowsHost :: Bool -#ifdef mingw32_HOST_OS -isWindowsHost = True -#else -isWindowsHost = False -#endif - -isDarwinHost :: Bool -#ifdef darwin_HOST_OS -isDarwinHost = True -#else -isDarwinHost = False -#endif -\end{code} - -%************************************************************************ -%* * -\subsection{A for loop} -%* * -%************************************************************************ - -\begin{code} --- | Compose a function with itself n times. (nth rather than twice) -nTimes :: Int -> (a -> a) -> (a -> a) -nTimes 0 _ = id -nTimes 1 f = f -nTimes n f = f . nTimes (n-1) f -\end{code} - -\begin{code} -fstOf3 :: (a,b,c) -> a -sndOf3 :: (a,b,c) -> b -thirdOf3 :: (a,b,c) -> c -fstOf3 (a,_,_) = a -sndOf3 (_,b,_) = b -thirdOf3 (_,_,c) = c - -third3 :: (c -> d) -> (a, b, c) -> (a, b, d) -third3 f (a, b, c) = (a, b, f c) - -uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d -uncurry3 f (a, b, c) = f a b c -\end{code} - -\begin{code} -firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b) -firstM f (x, y) = liftM (\x' -> (x', y)) (f x) - -first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c) -first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x) -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-lists]{General list processing} -%* * -%************************************************************************ - -\begin{code} -filterOut :: (a->Bool) -> [a] -> [a] --- ^ Like filter, only it reverses the sense of the test -filterOut _ [] = [] -filterOut p (x:xs) | p x = filterOut p xs - | otherwise = x : filterOut p xs - -partitionWith :: (a -> Either b c) -> [a] -> ([b], [c]) --- ^ Uses a function to determine which of two output lists an input element should join -partitionWith _ [] = ([],[]) -partitionWith f (x:xs) = case f x of - Left b -> (b:bs, cs) - Right c -> (bs, c:cs) - where (bs,cs) = partitionWith f xs - -splitEithers :: [Either a b] -> ([a], [b]) --- ^ Teases a list of 'Either's apart into two lists -splitEithers [] = ([],[]) -splitEithers (e : es) = case e of - Left x -> (x:xs, ys) - Right y -> (xs, y:ys) - where (xs,ys) = splitEithers es - -chkAppend :: [a] -> [a] -> [a] --- Checks for the second arguemnt being empty --- Used in situations where that situation is common -chkAppend xs ys - | null ys = xs - | otherwise = xs ++ ys -\end{code} - -A paranoid @zip@ (and some @zipWith@ friends) that checks the lists -are of equal length. Alastair Reid thinks this should only happen if -DEBUGging on; hey, why not? - -\begin{code} -zipEqual :: String -> [a] -> [b] -> [(a,b)] -zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c] -zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d] -zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e] - -#ifndef DEBUG -zipEqual _ = zip -zipWithEqual _ = zipWith -zipWith3Equal _ = zipWith3 -zipWith4Equal _ = zipWith4 -#else -zipEqual _ [] [] = [] -zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs -zipEqual msg _ _ = panic ("zipEqual: unequal lists:"++msg) - -zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs -zipWithEqual _ _ [] [] = [] -zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg) - -zipWith3Equal msg z (a:as) (b:bs) (c:cs) - = z a b c : zipWith3Equal msg z as bs cs -zipWith3Equal _ _ [] [] [] = [] -zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg) - -zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds) - = z a b c d : zipWith4Equal msg z as bs cs ds -zipWith4Equal _ _ [] [] [] [] = [] -zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg) -#endif -\end{code} - -\begin{code} --- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~) -zipLazy :: [a] -> [b] -> [(a,b)] -zipLazy [] _ = [] -zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys -\end{code} - - -\begin{code} -stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c] --- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in --- the places where @p@ returns @True@ - -stretchZipWith _ _ _ [] _ = [] -stretchZipWith p z f (x:xs) ys - | p x = f x z : stretchZipWith p z f xs ys - | otherwise = case ys of - [] -> [] - (y:ys) -> f x y : stretchZipWith p z f xs ys -\end{code} - - -\begin{code} -mapFst :: (a->c) -> [(a,b)] -> [(c,b)] -mapSnd :: (b->c) -> [(a,b)] -> [(a,c)] - -mapFst f xys = [(f x, y) | (x,y) <- xys] -mapSnd f xys = [(x, f y) | (x,y) <- xys] - -mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c]) - -mapAndUnzip _ [] = ([], []) -mapAndUnzip f (x:xs) - = let (r1, r2) = f x - (rs1, rs2) = mapAndUnzip f xs - in - (r1:rs1, r2:rs2) - -mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d]) - -mapAndUnzip3 _ [] = ([], [], []) -mapAndUnzip3 f (x:xs) - = let (r1, r2, r3) = f x - (rs1, rs2, rs3) = mapAndUnzip3 f xs - in - (r1:rs1, r2:rs2, r3:rs3) - -zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d]) -zipWithAndUnzip f (a:as) (b:bs) - = let (r1, r2) = f a b - (rs1, rs2) = zipWithAndUnzip f as bs - in - (r1:rs1, r2:rs2) -zipWithAndUnzip _ _ _ = ([],[]) - -mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b]) -mapAccumL2 f s1 s2 xs = (s1', s2', ys) - where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of - (s1', s2', y) -> ((s1', s2'), y)) - (s1, s2) xs -\end{code} - -\begin{code} -nOfThem :: Int -> a -> [a] -nOfThem n thing = replicate n thing - --- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely: --- --- @ --- atLength atLenPred atEndPred ls n --- | n < 0 = atLenPred n --- | length ls < n = atEndPred (n - length ls) --- | otherwise = atLenPred (drop n ls) --- @ -atLength :: ([a] -> b) - -> (Int -> b) - -> [a] - -> Int - -> b -atLength atLenPred atEndPred ls n - | n < 0 = atEndPred n - | otherwise = go n ls - where - go n [] = atEndPred n - go 0 ls = atLenPred ls - go n (_:xs) = go (n-1) xs - --- Some special cases of atLength: - -lengthExceeds :: [a] -> Int -> Bool --- ^ > (lengthExceeds xs n) = (length xs > n) -lengthExceeds = atLength notNull (const False) - -lengthAtLeast :: [a] -> Int -> Bool -lengthAtLeast = atLength notNull (== 0) - -lengthIs :: [a] -> Int -> Bool -lengthIs = atLength null (==0) - -listLengthCmp :: [a] -> Int -> Ordering -listLengthCmp = atLength atLen atEnd - where - atEnd 0 = EQ - atEnd x - | x > 0 = LT -- not yet seen 'n' elts, so list length is < n. - | otherwise = GT - - atLen [] = EQ - atLen _ = GT - -equalLength :: [a] -> [b] -> Bool -equalLength [] [] = True -equalLength (_:xs) (_:ys) = equalLength xs ys -equalLength _ _ = False - -compareLength :: [a] -> [b] -> Ordering -compareLength [] [] = EQ -compareLength (_:xs) (_:ys) = compareLength xs ys -compareLength [] _ = LT -compareLength _ [] = GT - -leLength :: [a] -> [b] -> Bool --- ^ True if length xs <= length ys -leLength xs ys = case compareLength xs ys of - LT -> True - EQ -> True - GT -> False - ----------------------------- -singleton :: a -> [a] -singleton x = [x] - -isSingleton :: [a] -> Bool -isSingleton [_] = True -isSingleton _ = False - -notNull :: [a] -> Bool -notNull [] = False -notNull _ = True - -only :: [a] -> a -#ifdef DEBUG -only [a] = a -#else -only (a:_) = a -#endif -only _ = panic "Util: only" -\end{code} - -Debugging/specialising versions of \tr{elem} and \tr{notElem} - -\begin{code} -isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool - -# ifndef DEBUG -isIn _msg x ys = x `elem` ys -isn'tIn _msg x ys = x `notElem` ys - -# else /* DEBUG */ -isIn msg x ys - = elem100 (_ILIT(0)) x ys - where - elem100 _ _ [] = False - elem100 i x (y:ys) - | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg) - (x `elem` (y:ys)) - | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys - -isn'tIn msg x ys - = notElem100 (_ILIT(0)) x ys - where - notElem100 _ _ [] = True - notElem100 i x (y:ys) - | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg) - (x `notElem` (y:ys)) - | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys -# endif /* DEBUG */ -\end{code} - -%************************************************************************ -%* * -\subsubsection{Sort utils} -%* * -%************************************************************************ - -\begin{code} -sortWith :: Ord b => (a->b) -> [a] -> [a] -sortWith get_key xs = sortBy (comparing get_key) xs - -minWith :: Ord b => (a -> b) -> [a] -> a -minWith get_key xs = ASSERT( not (null xs) ) - head (sortWith get_key xs) - -nubSort :: Ord a => [a] -> [a] -nubSort = Set.toAscList . Set.fromList -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-transitive-closure]{Transitive closure} -%* * -%************************************************************************ - -This algorithm for transitive closure is straightforward, albeit quadratic. - -\begin{code} -transitiveClosure :: (a -> [a]) -- Successor function - -> (a -> a -> Bool) -- Equality predicate - -> [a] - -> [a] -- The transitive closure - -transitiveClosure succ eq xs - = go [] xs - where - go done [] = done - go done (x:xs) | x `is_in` done = go done xs - | otherwise = go (x:done) (succ x ++ xs) - - _ `is_in` [] = False - x `is_in` (y:ys) | eq x y = True - | otherwise = x `is_in` ys -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-accum]{Accumulating} -%* * -%************************************************************************ - -A combination of foldl with zip. It works with equal length lists. - -\begin{code} -foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc -foldl2 _ z [] [] = z -foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs -foldl2 _ _ _ _ = panic "Util: foldl2" - -all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool --- True if the lists are the same length, and --- all corresponding elements satisfy the predicate -all2 _ [] [] = True -all2 p (x:xs) (y:ys) = p x y && all2 p xs ys -all2 _ _ _ = False -\end{code} - -Count the number of times a predicate is true - -\begin{code} -count :: (a -> Bool) -> [a] -> Int -count _ [] = 0 -count p (x:xs) | p x = 1 + count p xs - | otherwise = count p xs -\end{code} - -@splitAt@, @take@, and @drop@ but with length of another -list giving the break-off point: - -\begin{code} -takeList :: [b] -> [a] -> [a] -takeList [] _ = [] -takeList (_:xs) ls = - case ls of - [] -> [] - (y:ys) -> y : takeList xs ys - -dropList :: [b] -> [a] -> [a] -dropList [] xs = xs -dropList _ xs@[] = xs -dropList (_:xs) (_:ys) = dropList xs ys - - -splitAtList :: [b] -> [a] -> ([a], [a]) -splitAtList [] xs = ([], xs) -splitAtList _ xs@[] = (xs, xs) -splitAtList (_:xs) (y:ys) = (y:ys', ys'') - where - (ys', ys'') = splitAtList xs ys - --- drop from the end of a list -dropTail :: Int -> [a] -> [a] --- Specification: dropTail n = reverse . drop n . reverse --- Better implemention due to Joachim Breitner --- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html -dropTail n xs - = go (drop n xs) xs - where - go (_:ys) (x:xs) = x : go ys xs - go _ _ = [] -- Stop when ys runs out - -- It'll always run out before xs does - --- dropWhile from the end of a list. This is similar to Data.List.dropWhileEnd, --- but is lazy in the elements and strict in the spine. For reasonably short lists, --- such as path names and typical lines of text, dropWhileEndLE is generally --- faster than dropWhileEnd. Its advantage is magnified when the predicate is --- expensive--using dropWhileEndLE isSpace to strip the space off a line of text --- is generally much faster than using dropWhileEnd isSpace for that purpose. --- Specification: dropWhileEndLE p = reverse . dropWhile p . reverse --- Pay attention to the short-circuit (&&)! The order of its arguments is the only --- difference between dropWhileEnd and dropWhileEndLE. -dropWhileEndLE :: (a -> Bool) -> [a] -> [a] -dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) [] - -snocView :: [a] -> Maybe ([a],a) - -- Split off the last element -snocView [] = Nothing -snocView xs = go [] xs - where - -- Invariant: second arg is non-empty - go acc [x] = Just (reverse acc, x) - go acc (x:xs) = go (x:acc) xs - go _ [] = panic "Util: snocView" - -split :: Char -> String -> [String] -split c s = case rest of - [] -> [chunk] - _:rest -> chunk : split c rest - where (chunk, rest) = break (==c) s -\end{code} - - -%************************************************************************ -%* * -\subsection[Utils-comparison]{Comparisons} -%* * -%************************************************************************ - -\begin{code} -isEqual :: Ordering -> Bool --- Often used in (isEqual (a `compare` b)) -isEqual GT = False -isEqual EQ = True -isEqual LT = False - -thenCmp :: Ordering -> Ordering -> Ordering -{-# INLINE thenCmp #-} -thenCmp EQ ordering = ordering -thenCmp ordering _ = ordering - -eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool -eqListBy _ [] [] = True -eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys -eqListBy _ _ _ = False - -eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool -eqMaybeBy _ Nothing Nothing = True -eqMaybeBy eq (Just x) (Just y) = eq x y -eqMaybeBy _ _ _ = False - -cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering - -- `cmpList' uses a user-specified comparer - -cmpList _ [] [] = EQ -cmpList _ [] _ = LT -cmpList _ _ [] = GT -cmpList cmp (a:as) (b:bs) - = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx } -\end{code} - -\begin{code} -removeSpaces :: String -> String -removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace -\end{code} - -%************************************************************************ -%* * -\subsection{Edit distance} -%* * -%************************************************************************ - -\begin{code} --- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. --- See: . --- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing --- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro). --- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and --- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation -restrictedDamerauLevenshteinDistance :: String -> String -> Int -restrictedDamerauLevenshteinDistance str1 str2 - = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 - where - m = length str1 - n = length str2 - -restrictedDamerauLevenshteinDistanceWithLengths - :: Int -> Int -> String -> String -> Int -restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 - | m <= n - = if n <= 32 -- n must be larger so this check is sufficient - then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2 - else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2 - - | otherwise - = if m <= 32 -- m must be larger so this check is sufficient - then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1 - else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1 - -restrictedDamerauLevenshteinDistance' - :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int -restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2 - | [] <- str1 = n - | otherwise = extractAnswer $ - foldl' (restrictedDamerauLevenshteinDistanceWorker - (matchVectors str1) top_bit_mask vector_mask) - (0, 0, m_ones, 0, m) str2 - where - m_ones@vector_mask = (2 ^ m) - 1 - top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy - extractAnswer (_, _, _, _, distance) = distance - -restrictedDamerauLevenshteinDistanceWorker - :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv - -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int) -restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask - (pm, d0, vp, vn, distance) char2 - = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $ - seq pm' $ seq d0' $ seq vp' $ seq vn' $ - seq distance'' $ seq char2 $ - (pm', d0', vp', vn', distance'') - where - pm' = IM.findWithDefault 0 (ord char2) str1_mvs - - d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm) - .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn - -- No need to mask the shiftL because of the restricted range of pm - - hp' = vn .|. sizedComplement vector_mask (d0' .|. vp) - hn' = d0' .&. vp - - hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask - hn'_shift = (hn' `shiftL` 1) .&. vector_mask - vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift) - vn' = d0' .&. hp'_shift - - distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance - distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance' - -sizedComplement :: Bits bv => bv -> bv -> bv -sizedComplement vector_mask vect = vector_mask `xor` vect - -matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv -matchVectors = snd . foldl' go (0 :: Int, IM.empty) - where - go (ix, im) char = let ix' = ix + 1 - im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im - in seq ix' $ seq im' $ (ix', im') - -{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' - :: Word32 -> Int -> Int -> String -> String -> Int #-} -{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' - :: Integer -> Int -> Int -> String -> String -> Int #-} - -{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker - :: IM.IntMap Word32 -> Word32 -> Word32 - -> (Word32, Word32, Word32, Word32, Int) - -> Char -> (Word32, Word32, Word32, Word32, Int) #-} -{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker - :: IM.IntMap Integer -> Integer -> Integer - -> (Integer, Integer, Integer, Integer, Int) - -> Char -> (Integer, Integer, Integer, Integer, Int) #-} - -{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-} -{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-} - -{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-} -{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-} - -fuzzyMatch :: String -> [String] -> [String] -fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals] - --- | Search for possible matches to the users input in the given list, --- returning a small number of ranked results -fuzzyLookup :: String -> [(String,a)] -> [a] -fuzzyLookup user_entered possibilites - = map fst $ take mAX_RESULTS $ sortBy (comparing snd) - [ (poss_val, distance) | (poss_str, poss_val) <- possibilites - , let distance = restrictedDamerauLevenshteinDistance - poss_str user_entered - , distance <= fuzzy_threshold ] - where - -- Work out an approriate match threshold: - -- We report a candidate if its edit distance is <= the threshold, - -- The threshhold is set to about a quarter of the # of characters the user entered - -- Length Threshold - -- 1 0 -- Don't suggest *any* candidates - -- 2 1 -- for single-char identifiers - -- 3 1 - -- 4 1 - -- 5 1 - -- 6 2 - -- - fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational) - mAX_RESULTS = 3 -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-pairs]{Pairs} -%* * -%************************************************************************ - -\begin{code} -unzipWith :: (a -> b -> c) -> [(a, b)] -> [c] -unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs -\end{code} - -\begin{code} -seqList :: [a] -> b -> b -seqList [] b = b -seqList (x:xs) b = x `seq` seqList xs b -\end{code} - -Global variables: - -\begin{code} -global :: a -> IORef a -global a = unsafePerformIO (newIORef a) -\end{code} - -\begin{code} -consIORef :: IORef [a] -> a -> IO () -consIORef var x = do - atomicModifyIORef var (\xs -> (x:xs,())) -\end{code} - -\begin{code} -globalM :: IO a -> IORef a -globalM ma = unsafePerformIO (ma >>= newIORef) -\end{code} - -Module names: - -\begin{code} -looksLikeModuleName :: String -> Bool -looksLikeModuleName [] = False -looksLikeModuleName (c:cs) = isUpper c && go cs - where go [] = True - go ('.':cs) = looksLikeModuleName cs - go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs -\end{code} - -Akin to @Prelude.words@, but acts like the Bourne shell, treating -quoted strings as Haskell Strings, and also parses Haskell [String] -syntax. - -\begin{code} -getCmd :: String -> Either String -- Error - (String, String) -- (Cmd, Rest) -getCmd s = case break isSpace $ dropWhile isSpace s of - ([], _) -> Left ("Couldn't find command in " ++ show s) - res -> Right res - -toCmdArgs :: String -> Either String -- Error - (String, [String]) -- (Cmd, Args) -toCmdArgs s = case getCmd s of - Left err -> Left err - Right (cmd, s') -> case toArgs s' of - Left err -> Left err - Right args -> Right (cmd, args) - -toArgs :: String -> Either String -- Error - [String] -- Args -toArgs str - = case dropWhile isSpace str of - s@('[':_) -> case reads s of - [(args, spaces)] - | all isSpace spaces -> - Right args - _ -> - Left ("Couldn't read " ++ show str ++ "as [String]") - s -> toArgs' s - where - toArgs' s = case dropWhile isSpace s of - [] -> Right [] - ('"' : _) -> case reads s of - [(arg, rest)] - -- rest must either be [] or start with a space - | all isSpace (take 1 rest) -> - case toArgs' rest of - Left err -> Left err - Right args -> Right (arg : args) - _ -> - Left ("Couldn't read " ++ show s ++ "as String") - s' -> case break isSpace s' of - (arg, s'') -> case toArgs' s'' of - Left err -> Left err - Right args -> Right (arg : args) -\end{code} - --- ----------------------------------------------------------------------------- --- Floats - -\begin{code} -readRational__ :: ReadS Rational -- NB: doesn't handle leading "-" -readRational__ r = do - (n,d,s) <- readFix r - (k,t) <- readExp s - return ((n%1)*10^^(k-d), t) - where - readFix r = do - (ds,s) <- lexDecDigits r - (ds',t) <- lexDotDigits s - return (read (ds++ds'), length ds', t) - - readExp (e:s) | e `elem` "eE" = readExp' s - readExp s = return (0,s) - - readExp' ('+':s) = readDec s - readExp' ('-':s) = do (k,t) <- readDec s - return (-k,t) - readExp' s = readDec s - - readDec s = do - (ds,r) <- nonnull isDigit s - return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ], - r) - - lexDecDigits = nonnull isDigit - - lexDotDigits ('.':s) = return (span isDigit s) - lexDotDigits s = return ("",s) - - nonnull p s = do (cs@(_:_),t) <- return (span p s) - return (cs,t) - -readRational :: String -> Rational -- NB: *does* handle a leading "-" -readRational top_s - = case top_s of - '-' : xs -> - (read_me xs) - xs -> read_me xs - where - read_me s - = case (do { (x,"") <- readRational__ s ; return x }) of - [x] -> x - [] -> error ("readRational: no parse:" ++ top_s) - _ -> error ("readRational: ambiguous parse:" ++ top_s) - - ------------------------------------------------------------------------------ --- read helpers - -maybeRead :: Read a => String -> Maybe a -maybeRead str = case reads str of - [(x, "")] -> Just x - _ -> Nothing - -maybeReadFuzzy :: Read a => String -> Maybe a -maybeReadFuzzy str = case reads str of - [(x, s)] - | all isSpace s -> - Just x - _ -> - Nothing - ------------------------------------------------------------------------------ --- Verify that the 'dirname' portion of a FilePath exists. --- -doesDirNameExist :: FilePath -> IO Bool -doesDirNameExist fpath = doesDirectoryExist (takeDirectory fpath) - ------------------------------------------------------------------------------ --- Backwards compatibility definition of getModificationTime - -getModificationUTCTime :: FilePath -> IO UTCTime -getModificationUTCTime = getModificationTime - --- -------------------------------------------------------------- --- check existence & modification time at the same time - -modificationTimeIfExists :: FilePath -> IO (Maybe UTCTime) -modificationTimeIfExists f = do - (do t <- getModificationUTCTime f; return (Just t)) - `catchIO` \e -> if isDoesNotExistError e - then return Nothing - else ioError e - --- split a string at the last character where 'pred' is True, --- returning a pair of strings. The first component holds the string --- up (but not including) the last character for which 'pred' returned --- True, the second whatever comes after (but also not including the --- last character). --- --- If 'pred' returns False for all characters in the string, the original --- string is returned in the first component (and the second one is just --- empty). -splitLongestPrefix :: String -> (Char -> Bool) -> (String,String) -splitLongestPrefix str pred - | null r_pre = (str, []) - | otherwise = (reverse (tail r_pre), reverse r_suf) - -- 'tail' drops the char satisfying 'pred' - where (r_suf, r_pre) = break pred (reverse str) - -escapeSpaces :: String -> String -escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) "" - -type Suffix = String - --------------------------------------------------------------- --- * Search path --------------------------------------------------------------- - -data Direction = Forwards | Backwards - -reslash :: Direction -> FilePath -> FilePath -reslash d = f - where f ('/' : xs) = slash : f xs - f ('\\' : xs) = slash : f xs - f (x : xs) = x : f xs - f "" = "" - slash = case d of - Forwards -> '/' - Backwards -> '\\' - -makeRelativeTo :: FilePath -> FilePath -> FilePath -this `makeRelativeTo` that = directory thisFilename - where (thisDirectory, thisFilename) = splitFileName this - thatDirectory = dropFileName that - directory = joinPath $ f (splitPath thisDirectory) - (splitPath thatDirectory) - - f (x : xs) (y : ys) - | x == y = f xs ys - f xs ys = replicate (length ys) ".." ++ xs -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-Data]{Utils for defining Data instances} -%* * -%************************************************************************ - -These functions helps us to define Data instances for abstract types. - -\begin{code} -abstractConstr :: String -> Constr -abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix -\end{code} - -\begin{code} -abstractDataType :: String -> DataType -abstractDataType n = mkDataType n [abstractConstr n] -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-C]{Utils for printing C code} -%* * -%************************************************************************ - -\begin{code} -charToC :: Word8 -> String -charToC w = - case chr (fromIntegral w) of - '\"' -> "\\\"" - '\'' -> "\\\'" - '\\' -> "\\\\" - c | c >= ' ' && c <= '~' -> [c] - | otherwise -> ['\\', - chr (ord '0' + ord c `div` 64), - chr (ord '0' + ord c `div` 8 `mod` 8), - chr (ord '0' + ord c `mod` 8)] -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-Hashing]{Utils for hashing} -%* * -%************************************************************************ - -\begin{code} --- | A sample hash function for Strings. We keep multiplying by the --- golden ratio and adding. The implementation is: --- --- > hashString = foldl' f golden --- > where f m c = fromIntegral (ord c) * magic + hashInt32 m --- > magic = 0xdeadbeef --- --- Where hashInt32 works just as hashInt shown above. --- --- Knuth argues that repeated multiplication by the golden ratio --- will minimize gaps in the hash space, and thus it's a good choice --- for combining together multiple keys to form one. --- --- Here we know that individual characters c are often small, and this --- produces frequent collisions if we use ord c alone. A --- particular problem are the shorter low ASCII and ISO-8859-1 --- character strings. We pre-multiply by a magic twiddle factor to --- obtain a good distribution. In fact, given the following test: --- --- > testp :: Int32 -> Int --- > testp k = (n - ) . length . group . sort . map hs . take n $ ls --- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']] --- > hs = foldl' f golden --- > f m c = fromIntegral (ord c) * k + hashInt32 m --- > n = 100000 --- --- We discover that testp magic = 0. -hashString :: String -> Int32 -hashString = foldl' f golden - where f m c = fromIntegral (ord c) * magic + hashInt32 m - magic = fromIntegral (0xdeadbeef :: Word32) - -golden :: Int32 -golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32 --- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32 --- but that has bad mulHi properties (even adding 2^32 to get its inverse) --- Whereas the above works well and contains no hash duplications for --- [-32767..65536] - --- | A sample (and useful) hash function for Int32, --- implemented by extracting the uppermost 32 bits of the 64-bit --- result of multiplying by a 33-bit constant. The constant is from --- Knuth, derived from the golden ratio: --- --- > golden = round ((sqrt 5 - 1) * 2^32) --- --- We get good key uniqueness on small inputs --- (a problem with previous versions): --- (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768 --- -hashInt32 :: Int32 -> Int32 -hashInt32 x = mulHi x golden + x - --- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply -mulHi :: Int32 -> Int32 -> Int32 -mulHi a b = fromIntegral (r `shiftR` 32) - where r :: Int64 - r = fromIntegral a * fromIntegral b -\end{code} - -- cgit v1.2.1