diff options
122 files changed, 1421 insertions, 400 deletions
diff --git a/.circleci/config.yml b/.circleci/config.yml index b4a99473c8..2868f8d8eb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,6 +31,11 @@ aliases: run: name: Configure command: ./configure + - &configure_unix_32 + run: + name: Configure + command: | + setarch i386 ./configure --with-ghc=/opt/ghc-i386/8.2.2/bin/ghc - &configure_bsd run: name: Configure @@ -207,6 +212,24 @@ jobs: - *make - *test + "validate-i386-linux": + resource_class: xlarge + docker: + - image: mrkkrp/ghcci-i386-linux:0.0.1 + environment: + <<: *buildenv + steps: + - checkout + - *prepare + - *submodules + - *boot + - *configure_unix_32 + - *make + - *test + - *bindist + - *collectartifacts + - *storeartifacts + workflows: version: 2 validate: @@ -216,6 +239,7 @@ workflows: # - validate-x86_64-freebsd - validate-x86_64-darwin - validate-x86_64-linux-llvm + - validate-i386-linux - validate-hadrian-x86_64-linux nightly: diff --git a/.circleci/images/i386-linux/Dockerfile b/.circleci/images/i386-linux/Dockerfile new file mode 100644 index 0000000000..7d3e968195 --- /dev/null +++ b/.circleci/images/i386-linux/Dockerfile @@ -0,0 +1,30 @@ +# This Dockerfile tries to replicate haskell:8.2 a bit, but it does so on +# top of i368/debian:jessie instead of debian:jessie because I had troubles +# making i386 GHC bindist working there. + +FROM i386/debian:jessie + +ENV LANG C.UTF-8 + +# Install the necessary packages, including HVR stuff. +RUN echo 'deb http://ppa.launchpad.net/hvr/ghc/ubuntu trusty main' > /etc/apt/sources.list.d/ghc.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F6F88286 +RUN apt-get update -qq +RUN apt-get install -qy git make automake autoconf gcc perl python3 texinfo xz-utils lbzip2 bzip2 patch openssh-client sudo curl zlib1g-dev libtinfo-dev libsqlite3-0 libsqlite3-dev ca-certificates g++ cabal-install-2.0 ghc-8.2.2 happy-1.19.5 alex-3.1.7 +ENV PATH /opt/cabal/2.0/bin:/opt/ghc/8.2.2/bin:/opt/happy/1.19.5/bin:/opt/alex/3.1.7/bin:$PATH + +# Get i386 GHC bindist for 32 bit CI builds. +RUN cd /tmp && curl https://downloads.haskell.org/~ghc/8.2.2/ghc-8.2.2-i386-deb8-linux.tar.xz | tar -Jx +RUN cd /tmp/ghc-8.2.2 && setarch i386 ./configure --prefix=/opt/ghc-i386/8.2.2 CFLAGS=-m32 --target=i386-unknown-linux --build=i386-unknown-linux --host=i386-unknown-linux +RUN cd /tmp/ghc-8.2.2 && make install +RUN rm -rf /tmp/ghc-8.2.2 +ENV PATH /opt/ghc-i386/8.2.2/bin:$PATH + +# Create a normal user. +RUN adduser ghc --gecos "GHC builds" --disabled-password +RUN echo "ghc ALL = NOPASSWD : ALL" > /etc/sudoers.d/ghc +USER ghc + +WORKDIR /home/ghc/ + +CMD ["bash"] diff --git a/aclocal.m4 b/aclocal.m4 index 58d43e1ef0..e19cbf2aa9 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1970,7 +1970,7 @@ AC_DEFUN([GHC_CONVERT_OS],[ $3="openbsd" ;; # As far as I'm aware, none of these have relevant variants - freebsd|netbsd|dragonfly|hpux|linuxaout|kfreebsdgnu|freebsd2|solaris2|mingw32|darwin|nextstep2|nextstep3|sunos4|ultrix|haiku) + freebsd|netbsd|dragonfly|hpux|linuxaout|kfreebsdgnu|freebsd2|mingw32|darwin|nextstep2|nextstep3|sunos4|ultrix|haiku) $3="$1" ;; aix*) # e.g. powerpc-ibm-aix7.1.3.0 @@ -1979,6 +1979,9 @@ AC_DEFUN([GHC_CONVERT_OS],[ darwin*) # e.g. aarch64-apple-darwin14 $3="darwin" ;; + solaris2*) + $3="solaris2" + ;; freebsd*) # like i686-gentoo-freebsd7 # i686-gentoo-freebsd8 # i686-gentoo-freebsd8.2 diff --git a/compiler/basicTypes/Name.hs b/compiler/basicTypes/Name.hs index 02eb0678ee..6941dd9c55 100644 --- a/compiler/basicTypes/Name.hs +++ b/compiler/basicTypes/Name.hs @@ -70,7 +70,7 @@ module Name ( NamedThing(..), getSrcLoc, getSrcSpan, getOccString, getOccFS, - pprInfixName, pprPrefixName, pprModulePrefix, + pprInfixName, pprPrefixName, pprModulePrefix, pprNameUnqualified, nameStableString, -- Re-export the OccName stuff @@ -535,6 +535,10 @@ pprName (Name {n_sort = sort, n_uniq = uniq, n_occ = occ}) System -> pprSystem sty uniq occ Internal -> pprInternal sty uniq occ +-- | Print the string of Name unqualifiedly directly. +pprNameUnqualified :: Name -> SDoc +pprNameUnqualified Name { n_occ = occ } = ppr_occ_name occ + pprExternal :: PprStyle -> Unique -> Module -> OccName -> Bool -> BuiltInSyntax -> SDoc pprExternal sty uniq mod occ is_wired is_builtin | codeStyle sty = ppr mod <> char '_' <> ppr_z_occ_name occ diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index 9f832731b1..50d48afb38 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -1,5 +1,5 @@ -- Cmm representations using Hoopl's Graph CmmNode e x. -{-# LANGUAGE CPP, GADTs #-} +{-# LANGUAGE GADTs #-} module Cmm ( -- * Cmm top-level datatypes diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs index fce8f7dae8..c91d553c47 100644 --- a/compiler/cmm/CmmCommonBlockElim.hs +++ b/compiler/cmm/CmmCommonBlockElim.hs @@ -64,7 +64,9 @@ elimCommonBlocks :: CmmGraph -> CmmGraph elimCommonBlocks g = replaceLabels env $ copyTicks env g where env = iterate mapEmpty blocks_with_key - groups = groupByInt hash_block (postorderDfs g) + -- The order of blocks doesn't matter here, but revPostorder also drops any + -- unreachable blocks, which is useful. + groups = groupByInt hash_block (revPostorder g) blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups] -- Invariant: The blocks in the list are pairwise distinct diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index da365cfe7f..9f091da8c2 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -174,10 +174,9 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } | otherwise = (entry_id, shortcut_map) - -- blocks is a list of blocks in DFS postorder, while blockmap is - -- a map of blocks. We process each element from blocks and update - -- blockmap accordingly - blocks = postorderDfs g + -- blocks are sorted in reverse postorder, but we want to go from the exit + -- towards beginning, so we use foldr below. + blocks = revPostorder g blockmap = foldl' (flip addBlock) emptyBody blocks -- Accumulator contains three components: @@ -435,7 +434,7 @@ removeUnreachableBlocksProc proc@(CmmProc info lbl live g) | otherwise = env used_blocks :: [CmmBlock] - used_blocks = postorderDfs g + used_blocks = revPostorder g used_lbls :: LabelSet used_lbls = setFromList $ map entryLabel used_blocks diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index bae5a739ca..946e146f9e 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -1,5 +1,4 @@ {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 3f1633404c..d2525d1ffd 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -244,7 +244,7 @@ cmmLayoutStack dflags procpoints entry_args -- We need liveness info. Dead assignments are removed later -- by the sinking pass. let liveness = cmmLocalLiveness dflags graph - blocks = postorderDfs graph + blocks = revPostorder graph (final_stackmaps, _final_high_sp, new_blocks) <- mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 6b4d792122..e837d29783 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -422,14 +422,6 @@ That's what the constant-folding operations on comparison operators do above. -- ----------------------------------------------------------------------------- -- Utils -isLit :: CmmExpr -> Bool -isLit (CmmLit _) = True -isLit _ = False - -isComparisonExpr :: CmmExpr -> Bool -isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op -isComparisonExpr _ = False - isPicReg :: CmmExpr -> Bool isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True isPicReg _ = False diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index eeae96083a..bef8f384b8 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -190,7 +190,7 @@ minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -- Given the set of successors of calls (which must be proc-points) -- figure out the minimal set of necessary proc-points minimalProcPointSet platform callProcPoints g - = extendPPSet platform g (postorderDfs g) callProcPoints + = extendPPSet platform g (revPostorder g) callProcPoints extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet @@ -242,11 +242,11 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap (CmmProc (TopInfo {info_tbls = info_tbls}) top_l _ g@(CmmGraph {g_entry=entry})) = do -- Build a map from procpoints to the blocks they reach - let addBlock + let add_block :: LabelMap (LabelMap CmmBlock) -> CmmBlock -> LabelMap (LabelMap CmmBlock) - addBlock graphEnv b = + add_block graphEnv b = case mapLookup bid procMap of Just ProcPoint -> add graphEnv bid bid b Just (ReachedBy set) -> @@ -265,7 +265,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap regSetToList $ expectJust "ppLiveness" $ mapLookup pp liveness - graphEnv <- return $ foldlGraphBlocks addBlock mapEmpty g + graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g -- Build a map from proc point BlockId to pairs of: -- * Labels for their new procedures @@ -330,7 +330,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap -- replace branches to procpoints with branches to jumps blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv' -- add the jump blocks to the graph - blockEnv''' = foldl' (flip insertBlock) blockEnv'' jumpBlocks + blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks let g' = ofBlockMap ppId blockEnv''' -- pprTrace "g' pre jumps" (ppr g') $ do return (mapInsert ppId g' newGraphEnv) @@ -374,8 +374,8 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap -- reversed later. let (_, block_order) = foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int) - (postorderDfs g) - add_block_num (!i, !map) block = + (revPostorder g) + add_block_num (i, map) block = (i + 1, mapInsert (entryLabel block) i map) sort_fn (bid, _) (bid', _) = compare (expectJust "block_order" $ mapLookup bid block_order) diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index 487f0bc244..43444639e1 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -173,7 +173,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks liveness = cmmLocalLiveness dflags graph getLive l = mapFindWithDefault Set.empty l liveness - blocks = postorderDfs graph + blocks = revPostorder graph join_pts = findJoinPoints blocks @@ -458,17 +458,7 @@ tryToInline dflags live node assigs = go usages node emptyLRegSet assigs occurs_once = not l_live && l_usages == Just 1 occurs_none = not l_live && l_usages == Nothing - inl_node = case mapExpDeep inl_exp node of - -- See Note [Improving conditionals] - CmmCondBranch (CmmMachOp (MO_Ne w) args) - ti fi l - -> CmmCondBranch (cmmMachOpFold dflags (MO_Eq w) args) - fi ti (inv_likeliness l) - node' -> node' - - inv_likeliness :: Maybe Bool -> Maybe Bool - inv_likeliness Nothing = Nothing - inv_likeliness (Just l) = Just (not l) + inl_node = improveConditional (mapExpDeep inl_exp node) inl_exp :: CmmExpr -> CmmExpr -- inl_exp is where the inlining actually takes place! @@ -479,22 +469,43 @@ tryToInline dflags live node assigs = go usages node emptyLRegSet assigs inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args inl_exp other = other -{- Note [Improving conditionals] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Given - CmmCondBranch ((a >## b) != 1) t f -where a,b, are Floats, the constant folder /cannot/ turn it into - CmmCondBranch (a <=## b) t f -because comparison on floats are not invertible -(see CmmMachOp.maybeInvertComparison). -What we want instead is simply to reverse the true/false branches thus +{- Note [improveConditional] + +cmmMachOpFold tries to simplify conditionals to turn things like + (a == b) != 1 +into + (a != b) +but there's one case it can't handle: when the comparison is over +floating-point values, we can't invert it, because floating-point +comparisions aren't invertible (because NaN). + +But we *can* optimise this conditional by swapping the true and false +branches. Given CmmCondBranch ((a >## b) != 1) t f ---> +we can turn it into CmmCondBranch (a >## b) f t -And we do that right here in tryToInline, just as we do cmmMachOpFold. +So here we catch conditionals that weren't optimised by cmmMachOpFold, +and apply above transformation to eliminate the comparison against 1. + +It's tempting to just turn every != into == and then let cmmMachOpFold +do its thing, but that risks changing a nice fall-through conditional +into one that requires two jumps. (see swapcond_last in +CmmContFlowOpt), so instead we carefully look for just the cases where +we can eliminate a comparison. -} +improveConditional :: CmmNode O x -> CmmNode O x +improveConditional + (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l) + | neLike mop, isComparisonExpr x + = CmmCondBranch x f t (fmap not l) + where + neLike (MO_Ne _) = True + neLike (MO_U_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike (MO_S_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike _ = False +improveConditional other = other -- Note [dependent assignments] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 4a1d874d8f..53dbcddfbb 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, GADTs, RankNTypes #-} +{-# LANGUAGE GADTs, RankNTypes #-} ----------------------------------------------------------------------------- -- @@ -35,7 +35,7 @@ module CmmUtils( cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord, cmmToWord, - isTrivialCmmExpr, hasNoGlobalRegs, + isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr, baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr, currentTSOExpr, currentNurseryExpr, cccsExpr, @@ -56,17 +56,15 @@ module CmmUtils( -- * Operations that probably don't belong here modifyGraph, - ofBlockMap, toBlockMap, insertBlock, + ofBlockMap, toBlockMap, ofBlockList, toBlockList, bodyToBlockList, toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough, - foldlGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1, + foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1, -- * Ticks blockTicks ) where -#include "HsVersions.h" - import GhcPrelude import TyCon ( PrimRep(..), PrimElemRep(..) ) @@ -78,11 +76,9 @@ import BlockId import CLabel import Outputable import DynFlags -import Util import CodeGen.Platform import Data.Word -import Data.Maybe import Data.Bits import Hoopl.Graph import Hoopl.Label @@ -389,6 +385,14 @@ hasNoGlobalRegs (CmmReg (CmmLocal _)) = True hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True hasNoGlobalRegs _ = False +isLit :: CmmExpr -> Bool +isLit (CmmLit _) = True +isLit _ = False + +isComparisonExpr :: CmmExpr -> Bool +isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op +isComparisonExpr _ = False + --------------------------------------------------- -- -- Tagging @@ -487,12 +491,6 @@ toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO} -insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock -insertBlock block map = - ASSERT(isNothing $ mapLookup id map) - mapInsert id block map - where id = entryLabel block - toBlockList :: CmmGraph -> [CmmBlock] toBlockList g = mapElems $ toBlockMap g @@ -558,8 +556,9 @@ mapGraphNodes1 f = modifyGraph (mapGraph f) foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g -postorderDfs :: CmmGraph -> [CmmBlock] -postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g) +revPostorder :: CmmGraph -> [CmmBlock] +revPostorder g = {-# SCC "revPostorder" #-} + revPostorderFrom (toBlockMap g) (g_entry g) ------------------------------------------------- -- Tick utilities diff --git a/compiler/cmm/Hoopl/Collections.hs b/compiler/cmm/Hoopl/Collections.hs index b8072b37a7..ef7de4a078 100644 --- a/compiler/cmm/Hoopl/Collections.hs +++ b/compiler/cmm/Hoopl/Collections.hs @@ -12,7 +12,7 @@ module Hoopl.Collections import GhcPrelude -import qualified Data.IntMap as M +import qualified Data.IntMap.Strict as M import qualified Data.IntSet as S import Data.List (foldl', foldl1') @@ -66,6 +66,7 @@ class IsMap map where mapInsert :: KeyOf map -> a -> map a -> map a mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a mapDelete :: KeyOf map -> map a -> map a + mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a mapUnion :: map a -> map a -> map a mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a @@ -143,6 +144,7 @@ instance IsMap UniqueMap where mapInsert k v (UM m) = UM (M.insert k v m) mapInsertWith f k v (UM m) = UM (M.insertWith f k v m) mapDelete k (UM m) = UM (M.delete k m) + mapAlter f k (UM m) = UM (M.alter f k m) mapUnion (UM x) (UM y) = UM (M.union x y) mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y) diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs index 0b0434bb36..2538b70ee3 100644 --- a/compiler/cmm/Hoopl/Dataflow.hs +++ b/compiler/cmm/Hoopl/Dataflow.hs @@ -111,8 +111,7 @@ analyzeCmm dir lattice transfer cmmGraph initFact = blockMap = case hooplGraph of GMany NothingO bm NothingO -> bm - entries = if mapNull initFact then [entry] else mapKeys initFact - in fixpointAnalysis dir lattice transfer entries blockMap initFact + in fixpointAnalysis dir lattice transfer entry blockMap initFact -- Fixpoint algorithm. fixpointAnalysis @@ -120,16 +119,16 @@ fixpointAnalysis Direction -> DataflowLattice f -> TransferFun f - -> [Label] + -> Label -> LabelMap CmmBlock -> FactBase f -> FactBase f -fixpointAnalysis direction lattice do_block entries blockmap = loop start +fixpointAnalysis direction lattice do_block entry blockmap = loop start where -- Sorting the blocks helps to minimize the number of times we need to -- process blocks. For instance, for forward analysis we want to look at -- blocks in reverse postorder. Also, see comments for sortBlocks. - blocks = sortBlocks direction entries blockmap + blocks = sortBlocks direction entry blockmap num_blocks = length blocks block_arr = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks start = {-# SCC "start" #-} IntSet.fromDistinctAscList @@ -174,9 +173,8 @@ rewriteCmm dir lattice rwFun cmmGraph initFact = do blockMap1 = case hooplGraph of GMany NothingO bm NothingO -> bm - entries = if mapNull initFact then [entry] else mapKeys initFact (blockMap2, facts) <- - fixpointRewrite dir lattice rwFun entries blockMap1 initFact + fixpointRewrite dir lattice rwFun entry blockMap1 initFact return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts) fixpointRewrite @@ -184,16 +182,16 @@ fixpointRewrite Direction -> DataflowLattice f -> RewriteFun f - -> [Label] + -> Label -> LabelMap CmmBlock -> FactBase f -> UniqSM (LabelMap CmmBlock, FactBase f) -fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap +fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap where -- Sorting the blocks helps to minimize the number of times we need to -- process blocks. For instance, for forward analysis we want to look at -- blocks in reverse postorder. Also, see comments for sortBlocks. - blocks = sortBlocks dir entries blockmap + blocks = sortBlocks dir entry blockmap num_blocks = length blocks block_arr = {-# SCC "block_arr_rewrite" #-} listArray (0, num_blocks - 1) blocks @@ -268,20 +266,15 @@ we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. -- | Sort the blocks into the right order for analysis. This means reverse -- postorder for a forward analysis. For the backward one, we simply reverse -- that (see Note [Backward vs forward analysis]). --- --- Note: We're using Hoopl's confusingly named `postorder_dfs_from` but AFAICS --- it returns the *reverse* postorder of the blocks (it visits blocks in the --- postorder and uses (:) to collect them, which gives the reverse of the --- visitation order). sortBlocks :: NonLocal n - => Direction -> [Label] -> LabelMap (Block n C C) -> [Block n C C] -sortBlocks direction entries blockmap = + => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C] +sortBlocks direction entry blockmap = case direction of Fwd -> fwd Bwd -> reverse fwd where - fwd = postorder_dfs_from blockmap entries + fwd = revPostorderFrom blockmap entry -- Note [Backward vs forward analysis] -- diff --git a/compiler/cmm/Hoopl/Graph.hs b/compiler/cmm/Hoopl/Graph.hs index ca482ab4a8..0142f70c76 100644 --- a/compiler/cmm/Hoopl/Graph.hs +++ b/compiler/cmm/Hoopl/Graph.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} @@ -14,11 +15,12 @@ module Hoopl.Graph , labelsDefined , mapGraph , mapGraphBlocks - , postorder_dfs_from + , revPostorderFrom ) where import GhcPrelude +import Util import Hoopl.Label import Hoopl.Block @@ -51,13 +53,14 @@ emptyBody = mapEmpty bodyList :: Body' block n -> [(Label,block n C C)] bodyList body = mapToList body -addBlock :: NonLocal thing - => thing C C -> LabelMap (thing C C) - -> LabelMap (thing C C) -addBlock b body - | mapMember lbl body = error $ "duplicate label " ++ show lbl ++ " in graph" - | otherwise = mapInsert lbl b body - where lbl = entryLabel b +addBlock + :: (NonLocal block, HasDebugCallStack) + => block C C -> LabelMap (block C C) -> LabelMap (block C C) +addBlock block body = mapAlter add lbl body + where + lbl = entryLabel block + add Nothing = Just block + add _ = error $ "duplicate label " ++ show lbl ++ " in graph" -- --------------------------------------------------------------------------- @@ -119,22 +122,10 @@ labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body ---------------------------------------------------------------- -class LabelsPtr l where - targetLabels :: l -> [Label] - -instance NonLocal n => LabelsPtr (n e C) where - targetLabels n = successors n - -instance LabelsPtr Label where - targetLabels l = [l] - -instance LabelsPtr LabelSet where - targetLabels = setElems - -instance LabelsPtr l => LabelsPtr [l] where - targetLabels = concatMap targetLabels - --- | This is the most important traversal over this data structure. It drops +-- | Returns a list of blocks reachable from the provided Labels in the reverse +-- postorder. +-- +-- This is the most important traversal over this data structure. It drops -- unreachable code and puts blocks in an order that is good for solving forward -- dataflow problems quickly. The reverse order is good for solving backward -- dataflow problems quickly. The forward order is also reasonably good for @@ -143,59 +134,52 @@ instance LabelsPtr l => LabelsPtr [l] where -- that you would need a more serious analysis, probably based on dominators, to -- identify loop headers. -- --- The ubiquity of 'postorder_dfs' is one reason for the ubiquity of the 'LGraph' --- representation, when for most purposes the plain 'Graph' representation is --- more mathematically elegant (but results in more complicated code). --- --- Here's an easy way to go wrong! Consider +-- For forward analyses we want reverse postorder visitation, consider: -- @ -- A -> [B,C] -- B -> D -- C -> D -- @ --- Then ordinary dfs would give [A,B,D,C] which has a back ref from C to D. --- Better to get [A,B,C,D] - - --- | Traversal: 'postorder_dfs' returns a list of blocks reachable --- from the entry of enterable graph. The entry and exit are *not* included. --- The list has the following property: --- --- Say a "back reference" exists if one of a block's --- control-flow successors precedes it in the output list --- --- Then there are as few back references as possible --- --- The output is suitable for use in --- a forward dataflow problem. For a backward problem, simply reverse --- the list. ('postorder_dfs' is sufficiently tricky to implement that --- one doesn't want to try and maintain both forward and backward --- versions.) - -postorder_dfs_from_except :: forall block e . (NonLocal block, LabelsPtr e) - => LabelMap (block C C) -> e -> LabelSet -> [block C C] -postorder_dfs_from_except blocks b visited = - vchildren (get_children b) (\acc _visited -> acc) [] visited - where - vnode :: block C C -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a - vnode block cont acc visited = - if setMember id visited then - cont acc visited - else - let cont' acc visited = cont (block:acc) visited in - vchildren (get_children block) cont' acc (setInsert id visited) - where id = entryLabel block - vchildren :: forall a. [block C C] -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a - vchildren bs cont acc visited = next bs acc visited - where next children acc visited = - case children of [] -> cont acc visited - (b:bs) -> vnode b (next bs) acc visited - get_children :: forall l. LabelsPtr l => l -> [block C C] - get_children block = foldr add_id [] $ targetLabels block - add_id id rst = case lookupFact id blocks of - Just b -> b : rst - Nothing -> rst - -postorder_dfs_from - :: (NonLocal block, LabelsPtr b) => LabelMap (block C C) -> b -> [block C C] -postorder_dfs_from blocks b = postorder_dfs_from_except blocks b setEmpty +-- Postorder: [D, C, B, A] (or [D, B, C, A]) +-- Reverse postorder: [A, B, C, D] (or [A, C, B, D]) +-- This matters for, e.g., forward analysis, because we want to analyze *both* +-- B and C before we analyze D. +revPostorderFrom + :: forall block. (NonLocal block) + => LabelMap (block C C) -> Label -> [block C C] +revPostorderFrom graph start = go start_worklist setEmpty [] + where + start_worklist = lookup_for_descend start Nil + + -- To compute the postorder we need to "visit" a block (mark as done) + -- *after* visiting all its successors. So we need to know whether we + -- already processed all successors of each block (and @NonLocal@ allows + -- arbitrary many successors). So we use an explicit stack with an extra bit + -- of information: + -- * @ConsTodo@ means to explore the block if it wasn't visited before + -- * @ConsMark@ means that all successors were already done and we can add + -- the block to the result. + -- + -- NOTE: We add blocks to the result list in postorder, but we *prepend* + -- them (i.e., we use @(:)@), which means that the final list is in reverse + -- postorder. + go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C] + go Nil !_ !result = result + go (ConsMark block rest) !wip_or_done !result = + go rest wip_or_done (block : result) + go (ConsTodo block rest) !wip_or_done !result + | entryLabel block `setMember` wip_or_done = go rest wip_or_done result + | otherwise = + let new_worklist = + foldr lookup_for_descend + (ConsMark block rest) + (successors block) + in go new_worklist (setInsert (entryLabel block) wip_or_done) result + + lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C) + lookup_for_descend label wl + | Just b <- mapLookup label graph = ConsTodo b wl + | otherwise = + error $ "Label that doesn't have a block?! " ++ show label + +data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil diff --git a/compiler/cmm/Hoopl/Label.hs b/compiler/cmm/Hoopl/Label.hs index 8096fab073..6eae115779 100644 --- a/compiler/cmm/Hoopl/Label.hs +++ b/compiler/cmm/Hoopl/Label.hs @@ -87,6 +87,7 @@ instance IsMap LabelMap where mapInsert (Label k) v (LM m) = LM (mapInsert k v m) mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m) mapDelete (Label k) (LM m) = LM (mapDelete k m) + mapAlter f (Label k) (LM m) = LM (mapAlter f k m) mapUnion (LM x) (LM y) = LM (mapUnion x y) mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y) diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index d9f140254c..70229d067d 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, CPP, GADTs #-} +{-# LANGUAGE BangPatterns, GADTs #-} module MkGraph ( CmmAGraph, CmmAGraphScoped, CgStmt(..) @@ -21,7 +21,7 @@ module MkGraph ) where -import GhcPrelude (($),Int,Bool,Eq(..)) -- avoid importing (<*>) +import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>) import BlockId import Cmm @@ -37,10 +37,7 @@ import ForeignCall import OrdList import SMRep (ByteOff) import UniqSupply - -import Control.Monad -import Data.List -import Data.Maybe +import Util ----------------------------------------------------------------------------- @@ -184,12 +181,10 @@ mkNop :: CmmAGraph mkNop = nilOL mkComment :: FastString -> CmmAGraph -#if defined(DEBUG) --- SDM: generating all those comments takes time, this saved about 4% for me -mkComment fs = mkMiddle $ CmmComment fs -#else -mkComment _ = nilOL -#endif +mkComment fs + -- SDM: generating all those comments takes time, this saved about 4% for me + | debugIsOn = mkMiddle $ CmmComment fs + | otherwise = nilOL ---------- Assignment and store mkAssign :: CmmReg -> CmmExpr -> CmmAGraph diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 6a93ea818e..c9a6003aaf 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -141,8 +141,8 @@ pprCmmGraph g = text "{" <> text "offset" $$ nest 2 (vcat $ map ppr blocks) $$ text "}" - where blocks = postorderDfs g - -- postorderDfs has the side-effect of discarding unreachable code, + where blocks = revPostorder g + -- revPostorder has the side-effect of discarding unreachable code, -- so pretty-printed Cmm will omit any unreachable blocks. This can -- sometimes be confusing. diff --git a/compiler/cmm/PprCmmDecl.hs b/compiler/cmm/PprCmmDecl.hs index 9b3cecc3b8..9dd2332b67 100644 --- a/compiler/cmm/PprCmmDecl.hs +++ b/compiler/cmm/PprCmmDecl.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ---------------------------------------------------------------------------- -- -- Pretty-printing of common Cmm types @@ -54,7 +52,6 @@ import System.IO -- Temp Jan08 import SMRep -#include "../includes/rts/storage/FunTypes.h" pprCmms :: (Outputable info, Outputable g) diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index c20f1fd1d0..6a2840294a 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, GADTs #-} +{-# LANGUAGE GADTs #-} ----------------------------------------------------------------------------- -- @@ -10,8 +10,6 @@ module CgUtils ( fixStgRegisters ) where -#include "HsVersions.h" - import GhcPrelude import CodeGen.Platform diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 9ef552d336..b29394da6f 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Stg to C-- code generation: bindings @@ -15,8 +13,6 @@ module StgCmmBind ( pushUpdateFrame, emitUpdateFrame ) where -#include "HsVersions.h" - import GhcPrelude hiding ((<*>)) import StgCmmExpr diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 3fcc935121..22fcfaf412 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -1,5 +1,4 @@ {-# LANGUAGE CPP #-} -{-# OPTIONS_GHC -fno-warn-unused-do-bind #-} ----------------------------------------------------------------------------- -- @@ -61,7 +60,8 @@ cgExpr :: StgExpr -> FCode ReturnKind cgExpr (StgApp fun args) = cgIdApp fun args -{- seq# a s ==> a -} +-- seq# a s ==> a +-- See Note [seq# magic] in PrelRules cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = cgIdApp a [] @@ -409,7 +409,8 @@ cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts ; v_info <- getCgIdInfo v ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) (idInfoToAmode v_info) - ; bindArgToReg (NonVoid bndr) + -- Add bndr to the environment + ; _ <- bindArgToReg (NonVoid bndr) ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts } where reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr @@ -435,7 +436,8 @@ it would be better to invoke some kind of panic function here. cgCase scrut@(StgApp v []) _ (PrimAlt _) _ = do { dflags <- getDynFlags ; mb_cc <- maybeSaveCostCentre True - ; withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut) + ; _ <- withSequel + (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc ; emitComment $ mkFastString "should be unreachable code" ; l <- newBlockId @@ -446,13 +448,14 @@ cgCase scrut@(StgApp v []) _ (PrimAlt _) _ {- Note [Handle seq#] ~~~~~~~~~~~~~~~~~~~~~ -case seq# a s of v - (# s', a' #) -> e +See Note [seq# magic] in PrelRules. +The special case for seq# in cgCase does this: + case seq# a s of v + (# s', a' #) -> e ==> - -case a of v - (# s', a' #) -> e + case a of v + (# s', a' #) -> e (taking advantage of the fact that the return convention for (# State#, a #) is the same as the return convention for just 'a') @@ -460,6 +463,7 @@ is the same as the return convention for just 'a') cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts = -- Note [Handle seq#] + -- And see Note [seq# magic] in PrelRules -- Use the same return convention as vanilla 'a'. cgCase (StgApp a []) bndr alt_type alts diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index d0ad17f59b..c1103e7d77 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Code generation for foreign calls. @@ -20,8 +18,6 @@ module StgCmmForeign ( emitCloseNursery, ) where -#include "HsVersions.h" - import GhcPrelude hiding( succ, (<*>) ) import StgSyn @@ -408,8 +404,8 @@ Opening the nursery corresponds to the following code: @ tso = CurrentTSO; cn = CurrentNursery; - bdfree = CurrentNuresry->free; - bdstart = CurrentNuresry->start; + bdfree = CurrentNursery->free; + bdstart = CurrentNursery->start; // We *add* the currently occupied portion of the nursery block to // the allocation limit, because we will subtract it again in diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 07633ed4ae..3be35b35fa 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Stg to C--: heap management functions @@ -22,8 +20,6 @@ module StgCmmHeap ( emitSetDynHdr ) where -#include "HsVersions.h" - import GhcPrelude hiding ((<*>)) import StgSyn diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 7c3864296c..cc941a2e57 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, GADTs, UnboxedTuples #-} +{-# LANGUAGE GADTs, UnboxedTuples #-} ----------------------------------------------------------------------------- -- @@ -58,8 +58,6 @@ module StgCmmMonad ( CgInfoDownwards(..), CgState(..) -- non-abstract ) where -#include "HsVersions.h" - import GhcPrelude hiding( sequence, succ ) import Cmm @@ -79,6 +77,7 @@ import Unique import UniqSupply import FastString import Outputable +import Util import Control.Monad import Data.List @@ -696,11 +695,9 @@ emitLabel id = do tscope <- getTickScope emitCgStmt (CgLabel id tscope) emitComment :: FastString -> FCode () -#if 0 /* def DEBUG */ -emitComment s = emitCgStmt (CgStmt (CmmComment s)) -#else -emitComment _ = return () -#endif +emitComment s + | debugIsOn = emitCgStmt (CgStmt (CmmComment s)) + | otherwise = return () emitTick :: CmmTickish -> FCode () emitTick = emitCgStmt . CgStmt . CmmTick diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index a0bca5d661..15c31ca59c 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Code generation for profiling @@ -25,8 +23,6 @@ module StgCmmProf ( ldvEnter, ldvEnterClosure, ldvRecordCreate ) where -#include "HsVersions.h" - import GhcPrelude import StgCmmClosure diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index a7d158ce3a..8f3074856a 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, CPP #-} +{-# LANGUAGE BangPatterns #-} ----------------------------------------------------------------------------- -- @@ -104,8 +104,6 @@ module StgCmmTicky ( tickySlowCall, tickySlowCallPat, ) where -#include "HsVersions.h" - import GhcPrelude import StgCmmArgRep ( slowCallPattern , toArgRep , argRepString ) diff --git a/compiler/coreSyn/CoreUtils.hs b/compiler/coreSyn/CoreUtils.hs index 4db9d8fc29..5608afc334 100644 --- a/compiler/coreSyn/CoreUtils.hs +++ b/compiler/coreSyn/CoreUtils.hs @@ -1449,8 +1449,11 @@ app_ok primop_ok fun args -- Often there is a literal divisor, and this -- can get rid of a thunk in an inner loop + | SeqOp <- op -- See Note [seq# and expr_ok] + -> all (expr_ok primop_ok) args + | otherwise - -> primop_ok op -- Check the primop itself + -> primop_ok op -- Check the primop itself && and (zipWith arg_ok arg_tys args) -- Check the arguments _other -> isUnliftedType (idType fun) -- c.f. the Var case of exprIsHNF @@ -1607,6 +1610,25 @@ See also Note [dataToTag#] in primops.txt.pp. Bottom line: * in exprOkForSpeculation we simply ignore all lifted arguments. + * except see Note [seq# and expr_ok] for an exception + + +Note [seq# and expr_ok] +~~~~~~~~~~~~~~~~~~~~~~~ +Recall that + seq# :: forall a s . a -> State# s -> (# State# s, a #) +must always evaluate its first argument. So it's really a +counter-example to Note [Primops with lifted arguments]. In +the case of seq# we must check the argument to seq#. Remember +item (d) of the specification of exprOkForSpeculation: + + -- Precisely, it returns @True@ iff: + -- a) The expression guarantees to terminate, + ... + -- d) without throwing a Haskell exception + +The lack of this special case caused Trac #5129 to go bad again. +See comment:24 and following ************************************************************************ diff --git a/compiler/hsSyn/Convert.hs b/compiler/hsSyn/Convert.hs index 531f146a9d..644075810c 100644 --- a/compiler/hsSyn/Convert.hs +++ b/compiler/hsSyn/Convert.hs @@ -1625,8 +1625,14 @@ thRdrName loc ctxt_ns th_occ th_name occ :: OccName.OccName occ = mk_occ ctxt_ns th_occ +-- Return an unqualified exact RdrName if we're dealing with built-in syntax. +-- See Trac #13776. thOrigRdrName :: String -> TH.NameSpace -> PkgName -> ModName -> RdrName -thOrigRdrName occ th_ns pkg mod = (mkOrig $! (mkModule (mk_pkg pkg) (mk_mod mod))) $! (mk_occ (mk_ghc_ns th_ns) occ) +thOrigRdrName occ th_ns pkg mod = + let occ' = mk_occ (mk_ghc_ns th_ns) occ + in case isBuiltInOcc_maybe occ' of + Just name -> nameRdrName name + Nothing -> (mkOrig $! (mkModule (mk_pkg pkg) (mk_mod mod))) $! occ' thRdrNameGuesses :: TH.Name -> [RdrName] thRdrNameGuesses (TH.Name occ flavour) diff --git a/compiler/iface/IfaceType.hs b/compiler/iface/IfaceType.hs index 0c5922eb53..d0adce99ee 100644 --- a/compiler/iface/IfaceType.hs +++ b/compiler/iface/IfaceType.hs @@ -257,6 +257,10 @@ data IfaceCoercion | IfaceForAllCo IfaceTvBndr IfaceCoercion IfaceCoercion | IfaceCoVarCo IfLclName | IfaceAxiomInstCo IfExtName BranchIndex [IfaceCoercion] + | IfaceAxiomRuleCo IfLclName [IfaceCoercion] + -- There are only a fixed number of CoAxiomRules, so it suffices + -- to use an IfaceLclName to distinguish them. + -- See Note [Adding built-in type families] in TcTypeNats | IfaceUnivCo IfaceUnivCoProv Role IfaceType IfaceType | IfaceSymCo IfaceCoercion | IfaceTransCo IfaceCoercion IfaceCoercion @@ -266,7 +270,6 @@ data IfaceCoercion | IfaceCoherenceCo IfaceCoercion IfaceCoercion | IfaceKindCo IfaceCoercion | IfaceSubCo IfaceCoercion - | IfaceAxiomRuleCo IfLclName [IfaceCoercion] | IfaceFreeCoVar CoVar -- See Note [Free tyvars in IfaceType] | IfaceHoleCo CoVar -- ^ See Note [Holes in IfaceCoercion] diff --git a/compiler/iface/TcIface.hs b/compiler/iface/TcIface.hs index 70438f6337..ca1a17dba4 100644 --- a/compiler/iface/TcIface.hs +++ b/compiler/iface/TcIface.hs @@ -1359,7 +1359,7 @@ tcIfaceCo = go <*> go c2 go (IfaceKindCo c) = KindCo <$> go c go (IfaceSubCo c) = SubCo <$> go c - go (IfaceAxiomRuleCo ax cos) = AxiomRuleCo <$> go_axiom_rule ax + go (IfaceAxiomRuleCo ax cos) = AxiomRuleCo <$> tcIfaceCoAxiomRule ax <*> mapM go cos go (IfaceFreeCoVar c) = pprPanic "tcIfaceCo:IfaceFreeCoVar" (ppr c) go (IfaceHoleCo c) = pprPanic "tcIfaceCo:IfaceHoleCo" (ppr c) @@ -1367,12 +1367,6 @@ tcIfaceCo = go go_var :: FastString -> IfL CoVar go_var = tcIfaceLclId - go_axiom_rule :: FastString -> IfL CoAxiomRule - go_axiom_rule n = - case Map.lookup n typeNatCoAxiomRules of - Just ax -> return ax - _ -> pprPanic "go_axiom_rule" (ppr n) - tcIfaceUnivCoProv :: IfaceUnivCoProv -> IfL UnivCoProvenance tcIfaceUnivCoProv IfaceUnsafeCoerceProv = return UnsafeCoerceProv tcIfaceUnivCoProv (IfacePhantomProv kco) = PhantomProv <$> tcIfaceCo kco @@ -1808,6 +1802,16 @@ tcIfaceCoAxiom :: Name -> IfL (CoAxiom Branched) tcIfaceCoAxiom name = do { thing <- tcIfaceImplicit name ; return (tyThingCoAxiom thing) } + +tcIfaceCoAxiomRule :: IfLclName -> IfL CoAxiomRule +-- Unlike CoAxioms, which arise form user 'type instance' declarations, +-- there are a fixed set of CoAxiomRules, +-- currently enumerated in typeNatCoAxiomRules +tcIfaceCoAxiomRule n + = case Map.lookup n typeNatCoAxiomRules of + Just ax -> return ax + _ -> pprPanic "tcIfaceCoAxiomRule" (ppr n) + tcIfaceDataCon :: Name -> IfL DataCon tcIfaceDataCon name = do { thing <- tcIfaceGlobal name ; case thing of diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 7b9cb13254..0d018a7ec4 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -59,6 +59,7 @@ module DynFlags ( tablesNextToCode, mkTablesNextToCode, makeDynFlagsConsistent, shouldUseColor, + shouldUseHexWordLiterals, positionIndependent, optimisationFlags, @@ -449,6 +450,7 @@ data GeneralFlag | Opt_KillOneShot | Opt_FullLaziness | Opt_FloatIn + | Opt_LateSpecialise | Opt_Specialise | Opt_SpecialiseAggressively | Opt_CrossModuleSpecialise @@ -566,6 +568,7 @@ data GeneralFlag | Opt_NoSortValidSubstitutions | Opt_AbstractRefSubstitutions | Opt_ShowLoadedModules + | Opt_HexWordLiterals -- See Note [Print Hexadecimal Literals] -- Suppress all coercions, them replacing with '...' | Opt_SuppressCoercions @@ -631,6 +634,7 @@ optimisationFlags = EnumSet.fromList , Opt_KillOneShot , Opt_FullLaziness , Opt_FloatIn + , Opt_LateSpecialise , Opt_Specialise , Opt_SpecialiseAggressively , Opt_CrossModuleSpecialise @@ -1482,6 +1486,10 @@ data RtsOptsEnabled shouldUseColor :: DynFlags -> Bool shouldUseColor dflags = overrideWith (canUseColor dflags) (useColor dflags) +shouldUseHexWordLiterals :: DynFlags -> Bool +shouldUseHexWordLiterals dflags = + Opt_HexWordLiterals `EnumSet.member` generalFlags dflags + -- | Are we building with @-fPIE@ or @-fPIC@ enabled? positionIndependent :: DynFlags -> Bool positionIndependent dflags = gopt Opt_PIC dflags || gopt Opt_PIE dflags @@ -3007,6 +3015,8 @@ dynamic_flags_deps = [ (NoArg (setRtsOptsEnabled RtsOptsNone)) , make_ord_flag defGhcFlag "no-rtsopts-suggestions" (noArg (\d -> d {rtsOptsSuggestions = False})) + , make_ord_flag defGhcFlag "dhex-word-literals" + (NoArg (setGeneralFlag Opt_HexWordLiterals)) , make_ord_flag defGhcFlag "ghcversion-file" (hasArg addGhcVersionFile) , make_ord_flag defGhcFlag "main-is" (SepArg setMainIs) @@ -3929,6 +3939,7 @@ fFlagsDeps = [ flagSpec "kill-absence" Opt_KillAbsence, flagSpec "kill-one-shot" Opt_KillOneShot, flagSpec "late-dmd-anal" Opt_LateDmdAnal, + flagSpec "late-specialise" Opt_LateSpecialise, flagSpec "liberate-case" Opt_LiberateCase, flagSpec "llvm-pass-vectors-in-regs" Opt_LlvmPassVectorsInRegisters, flagHiddenSpec "llvm-tbaa" Opt_LlvmTBAA, diff --git a/compiler/main/DynFlags.hs-boot b/compiler/main/DynFlags.hs-boot index a8efb6013d..7440e5db00 100644 --- a/compiler/main/DynFlags.hs-boot +++ b/compiler/main/DynFlags.hs-boot @@ -5,13 +5,15 @@ import Platform data DynFlags data DumpFlag +data GeneralFlag -targetPlatform :: DynFlags -> Platform -pprUserLength :: DynFlags -> Int -pprCols :: DynFlags -> Int -unsafeGlobalDynFlags :: DynFlags -useUnicode :: DynFlags -> Bool -useUnicodeSyntax :: DynFlags -> Bool -shouldUseColor :: DynFlags -> Bool -hasPprDebug :: DynFlags -> Bool -hasNoDebugOutput :: DynFlags -> Bool +targetPlatform :: DynFlags -> Platform +pprUserLength :: DynFlags -> Int +pprCols :: DynFlags -> Int +unsafeGlobalDynFlags :: DynFlags +useUnicode :: DynFlags -> Bool +useUnicodeSyntax :: DynFlags -> Bool +shouldUseColor :: DynFlags -> Bool +shouldUseHexWordLiterals :: DynFlags -> Bool +hasPprDebug :: DynFlags -> Bool +hasNoDebugOutput :: DynFlags -> Bool diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index e2c568c836..7c345f2328 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -382,6 +382,14 @@ iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do mov_lo = MR rlo expr_reg return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) rlo + +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do + (expr_reg,expr_code) <- getSomeReg expr + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31)) + mov_lo = MR rlo expr_reg + return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo iselExpr64 expr = pprPanic "iselExpr64(powerpc)" (pprExpr expr) diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs index 8549fa07ee..6fa7482f9b 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs @@ -193,6 +193,24 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) return $ ChildCode64 code r_dst_lo +-- only W32 supported for now +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) + = do + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + + -- compute expr and load it into r_dst_lo + (a_reg, a_code) <- getSomeReg expr + + dflags <- getDynFlags + let platform = targetPlatform dflags + code = a_code + `appOL` toOL + [ SRA a_reg (RIImm (ImmInt 31)) r_dst_hi + , mkRegRegMoveInstr platform a_reg r_dst_lo ] + + return $ ChildCode64 code r_dst_lo + iselExpr64 expr = pprPanic "iselExpr64(sparc)" (ppr expr) diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 09757e769e..a0b0673d27 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -468,6 +468,20 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do r_dst_lo ) +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do + fn <- getAnyReg expr + r_dst_lo <- getNewRegNat II32 + let r_dst_hi = getHiVRegFromLo r_dst_lo + code = fn r_dst_lo + return ( + ChildCode64 (code `snocOL` + MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL` + CLTD II32 `snocOL` + MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL` + MOV II32 (OpReg edx) (OpReg r_dst_hi)) + r_dst_lo + ) + iselExpr64 expr = pprPanic "iselExpr64(i386)" (ppr expr) diff --git a/compiler/prelude/PrelRules.hs b/compiler/prelude/PrelRules.hs index 73484b7c35..14e3f0f36e 100644 --- a/compiler/prelude/PrelRules.hs +++ b/compiler/prelude/PrelRules.hs @@ -942,7 +942,56 @@ dataToTagRule = a `mplus` b ************************************************************************ -} --- seq# :: forall a s . a -> State# s -> (# State# s, a #) +{- Note [seq# magic] +~~~~~~~~~~~~~~~~~~~~ +The primop + seq# :: forall a s . a -> State# s -> (# State# s, a #) + +is /not/ the same as the Prelude function seq :: a -> b -> b +as you can see from its type. In fact, seq# is the implementation +mechanism for 'evaluate' + + evaluate :: a -> IO a + evaluate a = IO $ \s -> seq# a s + +The semantics of seq# is + * evaluate its first argument + * and return it + +Things to note + +* Why do we need a primop at all? That is, instead of + case seq# x s of (# x, s #) -> blah + why not instead say this? + case x of { DEFAULT -> blah) + + Reason (see Trac #5129): if we saw + catch# (\s -> case x of { DEFAULT -> raiseIO# exn s }) handler + + then we'd drop the 'case x' because the body of the case is bottom + anyway. But we don't want to do that; the whole /point/ of + seq#/evaluate is to evaluate 'x' first in the IO monad. + + In short, we /always/ evaluate the first argument and never + just discard it. + +* Why return the value? So that we can control sharing of seq'd + values: in + let x = e in x `seq` ... x ... + We don't want to inline x, so better to represent it as + let x = e in case seq# x RW of (# _, x' #) -> ... x' ... + also it matches the type of rseq in the Eval monad. + +Implementing seq#. The compiler has magic for SeqOp in + +- PrelRules.seqRule: eliminate (seq# <whnf> s) + +- StgCmmExpr.cgExpr, and cgCase: special case for seq# + +- CoreUtils.exprOkForSpeculation; + see Note [seq# and expr_ok] in CoreUtils +-} + seqRule :: RuleM CoreExpr seqRule = do [Type ty_a, Type _ty_s, a, s] <- getArgs diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 038d350a76..996e0bb3e8 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -2697,13 +2697,7 @@ primop SparkOp "spark#" GenPrimOp primop SeqOp "seq#" GenPrimOp a -> State# s -> (# State# s, a #) - - -- why return the value? So that we can control sharing of seq'd - -- values: in - -- let x = e in x `seq` ... x ... - -- we don't want to inline x, so better to represent it as - -- let x = e in case seq# x RW of (# _, x' #) -> ... x' ... - -- also it matches the type of rseq in the Eval monad. + -- See Note [seq# magic] in PrelRules primop GetSparkOp "getSpark#" GenPrimOp State# s -> (# State# s, Int#, a #) @@ -2942,6 +2936,20 @@ primop TraceMarkerOp "traceMarker#" GenPrimOp has_side_effects = True out_of_line = True +primop GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp + State# RealWorld -> (# State# RealWorld, INT64 #) + { Retrieves the allocation counter for the current thread. } + with + has_side_effects = True + out_of_line = True + +primop SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp + INT64 -> State# RealWorld -> State# RealWorld + { Sets the allocation counter for the current thread to the given value. } + with + has_side_effects = True + out_of_line = True + ------------------------------------------------------------------------ section "Safe coercions" ------------------------------------------------------------------------ diff --git a/compiler/rename/RnNames.hs b/compiler/rename/RnNames.hs index 769b34e45b..af00056271 100644 --- a/compiler/rename/RnNames.hs +++ b/compiler/rename/RnNames.hs @@ -1401,9 +1401,12 @@ warnUnusedImport flag fld_env (L loc decl, used, unused) pp_mod = ppr (unLoc (ideclName decl)) pp_not_used = text "is redundant" + -- In warning message, pretty-print identifiers unqualified unconditionally + -- to improve the consistent for ambiguous/unambiguous identifiers. + -- See trac#14881. ppr_possible_field n = case lookupNameEnv fld_env n of - Just (fld, p) -> ppr p <> parens (ppr fld) - Nothing -> ppr n + Just (fld, p) -> pprNameUnqualified p <> parens (ppr fld) + Nothing -> pprNameUnqualified n -- Print unused names in a deterministic (lexicographic) order sort_unused = pprWithCommas ppr_possible_field $ diff --git a/compiler/simplCore/SimplCore.hs b/compiler/simplCore/SimplCore.hs index 41f0a9a495..2bea6dd05d 100644 --- a/compiler/simplCore/SimplCore.hs +++ b/compiler/simplCore/SimplCore.hs @@ -17,7 +17,8 @@ import CoreSyn import HscTypes import CSE ( cseProgram ) import Rules ( mkRuleBase, unionRuleBase, - extendRuleBaseList, ruleCheckProgram, addRuleInfo, ) + extendRuleBaseList, ruleCheckProgram, addRuleInfo, + getRules ) import PprCore ( pprCoreBindings, pprCoreExpr ) import OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) import IdInfo @@ -130,6 +131,7 @@ getCoreToDo dflags spec_constr = gopt Opt_SpecConstr dflags liberate_case = gopt Opt_LiberateCase dflags late_dmd_anal = gopt Opt_LateDmdAnal dflags + late_specialise = gopt Opt_LateSpecialise dflags static_args = gopt Opt_StaticArgumentTransformation dflags rules_on = gopt Opt_EnableRewriteRules dflags eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags @@ -350,6 +352,10 @@ getCoreToDo dflags maybe_rule_check (Phase 0), + runWhen late_specialise + (CoreDoPasses [ CoreDoSpecialising + , simpl_phase 0 ["post-late-spec"] max_iter]), + -- Final clean-up simplification: simpl_phase 0 ["final"] max_iter, @@ -520,10 +526,12 @@ ruleCheckPass current_phase pat guts = { rb <- getRuleBase ; dflags <- getDynFlags ; vis_orphs <- getVisibleOrphanMods + ; let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn + ++ (mg_rules guts) ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan (defaultDumpStyle dflags) (ruleCheckProgram current_phase pat - (RuleEnv rb vis_orphs) (mg_binds guts)) + rule_fn (mg_binds guts)) ; return guts } doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts diff --git a/compiler/specialise/Rules.hs b/compiler/specialise/Rules.hs index 319404ef15..b6025955ac 100644 --- a/compiler/specialise/Rules.hs +++ b/compiler/specialise/Rules.hs @@ -1148,10 +1148,10 @@ is so important. -- string for the purposes of error reporting ruleCheckProgram :: CompilerPhase -- ^ Rule activation test -> String -- ^ Rule pattern - -> RuleEnv -- ^ Database of rules + -> (Id -> [CoreRule]) -- ^ Rules for an Id -> CoreProgram -- ^ Bindings to check in -> SDoc -- ^ Resulting check message -ruleCheckProgram phase rule_pat rule_base binds +ruleCheckProgram phase rule_pat rules binds | isEmptyBag results = text "Rule check results: no rule application sites" | otherwise @@ -1164,7 +1164,7 @@ ruleCheckProgram phase rule_pat rule_base binds , rc_id_unf = idUnfolding -- Not quite right -- Should use activeUnfolding , rc_pattern = rule_pat - , rc_rule_base = rule_base } + , rc_rules = rules } results = unionManyBags (map (ruleCheckBind env) binds) line = text (replicate 20 '-') @@ -1172,7 +1172,7 @@ data RuleCheckEnv = RuleCheckEnv { rc_is_active :: Activation -> Bool, rc_id_unf :: IdUnfoldingFun, rc_pattern :: String, - rc_rule_base :: RuleEnv + rc_rules :: Id -> [CoreRule] } ruleCheckBind :: RuleCheckEnv -> CoreBind -> Bag SDoc @@ -1206,7 +1206,7 @@ ruleCheckFun env fn args | null name_match_rules = emptyBag | otherwise = unitBag (ruleAppCheck_help env fn args name_match_rules) where - name_match_rules = filter match (getRules (rc_rule_base env) fn) + name_match_rules = filter match (rc_rules env fn) match rule = (rc_pattern env) `isPrefixOf` unpackFS (ruleName rule) ruleAppCheck_help :: RuleCheckEnv -> Id -> [CoreExpr] -> [CoreRule] -> SDoc diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs index efd56ce77c..f62f7d0778 100644 --- a/compiler/specialise/SpecConstr.hs +++ b/compiler/specialise/SpecConstr.hs @@ -57,6 +57,7 @@ import UniqFM import MonadUtils import Control.Monad ( zipWithM ) import Data.List +import Data.Maybe ( fromMaybe ) import PrelNames ( specTyConName ) import Module import TyCon ( TyCon ) @@ -1509,6 +1510,7 @@ data OneSpec = OS { os_pat :: CallPat -- Call pattern that generated this specialisation , os_rule :: CoreRule -- Rule connecting original id with the specialisation , os_id :: OutId -- Spec id + , os_orig_id :: OutId -- The original id , os_rhs :: OutExpr } -- Spec rhs noSpecInfo :: SpecInfo @@ -1522,7 +1524,8 @@ specNonRec :: ScEnv -- plus details of specialisations specNonRec env body_usg rhs_info - = specialise env (scu_calls body_usg) rhs_info + = addPatUsages env (scu_calls body_usg) <$> + specialise env (scu_calls body_usg) rhs_info (noSpecInfo { si_mb_unspec = Just (ri_rhs_usg rhs_info) }) ---------------------- @@ -1533,7 +1536,8 @@ specRec :: TopLevelFlag -> ScEnv -- plus details of specialisations specRec top_lvl env body_usg rhs_infos - = go 1 seed_calls nullUsage init_spec_infos + = addPatUsagess env (scu_calls body_usg) <$> + go 1 seed_calls nullUsage init_spec_infos where (seed_calls, init_spec_infos) -- Note [Seeding top-level recursive groups] | isTopLevel top_lvl @@ -1754,8 +1758,64 @@ spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number) -- See Note [Transfer activation] ; return (spec_usg, OS { os_pat = call_pat, os_rule = rule , os_id = spec_id + , os_orig_id = fn , os_rhs = spec_rhs }) } +-- See Note [ArgOcc from calls to specialized functions] +addPatUsagess :: ScEnv -> CallEnv -> (ScUsage, [SpecInfo]) -> (ScUsage, [SpecInfo]) +addPatUsagess env body_calls (usg, spec_infos) = (usg `combineUsage` extra_usages, spec_infos) + where extra_usages = combineUsages [ extraPatUsages env body_calls si | si <- spec_infos ] + +addPatUsages :: ScEnv -> CallEnv -> (ScUsage, SpecInfo) -> (ScUsage, SpecInfo) +addPatUsages env body_calls (usg, spec_info) = (usg `combineUsage` extra_usage, spec_info) + where extra_usage = extraPatUsages env body_calls spec_info + +extraPatUsages :: ScEnv -> CallEnv -> SpecInfo -> ScUsage +extraPatUsages env body_calls si = combineUsages + [ patToCallUsage env call_pat call + | os <- si_specs si + , let fn = os_orig_id os + call_pat = os_pat os + , pprTrace "add_pat_usages" (ppr fn <+> ppr call_pat) True + , call <- fromMaybe [] $ lookupVarEnv body_calls fn + ] + +patToCallUsage :: ScEnv -> CallPat -> Call -> ScUsage +patToCallUsage env (_qvars, pats) (Call _ args _) + = pprTrace "patToCallUsage" (ppr pats <+> ppr args <+> ppr usage) $ + usage + where + usage = combineUsages $ zipWith go pats args + + go :: CoreExpr -> CoreExpr -> ScUsage + -- The interesting case + go pat (Var v) + | Just RecArg <- lookupHowBound env v + , arg_occ@ScrutOcc{} <- patToArgOcc pat -- skip if we get UnkOcc + = nullUsage { scu_occs = unitVarEnv v arg_occ } + + -- Transparent cases + go (Tick _ p) e = go p e + go (Cast p _) e = go p e + go p (Tick _ e) = go p e + go p (Cast e _) = go p e + + + -- Traverse the tree + go (App pf pa) (App f a) + = go pf f `combineUsage` go pa a + + -- Boring catch-all + go _ _ = nullUsage + +patToArgOcc :: CoreExpr -> ArgOcc +patToArgOcc e@App{} + | (Var f, args) <- collectArgs e + , Just dc <- isDataConWorkId_maybe f + = let arg_occs = [ patToArgOcc arg | arg <- args, not (isTypeArg arg) ] + in ScrutOcc $ unitUFM dc arg_occs +patToArgOcc _ + = UnkOcc -- See Note [Strictness information in worker binders] handOutStrictnessInformation :: [Demand] -> [Var] -> [Var] @@ -1792,6 +1852,42 @@ calcSpecStrictness fn qvars pats go_one env _ _ = env {- +Note [ArgOcc from calls to specialized functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We collect the ArgOcc to find out which parameters are being scrutinized in the +body function, and only generate specializations when they would lead to some +optimization: In + + foo x = … case x of (a,b) -> … + +We are willing to specialize foo. If we have + + foo x = … bar x … + where bar y = … + +we normally don’t. But what if we specialize bar? Then we have + + foo x = … bar x … + where $sbar a b = … + bar y = … + {-# RULE forall a b. bar (a,b) = $sbar a b #-} + +and now it would be beneficial to create a specialized version of foo that +calls $sbar directly. + +To achieve this, after we specialize bar, we look at the calls to it (found in +scu_calls), and all the specializations that we created. If there is a call `bar x` +and a specialization pattern `(x,y)`, then we treat that as if we found a case +analysis of x, and include `x ↦ ScrutOcc` in scu_occs. This unblocks specialization +of foo, and so on. + +(We might want to generalize this to any call to `baz x` where `baz` has +rewrite rules that match on constructor arguments, not only for when _we_ _just_ +created specializations.) + +(See #14951) + Note [spec_usg includes rhs_usg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In calls to 'specialise', the returned ScUsage must include the rhs_usg in diff --git a/compiler/typecheck/TcInstDcls.hs b/compiler/typecheck/TcInstDcls.hs index 9f623fc0a5..7e44c4a32e 100644 --- a/compiler/typecheck/TcInstDcls.hs +++ b/compiler/typecheck/TcInstDcls.hs @@ -41,6 +41,7 @@ import TcUnify import CoreSyn ( Expr(..), mkApps, mkVarApps, mkLams ) import MkCore ( nO_METHOD_BINDING_ERROR_ID ) import CoreUnfold ( mkInlineUnfoldingWithArity, mkDFunUnfolding ) +import Kind import Type import TcEvidence import TyCon @@ -680,7 +681,7 @@ tcDataFamInstDecl mb_clsinfo -- Deal with any kind signature. -- See also Note [Arity of data families] in FamInstEnv ; (extra_tcbs, final_res_kind) <- tcDataKindSig full_tcbs res_kind' - ; checkTc (isLiftedTypeKind final_res_kind) (badKindSig True res_kind') + ; checkTc (tcIsStarKind final_res_kind) (badKindSig True res_kind') ; let extra_pats = map (mkTyVarTy . binderVar) extra_tcbs all_pats = pats' `chkAppend` extra_pats @@ -722,7 +723,7 @@ tcDataFamInstDecl mb_clsinfo ; checkValidFamPats mb_clsinfo fam_tc tvs' [] pats' extra_pats pp_hs_pats -- Result kind must be '*' (otherwise, we have too few patterns) - ; checkTc (isLiftedTypeKind final_res_kind) $ + ; checkTc (tcIsStarKind final_res_kind) $ tooFewParmsErr (tyConArity fam_tc) ; checkValidTyCon rep_tc diff --git a/compiler/typecheck/TcTyClsDecls.hs b/compiler/typecheck/TcTyClsDecls.hs index 7436b0d690..39697d6d94 100644 --- a/compiler/typecheck/TcTyClsDecls.hs +++ b/compiler/typecheck/TcTyClsDecls.hs @@ -886,7 +886,7 @@ tcFamDecl1 parent (FamilyDecl { fdInfo = fam_info, fdLName = tc_lname@(L _ tc_na -- Data families might have a variable return kind. -- See See Note [Arity of data families] in FamInstEnv. ; (extra_binders, final_res_kind) <- tcDataKindSig binders res_kind - ; checkTc (isLiftedTypeKind final_res_kind + ; checkTc (tcIsStarKind final_res_kind || isJust (tcGetCastedTyVar_maybe final_res_kind)) (badKindSig False res_kind) @@ -1038,7 +1038,7 @@ tcDataDefn roles_info ; let hsc_src = tcg_src tcg_env ; (extra_bndrs, final_res_kind) <- tcDataKindSig tycon_binders res_kind ; unless (mk_permissive_kind hsc_src cons) $ - checkTc (isLiftedTypeKind final_res_kind) (badKindSig True res_kind) + checkTc (tcIsStarKind final_res_kind) (badKindSig True res_kind) ; let final_bndrs = tycon_binders `chkAppend` extra_bndrs roles = roles_info tc_name diff --git a/compiler/typecheck/TcTypeNats.hs b/compiler/typecheck/TcTypeNats.hs index 78e0b96e11..24e12cd15c 100644 --- a/compiler/typecheck/TcTypeNats.hs +++ b/compiler/typecheck/TcTypeNats.hs @@ -5,6 +5,9 @@ module TcTypeNats , typeNatCoAxiomRules , BuiltInSynFamily(..) + -- If you define a new built-in type family, make sure to export its TyCon + -- from here as well. + -- See Note [Adding built-in type families] , typeNatAddTyCon , typeNatMulTyCon , typeNatExpTyCon @@ -53,10 +56,86 @@ import Data.Maybe ( isJust ) import Control.Monad ( guard ) import Data.List ( isPrefixOf, isSuffixOf ) +{- +Note [Type-level literals] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +There are currently two forms of type-level literals: natural numbers, and +symbols (even though this module is named TcTypeNats, it covers both). + +Type-level literals are supported by CoAxiomRules (conditional axioms), which +power the built-in type families (see Note [Adding built-in type families]). +Currently, all built-in type families are for the express purpose of supporting +type-level literals. + +See also the Wiki page: + + https://ghc.haskell.org/trac/ghc/wiki/TypeNats + +Note [Adding built-in type families] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There are a few steps to adding a built-in type family: + +* Adding a unique for the type family TyCon + + These go in PrelNames. It will likely be of the form + @myTyFamNameKey = mkPreludeTyConUnique xyz@, where @xyz@ is a number that + has not been chosen before in PrelNames. There are several examples already + in PrelNames—see, for instance, typeNatAddTyFamNameKey. + +* Adding the type family TyCon itself + + This goes in TcTypeNats. There are plenty of examples of how to define + these—see, for instance, typeNatAddTyCon. + + Once your TyCon has been defined, be sure to: + + - Export it from TcTypeNats. (Not doing so caused #14632.) + - Include it in the typeNatTyCons list, defined in TcTypeNats. + +* Exposing associated type family axioms + + When defining the type family TyCon, you will need to define an axiom for + the type family in general (see, for instance, axAddDef), and perhaps other + auxiliary axioms for special cases of the type family (see, for instance, + axAdd0L and axAdd0R). + + After you have defined all of these axioms, be sure to include them in the + typeNatCoAxiomRules list, defined in TcTypeNats. + (Not doing so caused #14934.) + +* Define the type family somewhere + + Finally, you will need to define the type family somewhere, likely in @base@. + Currently, all of the built-in type families are defined in GHC.TypeLits or + GHC.TypeNats, so those are likely candidates. + + Since the behavior of your built-in type family is specified in TcTypeNats, + you should give an open type family definition with no instances, like so: + + type family MyTypeFam (m :: Nat) (n :: Nat) :: Nat + + Changing the argument and result kinds as appropriate. + +* Update the relevant test cases + + The GHC test suite will likely need to be updated after you add your built-in + type family. For instance: + + - The T9181 test prints the :browse contents of GHC.TypeLits, so if you added + a test there, the expected output of T9181 will need to change. + - The TcTypeNatSimple and TcTypeSymbolSimple tests have compile-time unit + tests, as well as TcTypeNatSimpleRun and TcTypeSymbolSimpleRun, which have + runtime unit tests. Consider adding further unit tests to those if your + built-in type family deals with Nats or Symbols, respectively. +-} + {------------------------------------------------------------------------------- Built-in type constructors for functions on type-level nats -} +-- The list of built-in type family TyCons that GHC uses. +-- If you define a built-in type family, make sure to add it to this list. +-- See Note [Adding built-in type families] typeNatTyCons :: [TyCon] typeNatTyCons = [ typeNatAddTyCon @@ -266,6 +345,7 @@ Built-in rules axioms -- If you add additional rules, please remember to add them to -- `typeNatCoAxiomRules` also. +-- See Note [Adding built-in type families] axAddDef , axMulDef , axExpDef @@ -375,6 +455,9 @@ axAppendSymbol0R = mkAxiom1 "Concat0R" axAppendSymbol0L = mkAxiom1 "Concat0L" $ \(Pair s t) -> (s `appendSymbol` mkStrLitTy nilFS) === t +-- The list of built-in type family axioms that GHC uses. +-- If you define new axioms, make sure to include them in this list. +-- See Note [Adding built-in type families] typeNatCoAxiomRules :: Map.Map FastString CoAxiomRule typeNatCoAxiomRules = Map.fromList $ map (\x -> (coaxrName x, x)) [ axAddDef @@ -398,6 +481,7 @@ typeNatCoAxiomRules = Map.fromList $ map (\x -> (coaxrName x, x)) , axCmpSymbolRefl , axLeq0L , axSubDef + , axSub0R , axAppendSymbol0R , axAppendSymbol0L , axDivDef diff --git a/compiler/types/TyCoRep.hs b/compiler/types/TyCoRep.hs index 588963d012..cc425991ae 100644 --- a/compiler/types/TyCoRep.hs +++ b/compiler/types/TyCoRep.hs @@ -814,16 +814,17 @@ data Coercion -- any left over, we use AppCo. -- See [Coercion axioms applied to coercions] + | AxiomRuleCo CoAxiomRule [Coercion] + -- AxiomRuleCo is very like AxiomInstCo, but for a CoAxiomRule + -- The number coercions should match exactly the expectations + -- of the CoAxiomRule (i.e., the rule is fully saturated). + | UnivCo UnivCoProvenance Role Type Type -- :: _ -> "e" -> _ -> _ -> e | SymCo Coercion -- :: e -> e | TransCo Coercion Coercion -- :: e -> e -> e - -- The number coercions should match exactly the expectations - -- of the CoAxiomRule (i.e., the rule is fully saturated). - | AxiomRuleCo CoAxiomRule [Coercion] - | NthCo Int Coercion -- Zero-indexed; decomposes (T t0 ... tn) -- :: _ -> e -> ?? (inverse of TyConAppCo, see Note [TyConAppCo roles]) -- Using NthCo on a ForAllCo gives an N coercion always diff --git a/compiler/utils/Outputable.hs b/compiler/utils/Outputable.hs index 793b8fb139..2b03555bab 100644 --- a/compiler/utils/Outputable.hs +++ b/compiler/utils/Outputable.hs @@ -22,7 +22,7 @@ module Outputable ( empty, isEmpty, nest, char, text, ftext, ptext, ztext, - int, intWithCommas, integer, float, double, rational, doublePrec, + int, intWithCommas, integer, word, float, double, rational, doublePrec, parens, cparen, brackets, braces, quotes, quote, doubleQuotes, angleBrackets, paBrackets, semi, comma, colon, dcolon, space, equals, dot, vbar, @@ -91,7 +91,8 @@ import GhcPrelude import {-# SOURCE #-} DynFlags( DynFlags, hasPprDebug, hasNoDebugOutput, targetPlatform, pprUserLength, pprCols, useUnicode, useUnicodeSyntax, - shouldUseColor, unsafeGlobalDynFlags ) + shouldUseColor, unsafeGlobalDynFlags, + shouldUseHexWordLiterals ) import {-# SOURCE #-} Module( UnitId, Module, ModuleName, moduleName ) import {-# SOURCE #-} OccName( OccName ) @@ -555,6 +556,7 @@ ptext :: LitString -> SDoc ztext :: FastZString -> SDoc int :: Int -> SDoc integer :: Integer -> SDoc +word :: Integer -> SDoc float :: Float -> SDoc double :: Double -> SDoc rational :: Rational -> SDoc @@ -573,6 +575,11 @@ integer n = docToSDoc $ Pretty.integer n float n = docToSDoc $ Pretty.float n double n = docToSDoc $ Pretty.double n rational n = docToSDoc $ Pretty.rational n +word n = sdocWithDynFlags $ \dflags -> + -- See Note [Print Hexadecimal Literals] in Pretty.hs + if shouldUseHexWordLiterals dflags + then docToSDoc $ Pretty.hex n + else docToSDoc $ Pretty.integer n -- | @doublePrec p n@ shows a floating point number @n@ with @p@ -- digits of precision after the decimal point. @@ -969,9 +976,9 @@ pprPrimChar :: Char -> SDoc pprPrimInt, pprPrimWord, pprPrimInt64, pprPrimWord64 :: Integer -> SDoc pprPrimChar c = pprHsChar c <> primCharSuffix pprPrimInt i = integer i <> primIntSuffix -pprPrimWord w = integer w <> primWordSuffix +pprPrimWord w = word w <> primWordSuffix pprPrimInt64 i = integer i <> primInt64Suffix -pprPrimWord64 w = integer w <> primWord64Suffix +pprPrimWord64 w = word w <> primWord64Suffix --------------------- -- Put a name in parens if it's an operator diff --git a/compiler/utils/Pretty.hs b/compiler/utils/Pretty.hs index f4987d3751..9a12c7dae9 100644 --- a/compiler/utils/Pretty.hs +++ b/compiler/utils/Pretty.hs @@ -72,7 +72,7 @@ module Pretty ( -- ** Converting values into documents char, text, ftext, ptext, ztext, sizedText, zeroWidthText, - int, integer, float, double, rational, + int, integer, float, double, rational, hex, -- ** Simple derived documents semi, comma, colon, space, equals, @@ -117,6 +117,7 @@ import BufWrite import FastString import Panic import System.IO +import Numeric (showHex) --for a RULES import GHC.Base ( unpackCString# ) @@ -404,11 +405,18 @@ integer :: Integer -> Doc -- ^ @integer n = text (show n)@ float :: Float -> Doc -- ^ @float n = text (show n)@ double :: Double -> Doc -- ^ @double n = text (show n)@ rational :: Rational -> Doc -- ^ @rational n = text (show n)@ +hex :: Integer -> Doc -- ^ See Note [Print Hexadecimal Literals] int n = text (show n) integer n = text (show n) float n = text (show n) double n = text (show n) rational n = text (show n) +hex n = text ('0' : 'x' : padded) + where + str = showHex n "" + strLen = max 1 (length str) + len = 2 ^ (ceiling (logBase 2 (fromIntegral strLen :: Double)) :: Int) + padded = replicate (len - strLen) '0' ++ str parens :: Doc -> Doc -- ^ Wrap document in @(...)@ brackets :: Doc -> Doc -- ^ Wrap document in @[...]@ @@ -423,6 +431,57 @@ parens p = char '(' <> p <> char ')' brackets p = char '[' <> p <> char ']' braces p = char '{' <> p <> char '}' +{- +Note [Print Hexadecimal Literals] + +Relevant discussions: + * Phabricator: https://phabricator.haskell.org/D4465 + * GHC Trac: https://ghc.haskell.org/trac/ghc/ticket/14872 + +There is a flag `-dword-hex-literals` that causes literals of +type `Word#` or `Word64#` to be displayed in hexadecimal instead +of decimal when dumping GHC core. It also affects the presentation +of these in GHC's error messages. Additionally, the hexadecimal +encoding of these numbers is zero-padded so that its length is +a power of two. As an example of what this does, +consider the following haskell file `Literals.hs`: + + module Literals where + + alpha :: Int + alpha = 100 + 200 + + beta :: Word -> Word + beta x = x + div maxBound 255 + div 0xFFFFFFFF 255 + 0x0202 + +We get the following dumped core when we compile on a 64-bit +machine with ghc -O2 -fforce-recomp -ddump-simpl -dsuppress-all +-dhex-word-literals literals.hs: + + ==================== Tidy Core ==================== + + ... omitted for brevity ... + + -- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} + alpha + alpha = I# 300# + + -- RHS size: {terms: 12, types: 3, coercions: 0, joins: 0/0} + beta + beta + = \ x_aYE -> + case x_aYE of { W# x#_a1v0 -> + W# + (plusWord# + (plusWord# (plusWord# x#_a1v0 0x0101010101010101##) 0x01010101##) + 0x0202##) + } + +Notice that the word literals are in hexadecimals and that they have +been padded with zeroes so that their lengths are 16, 8, and 4, respectively. + +-} + -- | Apply 'parens' to 'Doc' if boolean is true. maybeParens :: Bool -> Doc -> Doc maybeParens False = id diff --git a/compiler/utils/UniqFM.hs b/compiler/utils/UniqFM.hs index 2a9b806178..a80880f4e5 100644 --- a/compiler/utils/UniqFM.hs +++ b/compiler/utils/UniqFM.hs @@ -78,12 +78,10 @@ import Outputable import Data.List (foldl') import qualified Data.IntMap as M -import qualified Data.IntMap.Merge.Lazy as M -import Control.Applicative (Const (..)) -import qualified Data.Monoid as Mon import qualified Data.IntSet as S import Data.Data import qualified Data.Semigroup as Semi +import Data.Functor.Classes (Eq1 (..)) newtype UniqFM ele = UFM (M.IntMap ele) @@ -342,10 +340,7 @@ ufmToIntMap (UFM m) = m -- Determines whether two 'UniqFm's contain the same keys. equalKeysUFM :: UniqFM a -> UniqFM b -> Bool -equalKeysUFM (UFM m1) (UFM m2) = Mon.getAll $ getConst $ - M.mergeA (M.traverseMissing (\_ _ -> Const (Mon.All False))) - (M.traverseMissing (\_ _ -> Const (Mon.All False))) - (M.zipWithAMatched (\_ _ _ -> Const (Mon.All True))) m1 m2 +equalKeysUFM (UFM m1) (UFM m2) = liftEq (\_ _ -> True) m1 m2 -- Instances diff --git a/configure.ac b/configure.ac index e75fc6c7dd..1bab56d7fe 100644 --- a/configure.ac +++ b/configure.ac @@ -55,13 +55,14 @@ AC_SUBST([release], [1]) # First off, a distrib sanity check.. AC_CONFIG_SRCDIR([mk/config.mk.in]) -dnl * We require autoconf version 2.60 -dnl We need 2.50 due to the use of AC_SYS_LARGEFILE and AC_MSG_NOTICE. -dnl We need 2.52 due to the use of AS_TR_CPP and AS_TR_SH. -dnl Using autoconf 2.59 started to give nonsense like this -dnl #define SIZEOF_CHAR 0 -dnl recently. -AC_PREREQ([2.60]) +dnl * We require autoconf version 2.69 due to +dnl https://bugs.ruby-lang.org/issues/8179. Also see #14910. +dnl * We need 2.50 due to the use of AC_SYS_LARGEFILE and AC_MSG_NOTICE. +dnl * We need 2.52 due to the use of AS_TR_CPP and AS_TR_SH. +dnl * Using autoconf 2.59 started to give nonsense like this +dnl #define SIZEOF_CHAR 0 +dnl recently. +AC_PREREQ([2.69]) # ------------------------------------------------------------------------- # Prepare to generate the following header files diff --git a/distrib/configure.ac.in b/distrib/configure.ac.in index 95ad1986bb..ed1c29671a 100644 --- a/distrib/configure.ac.in +++ b/distrib/configure.ac.in @@ -6,6 +6,9 @@ dnl AC_INIT([The Glorious Glasgow Haskell Compilation System], [@ProjectVersion@], [glasgow-haskell-bugs@haskell.org], [ghc-AC_PACKAGE_VERSION]) +dnl See /configure.ac for rationale. +AC_PREREQ([2.69]) + dnl-------------------------------------------------------------------- dnl * Deal with arguments telling us gmp is somewhere odd dnl-------------------------------------------------------------------- diff --git a/docs/users_guide/debugging.rst b/docs/users_guide/debugging.rst index d11cc04fd0..c6d90e642d 100644 --- a/docs/users_guide/debugging.rst +++ b/docs/users_guide/debugging.rst @@ -588,6 +588,20 @@ Formatting dumps let expressions. This is helpful when your code does a lot of unboxing. +.. ghc-flag:: -dhex-word-literals + :shortdesc: Print values of type `Word#` in hexadecimal. + :type: dynamic + + Print values of type `Word#` and `Word64#` (but not values of + type `Int#` and `Int64#`) in hexadecimal instead of decimal. + The hexadecimal is zero-padded to make the length of the + representation a power of two. For example: `0x0A0A##`, + `0x000FFFFF##`, `0xC##`. This flag may be helpful when you + are producing a bit pattern that to expect to work correctly on a 32-bit + or a 64-bit architecture. Dumping hexadecimal literals after + optimizations and constant folding makes it easier to confirm + that the generated bit pattern is correct. + .. ghc-flag:: -dno-debug-output :shortdesc: Suppress unsolicited debugging output :type: dynamic diff --git a/docs/users_guide/ghc_packages.py b/docs/users_guide/ghc_packages.py index d4a688b370..6419834e1e 100644 --- a/docs/users_guide/ghc_packages.py +++ b/docs/users_guide/ghc_packages.py @@ -8,13 +8,13 @@ from utils import build_table_from_list def read_cabal_file(pkg_path): import re cabal_file = open(pkg_path, 'r').read() - pkg_name = re.search(r'[nN]ame:\s*([-a-zA-Z0-9]+)', cabal_file) + pkg_name = re.search(r'^[nN]ame\s*:\s*([-a-zA-Z0-9]+)', cabal_file, re.MULTILINE) if pkg_name is not None: pkg_name = pkg_name.group(1) else: raise RuntimeError("Failed to parse `Name:` field from %s" % pkg_path) - pkg_version = re.search(r'[vV]ersion:\s*(\d+(\.\d+)*)', cabal_file) + pkg_version = re.search(r'^[vV]ersion\s*:\s*(\d+(\.\d+)*)', cabal_file, re.MULTILINE) if pkg_version is not None: pkg_version = pkg_version.group(1) else: diff --git a/docs/users_guide/glasgow_exts.rst b/docs/users_guide/glasgow_exts.rst index c6cff92790..49c6ed4709 100644 --- a/docs/users_guide/glasgow_exts.rst +++ b/docs/users_guide/glasgow_exts.rst @@ -9442,7 +9442,7 @@ The following things have kind ``Constraint``: - Anything whose form is not yet known, but the user has declared to have kind ``Constraint`` (for which they need to import it from ``GHC.Exts``). So for example - ``type Foo (f :: \* -> Constraint) = forall b. f b => b -> b`` + ``type Foo (f :: * -> Constraint) = forall b. f b => b -> b`` is allowed, as well as examples involving type families: :: type family Typ a b :: Constraint diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst index 009e3ae887..92bc739dfe 100644 --- a/docs/users_guide/runtime_control.rst +++ b/docs/users_guide/runtime_control.rst @@ -776,6 +776,7 @@ RTS options to produce runtime statistics -s [⟨file⟩] -S [⟨file⟩] --machine-readable + --internal-counters These options produce runtime-system statistics, such as the amount of time spent executing the program and in the garbage collector, @@ -785,7 +786,10 @@ RTS options to produce runtime statistics line of output in the same format as GHC's ``-Rghc-timing`` option, ``-s`` produces a more detailed summary at the end of the program, and ``-S`` additionally produces information about each and every - garbage collection. + garbage collection. Passing ``--internal-counters`` to a threaded + runtime will cause a detailed summary to include various internal + counts accumulated during the run; note that these are unspecified + and may change between releases. The output is placed in ⟨file⟩. If ⟨file⟩ is omitted, then the output is sent to ``stderr``. diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst index 3566462eeb..d6c24de502 100644 --- a/docs/users_guide/using-optimisation.rst +++ b/docs/users_guide/using-optimisation.rst @@ -884,6 +884,20 @@ by saying ``-fno-wombat``. which they are called in this module. Note that specialisation must be enabled (by ``-fspecialise``) for this to have any effect. +.. ghc-flag:: -flate-specialise + :shortdesc: Run a late specialisation pass + :type: dynamic + :reverse: -fno-late-specialise + :default: off + + Runs another specialisation pass towards the end of the optimisation + pipeline. This can catch specialisation opportunities which arose from + the previous specialisation pass or other inlining. + + You might want to use this if you are you have a type class method + which returns a constrained type. For example, a type class where one + of the methods implements a traversal. + .. ghc-flag:: -fsolve-constant-dicts :shortdesc: When solving constraints, try to eagerly solve super classes using available dictionaries. diff --git a/includes/Cmm.h b/includes/Cmm.h index 57d78ccaa5..18b2aaf324 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -161,9 +161,11 @@ /* TO_W_(n) converts n to W_ type from a smaller type */ #if SIZEOF_W == 4 +#define TO_I64(x) %sx64(x) #define TO_W_(x) %sx32(x) #define HALF_W_(x) %lobits16(x) #elif SIZEOF_W == 8 +#define TO_I64(x) (x) #define TO_W_(x) %sx64(x) #define HALF_W_(x) %lobits32(x) #endif diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h index 27a5080220..6f011cbf6e 100644 --- a/includes/RtsAPI.h +++ b/includes/RtsAPI.h @@ -210,6 +210,30 @@ typedef struct _RTSStats { GCDetails gc; + // ----------------------------------- + // Internal Counters + + // The number of times a GC thread spun on its 'gc_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t gc_spin_spin; + // The number of times a GC thread yielded on its 'gc_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t gc_spin_yield; + // The number of times a GC thread spun on its 'mut_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t mut_spin_spin; + // The number of times a GC thread yielded on its 'mut_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t mut_spin_yield; + // The number of times a GC thread has checked for work across all parallel + // GCs + uint64_t any_work; + // The number of times a GC thread has checked for work and found none across + // all parallel GCs + uint64_t no_work; + // The number of times a GC thread has iterated it's outer loop across all + // parallel GCs + uint64_t scav_find_work; } RTSStats; void getRTSStats (RTSStats *s); diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h index aed4dca384..6487947749 100644 --- a/includes/rts/Flags.h +++ b/includes/rts/Flags.h @@ -195,6 +195,7 @@ typedef struct _MISC_FLAGS { bool generate_dump_file; bool generate_stack_trace; bool machineReadable; + bool internalCounters; /* See Note [Internal Counter Stats] */ StgWord linkerMemBase; /* address to ask the OS for memory * for the linker, NULL ==> off */ } MISC_FLAGS; diff --git a/includes/rts/SpinLock.h b/includes/rts/SpinLock.h index 6530a3a2f0..1dca02f795 100644 --- a/includes/rts/SpinLock.h +++ b/includes/rts/SpinLock.h @@ -27,7 +27,8 @@ typedef struct SpinLock_ { StgWord lock; - StgWord64 spin; // DEBUG version counts how much it spins + StgWord64 spin; // incremented every time we spin in ACQUIRE_SPIN_LOCK + StgWord64 yield; // incremented every time we yield in ACQUIRE_SPIN_LOCK } SpinLock; #else typedef StgWord SpinLock; @@ -49,6 +50,7 @@ INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p) p->spin++; busy_wait_nop(); } + p->yield++; yieldThread(); } while (1); } @@ -66,6 +68,7 @@ INLINE_HEADER void initSpinLock(SpinLock * p) write_barrier(); p->lock = 1; p->spin = 0; + p->yield = 0; } #else diff --git a/includes/rts/Threads.h b/includes/rts/Threads.h index fceacdc75d..f72f5ed121 100644 --- a/includes/rts/Threads.h +++ b/includes/rts/Threads.h @@ -43,8 +43,6 @@ StgRegTable * resumeThread (void *); // int cmp_thread (StgPtr tso1, StgPtr tso2); int rts_getThreadId (StgPtr tso); -HsInt64 rts_getThreadAllocationCounter (StgPtr tso); -void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i); void rts_enableThreadAllocationLimit (StgPtr tso); void rts_disableThreadAllocationLimit (StgPtr tso); diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 2aed7c57ee..d4182dd7f9 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -120,7 +120,7 @@ typedef struct generation_ { // stats information uint32_t collections; uint32_t par_collections; - uint32_t failed_promotions; + uint32_t failed_promotions; // Currently unused // ------------------------------------ // Fields below are used during GC only diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 76cfbd6c8c..1fbfab9fbe 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh); RTS_FUN_DECL(stg_clearCCSzh); RTS_FUN_DECL(stg_traceEventzh); RTS_FUN_DECL(stg_traceMarkerzh); +RTS_FUN_DECL(stg_getThreadAllocationCounterzh); +RTS_FUN_DECL(stg_setThreadAllocationCounterzh); + /* Other misc stuff */ // See wiki:Commentary/Compiler/Backends/PprC#Prototypes diff --git a/libraries/array b/libraries/array -Subproject 9d63218fd067ff4885c0efa43b388238421a5c8 +Subproject 1d0435f4937f03901e32304e279f46ce19b0f08 diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs index 517c20e45f..94601f356d 100644 --- a/libraries/base/GHC/Conc/Sync.hs +++ b/libraries/base/GHC/Conc/Sync.hs @@ -105,6 +105,7 @@ import Data.Maybe import GHC.Base import {-# SOURCE #-} GHC.IO.Handle ( hFlush ) import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout ) +import GHC.Int import GHC.IO import GHC.IO.Encoding.UTF8 import GHC.IO.Exception @@ -194,18 +195,16 @@ instance Ord ThreadId where -- -- @since 4.8.0.0 setAllocationCounter :: Int64 -> IO () -setAllocationCounter i = do - ThreadId t <- myThreadId - rts_setThreadAllocationCounter t i +setAllocationCounter (I64# i) = IO $ \s -> + case setThreadAllocationCounter# i s of s' -> (# s', () #) -- | Return the current value of the allocation counter for the -- current thread. -- -- @since 4.8.0.0 getAllocationCounter :: IO Int64 -getAllocationCounter = do - ThreadId t <- myThreadId - rts_getThreadAllocationCounter t +getAllocationCounter = IO $ \s -> + case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #) -- | Enables the allocation counter to be treated as a limit for the -- current thread. When the allocation limit is enabled, if the @@ -242,16 +241,6 @@ disableAllocationLimit = do ThreadId t <- myThreadId rts_disableThreadAllocationLimit t --- We cannot do these operations safely on another thread, because on --- a 32-bit machine we cannot do atomic operations on a 64-bit value. --- Therefore, we only expose APIs that allow getting and setting the --- limit of the current thread. -foreign import ccall unsafe "rts_setThreadAllocationCounter" - rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO () - -foreign import ccall unsafe "rts_getThreadAllocationCounter" - rts_getThreadAllocationCounter :: ThreadId# -> IO Int64 - foreign import ccall unsafe "rts_enableThreadAllocationLimit" rts_enableThreadAllocationLimit :: ThreadId# -> IO () diff --git a/libraries/base/GHC/RTS/Flags.hsc b/libraries/base/GHC/RTS/Flags.hsc index 3e712ca900..046975577e 100644 --- a/libraries/base/GHC/RTS/Flags.hsc +++ b/libraries/base/GHC/RTS/Flags.hsc @@ -138,6 +138,7 @@ data MiscFlags = MiscFlags , generateCrashDumpFile :: Bool , generateStackTrace :: Bool , machineReadable :: Bool + , internalCounters :: Bool , linkerMemBase :: Word -- ^ address to ask the OS for memory for the linker, 0 ==> off } deriving ( Show -- ^ @since 4.8.0.0 @@ -441,6 +442,8 @@ getMiscFlags = do (#{peek MISC_FLAGS, generate_stack_trace} ptr :: IO CBool)) <*> (toBool <$> (#{peek MISC_FLAGS, machineReadable} ptr :: IO CBool)) + <*> (toBool <$> + (#{peek MISC_FLAGS, internalCounters} ptr :: IO CBool)) <*> #{peek MISC_FLAGS, linkerMemBase} ptr getDebugFlags :: IO DebugFlags diff --git a/libraries/integer-gmp/configure.ac b/libraries/integer-gmp/configure.ac index c19dbbc4a8..3aebeba5d5 100644 --- a/libraries/integer-gmp/configure.ac +++ b/libraries/integer-gmp/configure.ac @@ -1,4 +1,4 @@ -AC_PREREQ(2.60) +AC_PREREQ(2.69) AC_INIT([Haskell integer (GMP)], [1.0], [libraries@haskell.org], [integer]) # Safety check: Ensure that we are in the correct source directory. diff --git a/rts/Messages.c b/rts/Messages.c index 8fab314bc4..a9c794d823 100644 --- a/rts/Messages.c +++ b/rts/Messages.c @@ -129,6 +129,9 @@ loop: } else if (i == &stg_WHITEHOLE_info) { +#if defined(PROF_SPIN) + ++whitehole_executeMessage_spin; +#endif goto loop; } else diff --git a/rts/Messages.h b/rts/Messages.h index e60f19dc1d..18371564c4 100644 --- a/rts/Messages.h +++ b/rts/Messages.h @@ -31,3 +31,7 @@ doneWithMsgThrowTo (MessageThrowTo *m) } #include "EndPrivate.h" + +#if defined(THREADED_RTS) && defined(PROF_SPIN) +extern volatile StgWord64 whitehole_executeMessage_spin; +#endif diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 67a238488c..e3f6e4cd19 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -2491,3 +2491,23 @@ stg_traceMarkerzh ( W_ msg ) return (); } + +stg_getThreadAllocationCounterzh () +{ + // Account for the allocation in the current block + W_ offset; + offset = Hp - bdescr_start(CurrentNursery); + return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset)); +} + +stg_setThreadAllocationCounterzh ( I64 counter ) +{ + // Allocation in the current block will be subtracted by + // getThreadAllocationCounter#, so we have to offset any existing + // allocation here. See also openNursery/closeNursery in + // compiler/codeGen/StgCmmForeign.hs. + W_ offset; + offset = Hp - bdescr_start(CurrentNursery); + StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset); + return (); +} diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 7b38bbd6fd..b674e9b685 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -232,6 +232,7 @@ void initRtsFlagsDefaults(void) RtsFlags.MiscFlags.generate_stack_trace = true; RtsFlags.MiscFlags.generate_dump_file = false; RtsFlags.MiscFlags.machineReadable = false; + RtsFlags.MiscFlags.internalCounters = false; RtsFlags.MiscFlags.linkerMemBase = 0; #if defined(THREADED_RTS) @@ -888,6 +889,11 @@ error = true; OPTION_UNSAFE; RtsFlags.MiscFlags.machineReadable = true; } + else if (strequal("internal-counters", + &rts_argv[arg][2])) { + OPTION_SAFE; + RtsFlags.MiscFlags.internalCounters = true; + } else if (strequal("info", &rts_argv[arg][2])) { OPTION_SAFE; diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index e53a056a4c..d5800fd336 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -743,8 +743,6 @@ SymI_HasProto(rts_isProfiled) \ SymI_HasProto(rts_isDynamic) \ SymI_HasProto(rts_setInCallCapability) \ - SymI_HasProto(rts_getThreadAllocationCounter) \ - SymI_HasProto(rts_setThreadAllocationCounter) \ SymI_HasProto(rts_enableThreadAllocationLimit) \ SymI_HasProto(rts_disableThreadAllocationLimit) \ SymI_HasProto(rts_setMainThread) \ @@ -895,6 +893,8 @@ SymI_HasProto(stg_traceCcszh) \ SymI_HasProto(stg_traceEventzh) \ SymI_HasProto(stg_traceMarkerzh) \ + SymI_HasProto(stg_getThreadAllocationCounterzh) \ + SymI_HasProto(stg_setThreadAllocationCounterzh) \ SymI_HasProto(getMonotonicNSec) \ SymI_HasProto(lockFile) \ SymI_HasProto(unlockFile) \ diff --git a/rts/SMPClosureOps.h b/rts/SMPClosureOps.h index 4ea1c55976..1d18e1b018 100644 --- a/rts/SMPClosureOps.h +++ b/rts/SMPClosureOps.h @@ -38,6 +38,11 @@ EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info); #if defined(THREADED_RTS) +#if defined(PROF_SPIN) +extern volatile StgWord64 whitehole_lockClosure_spin; +extern volatile StgWord64 whitehole_lockClosure_yield; +#endif + /* ----------------------------------------------------------------------------- * Locking/unlocking closures * @@ -56,7 +61,14 @@ EXTERN_INLINE StgInfoTable *reallyLockClosure(StgClosure *p) do { info = xchg((P_)(void *)&p->header.info, (W_)&stg_WHITEHOLE_info); if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info; +#if defined(PROF_SPIN) + ++whitehole_lockClosure_spin; +#endif + busy_wait_nop(); } while (++i < SPIN_COUNT); +#if defined(PROF_SPIN) + ++whitehole_lockClosure_yield; +#endif yieldThread(); } while (1); } diff --git a/rts/Stats.c b/rts/Stats.c index 26bdac0ea5..7eb93bef33 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -16,10 +16,15 @@ #include "Profiling.h" #include "GetTime.h" #include "sm/Storage.h" -#include "sm/GC.h" // gc_alloc_block_sync, whitehole_gc_spin #include "sm/GCThread.h" #include "sm/BlockAlloc.h" +// for spin/yield counters +#include "sm/GC.h" +#include "ThreadPaused.h" +#include "Messages.h" + + #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION) static Time @@ -43,6 +48,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time #define PROF_VAL(x) 0 #endif +#if defined(PROF_SPIN) +volatile StgWord64 whitehole_lockClosure_spin = 0; +volatile StgWord64 whitehole_lockClosure_yield = 0; +volatile StgWord64 whitehole_threadPaused_spin = 0; +volatile StgWord64 whitehole_executeMessage_spin = 0; +#endif + // // All the stats! // @@ -150,6 +162,13 @@ initStats0(void) .par_copied_bytes = 0, .cumulative_par_max_copied_bytes = 0, .cumulative_par_balanced_copied_bytes = 0, + .gc_spin_spin = 0, + .gc_spin_yield = 0, + .mut_spin_spin = 0, + .mut_spin_yield = 0, + .any_work = 0, + .no_work = 0, + .scav_find_work = 0, .mutator_cpu_ns = 0, .mutator_elapsed_ns = 0, .gc_cpu_ns = 0, @@ -283,10 +302,11 @@ stat_startGC (Capability *cap, gc_thread *gct) -------------------------------------------------------------------------- */ void -stat_endGC (Capability *cap, gc_thread *gct, - W_ live, W_ copied, W_ slop, uint32_t gen, - uint32_t par_n_threads, W_ par_max_copied, - W_ par_balanced_copied) +stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop, + uint32_t gen, uint32_t par_n_threads, W_ par_max_copied, + W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield, + W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ no_work, + W_ scav_find_work) { // ------------------------------------------------- // Collect all the stats about this GC in stats.gc. We always do this since @@ -350,6 +370,13 @@ stat_endGC (Capability *cap, gc_thread *gct, stats.gc.par_max_copied_bytes; stats.cumulative_par_balanced_copied_bytes += stats.gc.par_balanced_copied_bytes; + stats.any_work += any_work; + stats.no_work += no_work; + stats.scav_find_work += scav_find_work; + stats.gc_spin_spin += gc_spin_spin; + stats.gc_spin_yield += gc_spin_yield; + stats.mut_spin_spin += mut_spin_spin; + stats.mut_spin_yield += mut_spin_yield; } stats.gc_cpu_ns += stats.gc.cpu_ns; stats.gc_elapsed_ns += stats.gc.elapsed_ns; @@ -764,18 +791,96 @@ stat_exit (void) PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100 / TimeToSecondsDbl(tot_elapsed)); + // See Note [Internal Counter Stats] for a description of the + // following counters. If you add a counter here, please remember + // to update the Note. + if (RtsFlags.MiscFlags.internalCounters) { #if defined(THREADED_RTS) && defined(PROF_SPIN) - { uint32_t g; + const int32_t col_width[] = {4, -30, 14, 14}; + statsPrintf("Internal Counters:\n"); + statsPrintf("%*s" "%*s" "%*s" "%*s" "\n" + , col_width[0], "" + , col_width[1], "SpinLock" + , col_width[2], "Spins" + , col_width[3], "Yields"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "gc_alloc_block_sync" + , col_width[2], gc_alloc_block_sync.spin + , col_width[3], gc_alloc_block_sync.yield); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "gc_spin" + , col_width[2], stats.gc_spin_spin + , col_width[3], stats.gc_spin_yield); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "mut_spin" + , col_width[2], stats.mut_spin_spin + , col_width[3], stats.mut_spin_yield); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n" + , col_width[0], "" + , col_width[1], "whitehole_gc" + , col_width[2], whitehole_gc_spin + , col_width[3], "n/a"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n" + , col_width[0], "" + , col_width[1], "whitehole_threadPaused" + , col_width[2], whitehole_threadPaused_spin + , col_width[3], "n/a"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n" + , col_width[0], "" + , col_width[1], "whitehole_executeMessage" + , col_width[2], whitehole_executeMessage_spin + , col_width[3], "n/a"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "whitehole_lockClosure" + , col_width[2], whitehole_lockClosure_spin + , col_width[3], whitehole_lockClosure_yield); + // waitForGcThreads isn't really spin-locking(see the function) + // but these numbers still seem useful. + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "waitForGcThreads" + , col_width[2], waitForGcThreads_spin + , col_width[3], waitForGcThreads_yield); - statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); - statsPrintf("whitehole_gc_spin: %"FMT_Word64"\n" - , whitehole_gc_spin); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin); + int prefix_length = 0; + statsPrintf("%*s" "gen[%" FMT_Word32 "%n", + col_width[0], "", g, &prefix_length); + prefix_length -= col_width[0]; + int suffix_length = col_width[1] + prefix_length; + suffix_length = + suffix_length > 0 ? col_width[1] : suffix_length; + + statsPrintf("%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , suffix_length, "].sync" + , col_width[2], generations[g].sync.spin + , col_width[3], generations[g].sync.yield); } - } + statsPrintf("\n"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "any_work" + , col_width[2], stats.any_work); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "no_work" + , col_width[2], stats.no_work); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "scav_find_work" + , col_width[2], stats.scav_find_work); +#elif defined(THREADED_RTS) // THREADED_RTS && PROF_SPIN + statsPrintf("Internal Counters require the RTS to be built " + "with PROF_SPIN"); // PROF_SPIN is not #defined here +#else // THREADED_RTS + statsPrintf("Internal Counters require the threaded RTS"); #endif + } } if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) { @@ -917,6 +1022,68 @@ the number of gc threads is limited to the number of cores. See #13830 */ +/* +Note [Internal Counter Stats] +----------------------------- +What do the counts at the end of a '+RTS -s --internal-counters' report mean? +They are detailed below. Most of these counters are used by multiple threads +with no attempt at synchronisation. This means that reported values may be +lower than the true value and this becomes more likely and more severe as +contention increases. + +The first counters are for various SpinLock-like constructs in the RTS. See +Spinlock.h for the definition of a SpinLock. We maintain up two counters per +SpinLock: +* spin: The number of busy-spins over the length of the program. +* yield: The number of times the SpinLock spun SPIN_COUNT times without success + and called yieldThread(). +Not all of these are actual SpinLocks, see the details below. + +Actual SpinLocks: +* gc_alloc_block: + This SpinLock protects the block allocator and free list manager. See + BlockAlloc.c. +* gc_spin and mut_spin: + These SpinLocks are used to herd gc worker threads during parallel garbage + collection. See gcWorkerThread, wakeup_gc_threads and releaseGCThreads. +* gen[g].sync: + These SpinLocks, one per generation, protect the generations[g] data + structure during garbage collection. + +waitForGcThreads: + These counters are incremented while we wait for all threads to be ready + for a parallel garbage collection. We yield more than we spin in this case. + +In several places in the runtime we must take a lock on a closure. To do this, +we replace it's info table with stg_WHITEHOLE_info, spinning if it is already +a white-hole. Sometimes we yieldThread() if we spin too long, sometimes we +don't. We count these white-hole spins and include them in the SpinLocks table. +If a particular loop does not yield, we put "n/a" in the table. They are named +for the function that has the spinning loop except that several loops in the +garbage collector accumulate into whitehole_gc. +TODO: Should these counters be more or less granular? + +white-hole spin counters: +* whitehole_gc +* whitehole_lockClosure +* whitehole_executeMessage +* whitehole_threadPaused + + +We count the number of calls of several functions in the parallel garbage +collector. + +Parallel garbage collector counters: +* any_work: + A cheap function called whenever a gc_thread is ready for work. Does + not do any work. +* no_work: + Incremented whenever any_work finds no work. +* scav_find_work: + Called to do work when any_work return true. + +*/ + /* ----------------------------------------------------------------------------- stat_describe_gens diff --git a/rts/Stats.h b/rts/Stats.h index 5d9cf04fa7..1c56344f81 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -30,7 +30,10 @@ void stat_startGCSync(struct gc_thread_ *_gct); void stat_startGC(Capability *cap, struct gc_thread_ *_gct); void stat_endGC (Capability *cap, struct gc_thread_ *_gct, W_ live, W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads, - W_ par_max_copied, W_ par_balanced_copied); + W_ par_max_copied, W_ par_balanced_copied, + W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin, + W_ mut_spin_yield, W_ any_work, W_ no_work, + W_ scav_find_work); #if defined(PROFILING) void stat_startRP(void); diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index 361989d0d2..595d3ce6c2 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -375,11 +375,21 @@ loop: // spin until the WHITEHOLE is updated info = StgHeader_info(node); if (info == stg_WHITEHOLE_info) { +#if defined(PROF_SPIN) + W_[whitehole_lockClosure_spin] = + W_[whitehole_lockClosure_spin] + 1; +#endif i = i + 1; if (i == SPIN_COUNT) { i = 0; +#if defined(PROF_SPIN) + W_[whitehole_lockClosure_yield] = + W_[whitehole_lockClosure_yield] + 1; +#endif ccall yieldThread(); } + // TODO: We should busy_wait_nop() here, but that's not currently + // defined in CMM. goto loop; } jump %ENTRY_CODE(info) (node); diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index 7ade0a64c5..3f7bddeb79 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -329,6 +329,10 @@ threadPaused(Capability *cap, StgTSO *tso) if (cur_bh_info != bh_info) { bh_info = cur_bh_info; +#if defined(PROF_SPIN) + ++whitehole_threadPaused_spin; +#endif + busy_wait_nop(); goto retry; } #endif diff --git a/rts/ThreadPaused.h b/rts/ThreadPaused.h index 4d762f9aed..ee25189c20 100644 --- a/rts/ThreadPaused.h +++ b/rts/ThreadPaused.h @@ -8,4 +8,12 @@ #pragma once +#include "BeginPrivate.h" + RTS_PRIVATE void threadPaused ( Capability *cap, StgTSO * ); + +#include "EndPrivate.h" + +#if defined(THREADED_RTS) && defined(PROF_SPIN) +extern volatile StgWord64 whitehole_threadPaused_spin; +#endif diff --git a/rts/Threads.c b/rts/Threads.c index b76917773a..be6962246d 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso) } /* --------------------------------------------------------------------------- - * Getting & setting the thread allocation limit + * Enabling and disabling the thread allocation limit * ------------------------------------------------------------------------ */ -HsInt64 rts_getThreadAllocationCounter(StgPtr tso) -{ - // NB. doesn't take into account allocation in the current nursery - // block, so it might be off by up to 4k. - return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit)); -} - -void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i) -{ - ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i); -} void rts_enableThreadAllocationLimit(StgPtr tso) { diff --git a/rts/posix/itimer/Pthread.c b/rts/posix/itimer/Pthread.c index e15ac2521e..c45d57e6dc 100644 --- a/rts/posix/itimer/Pthread.c +++ b/rts/posix/itimer/Pthread.c @@ -10,7 +10,7 @@ * We use a realtime timer by default. I found this much more * reliable than a CPU timer: * - * Experiments with different frequences: using + * Experiments with different frequencies: using * CLOCK_REALTIME/CLOCK_MONOTONIC on Linux 2.6.32, * 1000us has <1% impact on runtime * 100us has ~2% impact on runtime diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index bb54c7efc3..27f280665e 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -1058,9 +1058,14 @@ selector_chain: // In threaded mode, we'll use WHITEHOLE to lock the selector // thunk while we evaluate it. { - do { + while(true) { info_ptr = xchg((StgPtr)&p->header.info, (W_)&stg_WHITEHOLE_info); - } while (info_ptr == (W_)&stg_WHITEHOLE_info); + if (info_ptr != (W_)&stg_WHITEHOLE_info) { break; } +#if defined(PROF_SPIN) + ++whitehole_gc_spin; +#endif + busy_wait_nop(); + } // make sure someone else didn't get here first... if (IS_FORWARDING_PTR(info_ptr) || diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 54797ba0f0..d61ca41a6b 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -139,6 +139,9 @@ uint32_t n_gc_threads; static long copied; // *words* copied & scavenged during this GC #if defined(PROF_SPIN) && defined(THREADED_RTS) +// spin and yield counts for the quasi-SpinLock in waitForGcThreads +volatile StgWord64 waitForGcThreads_spin = 0; +volatile StgWord64 waitForGcThreads_yield = 0; volatile StgWord64 whitehole_gc_spin = 0; #endif @@ -198,7 +201,9 @@ GarbageCollect (uint32_t collect_gen, { bdescr *bd; generation *gen; - StgWord live_blocks, live_words, par_max_copied, par_balanced_copied; + StgWord live_blocks, live_words, par_max_copied, par_balanced_copied, + gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, + any_work, no_work, scav_find_work; #if defined(THREADED_RTS) gc_thread *saved_gct; #endif @@ -471,32 +476,53 @@ GarbageCollect (uint32_t collect_gen, copied = 0; par_max_copied = 0; par_balanced_copied = 0; + gc_spin_spin = 0; + gc_spin_yield = 0; + mut_spin_spin = 0; + mut_spin_yield = 0; + any_work = 0; + no_work = 0; + scav_find_work = 0; { uint32_t i; uint64_t par_balanced_copied_acc = 0; + const gc_thread* thread; for (i=0; i < n_gc_threads; i++) { copied += gc_threads[i]->copied; } for (i=0; i < n_gc_threads; i++) { + thread = gc_threads[i]; if (n_gc_threads > 1) { debugTrace(DEBUG_gc,"thread %d:", i); - debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); - debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); - debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work); - debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work); - debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + debugTrace(DEBUG_gc," copied %ld", + thread->copied * sizeof(W_)); + debugTrace(DEBUG_gc," scanned %ld", + thread->scanned * sizeof(W_)); + debugTrace(DEBUG_gc," any_work %ld", + thread->any_work); + debugTrace(DEBUG_gc," no_work %ld", + thread->no_work); + debugTrace(DEBUG_gc," scav_find_work %ld", + thread->scav_find_work); + +#if defined(THREADED_RTS) && defined(PROF_SPIN) + gc_spin_spin += thread->gc_spin.spin; + gc_spin_yield += thread->gc_spin.yield; + mut_spin_spin += thread->mut_spin.spin; + mut_spin_yield += thread->mut_spin.yield; +#endif + + any_work += thread->any_work; + no_work += thread->no_work; + scav_find_work += thread->scav_find_work; + + par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); + par_balanced_copied_acc += + stg_min(n_gc_threads * gc_threads[i]->copied, copied); } - par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); - par_balanced_copied_acc += - stg_min(n_gc_threads * gc_threads[i]->copied, copied); - } - if (n_gc_threads == 1) { - par_max_copied = 0; - par_balanced_copied = 0; } - else - { + if (n_gc_threads > 1) { // See Note [Work Balance] for an explanation of this computation par_balanced_copied = (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) / @@ -834,7 +860,9 @@ GarbageCollect (uint32_t collect_gen, // ok, GC over: tell the stats department what happened. stat_endGC(cap, gct, live_words, copied, live_blocks * BLOCK_SIZE_W - live_words /* slop */, - N, n_gc_threads, par_max_copied, par_balanced_copied); + N, n_gc_threads, par_max_copied, par_balanced_copied, + gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, + any_work, no_work, scav_find_work); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -1186,6 +1214,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) } } if (!retry) break; +#if defined(PROF_SPIN) + waitForGcThreads_yield++; +#endif yieldThread(); } @@ -1196,6 +1227,14 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) rtsConfig.longGCSync(cap->no, t2 - t0); t1 = t2; } + if (retry) { +#if defined(PROF_SPIN) + // This is a bit strange, we'll get more yields than spins. + // I guess that means it's not a spin-lock at all, but these + // numbers are still useful (I think). + waitForGcThreads_spin++; +#endif + } } if (RtsFlags.GcFlags.longGCSync != 0 && diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 78f054931a..7fce87edd4 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -47,6 +47,8 @@ extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS, #if defined(PROF_SPIN) && defined(THREADED_RTS) extern volatile StgWord64 whitehole_gc_spin; +extern volatile StgWord64 waitForGcThreads_spin; +extern volatile StgWord64 waitForGcThreads_yield; #endif void gcWorkerThread (Capability *cap); diff --git a/testsuite/tests/cmm/Makefile b/testsuite/tests/cmm/Makefile new file mode 100644 index 0000000000..9a36a1c5fe --- /dev/null +++ b/testsuite/tests/cmm/Makefile @@ -0,0 +1,3 @@ +TOP=../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/cmm/should_run/HooplPostorder.hs b/testsuite/tests/cmm/should_run/HooplPostorder.hs new file mode 100644 index 0000000000..d7a8bbaef1 --- /dev/null +++ b/testsuite/tests/cmm/should_run/HooplPostorder.hs @@ -0,0 +1,69 @@ +module Main where + +import Hoopl.Block +import Hoopl.Collections +import Hoopl.Graph +import Hoopl.Label + +import Data.Maybe + +data TestBlock e x = TB { label_ :: Label, successors_ :: [Label] } + deriving (Eq, Show) + +instance NonLocal TestBlock where + entryLabel = label_ + successors = successors_ + +-- Test the classical diamond shape graph. +test_diamond :: LabelMap (TestBlock C C) +test_diamond = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 3] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 3) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [] + ] + +-- Test that the backedge doesn't change anything. +test_diamond_backedge :: LabelMap (TestBlock C C) +test_diamond_backedge = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 3] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 3) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [mkHooplLabel 1] + ] + +-- Test that the "bypass" edge from 1 to 4 doesn't change anything. +test_3 :: LabelMap (TestBlock C C) +test_3 = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 4] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [] + ] + +-- Like test_3 but with different order of successors for the entry point. +test_4 :: LabelMap (TestBlock C C) +test_4 = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 4, mkHooplLabel 2] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [] + ] + + +main :: IO () +main = do + let result = revPostorderFrom test_diamond (mkHooplLabel 1) + putStrLn (show $ map label_ result) + let result = revPostorderFrom test_diamond_backedge (mkHooplLabel 1) + putStrLn (show $ map label_ result) + let result = revPostorderFrom test_3 (mkHooplLabel 1) + putStrLn (show $ map label_ result) + let result = revPostorderFrom test_4 (mkHooplLabel 1) + putStrLn (show $ map label_ result) diff --git a/testsuite/tests/cmm/should_run/HooplPostorder.stdout b/testsuite/tests/cmm/should_run/HooplPostorder.stdout new file mode 100644 index 0000000000..7e704c224b --- /dev/null +++ b/testsuite/tests/cmm/should_run/HooplPostorder.stdout @@ -0,0 +1,4 @@ +[L1,L3,L2,L4] +[L1,L3,L2,L4] +[L1,L2,L4] +[L1,L2,L4] diff --git a/testsuite/tests/cmm/should_run/Makefile b/testsuite/tests/cmm/should_run/Makefile new file mode 100644 index 0000000000..9101fbd40a --- /dev/null +++ b/testsuite/tests/cmm/should_run/Makefile @@ -0,0 +1,3 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/cmm/should_run/all.T b/testsuite/tests/cmm/should_run/all.T new file mode 100644 index 0000000000..00838075cf --- /dev/null +++ b/testsuite/tests/cmm/should_run/all.T @@ -0,0 +1,4 @@ +test('HooplPostorder', + extra_run_opts('"' + config.libdir + '"'), + compile_and_run, + ['-package ghc']) diff --git a/testsuite/tests/codeGen/should_run/T5129.hs b/testsuite/tests/codeGen/should_run/T5129.hs index 6bc1912754..2808f54eae 100644 --- a/testsuite/tests/codeGen/should_run/T5129.hs +++ b/testsuite/tests/codeGen/should_run/T5129.hs @@ -10,12 +10,13 @@ throwIfNegative n | n < 0 = error "negative" data HUnitFailure = HUnitFailure String deriving (Show,Typeable) instance Exception HUnitFailure +assertFailure :: String -> a -- Not an IO function! assertFailure msg = E.throw (HUnitFailure msg) -case_negative = - handleJust errorCalls (const $ return ()) $ do - evaluate $ throwIfNegative (-1) - assertFailure "must throw when given a negative number" +main :: IO () +main = + handleJust errorCalls (const (return ())) (do + evaluate (throwIfNegative (-1)) -- Pure expression evaluated in IO + assertFailure "must throw when given a negative number") where errorCalls (ErrorCall _) = Just () -main = case_negative diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 9403c4b1e1..55386e402b 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -90,7 +90,13 @@ test('T3677', extra_run_opts('+RTS -K8k -RTS'), compile_and_run, ['']) test('T4441', normal, compile_and_run, ['']) test('T5149', omit_ways(['ghci']), multi_compile_and_run, ['T5149', [('T5149_cmm.cmm', '')], '']) -test('T5129', normal, compile_and_run, ['']) + +test('T5129', + # The bug is in simplifier when run with -O1 and above, so only run it + # optimised, using any backend. + only_ways(['optasm']), + compile_and_run, ['']) + test('T5626', exit_code(1), compile_and_run, ['']) test('T5747', when(arch('i386'), extra_hc_opts('-msse2')), compile_and_run, ['-O2']) test('T5785', normal, compile_and_run, ['']) diff --git a/testsuite/tests/rename/should_compile/T14881.hs b/testsuite/tests/rename/should_compile/T14881.hs new file mode 100644 index 0000000000..c1b955c0f2 --- /dev/null +++ b/testsuite/tests/rename/should_compile/T14881.hs @@ -0,0 +1,5 @@ +module T14881 where + +import qualified T14881Aux as Aux (L(Cons), x, tail, adjust, length) + +x = Aux.Cons diff --git a/testsuite/tests/rename/should_compile/T14881.stderr b/testsuite/tests/rename/should_compile/T14881.stderr new file mode 100644 index 0000000000..bfb6ca913f --- /dev/null +++ b/testsuite/tests/rename/should_compile/T14881.stderr @@ -0,0 +1,6 @@ +[1 of 2] Compiling T14881Aux ( T14881Aux.hs, T14881Aux.o ) +[2 of 2] Compiling T14881 ( T14881.hs, T14881.o ) + +T14881.hs:3:1: warning: [-Wunused-imports (in -Wextra)] + The qualified import of ‘adjust, length, L(tail), L(x)’ + from module ‘T14881Aux’ is redundant diff --git a/testsuite/tests/rename/should_compile/T14881Aux.hs b/testsuite/tests/rename/should_compile/T14881Aux.hs new file mode 100644 index 0000000000..13b8f31d04 --- /dev/null +++ b/testsuite/tests/rename/should_compile/T14881Aux.hs @@ -0,0 +1,13 @@ +module T14881Aux where + +-- unambiguous function name. +adjust :: () +adjust = undefined + +-- ambiguous function name. +length :: () +length = undefined + +data L = Cons { x :: Int -- unambiguous field selector + , tail :: [Int] -- ambiguous field selector + } diff --git a/testsuite/tests/rename/should_compile/all.T b/testsuite/tests/rename/should_compile/all.T index 4eb584febe..80bcb092e0 100644 --- a/testsuite/tests/rename/should_compile/all.T +++ b/testsuite/tests/rename/should_compile/all.T @@ -153,3 +153,4 @@ test('T12548', normal, compile, ['']) test('T13132', normal, compile, ['']) test('T13646', normal, compile, ['']) test('LookupSub', [], multimod_compile, ['LookupSub', '-v0']) +test('T14881', [], multimod_compile, ['T14881', '-W']) diff --git a/testsuite/tests/rts/InternalCounters.stdout b/testsuite/tests/rts/InternalCounters.stdout new file mode 100644 index 0000000000..d764d7bc19 --- /dev/null +++ b/testsuite/tests/rts/InternalCounters.stdout @@ -0,0 +1 @@ +Internal Counters:
\ No newline at end of file diff --git a/testsuite/tests/rts/Makefile b/testsuite/tests/rts/Makefile index ded3be1b3b..630508542d 100644 --- a/testsuite/tests/rts/Makefile +++ b/testsuite/tests/rts/Makefile @@ -48,13 +48,13 @@ T5423: .PHONY: T9405 T9405: @'$(TEST_HC)' $(TEST_HC_OPTS) -ticky -rtsopts T9405.hs; \ - ./T9405 +RTS -rT9405.ticky & \ - sleep 0.2; \ - kill -2 $$!; \ - wait $$!; \ - [ -e T9405.ticky ] || echo "Error: Ticky profile doesn't exist"; \ - [ -s T9405.ticky ] || echo "Error: Ticky profile is empty"; \ - echo Ticky-Ticky; + ./T9405 +RTS -rT9405.ticky & \ + sleep 0.2; \ + kill -2 $$!; \ + wait $$!; \ + [ -e T9405.ticky ] || echo "Error: Ticky profile doesn't exist"; \ + [ -s T9405.ticky ] || echo "Error: Ticky profile is empty"; \ + echo Ticky-Ticky; # Naming convention: 'T5423_' obj-way '_' obj-src # obj-way ::= v | dyn @@ -178,3 +178,8 @@ T12497: .PHONY: T14695 T14695: echo ":quit" | LD_LIBRARY_PATH="foo:" "$(TEST_HC)" $(filter-out -rtsopts, $(TEST_HC_OPTS_INTERACTIVE)) + +.PHONY: InternalCounters +InternalCounters: + "$(TEST_HC)" +RTS -s --internal-counters -RTS 2>&1 | grep "Internal Counters" + -"$(TEST_HC)" +RTS -s -RTS 2>&1 | grep "Internal Counters" diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index 5000a914db..ffbd05c745 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -390,3 +390,11 @@ test('T14702', [ ignore_stdout , compile_and_run, ['']) test('T14900', normal, compile_and_run, ['-package ghc-compact']) +test('InternalCounters', normal, run_command, + ['$MAKE -s --no-print-directory InternalCounters']) +test('alloccounter1', normal, compile_and_run, + [ + # avoid allocating stack chunks, which counts as + # allocation and messes up the results: + '-with-rtsopts=-k1m' + ]) diff --git a/testsuite/tests/rts/alloccounter1.hs b/testsuite/tests/rts/alloccounter1.hs new file mode 100644 index 0000000000..4b81896d2c --- /dev/null +++ b/testsuite/tests/rts/alloccounter1.hs @@ -0,0 +1,19 @@ +module Main where + +import Control.Exception +import Control.Monad +import Data.List +import System.Mem + +main = do + let + testAlloc n = do + let start = 999999 + setAllocationCounter start + evaluate (last [1..n]) + c <- getAllocationCounter + -- print (start - c) + return (start - c) + results <- forM [1..1000] testAlloc + print (sort results == results) + -- results better be in ascending order diff --git a/testsuite/tests/rts/alloccounter1.stdout b/testsuite/tests/rts/alloccounter1.stdout new file mode 100644 index 0000000000..0ca95142bb --- /dev/null +++ b/testsuite/tests/rts/alloccounter1.stdout @@ -0,0 +1 @@ +True diff --git a/testsuite/tests/rts/flags/Makefile b/testsuite/tests/rts/flags/Makefile index 61900477f9..9101fbd40a 100644 --- a/testsuite/tests/rts/flags/Makefile +++ b/testsuite/tests/rts/flags/Makefile @@ -1,6 +1,3 @@ TOP=../../.. include $(TOP)/mk/boilerplate.mk include $(TOP)/mk/test.mk - -T1791: - '$(TEST_HC)' T1791.hs -o T1791 -O -rtsopts diff --git a/testsuite/tests/rts/flags/T12870.hs b/testsuite/tests/rts/flags/T12870.hs index 8d536d58d6..69086cde2b 100644 --- a/testsuite/tests/rts/flags/T12870.hs +++ b/testsuite/tests/rts/flags/T12870.hs @@ -1,4 +1,5 @@ -module T12870 where +--We check if RTS arguments are properly filtered/passed along +--by outputting them to stdout. import System.Environment diff --git a/testsuite/tests/rts/flags/T12870g.hs b/testsuite/tests/rts/flags/T12870g.hs index e409349827..3efd633ddd 100644 --- a/testsuite/tests/rts/flags/T12870g.hs +++ b/testsuite/tests/rts/flags/T12870g.hs @@ -1,4 +1,5 @@ -module T12870g where +--We check the generation count as a way to verify an RTS argument +--was actually parsed and accepted by the RTS. import GHC.RTS.Flags (getGCFlags, generations) diff --git a/testsuite/tests/rts/flags/all.T b/testsuite/tests/rts/flags/all.T index 33a28e500a..548fcd8631 100644 --- a/testsuite/tests/rts/flags/all.T +++ b/testsuite/tests/rts/flags/all.T @@ -1,44 +1,51 @@ #Standard handling of RTS arguments test('T12870a', - [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs'])], + [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs']), + omit_ways(['ghci'])], multimod_compile_and_run, - ['T12870', '-rtsopts -main-is T12870']) + ['T12870', '-rtsopts']) test('T12870b', [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs']), - exit_code(1), ignore_stderr], + exit_code(1), ignore_stderr, omit_ways(['ghci'])], multimod_compile_and_run, - ['T12870', '-rtsopts=none -main-is T12870']) + ['T12870', '-rtsopts=none']) test('T12870c', [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs']), - exit_code(1)], + exit_code(1), omit_ways(['ghci'])], multimod_compile_and_run, - ['T12870', '-rtsopts=some -main-is T12870']) + ['T12870', '-rtsopts=some']) test('T12870d', - [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs'])], + [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs']), + omit_ways(['ghci'])], multimod_compile_and_run, - ['T12870', '-main-is T12870']) + ['T12870', '']) #RTS options should be passed along to the program test('T12870e', - [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs'])], + [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs']), + omit_ways(['ghci', 'threaded2'])], multimod_compile_and_run, - ['T12870', '-rtsopts=ignore -main-is T12870']) + ['T12870', '-rtsopts=ignore']) + test('T12870f', - [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs'])], + [extra_run_opts('+RTS -G2 -RTS arg1 --RTS +RTS arg2'), extra_files(['T12870.hs']), + omit_ways(['ghci', 'threaded2'])], multimod_compile_and_run, - ['T12870', '-rtsopts=ignoreAll -main-is T12870']) + ['T12870', '-rtsopts=ignoreAll']) #Check handling of env variables test('T12870g', - [extra_files(['T12870g.hs']), cmd_prefix('GHCRTS=-G7 '), extra_files(['T12870g.hs'])], + [extra_files(['T12870g.hs']), cmd_prefix('GHCRTS=-G7 '), extra_files(['T12870g.hs']), + omit_ways(['ghci'])], multimod_compile_and_run, - ['T12870g', '-rtsopts -main-is T12870g -with-rtsopts="-G3"']) + ['T12870g', '-rtsopts -with-rtsopts="-G3"']) test('T12870h', - [extra_files(['T12870g.hs']), cmd_prefix('GHCRTS=-G7 '), extra_files(['T12870g.hs'])], + [extra_files(['T12870g.hs']), cmd_prefix('GHCRTS=-G7 '), extra_files(['T12870g.hs']), + omit_ways(['ghci'])], multimod_compile_and_run, - ['T12870g', '-rtsopts=ignoreAll -main-is T12870g -with-rtsopts="-G3"']) + ['T12870g', '-rtsopts=ignoreAll -with-rtsopts="-G3"']) diff --git a/testsuite/tests/th/T13776.hs b/testsuite/tests/th/T13776.hs new file mode 100644 index 0000000000..6082825592 --- /dev/null +++ b/testsuite/tests/th/T13776.hs @@ -0,0 +1,23 @@ +{-# LANGUAGE TemplateHaskell #-} + +module T13776 where + +import Language.Haskell.TH + +spliceTy1 :: $(conT ''(,) `appT` conT ''Int `appT` conT ''Int) +spliceTy1 = (1,2) + +spliceTy2 :: $(conT ''[] `appT` conT ''Int) +spliceTy2 = [] + +spliceExp1 :: (Int, Int) +spliceExp1 = $(conE '(,) `appE` litE (integerL 1) `appE` litE (integerL 1)) + +spliceExp2 :: [Int] +spliceExp2 = $(conE '[]) + +splicePat1 :: (Int, Int) -> () +splicePat1 $(conP '(,) [litP (integerL 1), litP (integerL 1)]) = () + +splicePat2 :: [Int] -> () +splicePat2 $(conP '[] []) = () diff --git a/testsuite/tests/th/T13776.stderr b/testsuite/tests/th/T13776.stderr new file mode 100644 index 0000000000..485dc64a28 --- /dev/null +++ b/testsuite/tests/th/T13776.stderr @@ -0,0 +1,14 @@ +T13776.hs:10:16-42: Splicing type + conT ''[] `appT` conT ''Int ======> [] Int +T13776.hs:7:16-61: Splicing type + conT ''(,) `appT` conT ''Int `appT` conT ''Int ======> (,) Int Int +T13776.hs:14:16-74: Splicing expression + conE '(,) `appE` litE (integerL 1) `appE` litE (integerL 1) + ======> + ((,) 1) 1 +T13776.hs:17:16-23: Splicing expression + conE '[] ======> [] +T13776.hs:20:14-61: Splicing pattern + conP '(,) [litP (integerL 1), litP (integerL 1)] ======> (,) 1 1 +T13776.hs:23:14-24: Splicing pattern + conP '[] [] ======> [] diff --git a/testsuite/tests/th/T3319.stderr b/testsuite/tests/th/T3319.stderr index 44ec90ffe7..b88b10f90f 100644 --- a/testsuite/tests/th/T3319.stderr +++ b/testsuite/tests/th/T3319.stderr @@ -4,4 +4,4 @@ T3319.hs:8:3-93: Splicing declarations (ImportF CCall Unsafe "&" (mkName "foo") (AppT (ConT ''Ptr) (ConT ''())))] ======> - foreign import ccall unsafe "&" foo :: Ptr GHC.Tuple.() + foreign import ccall unsafe "&" foo :: Ptr () diff --git a/testsuite/tests/th/T5700.stderr b/testsuite/tests/th/T5700.stderr index 729a36604f..3564b8cb2a 100644 --- a/testsuite/tests/th/T5700.stderr +++ b/testsuite/tests/th/T5700.stderr @@ -3,4 +3,4 @@ T5700.hs:8:3-9: Splicing declarations ======> instance C D where {-# INLINE inlinable #-} - inlinable _ = GHC.Tuple.() + inlinable _ = () diff --git a/testsuite/tests/th/TH_foreignInterruptible.stderr b/testsuite/tests/th/TH_foreignInterruptible.stderr index 7131eeee71..4afc38aab1 100644 --- a/testsuite/tests/th/TH_foreignInterruptible.stderr +++ b/testsuite/tests/th/TH_foreignInterruptible.stderr @@ -8,4 +8,4 @@ TH_foreignInterruptible.hs:8:3-100: Splicing declarations (mkName "foo") (AppT (ConT ''Ptr) (ConT ''())))] ======> - foreign import ccall interruptible "&" foo :: Ptr GHC.Tuple.() + foreign import ccall interruptible "&" foo :: Ptr () diff --git a/testsuite/tests/th/all.T b/testsuite/tests/th/all.T index e9f2838492..b51059ca1c 100644 --- a/testsuite/tests/th/all.T +++ b/testsuite/tests/th/all.T @@ -403,5 +403,6 @@ test('T14838', [], multimod_compile, ['T14838.hs', '-v0 -Wincomplete-patterns ' + config.ghc_th_way_flags]) test('T14817', normal, compile, ['-v0 -ddump-splices -dsuppress-uniques']) test('T14843', normal, compile, ['-v0']) +test('T13776', normal, compile, ['-ddump-splices -v0']) test('T14888', normal, compile, ['-v0 -ddump-splices -dsuppress-uniques ' + config.ghc_th_way_flags]) diff --git a/testsuite/tests/typecheck/should_compile/Makefile b/testsuite/tests/typecheck/should_compile/Makefile index c3065f3cd2..ac18b2f4b3 100644 --- a/testsuite/tests/typecheck/should_compile/Makefile +++ b/testsuite/tests/typecheck/should_compile/Makefile @@ -70,3 +70,8 @@ T13585: '$(TEST_HC)' $(TEST_HC_OPTS) -c T13585a.hs -O '$(TEST_HC)' $(TEST_HC_OPTS) -c T13585b.hs -O '$(TEST_HC)' $(TEST_HC_OPTS) -c T13585.hs -O + +T14934: + $(RM) -f T14934a.o T14934a.hi T14934.o T14934.hi + '$(TEST_HC)' $(TEST_HC_OPTS) -c T14934a.hs -O + '$(TEST_HC)' $(TEST_HC_OPTS) -c T14934.hs -O diff --git a/testsuite/tests/typecheck/should_compile/T14934.hs b/testsuite/tests/typecheck/should_compile/T14934.hs new file mode 100644 index 0000000000..581e93186e --- /dev/null +++ b/testsuite/tests/typecheck/should_compile/T14934.hs @@ -0,0 +1,9 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeOperators #-} +module T14934 where + +import T14934a +import GHC.TypeLits + +g :: Foo (1 - 0) +g = f MkFoo1 diff --git a/testsuite/tests/typecheck/should_compile/T14934a.hs b/testsuite/tests/typecheck/should_compile/T14934a.hs new file mode 100644 index 0000000000..3ba59ff976 --- /dev/null +++ b/testsuite/tests/typecheck/should_compile/T14934a.hs @@ -0,0 +1,14 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE TypeOperators #-} +module T14934a where + +import GHC.TypeLits + +data Foo :: Nat -> * where + MkFoo0 :: Foo 0 + MkFoo1 :: Foo 1 + +f :: Foo (1 - 0) -> Foo 1 +f x = x diff --git a/testsuite/tests/typecheck/should_compile/all.T b/testsuite/tests/typecheck/should_compile/all.T index 2d3c3cd118..9a2ce73263 100644 --- a/testsuite/tests/typecheck/should_compile/all.T +++ b/testsuite/tests/typecheck/should_compile/all.T @@ -597,3 +597,5 @@ test('T14732', normal, compile, ['']) test('T14774', [], run_command, ['$MAKE -s --no-print-directory T14774']) test('T14763', normal, compile, ['']) test('T14811', normal, compile, ['']) +test('T14934', [extra_files(['T14934.hs', 'T14934a.hs'])], run_command, + ['$MAKE -s --no-print-directory T14934']) diff --git a/testsuite/tests/typecheck/should_fail/T14048a.hs b/testsuite/tests/typecheck/should_fail/T14048a.hs new file mode 100644 index 0000000000..c717127df8 --- /dev/null +++ b/testsuite/tests/typecheck/should_fail/T14048a.hs @@ -0,0 +1,6 @@ +{-# LANGUAGE ConstraintKinds #-} +module T14048a where + +import Data.Kind + +data Foo :: Constraint diff --git a/testsuite/tests/typecheck/should_fail/T14048a.stderr b/testsuite/tests/typecheck/should_fail/T14048a.stderr new file mode 100644 index 0000000000..48a91c7525 --- /dev/null +++ b/testsuite/tests/typecheck/should_fail/T14048a.stderr @@ -0,0 +1,5 @@ + +T14048a.hs:6:1: error: + • Kind signature on data type declaration has non-* return kind + Constraint + • In the data declaration for ‘Foo’ diff --git a/testsuite/tests/typecheck/should_fail/T14048b.hs b/testsuite/tests/typecheck/should_fail/T14048b.hs new file mode 100644 index 0000000000..d2f6f74583 --- /dev/null +++ b/testsuite/tests/typecheck/should_fail/T14048b.hs @@ -0,0 +1,7 @@ +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE TypeFamilies #-} +module T14048b where + +import Data.Kind + +data family Foo :: Constraint diff --git a/testsuite/tests/typecheck/should_fail/T14048b.stderr b/testsuite/tests/typecheck/should_fail/T14048b.stderr new file mode 100644 index 0000000000..fe78d9f7f5 --- /dev/null +++ b/testsuite/tests/typecheck/should_fail/T14048b.stderr @@ -0,0 +1,6 @@ + +T14048b.hs:7:1: error: + • Kind signature on data type declaration has non-* + and non-variable return kind + Constraint + • In the data family declaration for ‘Foo’ diff --git a/testsuite/tests/typecheck/should_fail/T14048c.hs b/testsuite/tests/typecheck/should_fail/T14048c.hs new file mode 100644 index 0000000000..e81e454d31 --- /dev/null +++ b/testsuite/tests/typecheck/should_fail/T14048c.hs @@ -0,0 +1,9 @@ +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE TypeFamilies #-} +module T14048c where + +import Data.Kind + +data family Foo :: k +data instance Foo :: Constraint diff --git a/testsuite/tests/typecheck/should_fail/T14048c.stderr b/testsuite/tests/typecheck/should_fail/T14048c.stderr new file mode 100644 index 0000000000..7e83d1924c --- /dev/null +++ b/testsuite/tests/typecheck/should_fail/T14048c.stderr @@ -0,0 +1,5 @@ + +T14048c.hs:9:1: error: + • Kind signature on data type declaration has non-* return kind + Constraint + • In the data instance declaration for ‘Foo’ diff --git a/testsuite/tests/typecheck/should_fail/all.T b/testsuite/tests/typecheck/should_fail/all.T index 5377fefeb7..f01cbe1da7 100644 --- a/testsuite/tests/typecheck/should_fail/all.T +++ b/testsuite/tests/typecheck/should_fail/all.T @@ -457,6 +457,9 @@ test('T14000', normal, compile_fail, ['']) test('T14055', normal, compile_fail, ['']) test('T13909', normal, compile_fail, ['']) test('T13929', normal, compile_fail, ['']) +test('T14048a', normal, compile_fail, ['']) +test('T14048b', normal, compile_fail, ['']) +test('T14048c', normal, compile_fail, ['']) test('T14232', normal, compile_fail, ['']) test('T14325', normal, compile_fail, ['']) test('T14350', normal, compile_fail, ['']) diff --git a/utils/llvm-targets/gen-data-layout.sh b/utils/llvm-targets/gen-data-layout.sh index 834a978809..4f0b24076a 100755 --- a/utils/llvm-targets/gen-data-layout.sh +++ b/utils/llvm-targets/gen-data-layout.sh @@ -16,17 +16,28 @@ # Add missing targets to the list below to have them included in # llvm-targets file. -# Target sets -WINDOWS_x86="i386-unknown-windows i686-unknown-windows x86_64-unknown-windows" -LINUX_ARM="arm-unknown-linux-gnueabihf armv6-unknown-linux-gnueabihf armv7-unknown-linux-gnueabihf aarch64-unknown-linux-gnu aarch64-unknown-linux armv7a-unknown-linux-gnueabi" -LINUX_x86="i386-unknown-linux-gnu i386-unknown-linux x86_64-unknown-linux-gnu x86_64-unknown-linux" -ANDROID="armv7-unknown-linux-androideabi aarch64-unknown-linux-android" -QNX="arm-unknown-nto-qnx-eabi" -MACOS="i386-apple-darwin x86_64-apple-darwin" -IOS="armv7-apple-ios arm64-apple-ios i386-apple-ios x86_64-apple-ios" +# Target sets for which to generate the llvm-targets file +TARGETS=( + # Windows x86 + "i386-unknown-windows" "i686-unknown-windows" "x86_64-unknown-windows" -# targets for which to generate the llvm-targets file -TARGETS="${WINDOWS_x86} ${LINUX_ARM} ${LINUX_x86} ${ANDROID} ${QNX} ${MACOS} ${IOS}" + # Linux ARM + "arm-unknown-linux-gnueabihf" "armv6-unknown-linux-gnueabihf" + "armv7-unknown-linux-gnueabihf" "armv7a-unknown-linux-gnueabi" + "aarch64-unknown-linux-gnu" "aarch64-unknown-linux" + # Linux x86 + "i386-unknown-linux-gnu" "i386-unknown-linux" "x86_64-unknown-linux-gnu" "x86_64-unknown-linux" + # Linux Android + "armv7-unknown-linux-androideabi" "aarch64-unknown-linux-android" + + # QNX + "arm-unknown-nto-qnx-eabi" + + # macOS + "i386-apple-darwin" "x86_64-apple-darwin" + # iOS + "armv7-apple-ios arm64-apple-ios" "i386-apple-ios x86_64-apple-ios" +) # given the call to clang -c11 that clang --target -v generates, # parse the -target-cpu <CPU> and -target-feature <feature> from @@ -61,7 +72,7 @@ FST=1 FILE=_____dummy.c touch $FILE -for target in $TARGETS; do +for target in "${TARGETS[@]}"; do # find the cpu and attributes emitte by clang for the given $target CPU="" ATTR=() |