diff options
71 files changed, 1528 insertions, 1720 deletions
diff --git a/ghc/compiler/HsVersions.h b/ghc/compiler/HsVersions.h index 1c07d2c5ae..dd80922e0b 100644 --- a/ghc/compiler/HsVersions.h +++ b/ghc/compiler/HsVersions.h @@ -90,11 +90,19 @@ name = Util.global (value) :: IORef (ty); \ -- when compiling FastString itself #ifndef COMPILING_FAST_STRING -- -import qualified FastString +import qualified FastString as FS #endif -#define SLIT(x) (FastString.mkLitString# (x#)) -#define FSLIT(x) (FastString.mkFastString# (x#)) +#define SLIT(x) (FS.mkLitString# (x#)) +#define FSLIT(x) (FS.mkFastString# (x#)) + +-- Useful for declaring arguments to be strict +#define STRICT1(f) f a b c | a `seq` False = undefined +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined +#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined +#define STRICT6(f) f a b c d e f | a `seq` b `seq` c `seq` d `seq` e `seq` f `seq` False = undefined #endif /* HsVersions.h */ diff --git a/ghc/compiler/Makefile b/ghc/compiler/Makefile index 4920e16a0c..2b5252a5be 100644 --- a/ghc/compiler/Makefile +++ b/ghc/compiler/Makefile @@ -408,7 +408,7 @@ SRC_HC_OPTS += -DGHCI -package template-haskell PKG_DEPENDS += template-haskell # Use threaded RTS with GHCi, so threads don't get blocked at the prompt. -SRC_HC_OPTS += -threaded +# SRC_HC_OPTS += -threaded ALL_DIRS += ghci diff --git a/ghc/compiler/basicTypes/Id.lhs b/ghc/compiler/basicTypes/Id.lhs index 0d15b20f22..10d5a28e17 100644 --- a/ghc/compiler/basicTypes/Id.lhs +++ b/ghc/compiler/basicTypes/Id.lhs @@ -9,7 +9,7 @@ module Id ( -- Simple construction mkGlobalId, mkLocalId, mkLocalIdWithInfo, - mkSysLocal, mkSysLocalUnencoded, mkUserLocal, mkVanillaGlobal, + mkSysLocal, mkUserLocal, mkVanillaGlobal, mkTemplateLocals, mkTemplateLocalsNum, mkWildId, mkTemplateLocal, mkWorkerId, mkExportedLocalId, @@ -105,15 +105,15 @@ import qualified Demand ( Demand ) import DataCon ( DataCon, isUnboxedTupleCon ) import NewDemand ( Demand, StrictSig, topDmd, topSig, isBottomingSig ) import Name ( Name, OccName, nameIsLocalOrFrom, - mkSystemVarName, mkSystemVarNameEncoded, mkInternalName, - getOccName, getSrcLoc - ) + mkSystemVarName, mkInternalName, getOccName, + getSrcLoc ) import Module ( Module ) -import OccName ( EncodedFS, mkWorkerOcc ) +import OccName ( mkWorkerOcc ) import Maybes ( orElse ) import SrcLoc ( SrcLoc ) import Outputable import Unique ( Unique, mkBuiltinUnique ) +import FastString ( FastString ) import StaticFlags ( opt_NoStateHack ) -- infixl so you can say (id `set` a `set` b) @@ -162,15 +162,10 @@ mkLocalId name ty = mkLocalIdWithInfo name ty vanillaIdInfo -- SysLocal: for an Id being created by the compiler out of thin air... -- UserLocal: an Id with a name the user might recognize... mkUserLocal :: OccName -> Unique -> Type -> SrcLoc -> Id -mkSysLocal :: EncodedFS -> Unique -> Type -> Id +mkSysLocal :: FastString -> Unique -> Type -> Id mkVanillaGlobal :: Name -> Type -> IdInfo -> Id --- for SysLocal, we assume the base name is already encoded, to avoid --- re-encoding the same string over and over again. -mkSysLocal fs uniq ty = mkLocalId (mkSystemVarNameEncoded uniq fs) ty - --- version to use when the faststring needs to be encoded -mkSysLocalUnencoded fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty +mkSysLocal fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty mkUserLocal occ uniq ty loc = mkLocalId (mkInternalName uniq occ loc) ty mkVanillaGlobal = mkGlobalId VanillaGlobal diff --git a/ghc/compiler/basicTypes/Literal.lhs b/ghc/compiler/basicTypes/Literal.lhs index 9aab6ee7f1..e83ea9db74 100644 --- a/ghc/compiler/basicTypes/Literal.lhs +++ b/ghc/compiler/basicTypes/Literal.lhs @@ -35,7 +35,6 @@ import FastTypes import FastString import Binary -import UnicodeUtil ( stringToUtf8 ) import Ratio ( numerator ) import FastString ( uniqueOfFS, lengthFS ) import DATA_INT ( Int8, Int16, Int32 ) @@ -95,7 +94,11 @@ data Literal = ------------------ -- First the primitive guys MachChar Char -- Char# At least 31 bits - | MachStr FastString + + | MachStr FastString -- A string-literal: stored and emitted + -- UTF-8 encoded, we'll arrange to decode it + -- at runtime. Also emitted with a '\0' + -- terminator. | MachNullAddr -- the NULL pointer, the only pointer value -- that can be represented as a Literal. @@ -206,7 +209,7 @@ mkMachInt64 x = MachInt64 x mkMachWord64 x = MachWord64 x mkStringLit :: String -> Literal -mkStringLit s = MachStr (mkFastString (stringToUtf8 s)) +mkStringLit s = MachStr (mkFastString s) -- stored UTF-8 encoded inIntRange, inWordRange :: Integer -> Bool inIntRange x = x >= tARGET_MIN_INT && x <= tARGET_MAX_INT diff --git a/ghc/compiler/basicTypes/MkId.lhs b/ghc/compiler/basicTypes/MkId.lhs index 02d255959c..9d93a67008 100644 --- a/ghc/compiler/basicTypes/MkId.lhs +++ b/ghc/compiler/basicTypes/MkId.lhs @@ -59,7 +59,7 @@ import Class ( Class, classTyCon, classSelIds ) import Var ( Id, TyVar, Var ) import VarSet ( isEmptyVarSet, subVarSet, varSetElems ) import Name ( mkFCallName, mkWiredInName, Name, BuiltInSyntax(..) ) -import OccName ( mkOccFS, varName ) +import OccName ( mkOccNameFS, varName ) import PrimOp ( PrimOp, primOpSig, primOpOcc, primOpTag ) import ForeignCall ( ForeignCall ) import DataCon ( DataCon, DataConIds(..), dataConTyVars, @@ -847,7 +847,7 @@ another gun with which to shoot yourself in the foot. \begin{code} mkWiredInIdName mod fs uniq id - = mkWiredInName mod (mkOccFS varName fs) uniq Nothing (AnId id) UserSyntax + = mkWiredInName mod (mkOccNameFS varName fs) uniq Nothing (AnId id) UserSyntax unsafeCoerceName = mkWiredInIdName gHC_PRIM FSLIT("unsafeCoerce#") unsafeCoerceIdKey unsafeCoerceId nullAddrName = mkWiredInIdName gHC_PRIM FSLIT("nullAddr#") nullAddrIdKey nullAddrId diff --git a/ghc/compiler/basicTypes/Module.lhs b/ghc/compiler/basicTypes/Module.lhs index 9145b353f3..f4e413d263 100644 --- a/ghc/compiler/basicTypes/Module.lhs +++ b/ghc/compiler/basicTypes/Module.lhs @@ -4,7 +4,7 @@ Module ~~~~~~~~~~ -Simply the name of a module, represented as a Z-encoded FastString. +Simply the name of a module, represented as a FastString. These are Uniquable, hence we can build FiniteMaps with ModuleNames as the keys. @@ -17,13 +17,11 @@ module Module , ModLocation(..) , addBootSuffix, addBootSuffix_maybe, addBootSuffixLocn - , moduleString -- :: ModuleName -> EncodedString - , moduleUserString -- :: ModuleName -> UserString - , moduleFS -- :: ModuleName -> EncodedFS + , moduleString -- :: ModuleName -> String + , moduleFS -- :: ModuleName -> FastString - , mkModule -- :: UserString -> ModuleName - , mkModuleFS -- :: UserFS -> ModuleName - , mkSysModuleFS -- :: EncodedFS -> ModuleName + , mkModule -- :: String -> ModuleName + , mkModuleFS -- :: FastString -> ModuleName , ModuleEnv , elemModuleEnv, extendModuleEnv, extendModuleEnvList, plusModuleEnv_C @@ -108,7 +106,7 @@ addBootSuffixLocn locn %************************************************************************ \begin{code} -newtype Module = Module EncodedFS +newtype Module = Module FastString -- Haskell module names can include the quote character ', -- so the module names have the z-encoding applied to them @@ -131,30 +129,26 @@ instance Ord Module where instance Outputable Module where ppr = pprModule - pprModule :: Module -> SDoc -pprModule (Module nm) = pprEncodedFS nm +pprModule (Module nm) = + getPprStyle $ \ sty -> + if codeStyle sty + then ftext (zEncodeFS nm) + else ftext nm -moduleFS :: Module -> EncodedFS +moduleFS :: Module -> FastString moduleFS (Module mod) = mod -moduleString :: Module -> EncodedString +moduleString :: Module -> String moduleString (Module mod) = unpackFS mod -moduleUserString :: Module -> UserString -moduleUserString (Module mod) = decode (unpackFS mod) - -- used to be called mkSrcModule -mkModule :: UserString -> Module -mkModule s = Module (mkFastString (encode s)) +mkModule :: String -> Module +mkModule s = Module (mkFastString s) -- used to be called mkSrcModuleFS -mkModuleFS :: UserFS -> Module -mkModuleFS s = Module (encodeFS s) - --- used to be called mkSysModuleFS -mkSysModuleFS :: EncodedFS -> Module -mkSysModuleFS s = Module s +mkModuleFS :: FastString -> Module +mkModuleFS s = Module s \end{code} %************************************************************************ diff --git a/ghc/compiler/basicTypes/Name.lhs b/ghc/compiler/basicTypes/Name.lhs index fd8f2cfd48..3aeb03b8bf 100644 --- a/ghc/compiler/basicTypes/Name.lhs +++ b/ghc/compiler/basicTypes/Name.lhs @@ -12,7 +12,7 @@ module Name ( Name, -- Abstract BuiltInSyntax(..), mkInternalName, mkSystemName, - mkSystemVarName, mkSystemVarNameEncoded, mkSysTvName, + mkSystemVarName, mkSysTvName, mkFCallName, mkIPName, mkExternalName, mkWiredInName, @@ -38,10 +38,11 @@ module Name ( import {-# SOURCE #-} TypeRep( TyThing ) import OccName -- All of it -import Module ( Module ) +import Module ( Module, moduleFS ) import SrcLoc ( noSrcLoc, wiredInSrcLoc, SrcLoc ) import Unique ( Unique, Uniquable(..), getKey, pprUnique ) import Maybes ( orElse, isJust ) +import FastString ( FastString, zEncodeFS ) import Outputable \end{code} @@ -215,21 +216,16 @@ mkSystemName :: Unique -> OccName -> Name mkSystemName uniq occ = Name { n_uniq = uniq, n_sort = System, n_occ = occ, n_loc = noSrcLoc } -mkSystemVarName :: Unique -> UserFS -> Name -mkSystemVarName uniq fs = mkSystemName uniq (mkVarOcc fs) +mkSystemVarName :: Unique -> FastString -> Name +mkSystemVarName uniq fs = mkSystemName uniq (mkVarOccFS fs) --- Use this version when the string is already encoded. Avoids duplicating --- the string each time a new name is created. -mkSystemVarNameEncoded :: Unique -> EncodedFS -> Name -mkSystemVarNameEncoded uniq fs = mkSystemName uniq (mkSysOccFS varName fs) +mkSysTvName :: Unique -> FastString -> Name +mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs) -mkSysTvName :: Unique -> EncodedFS -> Name -mkSysTvName uniq fs = mkSystemName uniq (mkSysOccFS tvName fs) - -mkFCallName :: Unique -> EncodedString -> Name +mkFCallName :: Unique -> String -> Name -- The encoded string completely describes the ccall mkFCallName uniq str = Name { n_uniq = uniq, n_sort = Internal, - n_occ = mkFCallOcc str, n_loc = noSrcLoc } + n_occ = mkVarOcc str, n_loc = noSrcLoc } mkIPName :: Unique -> OccName -> Name mkIPName uniq occ @@ -317,13 +313,13 @@ pprName (Name {n_sort = sort, n_uniq = uniq, n_occ = occ}) Internal -> pprInternal sty uniq occ pprExternal sty uniq mod occ is_wired is_builtin - | codeStyle sty = ppr mod <> char '_' <> ppr_occ_name occ + | codeStyle sty = ppr_z_module mod <> char '_' <> ppr_z_occ_name occ -- In code style, always qualify -- ToDo: maybe we could print all wired-in things unqualified -- in code style, to reduce symbol table bloat? | debugStyle sty = ppr mod <> dot <> ppr_occ_name occ <> braces (hsep [if is_wired then ptext SLIT("(w)") else empty, - text (briefOccNameFlavour occ), + pprNameSpaceBrief (occNameSpace occ), pprUnique uniq]) | BuiltInSyntax <- is_builtin = ppr_occ_name occ -- never qualify builtin syntax @@ -332,7 +328,7 @@ pprExternal sty uniq mod occ is_wired is_builtin pprInternal sty uniq occ | codeStyle sty = pprUnique uniq - | debugStyle sty = ppr_occ_name occ <> braces (hsep [text (briefOccNameFlavour occ), + | debugStyle sty = ppr_occ_name occ <> braces (hsep [pprNameSpaceBrief (occNameSpace occ), pprUnique uniq]) | dumpStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq -- For debug dumps, we're not necessarily dumping @@ -343,15 +339,21 @@ pprInternal sty uniq occ pprSystem sty uniq occ | codeStyle sty = pprUnique uniq | debugStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq - <> braces (text (briefOccNameFlavour occ)) + <> braces (pprNameSpaceBrief (occNameSpace occ)) | otherwise = ppr_occ_name occ <> char '_' <> pprUnique uniq -- If the tidy phase hasn't run, the OccName -- is unlikely to be informative (like 's'), -- so print the unique -ppr_occ_name occ = pprEncodedFS (occNameFS occ) +ppr_occ_name occ = ftext (occNameFS occ) -- Don't use pprOccName; instead, just print the string of the OccName; -- we print the namespace in the debug stuff above + +-- In code style, we Z-encode the strings. The results of Z-encoding each FastString are +-- cached behind the scenes in the FastString implementation. +ppr_z_occ_name occ = ftext (zEncodeFS (occNameFS occ)) +ppr_z_module mod = ftext (zEncodeFS (moduleFS mod)) + \end{code} %************************************************************************ diff --git a/ghc/compiler/basicTypes/OccName.lhs b/ghc/compiler/basicTypes/OccName.lhs index 756d6a955a..bd6d3f7060 100644 --- a/ghc/compiler/basicTypes/OccName.lhs +++ b/ghc/compiler/basicTypes/OccName.lhs @@ -7,62 +7,66 @@ \begin{code} module OccName ( - -- The NameSpace type; abstact + -- * The NameSpace type; abstact NameSpace, tcName, clsName, tcClsName, dataName, varName, - tvName, srcDataName, nameSpaceString, + tvName, srcDataName, - -- The OccName type + -- ** Printing + pprNameSpace, pprNonVarNameSpace, pprNameSpaceBrief, + + -- * The OccName type OccName, -- Abstract, instance of Outputable pprOccName, + -- ** Construction + mkOccName, mkOccNameFS, + mkVarOcc, mkVarOccFS, + mkTyVarOcc, + mkDFunOcc, + mkTupleOcc, + setOccNameSpace, + + -- ** Derived OccNames + mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc, + mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc, + mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2, + mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc, + mkSuperDictSelOcc, mkLocalOcc, mkMethodOcc, + + -- ** Deconstruction + occNameFS, occNameString, occNameSpace, + + isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc, + parenSymOcc, reportIfUnused, isTcClsName, isVarName, + + isTupleOcc_maybe, + -- The OccEnv type OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv, lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv, occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C, - -- The OccSet type - OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList, + OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, + extendOccSetList, unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts, foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet, - mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS, - mkVarOcc, mkVarOccEncoded, mkTyVarOcc, - mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc, - mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc, - mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc, - mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc, - mkDataConWrapperOcc, mkDataConWorkerOcc, - - isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc, - parenSymOcc, reportIfUnused, isTcClsName, isVarName, - - occNameFS, occNameString, occNameUserString, occNameSpace, - occNameFlavour, briefOccNameFlavour, - setOccNameSpace, - - mkTupleOcc, isTupleOcc_maybe, - -- Tidying up TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv, - -- Encoding - EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS, - -- The basic form of names isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym, isLexVarId, isLexVarSym, - isLowerISO, isUpperISO, startsVarSym, startsVarId, startsConSym, startsConId ) where #include "HsVersions.h" -import Char ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt ) -import Util ( thenCmp ) -import Unique ( Unique, mkUnique, Uniquable(..) ) -import BasicTypes ( Boxity(..), Arity ) -import StaticFlags ( opt_PprStyle_Debug ) +import Util ( thenCmp ) +import Unique ( Unique, mkUnique, Uniquable(..) ) +import BasicTypes ( Boxity(..), Arity ) +import StaticFlags ( opt_PprStyle_Debug ) import UniqFM import UniqSet import FastString @@ -70,34 +74,16 @@ import Outputable import Binary import GLAEXTS -\end{code} - -We hold both module names and identifier names in a 'Z-encoded' form -that makes them acceptable both as a C identifier and as a Haskell -(prefix) identifier. - -They can always be decoded again when printing error messages -or anything else for the user, but it does make sense for it -to be represented here in encoded form, so that when generating -code the encoding operation is not performed on each occurrence. -These type synonyms help documentation. - -\begin{code} -type UserFS = FastString -- As the user typed it -type EncodedFS = FastString -- Encoded form +import Data.Char ( isUpper, isLower, ord ) -type UserString = String -- As the user typed it -type EncodedString = String -- Encoded form +-- Unicode TODO: put isSymbol in libcompat +#if __GLASGOW_HASKELL__ > 604 +import Data.Char ( isSymbol ) +#else +isSymbol = const False +#endif - -pprEncodedFS :: EncodedFS -> SDoc -pprEncodedFS fs - = getPprStyle $ \ sty -> - if userStyle sty || dumpStyle sty - -- ftext (decodeFS fs) would needlessly pack the string again - then text (decode (unpackFS fs)) - else ftext fs \end{code} %************************************************************************ @@ -155,12 +141,20 @@ isVarName TvName = True isVarName VarName = True isVarName other = False +pprNameSpace :: NameSpace -> SDoc +pprNameSpace DataName = ptext SLIT("data constructor") +pprNameSpace VarName = ptext SLIT("variable") +pprNameSpace TvName = ptext SLIT("type variable") +pprNameSpace TcClsName = ptext SLIT("type constructor or class") + +pprNonVarNameSpace :: NameSpace -> SDoc +pprNonVarNameSpace VarName = empty +pprNonVarNameSpace ns = pprNameSpace ns -nameSpaceString :: NameSpace -> String -nameSpaceString DataName = "data constructor" -nameSpaceString VarName = "variable" -nameSpaceString TvName = "type variable" -nameSpaceString TcClsName = "type constructor or class" +pprNameSpaceBrief DataName = char 'd' +pprNameSpaceBrief VarName = char 'v' +pprNameSpaceBrief TvName = ptext SLIT("tv") +pprNameSpaceBrief TcClsName = ptext SLIT("tc") \end{code} @@ -173,7 +167,7 @@ nameSpaceString TcClsName = "type constructor or class" \begin{code} data OccName = OccName { occNameSpace :: !NameSpace - , occNameFS :: !EncodedFS + , occNameFS :: !FastString } \end{code} @@ -201,9 +195,11 @@ instance Outputable OccName where pprOccName :: OccName -> SDoc pprOccName (OccName sp occ) = getPprStyle $ \ sty -> - pprEncodedFS occ <> if debugStyle sty then - braces (text (briefNameSpaceFlavour sp)) - else empty + if codeStyle sty + then ftext (zEncodeFS occ) + else ftext occ <> if debugStyle sty + then braces (pprNameSpaceBrief sp) + else empty \end{code} @@ -211,54 +207,24 @@ pprOccName (OccName sp occ) %* * \subsection{Construction} %* * -%*****p******************************************************************* - -*Sys* things do no encoding; the caller should ensure that the thing is -already encoded - -\begin{code} -mkSysOcc :: NameSpace -> EncodedString -> OccName -mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str ) - OccName occ_sp (mkFastString str) - -mkSysOccFS :: NameSpace -> EncodedFS -> OccName -mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs ) - OccName occ_sp fs - -mkFCallOcc :: EncodedString -> OccName --- This version of mkSysOcc doesn't check that the string is already encoded, --- because it will be something like "{__ccall f dyn Int# -> Int#}" --- This encodes a lot into something that then parses like an Id. --- But then alreadyEncoded complains about the braces! -mkFCallOcc str = OccName varName (mkFastString str) - --- Kind constructors get a special function. Uniquely, they are not encoded, --- so that they have names like '*'. This means that *even in interface files* --- we'll get kinds like (* -> (* -> *)). We can't use mkSysOcc because it --- has an ASSERT that doesn't hold. -mkKindOccFS :: NameSpace -> EncodedFS -> OccName -mkKindOccFS occ_sp fs = OccName occ_sp fs -\end{code} - -*Source-code* things are encoded. +%************************************************************************ \begin{code} -mkOccFS :: NameSpace -> UserFS -> OccName -mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs) - mkOccName :: NameSpace -> String -> OccName -mkOccName ns s = mkSysOcc ns (encode s) +mkOccName occ_sp str = OccName occ_sp (mkFastString str) -mkVarOcc :: UserFS -> OccName -mkVarOcc fs = mkSysOccFS varName (encodeFS fs) +mkOccNameFS :: NameSpace -> FastString -> OccName +mkOccNameFS occ_sp fs = OccName occ_sp fs -mkTyVarOcc :: UserFS -> OccName -mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs) +mkVarOcc :: String -> OccName +mkVarOcc s = mkOccName varName s -mkVarOccEncoded :: EncodedFS -> OccName -mkVarOccEncoded fs = mkSysOccFS varName fs -\end{code} +mkVarOccFS :: FastString -> OccName +mkVarOccFS fs = mkOccNameFS varName fs +mkTyVarOcc :: FastString -> OccName +mkTyVarOcc fs = mkOccNameFS tvName fs +\end{code} %************************************************************************ @@ -355,34 +321,13 @@ intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2)) %* * %************************************************************************ -\begin{code} -occNameString :: OccName -> EncodedString +\begin{code} +occNameString :: OccName -> String occNameString (OccName _ s) = unpackFS s -occNameUserString :: OccName -> UserString -occNameUserString occ = decode (occNameString occ) - setOccNameSpace :: NameSpace -> OccName -> OccName setOccNameSpace sp (OccName _ occ) = OccName sp occ --- occNameFlavour is used only to generate good error messages -occNameFlavour :: OccName -> SDoc -occNameFlavour (OccName DataName _) = ptext SLIT("data constructor") -occNameFlavour (OccName TvName _) = ptext SLIT("type variable") -occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class") -occNameFlavour (OccName VarName s) = empty - --- briefOccNameFlavour is used in debug-printing of names -briefOccNameFlavour :: OccName -> String -briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp - -briefNameSpaceFlavour DataName = "d" -briefNameSpaceFlavour VarName = "v" -briefNameSpaceFlavour TvName = "tv" -briefNameSpaceFlavour TcClsName = "tc" -\end{code} - -\begin{code} isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool isVarOcc (OccName VarName _) = True @@ -400,19 +345,19 @@ isValOcc other = False -- Data constructor operator (starts with ':', or '[]') -- Pretty inefficient! -isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s) -isDataSymOcc (OccName VarName s) = isLexConSym (decodeFS s) +isDataSymOcc (OccName DataName s) = isLexConSym s +isDataSymOcc (OccName VarName s) = isLexConSym s isDataSymOcc other = False isDataOcc (OccName DataName _) = True -isDataOcc (OccName VarName s) = isLexCon (decodeFS s) +isDataOcc (OccName VarName s) = isLexCon s isDataOcc other = False -- Any operator (data constructor or variable) -- Pretty inefficient! -isSymOcc (OccName DataName s) = isLexConSym (decodeFS s) -isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s) -isSymOcc (OccName VarName s) = isLexSym (decodeFS s) +isSymOcc (OccName DataName s) = isLexConSym s +isSymOcc (OccName TcClsName s) = isLexConSym s +isSymOcc (OccName VarName s) = isLexSym s isSymOcc other = False parenSymOcc :: OccName -> SDoc -> SDoc @@ -426,13 +371,12 @@ parenSymOcc occ doc | isSymOcc occ = parens doc reportIfUnused :: OccName -> Bool -- Haskell 98 encourages compilers to suppress warnings about -- unused names in a pattern if they start with "_". -reportIfUnused occ = case occNameUserString occ of +reportIfUnused occ = case occNameString occ of ('_' : _) -> False - zz_other -> True + _other -> True \end{code} - %************************************************************************ %* * \subsection{Making system names} @@ -466,16 +410,17 @@ NB: The string must already be encoded! \begin{code} mk_deriv :: NameSpace -> String -- Distinguishes one sort of derived name from another - -> EncodedString -- Must be already encoded!! We don't want to encode it a - -- second time because encoding isn't idempotent + -> String -> OccName -mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str) +mk_deriv occ_sp sys_prefix str = mkOccName occ_sp (sys_prefix ++ str) \end{code} \begin{code} -mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc, - mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc +mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc, + mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc, + mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2, + mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc :: OccName -> OccName -- These derived variables have a prefix that no Haskell value could have @@ -503,7 +448,6 @@ mkDataCOcc = mk_simple_deriv varName "$c" mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ) - -- Data constructor workers are made by setting the name space -- of the data constructor OccName (which should be a DataName) -- to VarName @@ -528,7 +472,7 @@ mkLocalOcc uniq occ \begin{code} -mkDFunOcc :: EncodedString -- Typically the class and type glommed together e.g. "OrdMaybe" +mkDFunOcc :: String -- Typically the class and type glommed together e.g. "OrdMaybe" -- Only used in debug mode, for extra clarity -> Bool -- True <=> hs-boot instance dfun -> Int -- Unique index @@ -609,248 +553,37 @@ tidyOccName in_scope occ@(OccName occ_sp fs) Just n -> -- Already used: make a new guess, -- change the guess base, and try again tidyOccName (extendOccEnv in_scope occ (n+1)) - (mkSysOcc occ_sp (unpackFS fs ++ show n)) + (mkOccName occ_sp (unpackFS fs ++ show n)) \end{code} - -%************************************************************************ -%* * -\subsection{The 'Z' encoding} -%* * -%************************************************************************ - -This is the main name-encoding and decoding function. It encodes any -string into a string that is acceptable as a C name. This is the name -by which things are known right through the compiler. - -The basic encoding scheme is this. - -* Tuples (,,,) are coded as Z3T - -* Alphabetic characters (upper and lower) and digits - all translate to themselves; - except 'Z', which translates to 'ZZ' - and 'z', which translates to 'zz' - We need both so that we can preserve the variable/tycon distinction - -* Most other printable characters translate to 'zx' or 'Zx' for some - alphabetic character x - -* The others translate as 'znnnU' where 'nnn' is the decimal number - of the character - - Before After - -------------------------- - Trak Trak - foo_wib foozuwib - > zg - >1 zg1 - foo# foozh - foo## foozhzh - foo##1 foozhzh1 - fooZ fooZZ - :+ ZCzp - () Z0T 0-tuple - (,,,,) Z5T 5-tuple - (# #) Z1H unboxed 1-tuple (note the space) - (#,,,,#) Z5H unboxed 5-tuple - (NB: There is no Z1T nor Z0H.) - -\begin{code} --- alreadyEncoded is used in ASSERTs to check for encoded --- strings. It isn't fail-safe, of course, because, say 'zh' might --- be encoded or not. -alreadyEncoded :: String -> Bool -alreadyEncoded s = all ok s - where - ok ' ' = True - -- This is a bit of a lie; if we really wanted spaces - -- in names we'd have to encode them. But we do put - -- spaces in ccall "occurrences", and we don't want to - -- reject them here - ok ch = isAlphaNum ch - -alreadyEncodedFS :: FastString -> Bool -alreadyEncodedFS fs = alreadyEncoded (unpackFS fs) - -encode :: UserString -> EncodedString -encode cs = case maybe_tuple cs of - Just n -> n -- Tuples go to Z2T etc - Nothing -> go cs - where - go [] = [] - go (c:cs) = encode_ch c ++ go cs - -encodeFS :: UserFS -> EncodedFS -encodeFS fast_str | all unencodedChar str = fast_str - | otherwise = mkFastString (encode str) - where - str = unpackFS fast_str - -unencodedChar :: Char -> Bool -- True for chars that don't need encoding -unencodedChar 'Z' = False -unencodedChar 'z' = False -unencodedChar c = c >= 'a' && c <= 'z' - || c >= 'A' && c <= 'Z' - || c >= '0' && c <= '9' - -encode_ch :: Char -> EncodedString -encode_ch c | unencodedChar c = [c] -- Common case first - --- Constructors -encode_ch '(' = "ZL" -- Needed for things like (,), and (->) -encode_ch ')' = "ZR" -- For symmetry with ( -encode_ch '[' = "ZM" -encode_ch ']' = "ZN" -encode_ch ':' = "ZC" -encode_ch 'Z' = "ZZ" - --- Variables -encode_ch 'z' = "zz" -encode_ch '&' = "za" -encode_ch '|' = "zb" -encode_ch '^' = "zc" -encode_ch '$' = "zd" -encode_ch '=' = "ze" -encode_ch '>' = "zg" -encode_ch '#' = "zh" -encode_ch '.' = "zi" -encode_ch '<' = "zl" -encode_ch '-' = "zm" -encode_ch '!' = "zn" -encode_ch '+' = "zp" -encode_ch '\'' = "zq" -encode_ch '\\' = "zr" -encode_ch '/' = "zs" -encode_ch '*' = "zt" -encode_ch '_' = "zu" -encode_ch '%' = "zv" -encode_ch c = 'z' : shows (ord c) "U" -\end{code} - -Decode is used for user printing. - -\begin{code} -decodeFS :: FastString -> FastString -decodeFS fs = mkFastString (decode (unpackFS fs)) - -decode :: EncodedString -> UserString -decode [] = [] -decode ('Z' : d : rest) | isDigit d = decode_tuple d rest - | otherwise = decode_upper d : decode rest -decode ('z' : d : rest) | isDigit d = decode_num_esc d rest - | otherwise = decode_lower d : decode rest -decode (c : rest) = c : decode rest - -decode_upper, decode_lower :: Char -> Char - -decode_upper 'L' = '(' -decode_upper 'R' = ')' -decode_upper 'M' = '[' -decode_upper 'N' = ']' -decode_upper 'C' = ':' -decode_upper 'Z' = 'Z' -decode_upper ch = pprTrace "decode_upper" (char ch) ch - -decode_lower 'z' = 'z' -decode_lower 'a' = '&' -decode_lower 'b' = '|' -decode_lower 'c' = '^' -decode_lower 'd' = '$' -decode_lower 'e' = '=' -decode_lower 'g' = '>' -decode_lower 'h' = '#' -decode_lower 'i' = '.' -decode_lower 'l' = '<' -decode_lower 'm' = '-' -decode_lower 'n' = '!' -decode_lower 'p' = '+' -decode_lower 'q' = '\'' -decode_lower 'r' = '\\' -decode_lower 's' = '/' -decode_lower 't' = '*' -decode_lower 'u' = '_' -decode_lower 'v' = '%' -decode_lower ch = pprTrace "decode_lower" (char ch) ch - --- Characters not having a specific code are coded as z224U -decode_num_esc d rest - = go (digitToInt d) rest - where - go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest - go n ('U' : rest) = chr n : decode rest - go n other = pprPanic "decode_num_esc" (ppr n <+> text other) - -decode_tuple :: Char -> EncodedString -> UserString -decode_tuple d rest - = go (digitToInt d) rest - where - -- NB. recurse back to decode after decoding the tuple, because - -- the tuple might be embedded in a longer name. - go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest - go 0 ('T':rest) = "()" ++ decode rest - go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ decode rest - go 1 ('H':rest) = "(# #)" ++ decode rest - go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest - go n other = pprPanic "decode_tuple" (ppr n <+> text other) -\end{code} - - %************************************************************************ %* * Stuff for dealing with tuples %* * %************************************************************************ -Tuples are encoded as - Z3T or Z3H -for 3-tuples or unboxed 3-tuples respectively. No other encoding starts - Z<digit> - -* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple) - There are no unboxed 0-tuples. - -* "()" is the tycon for a boxed 0-tuple. - There are no boxed 1-tuples. - - -\begin{code} -maybe_tuple :: UserString -> Maybe EncodedString - -maybe_tuple "(# #)" = Just("Z1H") -maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of - (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H") - other -> Nothing -maybe_tuple "()" = Just("Z0T") -maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of - (n, ')' : cs) -> Just ('Z' : shows (n+1) "T") - other -> Nothing -maybe_tuple other = Nothing - -count_commas :: Int -> String -> (Int, String) -count_commas n (',' : cs) = count_commas (n+1) cs -count_commas n cs = (n,cs) -\end{code} - \begin{code} mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName -mkTupleOcc ns bx ar - = OccName ns (mkFastString ('Z' : (show ar ++ bx_char))) +mkTupleOcc ns bx ar = OccName ns (mkFastString str) where - bx_char = case bx of - Boxed -> "T" - Unboxed -> "H" + -- no need to cache these, the caching is done in the caller + -- (TysWiredIn.mk_tuple) + str = case bx of + Boxed -> '(' : commas ++ ")" + Unboxed -> '(' : '#' : commas ++ "#)" + + commas = take (ar-1) (repeat ',') isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity) -- Tuples are special, because there are so many of them! isTupleOcc_maybe (OccName ns fs) = case unpackFS fs of - ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest) - other -> Nothing + '(':'#':',':rest -> Just (ns, Unboxed, 2 + count_commas rest) + '(':',':rest -> Just (ns, Boxed, 2 + count_commas rest) + _other -> Nothing where - decode_tup n "H" = (ns, Unboxed, n) - decode_tup n "T" = (ns, Boxed, n) - decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest + count_commas (',':rest) = 1 + count_commas rest + count_commas _ = 0 \end{code} %************************************************************************ @@ -875,37 +608,31 @@ isLexSym cs = isLexConSym cs || isLexVarSym cs ------------- isLexConId cs -- Prefix type or data constructors - | nullFastString cs = False -- e.g. "Foo", "[]", "(,)" + | nullFS cs = False -- e.g. "Foo", "[]", "(,)" | cs == FSLIT("[]") = True | otherwise = startsConId (headFS cs) isLexVarId cs -- Ordinary prefix identifiers - | nullFastString cs = False -- e.g. "x", "_x" + | nullFS cs = False -- e.g. "x", "_x" | otherwise = startsVarId (headFS cs) isLexConSym cs -- Infix type or data constructors - | nullFastString cs = False -- e.g. ":-:", ":", "->" + | nullFS cs = False -- e.g. ":-:", ":", "->" | cs == FSLIT("->") = True | otherwise = startsConSym (headFS cs) isLexVarSym cs -- Infix identifiers - | nullFastString cs = False -- e.g. "+" + | nullFS cs = False -- e.g. "+" | otherwise = startsVarSym (headFS cs) ------------- startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool -startsVarSym c = isSymbolASCII c || isSymbolISO c -- Infix Ids +startsVarSym c = isSymbolASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids startsConSym c = c == ':' -- Infix data constructors -startsVarId c = isLower c || isLowerISO c || c == '_' -- Ordinary Ids -startsConId c = isUpper c || isUpperISO c || c == '(' -- Ordinary type constructors and data constructors - +startsVarId c = isLower c || c == '_' -- Ordinary Ids +startsConId c = isUpper c || c == '(' -- Ordinary type constructors and data constructors isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-" -isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf]) -isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'# - --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c -isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'# - --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c \end{code} %************************************************************************ diff --git a/ghc/compiler/basicTypes/RdrName.lhs b/ghc/compiler/basicTypes/RdrName.lhs index 900717e610..030aa1f609 100644 --- a/ghc/compiler/basicTypes/RdrName.lhs +++ b/ghc/compiler/basicTypes/RdrName.lhs @@ -40,25 +40,17 @@ module RdrName ( #include "HsVersions.h" -import OccName ( NameSpace, varName, - OccName, UserFS, - setOccNameSpace, - mkOccFS, occNameFlavour, - isDataOcc, isTvOcc, isTcOcc, - OccEnv, emptyOccEnv, extendOccEnvList, lookupOccEnv, - elemOccEnv, plusOccEnv_C, extendOccEnv_C, foldOccEnv, - occEnvElts - ) +import OccName import Module ( Module, mkModuleFS ) import Name ( Name, NamedThing(getName), nameModule, nameParent_maybe, nameOccName, isExternalName, nameSrcLoc ) import Maybes ( mapCatMaybes ) import SrcLoc ( isGoodSrcLoc, SrcSpan ) +import FastString ( FastString ) import Outputable import Util ( thenCmp ) \end{code} - %************************************************************************ %* * \subsection{The main data type} @@ -147,14 +139,14 @@ mkDerivedRdrName parent mk_occ --------------- -- These two are used when parsing source files -- They do encode the module and occurrence names -mkUnqual :: NameSpace -> UserFS -> RdrName -mkUnqual sp n = Unqual (mkOccFS sp n) +mkUnqual :: NameSpace -> FastString -> RdrName +mkUnqual sp n = Unqual (mkOccNameFS sp n) -mkVarUnqual :: UserFS -> RdrName -mkVarUnqual n = Unqual (mkOccFS varName n) +mkVarUnqual :: FastString -> RdrName +mkVarUnqual n = Unqual (mkVarOccFS n) -mkQual :: NameSpace -> (UserFS, UserFS) -> RdrName -mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccFS sp n) +mkQual :: NameSpace -> (FastString, FastString) -> RdrName +mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccNameFS sp n) getRdrName :: NamedThing thing => thing -> RdrName getRdrName name = nameRdrName (getName name) @@ -213,7 +205,7 @@ instance Outputable RdrName where ppr (Qual mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ ppr (Orig mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ -ppr_name_space occ = ifPprDebug (parens (occNameFlavour occ)) +ppr_name_space occ = ifPprDebug (parens (pprNonVarNameSpace (occNameSpace occ))) instance OutputableBndr RdrName where pprBndr _ n diff --git a/ghc/compiler/cmm/CLabel.hs b/ghc/compiler/cmm/CLabel.hs index 296ad91ba8..fb13589cdc 100644 --- a/ghc/compiler/cmm/CLabel.hs +++ b/ghc/compiler/cmm/CLabel.hs @@ -740,10 +740,10 @@ pprCLbl (CC_Label cc) = ppr cc pprCLbl (CCS_Label ccs) = ppr ccs pprCLbl (ModuleInitLabel mod way _) - = ptext SLIT("__stginit_") <> ftext (moduleFS mod) + = ptext SLIT("__stginit_") <> ppr mod <> char '_' <> text way pprCLbl (PlainModuleInitLabel mod _) - = ptext SLIT("__stginit_") <> ftext (moduleFS mod) + = ptext SLIT("__stginit_") <> ppr mod ppIdFlavor :: IdLabelInfo -> SDoc ppIdFlavor x = pp_cSEP <> diff --git a/ghc/compiler/cmm/Cmm.hs b/ghc/compiler/cmm/Cmm.hs index 304ddb2b0f..13961c15d3 100644 --- a/ghc/compiler/cmm/Cmm.hs +++ b/ghc/compiler/cmm/Cmm.hs @@ -30,6 +30,7 @@ import CLabel ( CLabel ) import ForeignCall ( CCallConv ) import Unique ( Unique, Uniquable(..) ) import FastString ( FastString ) +import DATA_WORD ( Word8 ) ----------------------------------------------------------------------------- -- Cmm, CmmTop, CmmBasicBlock @@ -251,9 +252,8 @@ data CmmStatic -- align to next N-byte boundary (N must be a power of 2). | CmmDataLabel CLabel -- label the current position in this section. - | CmmString String + | CmmString [Word8] -- string of 8-bit values only, not zero terminated. - -- ToDo: might be more honest to use [Word8] here? ----------------------------------------------------------------------------- -- Global STG registers diff --git a/ghc/compiler/cmm/CmmLex.x b/ghc/compiler/cmm/CmmLex.x index 8515b3e399..c2efd17710 100644 --- a/ghc/compiler/cmm/CmmLex.x +++ b/ghc/compiler/cmm/CmmLex.x @@ -227,10 +227,10 @@ tok_decimal span buf len = return (L span (CmmT_Int $! parseInteger buf len 10 octDecDigit)) tok_octal span buf len - = return (L span (CmmT_Int $! parseInteger (stepOn buf) (len-1) 8 octDecDigit)) + = return (L span (CmmT_Int $! parseInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit)) tok_hexadecimal span buf len - = return (L span (CmmT_Int $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit)) + = return (L span (CmmT_Int $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit)) tok_float str = CmmT_Float $! readRational str diff --git a/ghc/compiler/cmm/CmmParse.y b/ghc/compiler/cmm/CmmParse.y index 45f411b942..cfb2a9d93c 100644 --- a/ghc/compiler/cmm/CmmParse.y +++ b/ghc/compiler/cmm/CmmParse.y @@ -48,6 +48,7 @@ import Constants ( wORD_SIZE ) import Outputable import Monad ( when ) +import Data.Char ( ord ) #include "HsVersions.h" } @@ -177,7 +178,7 @@ static :: { ExtFCode [CmmStatic] } return [CmmStaticLit (getLit e)] } | type ';' { return [CmmUninitialised (machRepByteWidth $1)] } - | 'bits8' '[' ']' STRING ';' { return [CmmString $4] } + | 'bits8' '[' ']' STRING ';' { return [mkString $4] } | 'bits8' '[' INT ']' ';' { return [CmmUninitialised (fromIntegral $3)] } | typenot8 '[' INT ']' ';' { return [CmmUninitialised @@ -427,6 +428,9 @@ section "rodata" = ReadOnlyData section "bss" = UninitialisedData section s = OtherSection s +mkString :: String -> CmmStatic +mkString s = CmmString (map (fromIntegral.ord) s) + -- mkMachOp infers the type of the MachOp from the type of its first -- argument. We assume that this is correct: for MachOps that don't have -- symmetrical args (e.g. shift ops), the first arg determines the type of diff --git a/ghc/compiler/cmm/PprC.hs b/ghc/compiler/cmm/PprC.hs index 7427f50ecc..9fece36a42 100644 --- a/ghc/compiler/cmm/PprC.hs +++ b/ghc/compiler/cmm/PprC.hs @@ -45,6 +45,7 @@ import Data.Bits ( shiftR ) import Char ( ord, chr ) import IO ( Handle ) import DATA_BITS +import Data.Word ( Word8 ) #ifdef DEBUG import PprCmm () -- instances only @@ -881,25 +882,21 @@ machRepSignedCType r | r == wordRep = ptext SLIT("I_") -- --------------------------------------------------------------------- -- print strings as valid C strings --- Assumes it contains only characters '\0'..'\xFF'! -pprFSInCStyle :: FastString -> SDoc -pprFSInCStyle fs = pprStringInCStyle (unpackFS fs) - -pprStringInCStyle :: String -> SDoc +pprStringInCStyle :: [Word8] -> SDoc pprStringInCStyle s = doubleQuotes (text (concatMap charToC s)) -charToC :: Char -> String -charToC '\"' = "\\\"" -charToC '\'' = "\\\'" -charToC '\\' = "\\\\" -charToC c | c >= ' ' && c <= '~' = [c] - | c > '\xFF' = panic ("charToC "++show c) - | otherwise = ['\\', +charToC :: Word8 -> String +charToC w = + case chr (fromIntegral w) of + '\"' -> "\\\"" + '\'' -> "\\\'" + '\\' -> "\\\\" + c | c >= ' ' && c <= '~' -> [c] + | otherwise -> ['\\', chr (ord '0' + ord c `div` 64), chr (ord '0' + ord c `div` 8 `mod` 8), chr (ord '0' + ord c `mod` 8)] - -- --------------------------------------------------------------------------- -- Initialising static objects with floating-point numbers. We can't -- just emit the floating point number, because C will cast it to an int diff --git a/ghc/compiler/cmm/PprCmm.hs b/ghc/compiler/cmm/PprCmm.hs index 3c3e9764a4..6e8367d662 100644 --- a/ghc/compiler/cmm/PprCmm.hs +++ b/ghc/compiler/cmm/PprCmm.hs @@ -51,6 +51,7 @@ import FastString ( mkFastString ) import Data.List ( intersperse, groupBy ) import IO ( Handle ) import Maybe ( isJust ) +import Data.Char ( chr ) pprCmms :: [Cmm] -> SDoc pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms)) @@ -391,7 +392,8 @@ pprStatic s = case s of CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i) CmmAlign i -> nest 4 $ text "align" <+> int i CmmDataLabel clbl -> pprCLabel clbl <> colon - CmmString s' -> nest 4 $ text "I8[]" <+> doubleQuotes (text s') + CmmString s' -> nest 4 $ text "I8[]" <+> + doubleQuotes (text (map (chr.fromIntegral) s')) -- -------------------------------------------------------------------------- -- Registers, whether local (temps) or global diff --git a/ghc/compiler/codeGen/CgProf.hs b/ghc/compiler/codeGen/CgProf.hs index aaab2fcb77..1488e34956 100644 --- a/ghc/compiler/codeGen/CgProf.hs +++ b/ghc/compiler/codeGen/CgProf.hs @@ -43,7 +43,7 @@ import MachOp import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr ) import CLabel ( mkCCLabel, mkCCSLabel, mkRtsDataLabel ) -import Module ( moduleUserString ) +import Module ( moduleString ) import Id ( Id ) import CostCentre import StgSyn ( GenStgExpr(..), StgExpr ) @@ -292,7 +292,7 @@ emitCostCentreDecl -> Code emitCostCentreDecl cc = do { label <- mkStringCLit (costCentreUserName cc) - ; modl <- mkStringCLit (moduleUserString (cc_mod cc)) + ; modl <- mkStringCLit (moduleString (cc_mod cc)) ; let lits = [ zero, -- StgInt ccID, label, -- char *label, diff --git a/ghc/compiler/codeGen/CgUtils.hs b/ghc/compiler/codeGen/CgUtils.hs index 68958d22a9..2f69927db0 100644 --- a/ghc/compiler/codeGen/CgUtils.hs +++ b/ghc/compiler/codeGen/CgUtils.hs @@ -54,11 +54,12 @@ import ListSetOps ( assocDefault ) import Util ( filterOut, sortLe ) import DynFlags ( DynFlags(..), HscTarget(..) ) import Packages ( HomeModules ) -import FastString ( LitString, FastString, unpackFS ) +import FastString ( LitString, FastString, bytesFS ) import Outputable import Char ( ord ) import DATA_BITS +import DATA_WORD ( Word8 ) import Maybe ( isNothing ) ------------------------------------------------------------------------- @@ -77,7 +78,8 @@ addIdReps ids = [(idCgRep id, id) | id <- ids] ------------------------------------------------------------------------- cgLit :: Literal -> FCode CmmLit -cgLit (MachStr s) = mkStringCLit (unpackFS s) +cgLit (MachStr s) = mkByteStringCLit (bytesFS s) + -- not unpackFS; we want the UTF-8 byte stream. cgLit other_lit = return (mkSimpleLit other_lit) mkSimpleLit :: Literal -> CmmLit @@ -308,10 +310,13 @@ emitRODataLits lbl lits mkStringCLit :: String -> FCode CmmLit -- Make a global definition for the string, -- and return its label -mkStringCLit str +mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str) + +mkByteStringCLit :: [Word8] -> FCode CmmLit +mkByteStringCLit bytes = do { uniq <- newUnique ; let lbl = mkStringLitLabel uniq - ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString str] + ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes] ; return (CmmLabel lbl) } ------------------------------------------------------------------------- diff --git a/ghc/compiler/codeGen/ClosureInfo.lhs b/ghc/compiler/codeGen/ClosureInfo.lhs index b0e9e232d7..a5362e60e0 100644 --- a/ghc/compiler/codeGen/ClosureInfo.lhs +++ b/ghc/compiler/codeGen/ClosureInfo.lhs @@ -69,7 +69,7 @@ import StaticFlags ( opt_SccProfilingOn, opt_OmitBlackHoling, import Id ( Id, idType, idArity, idName ) import DataCon ( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName ) import Name ( Name, nameUnique, getOccName, getOccString ) -import OccName ( occNameUserString ) +import OccName ( occNameString ) import Type ( isUnLiftedType, Type, repType, splitTyConApp_maybe ) import TcType ( tcSplitSigmaTy ) import TyCon ( isFunTyCon, isAbstractTyCon ) @@ -930,12 +930,12 @@ closureValDescr, closureTypeDescr :: ClosureInfo -> String closureValDescr (ClosureInfo {closureDescr = descr}) = descr closureValDescr (ConInfo {closureCon = con}) - = occNameUserString (getOccName con) + = occNameString (getOccName con) closureTypeDescr (ClosureInfo { closureType = ty }) = getTyDescription ty closureTypeDescr (ConInfo { closureCon = data_con }) - = occNameUserString (getOccName (dataConTyCon data_con)) + = occNameString (getOccName (dataConTyCon data_con)) getTyDescription :: Type -> String getTyDescription ty diff --git a/ghc/compiler/deSugar/Check.lhs b/ghc/compiler/deSugar/Check.lhs index 309aab2f50..6d7784dc14 100644 --- a/ghc/compiler/deSugar/Check.lhs +++ b/ghc/compiler/deSugar/Check.lhs @@ -18,7 +18,8 @@ import DsUtils ( EquationInfo(..), MatchResult(..), import MatchLit ( tidyLitPat, tidyNPat ) import Id ( Id, idType ) import DataCon ( DataCon, dataConTyCon, dataConOrigArgTys, dataConFieldLabels ) -import Name ( Name, mkInternalName, getOccName, isDataSymOcc, getName, mkVarOcc ) +import Name ( Name, mkInternalName, getOccName, isDataSymOcc, + getName, mkVarOccFS ) import TysWiredIn import PrelNames ( unboundKey ) import TyCon ( tyConDataCons, tupleTyConBoxity, isTupleTyCon ) @@ -382,7 +383,7 @@ make_row_vars used_lits (_, EqnInfo { eqn_pats = pats}) new_var = hash_x hash_x = mkInternalName unboundKey {- doesn't matter much -} - (mkVarOcc FSLIT("#x")) + (mkVarOccFS FSLIT("#x")) noSrcLoc make_row_vars_for_constructor :: (EqnNo, EquationInfo) -> [WarningPat] diff --git a/ghc/compiler/deSugar/DsForeign.lhs b/ghc/compiler/deSugar/DsForeign.lhs index d784eb8612..52956a09ff 100644 --- a/ghc/compiler/deSugar/DsForeign.lhs +++ b/ghc/compiler/deSugar/DsForeign.lhs @@ -28,9 +28,8 @@ import SMRep ( argMachRep, typeCgRep ) import CoreUtils ( exprType, mkInlineMe ) import Id ( Id, idType, idName, mkSysLocal, setInlinePragma ) import Literal ( Literal(..), mkStringLit ) -import Module ( moduleString ) +import Module ( moduleFS ) import Name ( getOccString, NamedThing(..) ) -import OccName ( encodeFS ) import Type ( repType, coreEqType ) import TcType ( Type, mkFunTys, mkForAllTys, mkTyConApp, mkFunTy, tcSplitTyConApp_maybe, @@ -146,7 +145,7 @@ dsFImport id (CImport cconv safety header lib spec) = dsCImport id spec cconv safety no_hdrs `thenDs` \(ids, h, c) -> returnDs (ids, h, c, if no_hdrs then Nothing else Just header) where - no_hdrs = nullFastString header + no_hdrs = nullFS header -- FIXME: the `lib' field is needed for .NET ILX generation when invoking -- routines that are external to the .NET runtime, but GHC doesn't @@ -246,7 +245,7 @@ dsFCall fn_id fcall no_hdrs the_ccall_app = mkFCall ccall_uniq fcall val_args ccall_result_ty work_rhs = mkLams tvs (mkLams work_arg_ids the_ccall_app) work_id = setImpInline no_hdrs $ -- See comments with setImpInline - mkSysLocal (encodeFS FSLIT("$wccall")) work_uniq worker_ty + mkSysLocal FSLIT("$wccall") work_uniq worker_ty -- Build the wrapper work_app = mkApps (mkVarApps (Var work_id) tvs) val_args @@ -356,7 +355,7 @@ dsFExportDynamic id cconv getModuleDs `thenDs` \ mod_name -> let -- hack: need to get at the name of the C stub we're about to generate. - fe_nm = mkFastString (moduleString mod_name ++ "_" ++ toCName fe_id) + fe_nm = mkFastString (unpackFS (zEncodeFS (moduleFS mod_name)) ++ "_" ++ toCName fe_id) in newSysLocalDs arg_ty `thenDs` \ cback -> dsLookupGlobalId newStablePtrName `thenDs` \ newStablePtrId -> diff --git a/ghc/compiler/deSugar/DsMeta.hs b/ghc/compiler/deSugar/DsMeta.hs index e5e079ea61..fcbcc78347 100644 --- a/ghc/compiler/deSugar/DsMeta.hs +++ b/ghc/compiler/deSugar/DsMeta.hs @@ -30,16 +30,16 @@ import qualified Language.Haskell.TH as TH import HsSyn import Class (FunDep) import PrelNames ( rationalTyConName, integerTyConName, negateName ) -import OccName ( isDataOcc, isTvOcc, occNameUserString ) +import OccName ( isDataOcc, isTvOcc, occNameString ) -- To avoid clashes with DsMeta.varName we must make a local alias for OccName.varName -- we do this by removing varName from the import of OccName above, making -- a qualified instance of OccName and using OccNameAlias.varName where varName -- ws previously used in this file. import qualified OccName -import Module ( Module, mkModule, moduleUserString ) +import Module ( Module, mkModule, moduleString ) import Id ( Id, mkLocalId ) -import OccName ( mkOccFS ) +import OccName ( mkOccNameFS ) import Name ( Name, mkExternalName, localiseName, nameOccName, nameModule, isExternalName, getSrcLoc ) import NameEnv @@ -911,7 +911,7 @@ globalVar name ; MkC uni <- coreIntLit (getKey (getUnique name)) ; rep2 mkNameLName [occ,uni] } where - name_mod = moduleUserString (nameModule name) + name_mod = moduleString (nameModule name) name_occ = nameOccName name mk_varg | OccName.isDataOcc name_occ = mkNameG_dName | OccName.isVarOcc name_occ = mkNameG_vName @@ -963,7 +963,7 @@ wrapNongenSyms binds (MkC body) ; return (NonRec id var) } occNameLit :: Name -> DsM (Core String) -occNameLit n = coreStringLit (occNameUserString (nameOccName n)) +occNameLit n = coreStringLit (occNameString (nameOccName n)) -- %********************************************************************* @@ -1390,7 +1390,7 @@ thSyn = mkModule "Language.Haskell.TH.Syntax" thLib = mkModule "Language.Haskell.TH.Lib" mk_known_key_name mod space str uniq - = mkExternalName uniq mod (mkOccFS space str) + = mkExternalName uniq mod (mkOccNameFS space str) Nothing noSrcLoc libFun = mk_known_key_name thLib OccName.varName diff --git a/ghc/compiler/deSugar/DsUtils.lhs b/ghc/compiler/deSugar/DsUtils.lhs index b77bb967cf..1465554175 100644 --- a/ghc/compiler/deSugar/DsUtils.lhs +++ b/ghc/compiler/deSugar/DsUtils.lhs @@ -69,11 +69,12 @@ import PrelNames ( unpackCStringName, unpackCStringUtf8Name, plusIntegerName, timesIntegerName, smallIntegerDataConName, lengthPName, indexPName ) import Outputable -import UnicodeUtil ( intsToUtf8 ) import SrcLoc ( Located(..), unLoc ) import Util ( isSingleton, notNull, zipEqual, sortWith ) import ListSetOps ( assocDefault ) import FastString + +import Data.Char ( ord ) \end{code} @@ -469,7 +470,7 @@ mkSmallIntegerLit small_integer_data_con i = mkConApp small_integer_data_con [mk mkStringExpr str = mkStringExprFS (mkFastString str) mkStringExprFS str - | nullFastString str + | nullFS str = returnDs (mkNilExpr charTy) | lengthFS str == 1 @@ -478,17 +479,17 @@ mkStringExprFS str in returnDs (mkConsExpr charTy the_char (mkNilExpr charTy)) - | all safeChar int_chars + | all safeChar chars = dsLookupGlobalId unpackCStringName `thenDs` \ unpack_id -> returnDs (App (Var unpack_id) (Lit (MachStr str))) | otherwise = dsLookupGlobalId unpackCStringUtf8Name `thenDs` \ unpack_id -> - returnDs (App (Var unpack_id) (Lit (MachStr (mkFastString (intsToUtf8 int_chars))))) + returnDs (App (Var unpack_id) (Lit (MachStr str))) where - int_chars = unpackIntFS str - safeChar c = c >= 1 && c <= 0xFF + chars = unpackFS str + safeChar c = ord c >= 1 && ord c <= 0xFF \end{code} diff --git a/ghc/compiler/ghci/ByteCodeGen.lhs b/ghc/compiler/ghci/ByteCodeGen.lhs index 4d2fa73876..f526ed9907 100644 --- a/ghc/compiler/ghci/ByteCodeGen.lhs +++ b/ghc/compiler/ghci/ByteCodeGen.lhs @@ -53,7 +53,8 @@ import OrdList import Constants ( wORD_SIZE ) import Data.List ( intersperse, sortBy, zip4, zip5, partition ) -import Foreign ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8 ) +import Foreign ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8, + withForeignPtr ) import Foreign.C ( CInt ) import Control.Exception ( throwDyn ) @@ -1084,18 +1085,18 @@ pushAtom d p (AnnLit lit) pushStr s = let getMallocvilleAddr = case s of - FastString _ l ba -> - -- sigh, a string in the heap is no good to us. - -- We need a static C pointer, since the type of - -- a string literal is Addr#. So, copy the string - -- into C land and remember the pointer so we can - -- free it later. - let n = I# l - -- CAREFUL! Chars are 32 bits in ghc 4.09+ - in ioToBc (mallocBytes (n+1)) `thenBc` \ ptr -> + FastString _ n _ fp _ -> + -- we could grab the Ptr from the ForeignPtr, + -- but then we have no way to control its lifetime. + -- In reality it'll probably stay alive long enoungh + -- by virtue of the global FastString table, but + -- to be on the safe side we copy the string into + -- a malloc'd area of memory. + ioToBc (mallocBytes (n+1)) `thenBc` \ ptr -> recordMallocBc ptr `thenBc_` ioToBc ( - do memcpy ptr ba (fromIntegral n) + withForeignPtr fp $ \p -> do + memcpy ptr p (fromIntegral n) pokeByteOff ptr n (fromIntegral (ord '\0') :: Word8) return ptr ) @@ -1110,7 +1111,7 @@ pushAtom d p other (pprCoreExpr (deAnnotate (undefined, other))) foreign import ccall unsafe "memcpy" - memcpy :: Ptr a -> ByteArray# -> CInt -> IO () + memcpy :: Ptr a -> Ptr b -> CInt -> IO () -- ----------------------------------------------------------------------------- diff --git a/ghc/compiler/ghci/ByteCodeLink.lhs b/ghc/compiler/ghci/ByteCodeLink.lhs index ee64b8aa8e..875f1d6331 100644 --- a/ghc/compiler/ghci/ByteCodeLink.lhs +++ b/ghc/compiler/ghci/ByteCodeLink.lhs @@ -21,10 +21,10 @@ import ObjLink ( lookupSymbol ) import Name ( Name, nameModule, nameOccName, isExternalName ) import NameEnv -import OccName ( occNameString ) +import OccName ( occNameFS ) import PrimOp ( PrimOp, primOpOcc ) -import Module ( moduleString ) -import FastString ( FastString(..), unpackFS ) +import Module ( moduleFS ) +import FastString ( FastString(..), unpackFS, zEncodeFS ) import Outputable import Panic ( GhcException(..) ) @@ -256,12 +256,12 @@ linkFail who what -- HACKS!!! ToDo: cleaner nameToCLabel :: Name -> String{-suffix-} -> String nameToCLabel n suffix - = moduleString (nameModule n) - ++ '_':occNameString (nameOccName n) ++ '_':suffix + = unpackFS (zEncodeFS (moduleFS (nameModule n))) + ++ '_': unpackFS (zEncodeFS (occNameFS (nameOccName n))) ++ '_':suffix primopToCLabel :: PrimOp -> String{-suffix-} -> String primopToCLabel primop suffix - = let str = "GHCziPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix + = let str = "GHCziPrimopWrappers_" ++ unpackFS (zEncodeFS (occNameFS (primOpOcc primop))) ++ '_':suffix in --trace ("primopToCLabel: " ++ str) str \end{code} diff --git a/ghc/compiler/ghci/InteractiveUI.hs b/ghc/compiler/ghci/InteractiveUI.hs index 0bf37dc400..8fee9ba19f 100644 --- a/ghc/compiler/ghci/InteractiveUI.hs +++ b/ghc/compiler/ghci/InteractiveUI.hs @@ -25,10 +25,10 @@ import PprTyThing import Outputable -- for createtags (should these come via GHC?) -import Module( moduleUserString ) -import Name( nameSrcLoc, nameModule, nameOccName ) -import OccName( pprOccName ) -import SrcLoc( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol ) +import Module ( moduleString ) +import Name ( nameSrcLoc, nameModule, nameOccName ) +import OccName ( pprOccName ) +import SrcLoc ( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol ) -- Other random utilities import Digraph ( flattenSCCs ) @@ -813,7 +813,7 @@ createTagsFile session tagskind tagFile = do is_interpreted <- GHC.moduleIsInterpreted session m -- should we just skip these? when (not is_interpreted) $ - throwDyn (CmdLineError ("module '" ++ moduleUserString m ++ "' is not interpreted")) + throwDyn (CmdLineError ("module '" ++ moduleString m ++ "' is not interpreted")) mbModInfo <- GHC.getModuleInfo session m let unqual diff --git a/ghc/compiler/hsSyn/Convert.lhs b/ghc/compiler/hsSyn/Convert.lhs index 96623bbd5c..9dddd29c21 100644 --- a/ghc/compiler/hsSyn/Convert.lhs +++ b/ghc/compiler/hsSyn/Convert.lhs @@ -20,7 +20,8 @@ import qualified Name ( Name, mkInternalName, getName ) import Module ( Module, mkModule ) import RdrHsSyn ( mkClassDecl, mkTyData ) import qualified OccName -import OccName ( startsVarId, startsVarSym, startsConId, startsConSym ) +import OccName ( startsVarId, startsVarSym, startsConId, startsConSym, + pprNameSpace ) import SrcLoc ( Located(..), SrcSpan ) import Type ( Type ) import TysWiredIn ( unitTyCon, tupleTyCon, tupleCon, trueDataCon, nilDataCon, consDataCon ) @@ -550,7 +551,7 @@ okOcc ns str@(c:_) badOcc :: OccName.NameSpace -> String -> SDoc badOcc ctxt_ns occ - = ptext SLIT("Illegal") <+> text (OccName.nameSpaceString ctxt_ns) + = ptext SLIT("Illegal") <+> pprNameSpace ctxt_ns <+> ptext SLIT("name:") <+> quotes (text occ) thRdrName :: OccName.NameSpace -> String -> TH.NameFlavour -> RdrName @@ -605,7 +606,7 @@ mk_uniq_occ ns occ uniq -- The packing and unpacking is rather turgid :-( mk_occ :: OccName.NameSpace -> String -> OccName.OccName -mk_occ ns occ = OccName.mkOccFS ns (mkFastString occ) +mk_occ ns occ = OccName.mkOccNameFS ns (mkFastString occ) mk_ghc_ns :: TH.NameSpace -> OccName.NameSpace mk_ghc_ns TH.DataName = OccName.dataName diff --git a/ghc/compiler/hsSyn/HsDecls.lhs b/ghc/compiler/hsSyn/HsDecls.lhs index c977496aa9..5253d11482 100644 --- a/ghc/compiler/hsSyn/HsDecls.lhs +++ b/ghc/compiler/hsSyn/HsDecls.lhs @@ -721,8 +721,8 @@ instance Outputable ForeignImport where ptext SLIT("dynamic") pprCEntity _ _ (CWrapper) = ptext SLIT("wrapper") -- - pprLib lib | nullFastString lib = empty - | otherwise = char '[' <> ppr lib <> char ']' + pprLib lib | nullFS lib = empty + | otherwise = char '[' <> ppr lib <> char ']' instance Outputable ForeignExport where ppr (CExport (CExportStatic lbl cconv)) = diff --git a/ghc/compiler/hsSyn/HsUtils.lhs b/ghc/compiler/hsSyn/HsUtils.lhs index 612e57a931..f8efa6cfb9 100644 --- a/ghc/compiler/hsSyn/HsUtils.lhs +++ b/ghc/compiler/hsSyn/HsUtils.lhs @@ -27,7 +27,7 @@ import RdrName ( RdrName, getRdrName, mkRdrUnqual ) import Var ( Id ) import Type ( Type ) import DataCon ( DataCon, dataConWrapId, dataConSourceArity ) -import OccName ( mkVarOcc ) +import OccName ( mkVarOccFS ) import Name ( Name ) import BasicTypes ( RecFlag(..) ) import SrcLoc @@ -136,7 +136,7 @@ mkHsOpApp e1 op e2 = OpApp e1 (noLoc (HsVar op)) (error "mkOpApp:fixity") e2 mkHsSplice e = HsSplice unqualSplice e -unqualSplice = mkRdrUnqual (mkVarOcc FSLIT("splice")) +unqualSplice = mkRdrUnqual (mkVarOccFS FSLIT("splice")) -- A name (uniquified later) to -- identify the splice diff --git a/ghc/compiler/iface/LoadIface.lhs b/ghc/compiler/iface/LoadIface.lhs index 356cf224bd..8c496f76ef 100644 --- a/ghc/compiler/iface/LoadIface.lhs +++ b/ghc/compiler/iface/LoadIface.lhs @@ -45,7 +45,7 @@ import NameEnv import MkId ( seqId ) import Module ( Module, ModLocation(ml_hi_file), emptyModuleEnv, addBootSuffix_maybe, - extendModuleEnv, lookupModuleEnv, moduleUserString + extendModuleEnv, lookupModuleEnv, moduleString ) import OccName ( OccName, mkOccEnv, lookupOccEnv, mkClassTyConOcc, mkClassDataConOcc, mkSuperDictSelOcc, mkDataConWrapperOcc, mkDataConWorkerOcc ) @@ -312,7 +312,7 @@ loadDecl ignore_prags mod (_version, decl) -- imported name, to fix the module correctly in the cache mk_new_bndr mod mb_parent occ = newGlobalBinder mod occ mb_parent - (importedSrcLoc (moduleUserString mod)) + (importedSrcLoc (moduleString mod)) doc = ptext SLIT("Declaration for") <+> ppr (ifName decl) diff --git a/ghc/compiler/iface/MkIface.lhs b/ghc/compiler/iface/MkIface.lhs index 5be56bfa25..2f15ee3773 100644 --- a/ghc/compiler/iface/MkIface.lhs +++ b/ghc/compiler/iface/MkIface.lhs @@ -214,7 +214,7 @@ import OccName ( OccName, OccEnv, mkOccEnv, lookupOccEnv, emptyOccEnv, isEmptyOccSet, intersectOccSet, intersectsOccSet, occNameFS, isTcOcc ) import Module ( Module, moduleFS, - ModLocation(..), mkSysModuleFS, moduleUserString, + ModLocation(..), mkModuleFS, moduleString, ModuleEnv, emptyModuleEnv, lookupModuleEnv, extendModuleEnv_C ) @@ -726,7 +726,7 @@ mkIfaceExports :: NameSet -> [(Module, [GenAvailInfo OccName])] -- Group by module and sort by occurrence -- This keeps the list in canonical order mkIfaceExports exports - = [ (mkSysModuleFS fs, eltsFM avails) + = [ (mkModuleFS fs, eltsFM avails) | (fs, avails) <- fmToList groupFM ] where @@ -768,7 +768,7 @@ checkOldIface :: HscEnv checkOldIface hsc_env mod_summary source_unchanged maybe_iface = do { showPass (hsc_dflags hsc_env) - ("Checking old interface for " ++ moduleUserString (ms_mod mod_summary)) ; + ("Checking old interface for " ++ moduleString (ms_mod mod_summary)) ; ; initIfaceCheck hsc_env $ check_old_iface mod_summary source_unchanged maybe_iface diff --git a/ghc/compiler/main/DriverMkDepend.hs b/ghc/compiler/main/DriverMkDepend.hs index fe2d8f3785..80d906c4a7 100644 --- a/ghc/compiler/main/DriverMkDepend.hs +++ b/ghc/compiler/main/DriverMkDepend.hs @@ -20,7 +20,7 @@ import HscTypes ( HscEnv, IsBootInterface, msObjFilePath, msHsFilePath ) import Packages ( PackageIdH(..) ) import SysTools ( newTempName ) import qualified SysTools -import Module ( Module, ModLocation(..), mkModule, moduleUserString, +import Module ( Module, ModLocation(..), mkModule, addBootSuffix_maybe ) import Digraph ( SCC(..) ) import Finder ( findModule, FindResult(..) ) diff --git a/ghc/compiler/main/DriverPipeline.hs b/ghc/compiler/main/DriverPipeline.hs index 85099e89d5..171cecf2a6 100644 --- a/ghc/compiler/main/DriverPipeline.hs +++ b/ghc/compiler/main/DriverPipeline.hs @@ -1396,9 +1396,9 @@ getOptionsFromSource file | otherwise -> return [] getOptionsFromStringBuffer :: StringBuffer -> FilePath -> [(Int,String)] -getOptionsFromStringBuffer buffer@(StringBuffer _ len# _) fn = +getOptionsFromStringBuffer buffer@(StringBuffer _ len _) fn = let - ls = lines (lexemeToString buffer (I# len#)) -- lazy, so it's ok + ls = lines (lexemeToString buffer len) -- lazy, so it's ok in look 1 ls where diff --git a/ghc/compiler/main/Finder.lhs b/ghc/compiler/main/Finder.lhs index 81dedb83d9..fbde40f6ea 100644 --- a/ghc/compiler/main/Finder.lhs +++ b/ghc/compiler/main/Finder.lhs @@ -267,7 +267,7 @@ searchPathExts paths mod exts return result where - basename = dots_to_slashes (moduleUserString mod) + basename = dots_to_slashes (moduleString mod) to_search :: [(FilePath, IO FinderCacheEntry)] to_search = [ (file, fn path basename) @@ -347,7 +347,7 @@ mkHomeModLocation2 :: DynFlags -> String -- Suffix -> IO ModLocation mkHomeModLocation2 dflags mod src_basename ext = do - let mod_basename = dots_to_slashes (moduleUserString mod) + let mod_basename = dots_to_slashes (moduleString mod) obj_fn <- mkObjPath dflags src_basename mod_basename hi_fn <- mkHiPath dflags src_basename mod_basename @@ -420,7 +420,7 @@ mkStubPaths dflags mod location = let stubdir = stubDir dflags - mod_basename = dots_to_slashes (moduleUserString mod) + mod_basename = dots_to_slashes (moduleString mod) src_basename = basenameOf (expectJust "mkStubPaths" (ml_hs_file location)) diff --git a/ghc/compiler/main/GHC.hs b/ghc/compiler/main/GHC.hs index 77cd9d45b0..7e0ec2ffed 100644 --- a/ghc/compiler/main/GHC.hs +++ b/ghc/compiler/main/GHC.hs @@ -628,7 +628,7 @@ load2 s@(Session ref) how_much mod_graph = do when (ghci_mode == BatchCompile && isJust ofile && not do_linking) $ debugTraceMsg dflags 1 (text ("Warning: output was redirected with -o, " ++ "but no output will be generated\n" ++ - "because there is no " ++ moduleUserString main_mod ++ " module.")) + "because there is no " ++ moduleString main_mod ++ " module.")) -- link everything together linkresult <- link ghci_mode dflags do_linking (hsc_HPT hsc_env1) diff --git a/ghc/compiler/main/HscTypes.lhs b/ghc/compiler/main/HscTypes.lhs index 20e84ab9f9..48041c055e 100644 --- a/ghc/compiler/main/HscTypes.lhs +++ b/ghc/compiler/main/HscTypes.lhs @@ -996,7 +996,7 @@ showModMsg use_object mod_summary char ')']) where mod = ms_mod mod_summary - mod_str = moduleUserString mod ++ hscSourceString (ms_hsc_src mod_summary) + mod_str = moduleString mod ++ hscSourceString (ms_hsc_src mod_summary) \end{code} diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs index 69d6573049..4392ae737b 100644 --- a/ghc/compiler/nativeGen/PprMach.hs +++ b/ghc/compiler/nativeGen/PprMach.hs @@ -697,17 +697,11 @@ pprLabel :: CLabel -> Doc pprLabel lbl = pprGloblDecl lbl $$ (pprCLabel_asm lbl <> char ':') --- Assume we want to backslash-convert the string pprASCII str - = vcat (map do1 (str ++ [chr 0])) + = vcat (map do1 str) $$ do1 0 where - do1 :: Char -> Doc - do1 c = ptext SLIT("\t.byte\t0x") <> hshow (ord c) - - hshow :: Int -> Doc - hshow n | n >= 0 && n <= 255 - = char (tab !! (n `div` 16)) <> char (tab !! (n `mod` 16)) - tab = "0123456789ABCDEF" + do1 :: Word8 -> Doc + do1 w = ptext SLIT("\t.byte\t") <> int (fromIntegral w) pprAlign bytes = IF_ARCH_alpha(ptextSLIT(".align ") <> int pow2, diff --git a/ghc/compiler/ndpFlatten/FlattenMonad.hs b/ghc/compiler/ndpFlatten/FlattenMonad.hs index 43e804ccee..45405088fc 100644 --- a/ghc/compiler/ndpFlatten/FlattenMonad.hs +++ b/ghc/compiler/ndpFlatten/FlattenMonad.hs @@ -67,7 +67,6 @@ import Monad (mplus) import Panic (panic) import Outputable (Outputable(ppr), pprPanic) import UniqSupply (UniqSupply, splitUniqSupply, uniqFromSupply) -import OccName (UserFS) import Var (Var, idType) import Id (Id, mkSysLocal) import Name (Name) @@ -86,6 +85,7 @@ import PrimOp ( PrimOp(..) ) import PrelInfo ( primOpId ) import CoreSyn (Expr(..), Bind(..), CoreBndr, CoreExpr, CoreBind, mkApps) import CoreUtils (exprType) +import FastString (FastString) -- friends import NDPCoreUtils (parrElemTy) @@ -176,7 +176,7 @@ runFlatten hsc_env eps us m -- generate a new local variable whose name is based on the given lexeme and -- whose type is as specified in the second argument (EXPORTED) -- -newVar :: UserFS -> Type -> Flatten Var +newVar :: FastString -> Type -> Flatten Var newVar lexeme ty = Flatten $ \state -> let (us1, us2) = splitUniqSupply (us state) @@ -187,7 +187,7 @@ newVar lexeme ty = Flatten $ \state -> -- generate a non-recursive binding using a new binder whose name is derived -- from the given lexeme (EXPORTED) -- -mkBind :: UserFS -> CoreExpr -> Flatten (CoreBndr, CoreBind) +mkBind :: FastString -> CoreExpr -> Flatten (CoreBndr, CoreBind) mkBind lexeme e = do v <- newVar lexeme (exprType e) diff --git a/ghc/compiler/parser/Ctype.lhs b/ghc/compiler/parser/Ctype.lhs index dfdb94a0c0..dbe4e9f1b0 100644 --- a/ghc/compiler/parser/Ctype.lhs +++ b/ghc/compiler/parser/Ctype.lhs @@ -9,6 +9,7 @@ module Ctype , is_lower -- Char# -> Bool , is_upper -- Char# -> Bool , is_digit -- Char# -> Bool + , is_alphanum -- Char# -> Bool , is_hexdigit, is_octdigit , hexDigit, octDecDigit @@ -50,6 +51,7 @@ is_space = is_ctype cSpace is_lower = is_ctype cLower is_upper = is_ctype cUpper is_digit = is_ctype cDigit +is_alphanum = is_ctype (cLower+cUpper+cDigit) \end{code} Utils @@ -241,99 +243,99 @@ charType c = case c of '\158' -> 0 -- \236 '\159' -> 0 -- \237 '\160' -> cSpace -- - '\161' -> cAny + cSymbol -- ¡ - '\162' -> cAny + cSymbol -- ¢ - '\163' -> cAny + cSymbol -- £ - '\164' -> cAny + cSymbol -- ¤ - '\165' -> cAny + cSymbol -- ¥ - '\166' -> cAny + cSymbol -- ¦ - '\167' -> cAny + cSymbol -- § - '\168' -> cAny + cSymbol -- ¨ - '\169' -> cAny + cSymbol -- © - '\170' -> cAny + cSymbol -- ª - '\171' -> cAny + cSymbol -- « - '\172' -> cAny + cSymbol -- ¬ - '\173' -> cAny + cSymbol -- - '\174' -> cAny + cSymbol -- ® - '\175' -> cAny + cSymbol -- ¯ - '\176' -> cAny + cSymbol -- ° - '\177' -> cAny + cSymbol -- ± - '\178' -> cAny + cSymbol -- ² - '\179' -> cAny + cSymbol -- ³ - '\180' -> cAny + cSymbol -- ´ - '\181' -> cAny + cSymbol -- µ - '\182' -> cAny + cSymbol -- ¶ - '\183' -> cAny + cSymbol -- · - '\184' -> cAny + cSymbol -- ¸ - '\185' -> cAny + cSymbol -- ¹ - '\186' -> cAny + cSymbol -- º - '\187' -> cAny + cSymbol -- » - '\188' -> cAny + cSymbol -- ¼ - '\189' -> cAny + cSymbol -- ½ - '\190' -> cAny + cSymbol -- ¾ - '\191' -> cAny + cSymbol -- ¿ - '\192' -> cAny + cIdent + cUpper -- À - '\193' -> cAny + cIdent + cUpper -- Á - '\194' -> cAny + cIdent + cUpper --  - '\195' -> cAny + cIdent + cUpper -- à - '\196' -> cAny + cIdent + cUpper -- Ä - '\197' -> cAny + cIdent + cUpper -- Å - '\198' -> cAny + cIdent + cUpper -- Æ - '\199' -> cAny + cIdent + cUpper -- Ç - '\200' -> cAny + cIdent + cUpper -- È - '\201' -> cAny + cIdent + cUpper -- É - '\202' -> cAny + cIdent + cUpper -- Ê - '\203' -> cAny + cIdent + cUpper -- Ë - '\204' -> cAny + cIdent + cUpper -- Ì - '\205' -> cAny + cIdent + cUpper -- Í - '\206' -> cAny + cIdent + cUpper -- Î - '\207' -> cAny + cIdent + cUpper -- Ï - '\208' -> cAny + cIdent + cUpper -- Ð - '\209' -> cAny + cIdent + cUpper -- Ñ - '\210' -> cAny + cIdent + cUpper -- Ò - '\211' -> cAny + cIdent + cUpper -- Ó - '\212' -> cAny + cIdent + cUpper -- Ô - '\213' -> cAny + cIdent + cUpper -- Õ - '\214' -> cAny + cIdent + cUpper -- Ö - '\215' -> cAny + cSymbol + cLower -- × - '\216' -> cAny + cIdent + cUpper -- Ø - '\217' -> cAny + cIdent + cUpper -- Ù - '\218' -> cAny + cIdent + cUpper -- Ú - '\219' -> cAny + cIdent + cUpper -- Û - '\220' -> cAny + cIdent + cUpper -- Ü - '\221' -> cAny + cIdent + cUpper -- Ý - '\222' -> cAny + cIdent + cUpper -- Þ - '\223' -> cAny + cIdent -- ß - '\224' -> cAny + cIdent + cLower -- à - '\225' -> cAny + cIdent + cLower -- á - '\226' -> cAny + cIdent + cLower -- â - '\227' -> cAny + cIdent + cLower -- ã - '\228' -> cAny + cIdent + cLower -- ä - '\229' -> cAny + cIdent + cLower -- å - '\230' -> cAny + cIdent + cLower -- æ - '\231' -> cAny + cIdent + cLower -- ç - '\232' -> cAny + cIdent + cLower -- è - '\233' -> cAny + cIdent + cLower -- é - '\234' -> cAny + cIdent + cLower -- ê - '\235' -> cAny + cIdent + cLower -- ë - '\236' -> cAny + cIdent + cLower -- ì - '\237' -> cAny + cIdent + cLower -- í - '\238' -> cAny + cIdent + cLower -- î - '\239' -> cAny + cIdent + cLower -- ï - '\240' -> cAny + cIdent + cLower -- ð - '\241' -> cAny + cIdent + cLower -- ñ - '\242' -> cAny + cIdent + cLower -- ò - '\243' -> cAny + cIdent + cLower -- ó - '\244' -> cAny + cIdent + cLower -- ô - '\245' -> cAny + cIdent + cLower -- õ - '\246' -> cAny + cIdent + cLower -- ö - '\247' -> cAny + cSymbol -- ÷ - '\248' -> cAny + cIdent -- ø - '\249' -> cAny + cIdent + cLower -- ù - '\250' -> cAny + cIdent + cLower -- ú - '\251' -> cAny + cIdent + cLower -- û - '\252' -> cAny + cIdent + cLower -- ü - '\253' -> cAny + cIdent + cLower -- ý - '\254' -> cAny + cIdent + cLower -- þ - '\255' -> cAny + cIdent + cLower -- ÿ + '\161' -> cAny + cSymbol -- ¡ + '\162' -> cAny + cSymbol -- ¢ + '\163' -> cAny + cSymbol -- £ + '\164' -> cAny + cSymbol -- ¤ + '\165' -> cAny + cSymbol -- Â¥ + '\166' -> cAny + cSymbol -- ¦ + '\167' -> cAny + cSymbol -- § + '\168' -> cAny + cSymbol -- ¨ + '\169' -> cAny + cSymbol -- © + '\170' -> cAny + cSymbol -- ª + '\171' -> cAny + cSymbol -- « + '\172' -> cAny + cSymbol -- ¬ + '\173' -> cAny + cSymbol --  + '\174' -> cAny + cSymbol -- ® + '\175' -> cAny + cSymbol -- ¯ + '\176' -> cAny + cSymbol -- ° + '\177' -> cAny + cSymbol -- ± + '\178' -> cAny + cSymbol -- ² + '\179' -> cAny + cSymbol -- ³ + '\180' -> cAny + cSymbol -- ´ + '\181' -> cAny + cSymbol -- µ + '\182' -> cAny + cSymbol -- ¶ + '\183' -> cAny + cSymbol -- · + '\184' -> cAny + cSymbol -- ¸ + '\185' -> cAny + cSymbol -- ¹ + '\186' -> cAny + cSymbol -- º + '\187' -> cAny + cSymbol -- » + '\188' -> cAny + cSymbol -- ¼ + '\189' -> cAny + cSymbol -- ½ + '\190' -> cAny + cSymbol -- ¾ + '\191' -> cAny + cSymbol -- ¿ + '\192' -> cAny + cIdent + cUpper -- À + '\193' -> cAny + cIdent + cUpper -- à + '\194' -> cAny + cIdent + cUpper --  + '\195' -> cAny + cIdent + cUpper -- à + '\196' -> cAny + cIdent + cUpper -- Ä + '\197' -> cAny + cIdent + cUpper -- Ã… + '\198' -> cAny + cIdent + cUpper -- Æ + '\199' -> cAny + cIdent + cUpper -- Ç + '\200' -> cAny + cIdent + cUpper -- È + '\201' -> cAny + cIdent + cUpper -- É + '\202' -> cAny + cIdent + cUpper -- Ê + '\203' -> cAny + cIdent + cUpper -- Ë + '\204' -> cAny + cIdent + cUpper -- ÃŒ + '\205' -> cAny + cIdent + cUpper -- à + '\206' -> cAny + cIdent + cUpper -- ÃŽ + '\207' -> cAny + cIdent + cUpper -- à + '\208' -> cAny + cIdent + cUpper -- à + '\209' -> cAny + cIdent + cUpper -- Ñ + '\210' -> cAny + cIdent + cUpper -- Ã’ + '\211' -> cAny + cIdent + cUpper -- Ó + '\212' -> cAny + cIdent + cUpper -- Ô + '\213' -> cAny + cIdent + cUpper -- Õ + '\214' -> cAny + cIdent + cUpper -- Ö + '\215' -> cAny + cSymbol + cLower -- × + '\216' -> cAny + cIdent + cUpper -- Ø + '\217' -> cAny + cIdent + cUpper -- Ù + '\218' -> cAny + cIdent + cUpper -- Ú + '\219' -> cAny + cIdent + cUpper -- Û + '\220' -> cAny + cIdent + cUpper -- Ü + '\221' -> cAny + cIdent + cUpper -- à + '\222' -> cAny + cIdent + cUpper -- Þ + '\223' -> cAny + cIdent -- ß + '\224' -> cAny + cIdent + cLower -- à + '\225' -> cAny + cIdent + cLower -- á + '\226' -> cAny + cIdent + cLower -- â + '\227' -> cAny + cIdent + cLower -- ã + '\228' -> cAny + cIdent + cLower -- ä + '\229' -> cAny + cIdent + cLower -- Ã¥ + '\230' -> cAny + cIdent + cLower -- æ + '\231' -> cAny + cIdent + cLower -- ç + '\232' -> cAny + cIdent + cLower -- è + '\233' -> cAny + cIdent + cLower -- é + '\234' -> cAny + cIdent + cLower -- ê + '\235' -> cAny + cIdent + cLower -- ë + '\236' -> cAny + cIdent + cLower -- ì + '\237' -> cAny + cIdent + cLower -- à + '\238' -> cAny + cIdent + cLower -- î + '\239' -> cAny + cIdent + cLower -- ï + '\240' -> cAny + cIdent + cLower -- ð + '\241' -> cAny + cIdent + cLower -- ñ + '\242' -> cAny + cIdent + cLower -- ò + '\243' -> cAny + cIdent + cLower -- ó + '\244' -> cAny + cIdent + cLower -- ô + '\245' -> cAny + cIdent + cLower -- õ + '\246' -> cAny + cIdent + cLower -- ö + '\247' -> cAny + cSymbol -- ÷ + '\248' -> cAny + cIdent -- ø + '\249' -> cAny + cIdent + cLower -- ù + '\250' -> cAny + cIdent + cLower -- ú + '\251' -> cAny + cIdent + cLower -- û + '\252' -> cAny + cIdent + cLower -- ü + '\253' -> cAny + cIdent + cLower -- ý + '\254' -> cAny + cIdent + cLower -- þ + '\255' -> cAny + cIdent + cLower -- ÿ \end{code} diff --git a/ghc/compiler/parser/Lexer.x b/ghc/compiler/parser/Lexer.x index eb00e90613..38908a0a85 100644 --- a/ghc/compiler/parser/Lexer.x +++ b/ghc/compiler/parser/Lexer.x @@ -1,5 +1,5 @@ ----------------------------------------------------------------------------- --- (c) The University of Glasgow, 2003 +-- (c) The University of Glasgow, 2006 -- -- GHC's lexer. -- @@ -43,35 +43,38 @@ import Ctype import Util ( maybePrefixMatch, readRational ) import DATA_BITS -import Char +import Data.Char import Ratio --import TRACE } -$whitechar = [\ \t\n\r\f\v\xa0] +$unispace = \x05 +$whitechar = [\ \t\n\r\f\v\xa0 $unispace] $white_no_nl = $whitechar # \n $ascdigit = 0-9 -$unidigit = \x01 +$unidigit = \x03 +$decdigit = $ascdigit -- for now, should really be $digit (ToDo) $digit = [$ascdigit $unidigit] $special = [\(\)\,\;\[\]\`\{\}] $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] -$unisymbol = \x02 +$unisymbol = \x04 $symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] -$unilarge = \x03 +$unilarge = \x01 $asclarge = [A-Z \xc0-\xd6 \xd8-\xde] $large = [$asclarge $unilarge] -$unismall = \x04 +$unismall = \x02 $ascsmall = [a-z \xdf-\xf6 \xf8-\xff] $small = [$ascsmall $unismall \_] -$graphic = [$small $large $symbol $digit $special \:\"\'] +$unigraphic = \x06 +$graphic = [$small $large $symbol $digit $special $unigraphic \:\"\'] $octit = 0-7 -$hexit = [$digit A-F a-f] +$hexit = [$decdigit A-F a-f] $symchar = [$symbol \:] $nl = [\n\r] $idchar = [$small $large $digit \'] @@ -82,7 +85,7 @@ $idchar = [$small $large $digit \'] @varsym = $symbol $symchar* @consym = \: $symchar* -@decimal = $digit+ +@decimal = $decdigit+ @octal = $octit+ @hexadecimal = $hexit+ @exponent = [eE] [\-\+]? @decimal @@ -154,13 +157,13 @@ $white_no_nl+ ; -- single-line line pragmas, of the form -- # <line> "<file>" <extra-stuff> \n -<line_prag1> $digit+ { setLine line_prag1a } +<line_prag1> $decdigit+ { setLine line_prag1a } <line_prag1a> \" [$graphic \ ]* \" { setFile line_prag1b } <line_prag1b> .* { pop } -- Haskell-style line pragmas, of the form -- {-# LINE <line> "<file>" #-} -<line_prag2> $digit+ { setLine line_prag2a } +<line_prag2> $decdigit+ { setLine line_prag2a } <line_prag2a> \" [$graphic \ ]* \" { setFile line_prag2b } <line_prag2b> "#-}"|"-}" { pop } -- NOTE: accept -} at the end of a LINE pragma, for compatibility @@ -554,6 +557,13 @@ reservedSymsFM = listToUFM $ ,(">-", ITrarrowtail, bit arrowsBit) ,("-<<", ITLarrowtail, bit arrowsBit) ,(">>-", ITRarrowtail, bit arrowsBit) + +#if __GLASGOW_HASKELL__ >= 605 + ,("∀", ITforall, bit tvBit) + ,("→", ITrarrow, 0) + ,("â†", ITlarrow, 0) + ,("⋯", ITdotdot, 0) +#endif ] -- ----------------------------------------------------------------------------- @@ -670,23 +680,29 @@ splitQualName :: StringBuffer -> Int -> (FastString,FastString) -- takes a StringBuffer and a length, and returns the module name -- and identifier parts of a qualified name. Splits at the *last* dot, -- because of hierarchical module names. -splitQualName orig_buf len = split orig_buf 0 0 +splitQualName orig_buf len = split orig_buf orig_buf where - split buf dot_off n - | n == len = done dot_off - | lookAhead buf n == '.' = split2 buf n (n+1) - | otherwise = split buf dot_off (n+1) + split buf dot_buf + | orig_buf `byteDiff` buf >= len = done dot_buf + | c == '.' = found_dot buf' + | otherwise = split buf' dot_buf + where + (c,buf') = nextChar buf -- careful, we might get names like M.... -- so, if the character after the dot is not upper-case, this is -- the end of the qualifier part. - split2 buf dot_off n - | isUpper (lookAhead buf n) = split buf dot_off (n+1) - | otherwise = done dot_off - - done dot_off = - (lexemeToFastString orig_buf dot_off, - lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1)) + found_dot buf -- buf points after the '.' + | isUpper c = split buf' buf + | otherwise = done buf + where + (c,buf') = nextChar buf + + done dot_buf = + (lexemeToFastString orig_buf (qual_size - 1), + lexemeToFastString dot_buf (len - qual_size)) + where + qual_size = orig_buf `byteDiff` dot_buf varid span buf len = case lookupUFM reservedWordsFM fs of @@ -726,19 +742,19 @@ tok_decimal span buf len = return (L span (ITinteger $! parseInteger buf len 10 octDecDigit)) tok_octal span buf len - = return (L span (ITinteger $! parseInteger (stepOnBy 2 buf) (len-2) 8 octDecDigit)) + = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit)) tok_hexadecimal span buf len - = return (L span (ITinteger $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit)) + = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit)) prim_decimal span buf len = return (L span (ITprimint $! parseInteger buf (len-1) 10 octDecDigit)) prim_octal span buf len - = return (L span (ITprimint $! parseInteger (stepOnBy 2 buf) (len-3) 8 octDecDigit)) + = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit)) prim_hexadecimal span buf len - = return (L span (ITprimint $! parseInteger (stepOnBy 2 buf) (len-3) 16 hexDigit)) + = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit)) tok_float str = ITrational $! readRational str prim_float str = ITprimfloat $! readRational str @@ -839,7 +855,7 @@ lex_string_tok span buf len = do lex_string :: String -> P Token lex_string s = do i <- getInput - case alexGetChar i of + case alexGetChar' i of Nothing -> lit_error Just ('"',i) -> do @@ -848,14 +864,15 @@ lex_string s = do if glaexts then do i <- getInput - case alexGetChar i of + case alexGetChar' i of Just ('#',i) -> do setInput i if any (> '\xFF') s then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'" - else let s' = mkFastStringNarrow (reverse s) in - -- always a narrow string/byte array + else let s' = mkZFastString (reverse s) in return (ITprimstring s') + -- mkZFastString is a hack to avoid encoding the + -- string in UTF-8. We just want the exact bytes. _other -> return (ITstring (mkFastString (reverse s))) else @@ -866,11 +883,11 @@ lex_string s = do setInput i; lex_string s | Just (c,i) <- next, is_space c -> do setInput i; lex_stringgap s - where next = alexGetChar i + where next = alexGetChar' i - Just _ -> do - c <- lex_char - lex_string (c:s) + Just (c, i) -> do + c' <- lex_char c i + lex_string (c':s) lex_stringgap s = do c <- getCharOrFail @@ -890,7 +907,7 @@ lex_char_tok :: Action lex_char_tok span buf len = do -- We've seen ' i1 <- getInput -- Look ahead to first character let loc = srcSpanStart span - case alexGetChar i1 of + case alexGetChar' i1 of Nothing -> lit_error Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen '' @@ -905,14 +922,15 @@ lex_char_tok span buf len = do -- We've seen ' lit_ch <- lex_escape mc <- getCharOrFail -- Trailing quote if mc == '\'' then finish_char_tok loc lit_ch - else lit_error + else do setInput i2; lit_error - Just (c, i2@(AI end2 _ _)) | not (is_any c) -> lit_error - | otherwise -> + Just (c, i2@(AI end2 _ _)) + | not (isAny c) -> lit_error + | otherwise -> -- We've seen 'x, where x is a valid character -- (i.e. not newline etc) but not a quote or backslash - case alexGetChar i2 of -- Look ahead one more character + case alexGetChar' i2 of -- Look ahead one more character Nothing -> lit_error Just ('\'', i3) -> do -- We've seen 'x' setInput i3 @@ -922,7 +940,7 @@ lex_char_tok span buf len = do -- We've seen ' th_exts <- extension thEnabled let (AI end _ _) = i1 if th_exts then return (L (mkSrcSpan loc end) ITvarQuote) - else lit_error + else do setInput i2; lit_error finish_char_tok :: SrcLoc -> Char -> P (Located Token) finish_char_tok loc ch -- We've already seen the closing quote @@ -930,7 +948,7 @@ finish_char_tok loc ch -- We've already seen the closing quote = do glaexts <- extension glaExtsEnabled i@(AI end _ _) <- getInput if glaexts then do - case alexGetChar i of + case alexGetChar' i of Just ('#',i@(AI end _ _)) -> do setInput i return (L (mkSrcSpan loc end) (ITprimchar ch)) @@ -939,14 +957,16 @@ finish_char_tok loc ch -- We've already seen the closing quote else do return (L (mkSrcSpan loc end) (ITchar ch)) -lex_char :: P Char -lex_char = do - mc <- getCharOrFail - case mc of - '\\' -> lex_escape - c | is_any c -> return c +lex_char :: Char -> AlexInput -> P Char +lex_char c inp = do + case c of + '\\' -> do setInput inp; lex_escape + c | isAny c -> do setInput inp; return c _other -> lit_error +isAny c | c > '\xff' = isPrint c + | otherwise = is_any c + lex_escape :: P Char lex_escape = do c <- getCharOrFail @@ -972,11 +992,11 @@ lex_escape = do c1 -> do i <- getInput - case alexGetChar i of + case alexGetChar' i of Nothing -> lit_error Just (c2,i2) -> - case alexGetChar i2 of - Nothing -> lit_error + case alexGetChar' i2 of + Nothing -> do setInput i2; lit_error Just (c3,i3) -> let str = [c1,c2,c3] in case [ (c,rest) | (p,c) <- silly_escape_chars, @@ -991,22 +1011,22 @@ lex_escape = do readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char readNum is_digit base conv = do + i <- getInput c <- getCharOrFail if is_digit c then readNum2 is_digit base conv (conv c) - else lit_error + else do setInput i; lit_error readNum2 is_digit base conv i = do input <- getInput read i input where read i input = do - case alexGetChar input of + case alexGetChar' input of Just (c,input') | is_digit c -> do read (i*base + conv c) input' _other -> do - setInput input if i >= 0 && i <= 0x10FFFF - then return (chr i) + then do setInput input; return (chr i) else lit_error silly_escape_chars = [ @@ -1046,12 +1066,16 @@ silly_escape_chars = [ ("DEL", '\DEL') ] +-- before calling lit_error, ensure that the current input is pointing to +-- the position of the error in the buffer. This is so that we can report +-- a correct location to the user, but also so we can detect UTF-8 decoding +-- errors if they occur. lit_error = lexError "lexical error in string/character literal" getCharOrFail :: P Char getCharOrFail = do i <- getInput - case alexGetChar i of + case alexGetChar' i of Nothing -> lexError "unexpected end-of-file in string/character literal" Just (c,i) -> do setInput i; return c @@ -1134,21 +1158,74 @@ setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } () data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (AI _ _ s) = prevChar s '\n' +alexInputPrevChar (AI _ _ buf) = prevChar buf '\n' alexGetChar :: AlexInput -> Maybe (Char,AlexInput) alexGetChar (AI loc ofs s) | atEnd s = Nothing - | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` Just (c, (AI loc' ofs' s')) - where c = currentChar s - loc' = advanceSrcLoc loc c - ofs' = advanceOffs c ofs - s' = stepOn s + | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` + Just (adj_c, (AI loc' ofs' s')) + where (c,s') = nextChar s + loc' = advanceSrcLoc loc c + ofs' = advanceOffs c ofs + + non_graphic = '\x0' + upper = '\x1' + lower = '\x2' + digit = '\x3' + symbol = '\x4' + space = '\x5' + other_graphic = '\x6' + + adj_c +#if __GLASGOW_HASKELL__ < 605 + = c -- no Unicode support +#else + | c <= '\x04' = non_graphic + | c <= '\xff' = c + | otherwise = + case generalCategory c of + UppercaseLetter -> upper + LowercaseLetter -> lower + TitlecaseLetter -> upper + ModifierLetter -> other_graphic + OtherLetter -> other_graphic + NonSpacingMark -> other_graphic + SpacingCombiningMark -> other_graphic + EnclosingMark -> other_graphic + DecimalNumber -> digit + LetterNumber -> other_graphic + OtherNumber -> other_graphic + ConnectorPunctuation -> other_graphic + DashPunctuation -> other_graphic + OpenPunctuation -> other_graphic + ClosePunctuation -> other_graphic + InitialQuote -> other_graphic + FinalQuote -> other_graphic + OtherPunctuation -> other_graphic + MathSymbol -> symbol + CurrencySymbol -> symbol + ModifierSymbol -> symbol + OtherSymbol -> symbol + Space -> space + _other -> non_graphic +#endif + +-- This version does not squash unicode characters, it is used when +-- lexing strings. +alexGetChar' :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar' (AI loc ofs s) + | atEnd s = Nothing + | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` + Just (c, (AI loc' ofs' s')) + where (c,s') = nextChar s + loc' = advanceSrcLoc loc c + ofs' = advanceOffs c ofs - advanceOffs :: Char -> Int -> Int - advanceOffs '\n' offs = 0 - advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8 - advanceOffs _ offs = offs + 1 +advanceOffs :: Char -> Int -> Int +advanceOffs '\n' offs = 0 +advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8 +advanceOffs _ offs = offs + 1 getInput :: P AlexInput getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b) @@ -1255,7 +1332,7 @@ srcParseErr buf len else hcat [ptext SLIT("parse error on input "), char '`', text token, char '\''] ] - where token = lexemeToString (stepOnBy (-len) buf) len + where token = lexemeToString (offsetBytes (-len) buf) len -- Report a parse failure, giving the span of the previous token as -- the location of the error. This is the entry point for errors @@ -1266,14 +1343,12 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len, PFailed last_loc (srcParseErr buf len) -- A lexical error is reported at a particular position in the source file, --- not over a token range. TODO: this is slightly wrong, because we record --- the error at the character position following the one which caused the --- error. We should somehow back up by one character. +-- not over a token range. lexError :: String -> P a lexError str = do loc <- getSrcLoc - i@(AI end _ _) <- getInput - failLocMsgP loc end str + i@(AI end _ buf) <- getInput + reportLexError loc end buf False str -- ----------------------------------------------------------------------------- -- This is the top-level function: called from the parser each time a @@ -1282,7 +1357,7 @@ lexError str = do lexer :: (Located Token -> P a) -> P a lexer cont = do tok@(L _ tok__) <- lexToken - -- trace ("token: " ++ show tok__) $ do + --trace ("token: " ++ show tok__) $ do cont tok lexToken :: P (Located Token) @@ -1294,13 +1369,24 @@ lexToken = do AlexEOF -> do let span = mkSrcSpan loc1 loc1 setLastToken span 0 return (L span ITeof) - AlexError (AI loc2 _ _) -> do failLocMsgP loc1 loc2 "lexical error" + AlexError (AI loc2 _ buf) -> do + reportLexError loc1 loc2 buf True "lexical error" AlexSkip inp2 _ -> do setInput inp2 lexToken AlexToken inp2@(AI end _ buf2) len t -> do setInput inp2 let span = mkSrcSpan loc1 end - span `seq` setLastToken span len - t span buf len + let bytes = byteDiff buf buf2 + span `seq` setLastToken span bytes + t span buf bytes + +reportLexError loc1 loc2 buf is_prev str = + let + c | is_prev = prevChar buf '\0' + | otherwise = fst (nextChar buf) + in + if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar# + then failLocMsgP loc2 loc2 "UTF-8 decoding error" + else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c) } diff --git a/ghc/compiler/parser/Parser.y.pp b/ghc/compiler/parser/Parser.y.pp index 844cc8670e..b4acb890eb 100644 --- a/ghc/compiler/parser/Parser.y.pp +++ b/ghc/compiler/parser/Parser.y.pp @@ -25,7 +25,7 @@ import Type ( funTyCon ) import ForeignCall ( Safety(..), CExportSpec(..), CLabelString, CCallConv(..), CCallTarget(..), defaultCCallConv ) -import OccName ( UserFS, varName, dataName, tcClsName, tvName ) +import OccName ( varName, dataName, tcClsName, tvName ) import DataCon ( DataCon, dataConName ) import SrcLoc ( Located(..), unLoc, getLoc, noLoc, combineSrcSpans, SrcSpan, combineLocs, srcLocFile, @@ -1469,7 +1469,7 @@ varsym_no_minus :: { Located RdrName } -- varsym not including '-' -- These special_ids are treated as keywords in various places, -- but as ordinary ids elsewhere. 'special_id' collects all these -- except 'unsafe' and 'forall' whose treatment differs depending on context -special_id :: { Located UserFS } +special_id :: { Located FastString } special_id : 'as' { L1 FSLIT("as") } | 'qualified' { L1 FSLIT("qualified") } @@ -1480,7 +1480,7 @@ special_id | 'stdcall' { L1 FSLIT("stdcall") } | 'ccall' { L1 FSLIT("ccall") } -special_sym :: { Located UserFS } +special_sym :: { Located FastString } special_sym : '!' { L1 FSLIT("!") } | '.' { L1 FSLIT(".") } | '*' { L1 FSLIT("*") } diff --git a/ghc/compiler/parser/ParserCore.y b/ghc/compiler/parser/ParserCore.y index d8fceebf6a..3210583f96 100644 --- a/ghc/compiler/parser/ParserCore.y +++ b/ghc/compiler/parser/ParserCore.y @@ -72,7 +72,7 @@ module :: { HsExtCore RdrName } : '%module' modid tdefs vdefgs { HsExtCore $2 $3 $4 } modid :: { Module } - : CNAME { mkSysModuleFS (mkFastString $1) } + : CNAME { mkModuleFS (mkFastString $1) } ------------------------------------------------------------- -- Type and newtype declarations are in HsSyn syntax @@ -262,25 +262,25 @@ lit :: { Literal } | '(' STRING '::' aty ')' { MachStr (mkFastString $2) } tv_occ :: { OccName } - : NAME { mkSysOcc tvName $1 } + : NAME { mkOccName tvName $1 } var_occ :: { OccName } - : NAME { mkSysOcc varName $1 } + : NAME { mkVarOcc $1 } -- Type constructor q_tc_name :: { IfaceExtName } - : modid '.' CNAME { ExtPkg $1 (mkSysOcc tcName $3) } + : modid '.' CNAME { ExtPkg $1 (mkOccName tcName $3) } -- Data constructor in a pattern or data type declaration; use the dataName, -- because that's what we expect in Core case patterns d_pat_occ :: { OccName } - : CNAME { mkSysOcc dataName $1 } + : CNAME { mkOccName dataName $1 } -- Data constructor occurrence in an expression; -- use the varName because that's the worker Id d_occ :: { OccName } - : CNAME { mkSysOcc varName $1 } + : CNAME { mkVarOcc $1 } { diff --git a/ghc/compiler/parser/RdrHsSyn.lhs b/ghc/compiler/parser/RdrHsSyn.lhs index 2d18d6d5df..6ff15e772b 100644 --- a/ghc/compiler/parser/RdrHsSyn.lhs +++ b/ghc/compiler/parser/RdrHsSyn.lhs @@ -59,7 +59,7 @@ import TysWiredIn ( unitTyCon ) import ForeignCall ( CCallConv, Safety, CCallTarget(..), CExportSpec(..), DNCallSpec(..), DNKind(..), CLabelString ) import OccName ( srcDataName, varName, isDataOcc, isTcOcc, - occNameUserString ) + occNameString ) import SrcLoc import OrdList ( OrdList, fromOL ) import Bag ( Bag, emptyBag, snocBag, consBag, foldrBag ) @@ -800,8 +800,8 @@ mkExport :: CallConv mkExport (CCall cconv) (L loc entity, v, ty) = return $ ForD (ForeignExport v ty (CExport (CExportStatic entity' cconv)) False) where - entity' | nullFastString entity = mkExtName (unLoc v) - | otherwise = entity + entity' | nullFS entity = mkExtName (unLoc v) + | otherwise = entity mkExport DNCall (L loc entity, v, ty) = parseError (getLoc v){-TODO: not quite right-} "Foreign export is not yet supported for .NET" @@ -811,10 +811,9 @@ mkExport DNCall (L loc entity, v, ty) = -- of the Haskell name is then performed, so if you foreign export (++), -- it's external name will be "++". Too bad; it's important because we don't -- want z-encoding (e.g. names with z's in them shouldn't be doubled) --- (This is why we use occNameUserString.) -- mkExtName :: RdrName -> CLabelString -mkExtName rdrNm = mkFastString (occNameUserString (rdrNameOcc rdrNm)) +mkExtName rdrNm = mkFastString (occNameString (rdrNameOcc rdrNm)) \end{code} diff --git a/ghc/compiler/prelude/PrelNames.lhs b/ghc/compiler/prelude/PrelNames.lhs index 0d99121719..eb26d3404a 100644 --- a/ghc/compiler/prelude/PrelNames.lhs +++ b/ghc/compiler/prelude/PrelNames.lhs @@ -50,9 +50,8 @@ module PrelNames ( #include "HsVersions.h" import Module ( Module, mkModule ) -import OccName ( dataName, tcName, clsName, varName, mkOccFS - ) - +import OccName ( dataName, tcName, clsName, varName, mkOccNameFS, + mkVarOccFS ) import RdrName ( RdrName, nameRdrName, mkOrig, rdrNameOcc, mkUnqual ) import Unique ( Unique, Uniquable(..), hasKey, mkPreludeMiscIdUnique, mkPreludeDataConUnique, @@ -75,7 +74,7 @@ import FastString This *local* name is used by the interactive stuff \begin{code} -itName uniq = mkInternalName uniq (mkOccFS varName FSLIT("it")) noSrcLoc +itName uniq = mkInternalName uniq (mkOccNameFS varName FSLIT("it")) noSrcLoc \end{code} \begin{code} @@ -415,10 +414,10 @@ inrDataCon_RDR = dataQual_RDR pREL_BASE FSLIT("Inr") genUnitDataCon_RDR = dataQual_RDR pREL_BASE FSLIT("Unit") ---------------------- -varQual_RDR mod str = mkOrig mod (mkOccFS varName str) -tcQual_RDR mod str = mkOrig mod (mkOccFS tcName str) -clsQual_RDR mod str = mkOrig mod (mkOccFS clsName str) -dataQual_RDR mod str = mkOrig mod (mkOccFS dataName str) +varQual_RDR mod str = mkOrig mod (mkOccNameFS varName str) +tcQual_RDR mod str = mkOrig mod (mkOccNameFS tcName str) +clsQual_RDR mod str = mkOrig mod (mkOccNameFS clsName str) +dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str) \end{code} %************************************************************************ @@ -656,17 +655,17 @@ tcQual = mk_known_key_name tcName clsQual = mk_known_key_name clsName mk_known_key_name space mod str uniq - = mkExternalName uniq mod (mkOccFS space str) + = mkExternalName uniq mod (mkOccNameFS space str) Nothing noSrcLoc conName :: Name -> FastString -> Unique -> Name conName tycon occ uniq - = mkExternalName uniq (nameModule tycon) (mkOccFS dataName occ) + = mkExternalName uniq (nameModule tycon) (mkOccNameFS dataName occ) (Just tycon) noSrcLoc methName :: Name -> FastString -> Unique -> Name methName cls occ uniq - = mkExternalName uniq (nameModule cls) (mkOccFS varName occ) + = mkExternalName uniq (nameModule cls) (mkVarOccFS occ) (Just cls) noSrcLoc \end{code} diff --git a/ghc/compiler/prelude/PrelRules.lhs b/ghc/compiler/prelude/PrelRules.lhs index e0b234782e..9cdddc9065 100644 --- a/ghc/compiler/prelude/PrelRules.lhs +++ b/ghc/compiler/prelude/PrelRules.lhs @@ -36,7 +36,7 @@ import TyCon ( tyConDataCons_maybe, isEnumerationTyCon, isNewTyCon ) import DataCon ( dataConTag, dataConTyCon, dataConWorkId, fIRST_TAG ) import CoreUtils ( cheapEqExpr, exprIsConApp_maybe ) import Type ( tyConAppTyCon, coreEqType ) -import OccName ( occNameUserString) +import OccName ( occNameFS ) import PrelNames ( unpackCStringFoldrName, unpackCStringFoldrIdKey, hasKey, eqStringName, unpackCStringIdKey ) import Maybes ( orElse ) @@ -58,7 +58,7 @@ import DATA_WORD ( Word64 ) primOpRules :: PrimOp -> Name -> [CoreRule] primOpRules op op_name = primop_rule op where - rule_name = mkFastString (occNameUserString (primOpOcc op)) + rule_name = occNameFS (primOpOcc op) rule_name_case = rule_name `appendFS` FSLIT("->case") -- A useful shorthand diff --git a/ghc/compiler/prelude/PrimOp.lhs b/ghc/compiler/prelude/PrimOp.lhs index e99eb9d17c..a650352280 100644 --- a/ghc/compiler/prelude/PrimOp.lhs +++ b/ghc/compiler/prelude/PrimOp.lhs @@ -22,7 +22,7 @@ import TysWiredIn import NewDemand import Var ( TyVar ) -import OccName ( OccName, pprOccName, mkVarOcc ) +import OccName ( OccName, pprOccName, mkVarOccFS ) import TyCon ( TyCon, isPrimTyCon, tyConPrimRep, PrimRep(..) ) import Type ( Type, mkForAllTys, mkFunTy, mkFunTys, tyConAppTyCon, typePrimRep ) @@ -113,10 +113,10 @@ data PrimOpInfo [Type] Type -mkDyadic str ty = Dyadic (mkVarOcc str) ty -mkMonadic str ty = Monadic (mkVarOcc str) ty -mkCompare str ty = Compare (mkVarOcc str) ty -mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOcc str) tvs tys ty +mkDyadic str ty = Dyadic (mkVarOccFS str) ty +mkMonadic str ty = Monadic (mkVarOccFS str) ty +mkCompare str ty = Compare (mkVarOccFS str) ty +mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOccFS str) tvs tys ty \end{code} %************************************************************************ diff --git a/ghc/compiler/prelude/TysPrim.lhs b/ghc/compiler/prelude/TysPrim.lhs index 7d397d6ed6..2f6168bafb 100644 --- a/ghc/compiler/prelude/TysPrim.lhs +++ b/ghc/compiler/prelude/TysPrim.lhs @@ -46,7 +46,7 @@ module TysPrim( import Var ( TyVar, mkTyVar ) import Name ( Name, BuiltInSyntax(..), mkInternalName, mkWiredInName ) -import OccName ( mkOccFS, tcName, mkTyVarOcc ) +import OccName ( mkOccNameFS, tcName, mkTyVarOcc ) import TyCon ( TyCon, ArgVrcs, mkPrimTyCon, mkLiftedPrimTyCon, PrimRep(..) ) import Type ( mkTyConApp, mkTyConTy, mkTyVarTys, mkTyVarTy, @@ -100,7 +100,7 @@ primTyCons mkPrimTc :: FastString -> Unique -> TyCon -> Name mkPrimTc fs uniq tycon - = mkWiredInName gHC_PRIM (mkOccFS tcName fs) + = mkWiredInName gHC_PRIM (mkOccNameFS tcName fs) uniq Nothing -- No parent object (ATyCon tycon) -- Relevant TyCon diff --git a/ghc/compiler/prelude/TysWiredIn.lhs b/ghc/compiler/prelude/TysWiredIn.lhs index e7dea60711..ceb4df550a 100644 --- a/ghc/compiler/prelude/TysWiredIn.lhs +++ b/ghc/compiler/prelude/TysWiredIn.lhs @@ -62,16 +62,18 @@ import Module ( Module ) import RdrName ( nameRdrName ) import Name ( Name, BuiltInSyntax(..), nameUnique, nameOccName, nameModule, mkWiredInName ) -import OccName ( mkOccFS, tcName, dataName, mkTupleOcc, mkDataConWorkerOcc ) +import OccName ( mkOccNameFS, tcName, dataName, mkTupleOcc, + mkDataConWorkerOcc ) import DataCon ( DataCon, mkDataCon, dataConWorkId, dataConSourceArity ) import Var ( TyVar, tyVarKind ) import TyCon ( TyCon, AlgTyConRhs(DataTyCon), tyConDataCons, - mkTupleTyCon, mkAlgTyCon, tyConName - ) + mkTupleTyCon, mkAlgTyCon, tyConName ) -import BasicTypes ( Arity, RecFlag(..), Boxity(..), isBoxed, StrictnessMark(..) ) +import BasicTypes ( Arity, RecFlag(..), Boxity(..), isBoxed, + StrictnessMark(..) ) -import Type ( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys, TyThing(..) ) +import Type ( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys, + TyThing(..) ) import Kind ( mkArrowKinds, liftedTypeKind, ubxTupleKind ) import Unique ( incrUnique, mkTupleTyConUnique, mkTupleDataConUnique, mkPArrDataConUnique ) @@ -114,14 +116,14 @@ wiredInTyCons = [ unitTyCon -- Not treated like other tuples, because \begin{code} mkWiredInTyConName :: BuiltInSyntax -> Module -> FastString -> Unique -> TyCon -> Name mkWiredInTyConName built_in mod fs uniq tycon - = mkWiredInName mod (mkOccFS tcName fs) uniq + = mkWiredInName mod (mkOccNameFS tcName fs) uniq Nothing -- No parent object (ATyCon tycon) -- Relevant TyCon built_in mkWiredInDataConName :: BuiltInSyntax -> Module -> FastString -> Unique -> DataCon -> Name -> Name mkWiredInDataConName built_in mod fs uniq datacon parent - = mkWiredInName mod (mkOccFS dataName fs) uniq + = mkWiredInName mod (mkOccNameFS dataName fs) uniq (Just parent) -- Name of parent TyCon (ADataCon datacon) -- Relevant DataCon built_in @@ -535,7 +537,7 @@ mkPArrFakeCon arity = data_con tyvar = head alphaTyVars tyvarTys = replicate arity $ mkTyVarTy tyvar nameStr = mkFastString ("MkPArr" ++ show arity) - name = mkWiredInName pREL_PARR (mkOccFS dataName nameStr) uniq + name = mkWiredInName pREL_PARR (mkOccNameFS dataName nameStr) uniq Nothing (ADataCon data_con) UserSyntax uniq = mkPArrDataConUnique arity diff --git a/ghc/compiler/profiling/CostCentre.lhs b/ghc/compiler/profiling/CostCentre.lhs index 3616ccbe30..f4a6ba951d 100644 --- a/ghc/compiler/profiling/CostCentre.lhs +++ b/ghc/compiler/profiling/CostCentre.lhs @@ -32,9 +32,7 @@ module CostCentre ( #include "HsVersions.h" import Var ( Id ) -import Name ( UserFS, EncodedFS, encodeFS, decode, - getOccName, occNameFS - ) +import Name ( getOccName, occNameFS ) import Module ( Module ) import Outputable import FastTypes @@ -120,7 +118,7 @@ data CostCentre cc_mod :: Module -- Name of module defining this CC. } -type CcName = EncodedFS +type CcName = FastString data IsDupdCC = OriginalCC -- This says how the CC is *used*. Saying that @@ -200,9 +198,9 @@ maybeSingletonCCS _ = Nothing Building cost centres \begin{code} -mkUserCC :: UserFS -> Module -> CostCentre +mkUserCC :: FastString -> Module -> CostCentre mkUserCC cc_name mod - = NormalCC { cc_name = encodeFS cc_name, cc_mod = mod, + = NormalCC { cc_name = cc_name, cc_mod = mod, cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-} } @@ -370,5 +368,5 @@ ppCostCentreLbl (NormalCC {cc_name = n, cc_mod = m, cc_is_caf = is_caf}) costCentreUserName (NoCostCentre) = "NO_CC" costCentreUserName (AllCafsCC {}) = "CAF" costCentreUserName cc@(NormalCC {cc_name = name, cc_is_caf = is_caf}) - = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ decode (unpackFS name) + = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ unpackFS name \end{code} diff --git a/ghc/compiler/rename/RnEnv.lhs b/ghc/compiler/rename/RnEnv.lhs index 99d6a3414c..2be3bfd5c0 100644 --- a/ghc/compiler/rename/RnEnv.lhs +++ b/ghc/compiler/rename/RnEnv.lhs @@ -50,7 +50,8 @@ import TcRnMonad import Name ( Name, nameIsLocalOrFrom, mkInternalName, isWiredInName, nameSrcLoc, nameOccName, nameModule, nameParent, isExternalName ) import NameSet -import OccName ( tcName, isDataOcc, occNameFlavour, reportIfUnused ) +import OccName ( tcName, isDataOcc, pprNonVarNameSpace, occNameSpace, + reportIfUnused ) import Module ( Module ) import PrelNames ( mkUnboundName, rOOT_MAIN, iNTERACTIVE, consDataConKey, hasKey ) import UniqSupply @@ -747,7 +748,8 @@ warnUnusedName :: (Name, Maybe Provenance) -> RnM () warnUnusedName (name, prov) = addWarnAt loc $ sep [msg <> colon, - nest 2 $ occNameFlavour (nameOccName name) <+> quotes (ppr name)] + nest 2 $ pprNonVarNameSpace (occNameSpace (nameOccName name)) + <+> quotes (ppr name)] -- TODO should be a proper span where (loc,msg) = case prov of @@ -778,7 +780,8 @@ shadowedNameWarn doc shadow unknownNameErr rdr_name = sep [ptext SLIT("Not in scope:"), - nest 2 $ occNameFlavour (rdrNameOcc rdr_name) <+> quotes (ppr rdr_name)] + nest 2 $ pprNonVarNameSpace (occNameSpace (rdrNameOcc rdr_name)) + <+> quotes (ppr rdr_name)] unknownInstBndrErr cls op = quotes (ppr op) <+> ptext SLIT("is not a (visible) method of class") <+> quotes (ppr cls) diff --git a/ghc/compiler/rename/RnExpr.lhs b/ghc/compiler/rename/RnExpr.lhs index 53a412f765..95d7b8307e 100644 --- a/ghc/compiler/rename/RnExpr.lhs +++ b/ghc/compiler/rename/RnExpr.lhs @@ -37,7 +37,6 @@ import Name ( Name, nameOccName, nameIsLocalOrFrom ) import NameSet import RdrName ( RdrName, emptyGlobalRdrEnv, extendLocalRdrEnv, lookupLocalRdrEnv ) import LoadIface ( loadHomeInterface ) -import UnicodeUtil ( stringToUtf8 ) import UniqFM ( isNullUFM ) import UniqSet ( emptyUniqSet ) import List ( nub ) @@ -932,7 +931,7 @@ mkAssertErrorExpr = getSrcSpanM `thenM` \ sloc -> let expr = HsApp (L sloc (HsVar assertErrorName)) (L sloc (HsLit msg)) - msg = HsStringPrim (mkFastString (stringToUtf8 (showSDoc (ppr sloc)))) + msg = HsStringPrim (mkFastString (showSDoc (ppr sloc))) in returnM (expr, emptyFVs) \end{code} diff --git a/ghc/compiler/rename/RnNames.lhs b/ghc/compiler/rename/RnNames.lhs index bf6e54a4f5..4cdb241d8e 100644 --- a/ghc/compiler/rename/RnNames.lhs +++ b/ghc/compiler/rename/RnNames.lhs @@ -26,15 +26,17 @@ import TcRnMonad import FiniteMap import PrelNames ( pRELUDE, isUnboundName, main_RDR_Unqual ) -import Module ( Module, moduleUserString, unitModuleEnv, +import Module ( Module, moduleString, unitModuleEnv, lookupModuleEnv, moduleEnvElts, foldModuleEnv ) import Name ( Name, nameSrcLoc, nameOccName, nameModule, isWiredInName, nameParent, nameParent_maybe, isExternalName, isBuiltInSyntax ) import NameSet import NameEnv -import OccName ( srcDataName, isTcOcc, occNameFlavour, OccEnv, - mkOccEnv, lookupOccEnv, emptyOccEnv, extendOccEnv ) +import OccName ( srcDataName, isTcOcc, pprNonVarNameSpace, + occNameSpace, + OccEnv, mkOccEnv, lookupOccEnv, emptyOccEnv, + extendOccEnv ) import HscTypes ( GenAvailInfo(..), AvailInfo, HomePackageTable, PackageIfaceTable, unQualInScope, @@ -683,7 +685,7 @@ reportDeprecations tcg_env , Just deprec_txt <- lookupDeprec hpt pit name = setSrcSpan (importSpecLoc imp_spec) $ addWarn (sep [ptext SLIT("Deprecated use of") <+> - occNameFlavour (nameOccName name) <+> + pprNonVarNameSpace (occNameSpace (nameOccName name)) <+> quotes (ppr name), (parens imp_msg) <> colon, (ppr deprec_txt) ]) @@ -958,7 +960,7 @@ printMinimalImports imps (vcat (map ppr_mod_ie mod_ies)) }) } where - mkFilename this_mod = moduleUserString this_mod ++ ".imports" + mkFilename this_mod = moduleString this_mod ++ ".imports" ppr_mod_ie (mod_name, ies) | mod_name == pRELUDE = empty diff --git a/ghc/compiler/simplCore/SetLevels.lhs b/ghc/compiler/simplCore/SetLevels.lhs index 4b1c01dfec..f8ab29dcd5 100644 --- a/ghc/compiler/simplCore/SetLevels.lhs +++ b/ghc/compiler/simplCore/SetLevels.lhs @@ -60,8 +60,8 @@ import CoreUtils ( exprType, exprIsTrivial, exprIsCheap, mkPiTypes ) import CoreFVs -- all of it import CoreSubst ( Subst, emptySubst, extendInScope, extendIdSubst, cloneIdBndr, cloneRecIdBndrs ) -import Id ( Id, idType, mkSysLocalUnencoded, - isOneShotLambda, zapDemandIdInfo, +import Id ( Id, idType, mkSysLocal, isOneShotLambda, + zapDemandIdInfo, idSpecialisation, idWorkerInfo, setIdInfo ) import IdInfo ( workerExists, vanillaIdInfo, isEmptySpecInfo ) @@ -69,7 +69,7 @@ import Var ( Var ) import VarSet import VarEnv import Name ( getOccName ) -import OccName ( occNameUserString ) +import OccName ( occNameString ) import Type ( isUnLiftedType, Type ) import BasicTypes ( TopLevelFlag(..) ) import UniqSupply @@ -796,9 +796,9 @@ newPolyBndrs dest_lvl env abs_vars bndrs in returnLvl (extendPolyLvlEnv dest_lvl env abs_vars (bndrs `zip` new_bndrs), new_bndrs) where - mk_poly_bndr bndr uniq = mkSysLocalUnencoded (mkFastString str) uniq poly_ty + mk_poly_bndr bndr uniq = mkSysLocal (mkFastString str) uniq poly_ty where - str = "poly_" ++ occNameUserString (getOccName bndr) + str = "poly_" ++ occNameString (getOccName bndr) poly_ty = mkPiTypes abs_vars (idType bndr) @@ -807,7 +807,7 @@ newLvlVar :: String -> LvlM Id newLvlVar str vars body_ty = getUniqueUs `thenLvl` \ uniq -> - returnUs (mkSysLocalUnencoded (mkFastString str) uniq (mkPiTypes vars body_ty)) + returnUs (mkSysLocal (mkFastString str) uniq (mkPiTypes vars body_ty)) -- The deeply tiresome thing is that we have to apply the substitution -- to the rules inside each Id. Grr. But it matters. diff --git a/ghc/compiler/simplCore/SimplMonad.lhs b/ghc/compiler/simplCore/SimplMonad.lhs index b82562e668..bc09e1128c 100644 --- a/ghc/compiler/simplCore/SimplMonad.lhs +++ b/ghc/compiler/simplCore/SimplMonad.lhs @@ -34,7 +34,6 @@ import UniqSupply ( uniqsFromSupply, uniqFromSupply, splitUniqSupply, ) import DynFlags ( SimplifierSwitch(..), DynFlags, DynFlag(..), dopt ) import StaticFlags ( opt_PprStyle_Debug, opt_HistorySize ) -import OccName ( EncodedFS ) import Unique ( Unique ) import Maybes ( expectJust ) import FiniteMap ( FiniteMap, emptyFM, isEmptyFM, lookupFM, addToFM, plusFM_C, fmToList ) @@ -160,7 +159,7 @@ getDOptsSmpl :: SimplM DynFlags getDOptsSmpl = SM (\dflags us sc -> (dflags, us, sc)) -newId :: EncodedFS -> Type -> SimplM Id +newId :: FastString -> Type -> SimplM Id newId fs ty = getUniqueSmpl `thenSmpl` \ uniq -> returnSmpl (mkSysLocal fs uniq ty) \end{code} diff --git a/ghc/compiler/simplCore/Simplify.lhs b/ghc/compiler/simplCore/Simplify.lhs index 885914063c..17a7969e74 100644 --- a/ghc/compiler/simplCore/Simplify.lhs +++ b/ghc/compiler/simplCore/Simplify.lhs @@ -28,7 +28,6 @@ import Id ( Id, idType, idInfo, idArity, isDataConWorkId, ) import MkId ( eRROR_ID ) import Literal ( mkStringLit ) -import OccName ( encodeFS ) import IdInfo ( OccInfo(..), isLoopBreaker, setArityInfo, zapDemandInfo, setUnfoldingInfo, @@ -1875,7 +1874,7 @@ mkDupableAlt env case_bndr' cont alt ) `thenSmpl` \ (final_bndrs', final_args) -> -- See comment about "$j" name above - newId (encodeFS FSLIT("$j")) (mkPiTypes final_bndrs' rhs_ty') `thenSmpl` \ join_bndr -> + newId FSLIT("$j") (mkPiTypes final_bndrs' rhs_ty') `thenSmpl` \ join_bndr -> -- Notice the funky mkPiTypes. If the contructor has existentials -- it's possible that the join point will be abstracted over -- type varaibles as well as term variables. diff --git a/ghc/compiler/stgSyn/CoreToStg.lhs b/ghc/compiler/stgSyn/CoreToStg.lhs index 9ae0d2741b..824cabaacb 100644 --- a/ghc/compiler/stgSyn/CoreToStg.lhs +++ b/ghc/compiler/stgSyn/CoreToStg.lhs @@ -30,7 +30,7 @@ import VarSet import VarEnv import Maybes ( maybeToBool ) import Name ( getOccName, isExternalName, nameOccName ) -import OccName ( occNameUserString, occNameFS ) +import OccName ( occNameString, occNameFS ) import BasicTypes ( Arity ) import Packages ( HomeModules ) import StaticFlags ( opt_RuntimeTypes ) @@ -688,7 +688,7 @@ coreToStgLet let_no_escape bind body is_join_var :: Id -> Bool -- A hack (used only for compiler debuggging) to tell if -- a variable started life as a join point ($j) -is_join_var j = occNameUserString (getOccName j) == "$j" +is_join_var j = occNameString (getOccName j) == "$j" \end{code} \begin{code} diff --git a/ghc/compiler/typecheck/Inst.lhs b/ghc/compiler/typecheck/Inst.lhs index 71d3e8472b..d6cf344b90 100644 --- a/ghc/compiler/typecheck/Inst.lhs +++ b/ghc/compiler/typecheck/Inst.lhs @@ -79,7 +79,7 @@ import DataCon ( DataCon, dataConTyVars, dataConStupidTheta, dataConName, dataCo import Id ( Id, idName, idType, mkUserLocal, mkLocalId ) import PrelInfo ( isNoDictClass ) import Name ( Name, mkMethodOcc, getOccName, getSrcLoc, nameModule, - isInternalName, setNameUnique, mkSystemVarNameEncoded ) + isInternalName, setNameUnique, mkSystemVarName ) import NameSet ( addOneToNameSet ) import Literal ( inIntRange ) import Var ( TyVar, tyVarKind, setIdType ) @@ -398,9 +398,7 @@ newLitInst orig lit expected_ty -- Make a LitInst = do { loc <- getInstLoc orig ; new_uniq <- newUnique ; let - lit_nm = mkSystemVarNameEncoded new_uniq FSLIT("lit") - -- The "encoded" bit means that we don't need to - -- z-encode the string every time we call this! + lit_nm = mkSystemVarName new_uniq FSLIT("lit") lit_inst = LitInst lit_nm lit expected_ty loc ; extendLIE lit_inst ; return (HsVar (instToId lit_inst)) } diff --git a/ghc/compiler/typecheck/TcClassDcl.lhs b/ghc/compiler/typecheck/TcClassDcl.lhs index b382af94df..fbb450a199 100644 --- a/ghc/compiler/typecheck/TcClassDcl.lhs +++ b/ghc/compiler/typecheck/TcClassDcl.lhs @@ -52,7 +52,6 @@ import RdrName ( RdrName, mkDerivedRdrName ) import Outputable import PrelNames ( genericTyConNames ) import DynFlags -import UnicodeUtil ( stringToUtf8 ) import ErrUtils ( dumpIfSet_dyn ) import Util ( count, lengthIs, isSingleton, lengthExceeds ) import Unique ( Uniquable(..) ) @@ -487,7 +486,7 @@ mkDefMethRhs origin clas inst_tys sel_id loc NoDefMeth where error_rhs = noLoc $ HsLam (mkMatchGroup [mkSimpleMatch wild_pats simple_rhs]) simple_rhs = nlHsApp (nlHsVar (getName nO_METHOD_BINDING_ERROR_ID)) - (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 error_msg)))) + (nlHsLit (HsStringPrim (mkFastString error_msg))) error_msg = showSDoc (hcat [ppr loc, text "|", ppr sel_id ]) -- When the type is of form t1 -> t2 -> t3 diff --git a/ghc/compiler/typecheck/TcGenDeriv.lhs b/ghc/compiler/typecheck/TcGenDeriv.lhs index faa32ec18b..94bb152850 100644 --- a/ghc/compiler/typecheck/TcGenDeriv.lhs +++ b/ghc/compiler/typecheck/TcGenDeriv.lhs @@ -809,7 +809,7 @@ gen_Read_binds get_fixity tycon ident_pat s = nlConPat ident_RDR [nlLitPat (mkHsString s)] -- Ident "foo" symbol_pat s = nlConPat symbol_RDR [nlLitPat (mkHsString s)] -- Symbol ">>" - data_con_str con = occNameUserString (getOccName con) + data_con_str con = occNameString (getOccName con) read_punc c = bindLex (punc_pat c) read_arg a ty @@ -832,7 +832,7 @@ gen_Read_binds get_fixity tycon | otherwise = [bindLex (ident_pat lbl_str)] where - lbl_str = occNameUserString (getOccName lbl) + lbl_str = occNameString (getOccName lbl) \end{code} @@ -899,7 +899,7 @@ gen_Show_binds get_fixity tycon dc_nm = getName data_con dc_occ_nm = getOccName data_con - con_str = occNameUserString dc_occ_nm + con_str = occNameString dc_occ_nm op_con_str = wrapOpParens con_str backquote_str = wrapOpBackquotes con_str @@ -916,7 +916,7 @@ gen_Show_binds get_fixity tycon -- it seems tidier to have them both sides. where occ_nm = getOccName l - nm = wrapOpParens (occNameUserString occ_nm) + nm = wrapOpParens (occNameString occ_nm) show_args = zipWith show_arg bs_needed arg_tys (show_arg1:show_arg2:_) = show_args @@ -1128,7 +1128,7 @@ gen_Data_binds fix_env tycon constr_args dc = [ -- nlHsIntLit (toInteger (dataConTag dc)), -- Tag nlHsVar data_type_name, -- DataType - nlHsLit (mkHsString (occNameUserString dc_occ)), -- String name + nlHsLit (mkHsString (occNameString dc_occ)), -- String name nlList labels, -- Field labels nlHsVar fixity] -- Fixity where @@ -1458,7 +1458,7 @@ mk_tc_deriv_name tycon str = mkDerivedRdrName tc_name mk_occ where tc_name = tyConName tycon - mk_occ tc_occ = mkOccFS varName (mkFastString new_str) + mk_occ tc_occ = mkVarOccFS (mkFastString new_str) where new_str = str ++ occNameString tc_occ ++ "#" \end{code} diff --git a/ghc/compiler/typecheck/TcInstDcls.lhs b/ghc/compiler/typecheck/TcInstDcls.lhs index 432d3c8cae..04fbafb5b1 100644 --- a/ghc/compiler/typecheck/TcInstDcls.lhs +++ b/ghc/compiler/typecheck/TcInstDcls.lhs @@ -35,7 +35,6 @@ import Var ( Id, idName, idType ) import MkId ( mkDictFunId, rUNTIME_ERROR_ID ) import FunDeps ( checkInstFDs ) import Name ( Name, getSrcLoc ) -import UnicodeUtil ( stringToUtf8 ) import Maybe ( catMaybes ) import SrcLoc ( srcLocSpan, unLoc, noLoc, Located(..), srcSpanStart ) import ListSetOps ( minusList ) @@ -405,7 +404,7 @@ tcInstDecl2 (InstInfo { iSpec = ispec, iBinds = binds }) -- Hardly beautiful, but only three extra lines. nlHsApp (noLoc $ TyApp (nlHsVar rUNTIME_ERROR_ID) [idType this_dict_id]) - (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 msg)))) + (nlHsLit (HsStringPrim (mkFastString msg))) | otherwise -- The common case = mkHsConApp dict_constr inst_tys' (map HsVar scs_and_meths) diff --git a/ghc/compiler/typecheck/TcRnDriver.lhs b/ghc/compiler/typecheck/TcRnDriver.lhs index b2e665fdb6..7e3aae2506 100644 --- a/ghc/compiler/typecheck/TcRnDriver.lhs +++ b/ghc/compiler/typecheck/TcRnDriver.lhs @@ -63,7 +63,7 @@ import ErrUtils ( Messages, mkDumpDoc, showPass ) import Id ( Id, mkExportedLocalId, isLocalId, idName, idType ) import Var ( Var ) import Module ( Module, ModuleEnv, moduleEnvElts, elemModuleEnv ) -import OccName ( mkVarOcc, mkOccFS, varName ) +import OccName ( mkVarOccFS ) import Name ( Name, NamedThing(..), isExternalName, getSrcLoc, isWiredInName, mkExternalName ) import NameSet @@ -734,7 +734,7 @@ checkMain dflags <- getDOpts ; let { main_mod = mainModIs dflags ; main_fn = case mainFunIs dflags of { - Just fn -> mkRdrUnqual (mkVarOcc (mkFastString fn)) ; + Just fn -> mkRdrUnqual (mkVarOccFS (mkFastString fn)) ; Nothing -> main_RDR_Unqual } } ; check_main ghci_mode tcg_env main_mod main_fn @@ -776,7 +776,7 @@ check_main ghci_mode tcg_env main_mod main_fn -- for 'main' in the interface file! ; let { root_main_name = mkExternalName rootMainKey rOOT_MAIN - (mkOccFS varName FSLIT("main")) + (mkVarOccFS FSLIT("main")) (Just main_name) (getSrcLoc main_name) ; root_main_id = mkExportedLocalId root_main_name ty ; main_bind = noLoc (VarBind root_main_id main_expr) } diff --git a/ghc/compiler/typecheck/TcSplice.lhs b/ghc/compiler/typecheck/TcSplice.lhs index 2844ab42a7..578c96b105 100644 --- a/ghc/compiler/typecheck/TcSplice.lhs +++ b/ghc/compiler/typecheck/TcSplice.lhs @@ -41,7 +41,7 @@ import NameEnv ( lookupNameEnv ) import HscTypes ( lookupType, ExternalPackageState(..), emptyModDetails ) import OccName import Var ( Id, TyVar, idType ) -import Module ( moduleUserString ) +import Module ( moduleString ) import TcRnMonad import IfaceEnv ( lookupOrig ) import Class ( Class, classExtraBigSig ) @@ -416,7 +416,7 @@ instance TH.Quasi (IOEnv (Env TcGblEnv TcLclEnv)) where qReport True msg = addErr (text msg) qReport False msg = addReport (text msg) - qCurrentModule = do { m <- getModule; return (moduleUserString m) } + qCurrentModule = do { m <- getModule; return (moduleString m) } qReify v = reify v qRecover = recoverM @@ -659,8 +659,8 @@ reifyName thing -- have free variables, we may need to generate NameL's for them. where name = getName thing - mod = moduleUserString (nameModule name) - occ_str = occNameUserString occ + mod = moduleString (nameModule name) + occ_str = occNameString occ occ = nameOccName name mk_varg | OccName.isDataOcc occ = TH.mkNameG_d | OccName.isVarOcc occ = TH.mkNameG_v diff --git a/ghc/compiler/types/TypeRep.lhs b/ghc/compiler/types/TypeRep.lhs index dc5344569b..7bb863a210 100644 --- a/ghc/compiler/types/TypeRep.lhs +++ b/ghc/compiler/types/TypeRep.lhs @@ -33,7 +33,7 @@ import Kind import Var ( Var, Id, TyVar, tyVarKind ) import VarSet ( TyVarSet ) import Name ( Name, NamedThing(..), BuiltInSyntax(..), mkWiredInName ) -import OccName ( mkOccFS, tcName, parenSymOcc ) +import OccName ( mkOccNameFS, tcName, parenSymOcc ) import BasicTypes ( IPName, tupleParens ) import TyCon ( TyCon, mkFunTyCon, tyConArity, tupleTyConBoxity, isTupleTyCon, isRecursiveTyCon, isNewTyCon ) import Class ( Class ) @@ -273,7 +273,7 @@ funTyCon = mkFunTyCon funTyConName (mkArrowKinds [argTypeKind, openTypeKind] lif -- a prefix way, thus: (->) Int# Int#. And this is unusual. funTyConName = mkWiredInName gHC_PRIM - (mkOccFS tcName FSLIT("(->)")) + (mkOccNameFS tcName FSLIT("(->)")) funTyConKey Nothing -- No parent object (ATyCon funTyCon) -- Relevant TyCon diff --git a/ghc/compiler/utils/Binary.hs b/ghc/compiler/utils/Binary.hs index 1902ff1f66..7b40bd279d 100644 --- a/ghc/compiler/utils/Binary.hs +++ b/ghc/compiler/utils/Binary.hs @@ -58,26 +58,7 @@ import UniqFM import FastMutInt import PackageConfig ( PackageId, packageIdFS, fsToPackageId ) -#if __GLASGOW_HASKELL__ < 503 -import DATA_IOREF -import DATA_BITS -import DATA_INT -import DATA_WORD -import Char -import Monad -import Exception -import GlaExts hiding (ByteArray, newByteArray, freezeByteArray) -import Array -import IO -import PrelIOBase ( IOError(..), IOErrorType(..) -#if __GLASGOW_HASKELL__ > 411 - , IOException(..) -#endif - ) -import PrelReal ( Ratio(..) ) -import PrelIOBase ( IO(..) ) -import IOExts ( openFileEx, IOModeEx(..) ) -#else +import Foreign import Data.Array.IO import Data.Array import Data.Bits @@ -102,44 +83,12 @@ import GHC.Handle ( openFileEx, IOModeEx(..) ) #else import System.IO ( openBinaryFile ) #endif -#endif #if __GLASGOW_HASKELL__ < 601 openBinaryFile f mode = openFileEx f (BinaryMode mode) #endif -#if __GLASGOW_HASKELL__ < 503 -type BinArray = MutableByteArray RealWorld Int -newArray_ bounds = stToIO (newCharArray bounds) -unsafeWrite arr ix e = stToIO (writeWord8Array arr ix e) -unsafeRead arr ix = stToIO (readWord8Array arr ix) -#if __GLASGOW_HASKELL__ < 411 -newByteArray# = newCharArray# -#endif -hPutArray h arr sz = hPutBufBAFull h arr sz -hGetArray h sz = hGetBufBAFull h sz - -mkIOError :: IOErrorType -> String -> Maybe Handle -> Maybe FilePath -> Exception -mkIOError t location maybe_hdl maybe_filename - = IOException (IOError maybe_hdl t location "" -#if __GLASGOW_HASKELL__ > 411 - maybe_filename -#endif - ) - -eofErrorType = EOF - -#ifndef SIZEOF_HSINT -#define SIZEOF_HSINT INT_SIZE_IN_BYTES -#endif - -#ifndef SIZEOF_HSWORD -#define SIZEOF_HSWORD WORD_SIZE_IN_BYTES -#endif - -#else type BinArray = IOUArray Int Word8 -#endif --------------------------------------------------------------- -- BinHandle @@ -741,13 +690,17 @@ constructDictionary j fm = array (0,j-1) (eltsUFM fm) -- Reading and writing FastStrings --------------------------------------------------------- -putFS bh (FastString id l ba) = do - put_ bh (I# l) - putByteArray bh ba l -putFS bh s = error ("Binary.put_(FastString): " ++ unpackFS s) - -- Note: the length of the FastString is *not* the same as - -- the size of the ByteArray: the latter is rounded up to a - -- multiple of the word size. +putFS bh (FastString id l _ buf _) = do + put_ bh l + withForeignPtr buf $ \ptr -> + let + go n | n == l = return () + | otherwise = do + b <- peekElemOff ptr n + putByte bh b + go (n+1) + in + go 0 {- -- possible faster version, not quite there yet: getFS bh@BinMem{} = do @@ -757,16 +710,24 @@ getFS bh@BinMem{} = do return $! (mkFastSubStringBA# arr off l) -} getFS bh = do - (I# l) <- get bh - (BA ba) <- getByteArray bh (I# l) - return $! (mkFastSubStringBA# ba 0# l) + l <- get bh + fp <- mallocForeignPtrBytes l + withForeignPtr fp $ \ptr -> do + let + go n | n == l = mkFastStringForeignPtr ptr fp l + | otherwise = do + b <- getByte bh + pokeElemOff ptr n b + go (n+1) + -- + go 0 instance Binary PackageId where put_ bh pid = put_ bh (packageIdFS pid) get bh = do { fs <- get bh; return (fsToPackageId fs) } instance Binary FastString where - put_ bh f@(FastString id l ba) = + put_ bh f@(FastString id l _ fp _) = case getUserData bh of { UserData { ud_next = j_r, ud_map = out_r, ud_dict = dict} -> do out <- readIORef out_r diff --git a/ghc/compiler/utils/BufWrite.hs b/ghc/compiler/utils/BufWrite.hs index 6d00e46634..b15089ead3 100644 --- a/ghc/compiler/utils/BufWrite.hs +++ b/ghc/compiler/utils/BufWrite.hs @@ -31,17 +31,11 @@ import Char ( ord ) import Foreign import IO -#if __GLASGOW_HASKELL__ < 503 -import PrelIOBase ( IO(..) ) -import IOExts ( hPutBufFull ) -#else import GHC.IOBase ( IO(..) ) import System.IO ( hPutBuf ) -#endif - -import GLAEXTS ( touch#, byteArrayContents#, Int(..), Int#, Addr# ) +import GHC.Ptr ( Ptr(..) ) -import PrimPacked ( Ptr(..) ) +import GLAEXTS ( Int(..), Int#, Addr# ) -- ----------------------------------------------------------------------------- @@ -88,22 +82,17 @@ bPutStr b@(BufHandle buf r hdl) str = do loop cs (i+1) bPutFS :: BufHandle -> FastString -> IO () -bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len# arr#) = do - let len = I# len# +bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len _ fp _) = + withForeignPtr fp $ \ptr -> do i <- readFastMutInt r if (i + len) >= buf_size then do hPutBuf hdl buf i writeFastMutInt r 0 if (len >= buf_size) - then do - let a# = byteArrayContents# arr# - hPutBuf hdl (Ptr a#) len - touch fs + then hPutBuf hdl ptr len else bPutFS b fs else do - let a# = byteArrayContents# arr# - copyBytes (buf `plusPtr` i) (Ptr a#) len - touch fs + copyBytes (buf `plusPtr` i) ptr len writeFastMutInt r (i+len) bPutFS _ _ = panic "bPutFS" @@ -128,8 +117,6 @@ bFlush b@(BufHandle buf r hdl) = do free buf return () -touch r = IO $ \s -> case touch# r s of s -> (# s, () #) - #if 0 myPutBuf s hdl buf i = modifyIOError (\e -> ioeSetErrorString e (ioeGetErrorString e ++ ':':s ++ " (" ++ show buf ++ "," ++ show i ++ ")")) $ diff --git a/ghc/compiler/utils/Encoding.hs b/ghc/compiler/utils/Encoding.hs new file mode 100644 index 0000000000..d15c0216ae --- /dev/null +++ b/ghc/compiler/utils/Encoding.hs @@ -0,0 +1,386 @@ +{-# OPTIONS_GHC -O #-} +-- ----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 1997-2003 +-- +-- Character encodings +-- +-- ----------------------------------------------------------------------------- + +module Encoding ( + -- * UTF-8 + utf8DecodeChar#, + utf8PrevChar, + utf8CharStart, + utf8DecodeChar, + utf8DecodeString, + utf8EncodeChar, + utf8EncodeString, + utf8EncodedLength, + countUTF8Chars, + + -- * Latin-1 + latin1DecodeChar, + latin1EncodeChar, + + -- * Z-encoding + zEncodeString, + zDecodeString + ) where + +#define COMPILING_FAST_STRING +#include "HsVersions.h" +import Foreign +import Data.Char ( ord, chr, isDigit, digitToInt, isHexDigit ) +import Numeric ( showHex ) + +import GHC.Ptr ( Ptr(..) ) +import GHC.Base + +-- ----------------------------------------------------------------------------- +-- Latin-1 + +latin1DecodeChar ptr = do + w <- peek ptr + return (unsafeChr (fromIntegral w), ptr `plusPtr` 1) + +latin1EncodeChar c ptr = do + poke ptr (fromIntegral (ord c)) + return (ptr `plusPtr` 1) + +-- ----------------------------------------------------------------------------- +-- UTF-8 + +-- We can't write the decoder as efficiently as we'd like without +-- resorting to unboxed extensions, unfortunately. I tried to write +-- an IO version of this function, but GHC can't eliminate boxed +-- results from an IO-returning function. +-- +-- We assume we can ignore overflow when parsing a multibyte character here. +-- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences +-- before decoding them (see StringBuffer.hs). + +{-# INLINE utf8DecodeChar# #-} +utf8DecodeChar# :: Addr# -> (# Char#, Addr# #) +utf8DecodeChar# a# = + let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in + case () of + _ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #) + + | ch0 >=# 0xC0# && ch0 <=# 0xDF# -> + let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else + (# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +# + (ch1 -# 0x80#)), + a# `plusAddr#` 2# #) + + | ch0 >=# 0xE0# && ch0 <=# 0xEF# -> + let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else + let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else + (# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +# + ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#) +# + (ch2 -# 0x80#)), + a# `plusAddr#` 3# #) + + | ch0 >=# 0xF0# && ch0 <=# 0xF8# -> + let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else + let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else + let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in + if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else + (# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +# + ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +# + ((ch2 -# 0x80#) `uncheckedIShiftL#` 6#) +# + (ch3 -# 0x80#)), + a# `plusAddr#` 4# #) + + | otherwise -> fail 1# + where + -- all invalid sequences end up here: + fail n = (# '\0'#, a# `plusAddr#` n #) + -- '\xFFFD' would be the usual replacement character, but + -- that's a valid symbol in Haskell, so will result in a + -- confusing parse error later on. Instead we use '\0' which + -- will signal a lexer error immediately. + +utf8DecodeChar :: Ptr Word8 -> (Char, Ptr Word8) +utf8DecodeChar (Ptr a#) = ( C# c#, Ptr b# ) + where (# c#, b# #) = utf8DecodeChar# a# + +-- UTF-8 is cleverly designed so that we can always figure out where +-- the start of the current character is, given any position in a +-- stream. This function finds the start of the previous character, +-- assuming there *is* a previous character. +utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8) +utf8PrevChar p = utf8CharStart (p `plusPtr` (-1)) + +utf8CharStart :: Ptr Word8 -> IO (Ptr Word8) +utf8CharStart p = go p + where go p = do w <- peek p + if (w .&. 0xC0) == 0x80 + then go (p `plusPtr` (-1)) + else return p + +utf8DecodeString :: Ptr Word8 -> Int -> IO [Char] +STRICT2(utf8DecodeString) +utf8DecodeString (Ptr a#) (I# len#) + = unpack a# + where + end# = addr2Int# (a# `plusAddr#` len#) + + unpack p# + | addr2Int# p# >=# end# = return [] + | otherwise = + case utf8DecodeChar# p# of + (# c#, q# #) -> do + chs <- unpack q# + return (C# c# : chs) + +countUTF8Chars :: Ptr Word8 -> Int -> IO Int +countUTF8Chars ptr bytes = go ptr 0 + where + end = ptr `plusPtr` bytes + + STRICT2(go) + go ptr n + | ptr >= end = return n + | otherwise = do + case utf8DecodeChar# (unPtr ptr) of + (# c, a #) -> go (Ptr a) (n+1) + +unPtr (Ptr a) = a + +utf8EncodeChar c ptr = + let x = ord c in + case () of + _ | x > 0 && x <= 0x007f -> do + poke ptr (fromIntegral x) + return (ptr `plusPtr` 1) + -- NB. '\0' is encoded as '\xC0\x80', not '\0'. This is so that we + -- can have 0-terminated UTF-8 strings (see GHC.Base.unpackCStringUtf8). + | x <= 0x07ff -> do + poke ptr (fromIntegral (0xC0 .|. ((x `shiftR` 6) .&. 0x1F))) + pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x .&. 0x3F))) + return (ptr `plusPtr` 2) + | x <= 0xffff -> do + poke ptr (fromIntegral (0xE0 .|. (x `shiftR` 12) .&. 0x0F)) + pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x `shiftR` 6) .&. 0x3F)) + pokeElemOff ptr 2 (fromIntegral (0x80 .|. (x .&. 0x3F))) + return (ptr `plusPtr` 3) + | otherwise -> do + poke ptr (fromIntegral (0xF0 .|. (x `shiftR` 18))) + pokeElemOff ptr 1 (fromIntegral (0x80 .|. ((x `shiftR` 12) .&. 0x3F))) + pokeElemOff ptr 2 (fromIntegral (0x80 .|. ((x `shiftR` 6) .&. 0x3F))) + pokeElemOff ptr 3 (fromIntegral (0x80 .|. (x .&. 0x3F))) + return (ptr `plusPtr` 4) + +utf8EncodeString :: Ptr Word8 -> String -> IO () +utf8EncodeString ptr str = go ptr str + where STRICT2(go) + go ptr [] = return () + go ptr (c:cs) = do + ptr' <- utf8EncodeChar c ptr + go ptr' cs + +utf8EncodedLength :: String -> Int +utf8EncodedLength str = go 0 str + where STRICT2(go) + go n [] = n + go n (c:cs) + | ord c > 0 && ord c <= 0x007f = go (n+1) cs + | ord c <= 0x07ff = go (n+2) cs + | ord c <= 0xffff = go (n+3) cs + | otherwise = go (n+4) cs + +-- ----------------------------------------------------------------------------- +-- The Z-encoding + +{- +This is the main name-encoding and decoding function. It encodes any +string into a string that is acceptable as a C name. This is the name +by which things are known right through the compiler. + +The basic encoding scheme is this. + +* Tuples (,,,) are coded as Z3T + +* Alphabetic characters (upper and lower) and digits + all translate to themselves; + except 'Z', which translates to 'ZZ' + and 'z', which translates to 'zz' + We need both so that we can preserve the variable/tycon distinction + +* Most other printable characters translate to 'zx' or 'Zx' for some + alphabetic character x + +* The others translate as 'znnnU' where 'nnn' is the decimal number + of the character + + Before After + -------------------------- + Trak Trak + foo_wib foozuwib + > zg + >1 zg1 + foo# foozh + foo## foozhzh + foo##1 foozhzh1 + fooZ fooZZ + :+ ZCzp + () Z0T 0-tuple + (,,,,) Z5T 5-tuple + (# #) Z1H unboxed 1-tuple (note the space) + (#,,,,#) Z5H unboxed 5-tuple + (NB: There is no Z1T nor Z0H.) +-} + +type UserString = String -- As the user typed it +type EncodedString = String -- Encoded form + + +zEncodeString :: UserString -> EncodedString +zEncodeString cs = case maybe_tuple cs of + Just n -> n -- Tuples go to Z2T etc + Nothing -> go cs + where + go [] = [] + go (c:cs) = encode_ch c ++ go cs + +unencodedChar :: Char -> Bool -- True for chars that don't need encoding +unencodedChar 'Z' = False +unencodedChar 'z' = False +unencodedChar c = c >= 'a' && c <= 'z' + || c >= 'A' && c <= 'Z' + || c >= '0' && c <= '9' + +encode_ch :: Char -> EncodedString +encode_ch c | unencodedChar c = [c] -- Common case first + +-- Constructors +encode_ch '(' = "ZL" -- Needed for things like (,), and (->) +encode_ch ')' = "ZR" -- For symmetry with ( +encode_ch '[' = "ZM" +encode_ch ']' = "ZN" +encode_ch ':' = "ZC" +encode_ch 'Z' = "ZZ" + +-- Variables +encode_ch 'z' = "zz" +encode_ch '&' = "za" +encode_ch '|' = "zb" +encode_ch '^' = "zc" +encode_ch '$' = "zd" +encode_ch '=' = "ze" +encode_ch '>' = "zg" +encode_ch '#' = "zh" +encode_ch '.' = "zi" +encode_ch '<' = "zl" +encode_ch '-' = "zm" +encode_ch '!' = "zn" +encode_ch '+' = "zp" +encode_ch '\'' = "zq" +encode_ch '\\' = "zr" +encode_ch '/' = "zs" +encode_ch '*' = "zt" +encode_ch '_' = "zu" +encode_ch '%' = "zv" +encode_ch c = 'z' : if isDigit (head hex_str) then hex_str + else '0':hex_str + where hex_str = showHex (ord c) "U" + -- ToDo: we could improve the encoding here in various ways. + -- eg. strings of unicode characters come out as 'z1234Uz5678U', we + -- could remove the 'U' in the middle (the 'z' works as a separator). + +zDecodeString :: EncodedString -> UserString +zDecodeString [] = [] +zDecodeString ('Z' : d : rest) + | isDigit d = decode_tuple d rest + | otherwise = decode_upper d : zDecodeString rest +zDecodeString ('z' : d : rest) + | isDigit d = decode_num_esc d rest + | otherwise = decode_lower d : zDecodeString rest +zDecodeString (c : rest) = c : zDecodeString rest + +decode_upper, decode_lower :: Char -> Char + +decode_upper 'L' = '(' +decode_upper 'R' = ')' +decode_upper 'M' = '[' +decode_upper 'N' = ']' +decode_upper 'C' = ':' +decode_upper 'Z' = 'Z' +decode_upper ch = {-pprTrace "decode_upper" (char ch)-} ch + +decode_lower 'z' = 'z' +decode_lower 'a' = '&' +decode_lower 'b' = '|' +decode_lower 'c' = '^' +decode_lower 'd' = '$' +decode_lower 'e' = '=' +decode_lower 'g' = '>' +decode_lower 'h' = '#' +decode_lower 'i' = '.' +decode_lower 'l' = '<' +decode_lower 'm' = '-' +decode_lower 'n' = '!' +decode_lower 'p' = '+' +decode_lower 'q' = '\'' +decode_lower 'r' = '\\' +decode_lower 's' = '/' +decode_lower 't' = '*' +decode_lower 'u' = '_' +decode_lower 'v' = '%' +decode_lower ch = {-pprTrace "decode_lower" (char ch)-} ch + +-- Characters not having a specific code are coded as z224U (in hex) +decode_num_esc d rest + = go (digitToInt d) rest + where + go n (c : rest) | isHexDigit c = go (16*n + digitToInt c) rest + go n ('U' : rest) = chr n : zDecodeString rest + go n other = error ("decode_num_esc: " ++ show n ++ ' ':other) + +decode_tuple :: Char -> EncodedString -> UserString +decode_tuple d rest + = go (digitToInt d) rest + where + -- NB. recurse back to zDecodeString after decoding the tuple, because + -- the tuple might be embedded in a longer name. + go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest + go 0 ('T':rest) = "()" ++ zDecodeString rest + go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ zDecodeString rest + go 1 ('H':rest) = "(# #)" ++ zDecodeString rest + go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ zDecodeString rest + go n other = error ("decode_tuple: " ++ show n ++ ' ':other) + +{- +Tuples are encoded as + Z3T or Z3H +for 3-tuples or unboxed 3-tuples respectively. No other encoding starts + Z<digit> + +* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple) + There are no unboxed 0-tuples. + +* "()" is the tycon for a boxed 0-tuple. + There are no boxed 1-tuples. +-} + +maybe_tuple :: UserString -> Maybe EncodedString + +maybe_tuple "(# #)" = Just("Z1H") +maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of + (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H") + other -> Nothing +maybe_tuple "()" = Just("Z0T") +maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of + (n, ')' : cs) -> Just ('Z' : shows (n+1) "T") + other -> Nothing +maybe_tuple other = Nothing + +count_commas :: Int -> String -> (Int, String) +count_commas n (',' : cs) = count_commas (n+1) cs +count_commas n cs = (n,cs) diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs index 52512d3e20..2558c5630a 100644 --- a/ghc/compiler/utils/FastString.lhs +++ b/ghc/compiler/utils/FastString.lhs @@ -1,8 +1,10 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998 +% (c) The University of Glasgow, 1997-2006 % -\section{Fast strings} +\begin{code} +{-# OPTIONS -fglasgow-exts -O #-} +{- FastString: A compact, hash-consed, representation of character strings. Comparison is O(1), and you can get a Unique from them. Generated by the FSLIT macro @@ -15,40 +17,46 @@ LitString: Just a wrapper for the Addr# of a C string (Ptr CChar). Turn into SDoc with Outputable.ptext Use LitString unless you want the facilities of FastString - -\begin{code} +-} module FastString ( + -- * FastStrings FastString(..), -- not abstract, for now. - mkFastString, -- :: String -> FastString - mkFastStringNarrow, -- :: String -> FastString - mkFastSubString, -- :: Addr -> Int -> Int -> FastString - - mkFastString#, -- :: Addr# -> FastString - mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString - - mkFastStringInt, -- :: [Int] -> FastString - - uniqueOfFS, -- :: FastString -> Int# - lengthFS, -- :: FastString -> Int - nullFastString, -- :: FastString -> Bool + -- ** Construction + mkFastString, + mkFastStringBytes, + mkFastStringForeignPtr, + mkFastString#, + mkZFastString, + mkZFastStringBytes, + -- ** Deconstruction unpackFS, -- :: FastString -> String - unpackIntFS, -- :: FastString -> [Int] - appendFS, -- :: FastString -> FastString -> FastString - headFS, -- :: FastString -> Char - headIntFS, -- :: FastString -> Int - tailFS, -- :: FastString -> FastString - concatFS, -- :: [FastString] -> FastString - consFS, -- :: Char -> FastString -> FastString - indexFS, -- :: FastString -> Int -> Char - nilFS, -- :: FastString - - hPutFS, -- :: Handle -> FastString -> IO () - + bytesFS, -- :: FastString -> [Word8] + + -- ** Encoding + isZEncoded, + zEncodeFS, + + -- ** Operations + uniqueOfFS, + lengthFS, + nullFS, + appendFS, + headFS, + tailFS, + concatFS, + consFS, + nilFS, + + -- ** Outputing + hPutFS, + + -- * LitStrings LitString, - mkLitString# -- :: Addr# -> LitString + mkLitString#, + strLength ) where -- This #define suppresses the "import FastString" that @@ -56,64 +64,49 @@ module FastString #define COMPILING_FAST_STRING #include "HsVersions.h" -#if __GLASGOW_HASKELL__ < 503 -import PrelIOBase ( IO(..) ) -#else -import GHC.IOBase ( IO(..) ) -#endif +import Encoding -import PrimPacked +import Foreign +import Foreign.C import GLAEXTS import UNSAFE_IO ( unsafePerformIO ) import MONAD_ST ( stToIO ) import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef ) +import System.IO ( hPutBuf ) -#if __GLASGOW_HASKELL__ < 503 -import PrelArr ( STArray(..), newSTArray ) -#else import GHC.Arr ( STArray(..), newSTArray ) -#endif - -#if __GLASGOW_HASKELL__ >= 504 -import GHC.IOBase -import GHC.Handle -import Foreign.C -#else -import IOExts ( hPutBufBAFull ) -#endif +import GHC.IOBase ( IO(..) ) import IO -import Char ( chr, ord ) #define hASH_TBL_SIZE 4091 -\end{code} -@FastString@s are packed representations of strings -with a unique id for fast comparisons. The unique id -is assigned when creating the @FastString@, using -a hash table to map from the character string representation -to the unique ID. -\begin{code} -data FastString - = FastString -- packed repr. on the heap. - Int# -- unique id - -- 0 => string literal, comparison - -- will - Int# -- length - ByteArray# -- stuff - - | UnicodeStr -- if contains characters outside '\1'..'\xFF' - Int# -- unique id - [Int] -- character numbers +{-| +A 'FastString' is an array of bytes, hashed to support fast O(1) +comparison. It is also associated with a character encoding, so that +we know how to convert a 'FastString' to the local encoding, or to the +Z-encoding used by the compiler internally. -instance Eq FastString where - -- shortcut for real FastStrings - (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2 - a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False } +'FastString's support a memoized conversion to the Z-encoding via zEncodeFS. +-} + +data FastString = FastString { + uniq :: {-# UNPACK #-} !Int, -- unique id + n_bytes :: {-# UNPACK #-} !Int, -- number of bytes + n_chars :: {-# UNPACK #-} !Int, -- number of chars + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + enc :: FSEncoding + } + +data FSEncoding + = ZEncoded + -- including strings that don't need any encoding + | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString)) + -- A UTF-8 string with a memoized Z-encoding - (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2 - a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True } +instance Eq FastString where + f1 == f2 = uniq f1 == uniq f2 instance Ord FastString where -- Compares lexicographically, not by unique @@ -130,360 +123,311 @@ instance Ord FastString where instance Show FastString where show fs = show (unpackFS fs) -lengthFS :: FastString -> Int -lengthFS (FastString _ l# _) = I# l# -lengthFS (UnicodeStr _ s) = length s - -nullFastString :: FastString -> Bool -nullFastString (FastString _ l# _) = l# ==# 0# -nullFastString (UnicodeStr _ []) = True -nullFastString (UnicodeStr _ (_:_)) = False - -unpackFS :: FastString -> String -unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#) -unpackFS (UnicodeStr _ s) = map chr s - -unpackIntFS :: FastString -> [Int] -unpackIntFS (UnicodeStr _ s) = s -unpackIntFS fs = map ord (unpackFS fs) - -appendFS :: FastString -> FastString -> FastString -appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2) - -concatFS :: [FastString] -> FastString -concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better - -headFS :: FastString -> Char -headFS (FastString _ l# ba#) = - if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS") -headFS (UnicodeStr _ (c:_)) = chr c -headFS (UnicodeStr _ []) = error ("headFS: empty FS") - -headIntFS :: FastString -> Int -headIntFS (UnicodeStr _ (c:_)) = c -headIntFS fs = ord (headFS fs) - -indexFS :: FastString -> Int -> Char -indexFS f i@(I# i#) = - case f of - FastString _ l# ba# - | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#) - | otherwise -> error (msg (I# l#)) - UnicodeStr _ s -> chr (s!!i) - where - msg l = "indexFS: out of range: " ++ show (l,i) - -tailFS :: FastString -> FastString -tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#) -tailFS fs = mkFastStringInt (tail (unpackIntFS fs)) - -consFS :: Char -> FastString -> FastString -consFS c fs = mkFastStringInt (ord c : unpackIntFS fs) +cmpFS :: FastString -> FastString -> Ordering +cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) = + if u1 == u2 then EQ else + let l = if l1 <= l2 then l1 else l2 in + inlinePerformIO $ + withForeignPtr buf1 $ \p1 -> + withForeignPtr buf2 $ \p2 -> do + res <- memcmp p1 p2 l + case () of + _ | res < 0 -> return LT + | res == 0 -> if l1 == l2 then return EQ + else if l1 < l2 then return LT + else return GT + | otherwise -> return GT -uniqueOfFS :: FastString -> Int# -uniqueOfFS (FastString u# _ _) = u# -uniqueOfFS (UnicodeStr u# _) = u# +#ifndef __HADDOCK__ +foreign import ccall unsafe "ghc_memcmp" + memcmp :: Ptr a -> Ptr b -> Int -> IO Int +#endif -nilFS = mkFastString "" -\end{code} +-- ----------------------------------------------------------------------------- +-- Construction +{- Internally, the compiler will maintain a fast string symbol table, providing sharing and fast comparison. Creation of new @FastString@s then covertly does a lookup, re-using the @FastString@ if there was a hit. +-} -Caution: mkFastStringUnicode assumes that if the string is in the -table, it sits under the UnicodeStr constructor. Other mkFastString -variants analogously assume the FastString constructor. - -\begin{code} data FastStringTable = FastStringTable - Int# + {-# UNPACK #-} !Int (MutableArray# RealWorld [FastString]) -type FastStringTableVar = IORef FastStringTable - -string_table :: FastStringTableVar +string_table :: IORef FastStringTable string_table = - unsafePerformIO ( - stToIO (newSTArray (0::Int,hASH_TBL_SIZE) []) - >>= \ (STArray _ _ arr#) -> - newIORef (FastStringTable 0# arr#)) - -lookupTbl :: FastStringTable -> Int# -> IO [FastString] -lookupTbl (FastStringTable _ arr#) i# = - IO ( \ s# -> - readArray# arr# i# s#) - -updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO () -updTbl fs_table_var (FastStringTable uid# arr#) i# ls = - IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> - (# s2#, () #) }) >> - writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#) + unsafePerformIO $ do + (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) []) + newIORef (FastStringTable 0 arr#) + +lookupTbl :: FastStringTable -> Int -> IO [FastString] +lookupTbl (FastStringTable _ arr#) (I# i#) = + IO $ \ s# -> readArray# arr# i# s# + +updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO () +updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do + (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) + writeIORef fs_table_var (FastStringTable (uid+1) arr#) mkFastString# :: Addr# -> FastString -mkFastString# a# = - case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# } +mkFastString# a# = mkFastStringBytes ptr (strLength ptr) + where ptr = Ptr a# -mkFastStringLen# :: Addr# -> Int# -> FastString -mkFastStringLen# a# len# = - unsafePerformIO ( - readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> +mkFastStringBytes :: Ptr Word8 -> Int -> FastString +mkFastStringBytes ptr len = unsafePerformIO $ do + ft@(FastStringTable uid tbl#) <- readIORef string_table let - h = hashStr a# len# - in --- _trace ("hashed: "++show (I# h)) $ - lookupTbl ft h >>= \ lookup_result -> + h = hashStr ptr len + add_it ls = do + fs <- copyNewFastString uid ptr len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h case lookup_result of - [] -> - -- no match, add it to table by copying out the - -- the string into a ByteArray - -- _trace "empty bucket" $ - case copyPrefixStr a# (I# len#) of - BA barr# -> - let f_str = FastString uid# len# barr# in - updTbl string_table ft h [f_str] >> - ({- _trace ("new: " ++ show f_str) $ -} return f_str) - ls -> - -- non-empty `bucket', scan the list looking - -- entry with same length and compare byte by byte. - -- _trace ("non-empty bucket"++show ls) $ - case bucket_match ls len# a# of - Nothing -> - case copyPrefixStr a# (I# len#) of - BA barr# -> - let f_str = FastString uid# len# barr# in - updTbl string_table ft h (f_str:ls) >> - ( {- _trace ("new: " ++ show f_str) $ -} return f_str) - Just v -> {- _trace ("re-use: "++show v) $ -} return v) - where - bucket_match [] _ _ = Nothing - bucket_match (v@(FastString _ l# ba#):ls) len# a# = - if len# ==# l# && eqStrPrefix a# ba# l# then - Just v - else - bucket_match ls len# a# - bucket_match (UnicodeStr _ _ : ls) len# a# = - bucket_match ls len# a# - -mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString -mkFastSubStringBA# barr# start# len# = - unsafePerformIO ( - readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +mkZFastStringBytes :: Ptr Word8 -> Int -> FastString +mkZFastStringBytes ptr len = unsafePerformIO $ do + ft@(FastStringTable uid tbl#) <- readIORef string_table let - h = hashSubStrBA barr# start# len# - in --- _trace ("hashed(b): "++show (I# h)) $ - lookupTbl ft h >>= \ lookup_result -> + h = hashStr ptr len + add_it ls = do + fs <- copyNewZFastString uid ptr len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h case lookup_result of - [] -> - -- no match, add it to table by copying out the - -- the string into a ByteArray - -- _trace "empty bucket(b)" $ - case copySubStrBA (BA barr#) (I# start#) (I# len#) of - BA ba# -> - let f_str = FastString uid# len# ba# in - updTbl string_table ft h [f_str] >> - -- _trace ("new(b): " ++ show f_str) $ - return f_str - ls -> - -- non-empty `bucket', scan the list looking - -- entry with same length and compare byte by byte. - -- _trace ("non-empty bucket(b)"++show ls) $ - case bucket_match ls start# len# barr# of - Nothing -> - case copySubStrBA (BA barr#) (I# start#) (I# len#) of - BA ba# -> - let f_str = FastString uid# len# ba# in - updTbl string_table ft h (f_str:ls) >> - -- _trace ("new(b): " ++ show f_str) $ - return f_str - Just v -> - -- _trace ("re-use(b): "++show v) $ - return v - ) - where - bucket_match [] _ _ _ = Nothing - bucket_match (v:ls) start# len# ba# = - case v of - FastString _ l# barr# -> - if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then - Just v - else - bucket_match ls start# len# ba# - UnicodeStr _ _ -> bucket_match ls start# len# ba# - -mkFastStringUnicode :: [Int] -> FastString -mkFastStringUnicode s = - unsafePerformIO ( - readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkFastStringForeignPtr ptr fp len = do + ft@(FastStringTable uid tbl#) <- readIORef string_table +-- _trace ("hashed: "++show (I# h)) $ let - h = hashUnicode s 0# - in --- _trace ("hashed(b): "++show (I# h)) $ - lookupTbl ft h >>= \ lookup_result -> + h = hashStr ptr len + add_it ls = do + fs <- mkNewFastString uid ptr fp len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkZFastStringForeignPtr ptr fp len = do + ft@(FastStringTable uid tbl#) <- readIORef string_table +-- _trace ("hashed: "++show (I# h)) $ + let + h = hashStr ptr len + add_it ls = do + fs <- mkNewZFastString uid ptr fp len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h case lookup_result of - [] -> - -- no match, add it to table by copying out the - -- the string into a [Int] - let f_str = UnicodeStr uid# s in - updTbl string_table ft h [f_str] >> - -- _trace ("new(b): " ++ show f_str) $ - return f_str - ls -> - -- non-empty `bucket', scan the list looking - -- entry with same length and compare byte by byte. - -- _trace ("non-empty bucket(b)"++show ls) $ - case bucket_match ls of - Nothing -> - let f_str = UnicodeStr uid# s in - updTbl string_table ft h (f_str:ls) >> - -- _trace ("new(b): " ++ show f_str) $ - return f_str - Just v -> - -- _trace ("re-use(b): "++show v) $ - return v - ) - where - bucket_match [] = Nothing - bucket_match (v@(UnicodeStr _ s'):ls) = - if s' == s then Just v else bucket_match ls - bucket_match (FastString _ _ _ : ls) = bucket_match ls - -mkFastStringNarrow :: String -> FastString -mkFastStringNarrow str = - case packString str of { (I# len#, BA frozen#) -> - mkFastSubStringBA# frozen# 0# len# - } - {- 0-indexed array, len# == index to one beyond end of string, - i.e., (0,1) => empty string. -} + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v -mkFastString :: String -> FastString -mkFastString str = if all good str - then mkFastStringNarrow str - else mkFastStringUnicode (map ord str) - where - good c = c >= '\1' && c <= '\xFF' - -mkFastStringInt :: [Int] -> FastString -mkFastStringInt str = if all good str - then mkFastStringNarrow (map chr str) - else mkFastStringUnicode str - where - good c = c >= 1 && c <= 0xFF - -mkFastSubString :: Addr# -> Int -> Int -> FastString -mkFastSubString a# (I# start#) (I# len#) = - mkFastStringLen# (a# `plusAddr#` start#) len# -\end{code} -\begin{code} -hashStr :: Addr# -> Int# -> Int# +-- | Creates a UTF-8 encoded 'FastString' from a 'String' +mkFastString :: String -> FastString +mkFastString str = + inlinePerformIO $ do + let l = utf8EncodedLength str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + mkFastStringForeignPtr ptr buf l + + +-- | Creates a Z-encoded 'FastString' from a 'String' +mkZFastString :: String -> FastString +mkZFastString str = + inlinePerformIO $ do + let l = Prelude.length str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + pokeCAString (castPtr ptr) str + mkZFastStringForeignPtr ptr buf l + +bucket_match [] _ _ = return Nothing +bucket_match (v@(FastString _ l _ buf _):ls) len ptr + | len == l = do + b <- cmpStringPrefix ptr buf len + if b then return (Just v) + else bucket_match ls len ptr + | otherwise = + bucket_match ls len ptr + +mkNewFastString uid ptr fp len = do + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid len n_chars fp (UTF8Encoded ref)) + +mkNewZFastString uid ptr fp len = do + return (FastString uid len len fp ZEncoded) + + +copyNewFastString uid ptr len = do + fp <- copyBytesToForeignPtr ptr len + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid len n_chars fp (UTF8Encoded ref)) + +copyNewZFastString uid ptr len = do + fp <- copyBytesToForeignPtr ptr len + return (FastString uid len len fp ZEncoded) + + +copyBytesToForeignPtr ptr len = do + fp <- mallocForeignPtrBytes len + withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len + return fp + +cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool +cmpStringPrefix ptr fp len = + withForeignPtr fp $ \ptr' -> do + r <- memcmp ptr ptr' len + return (r == 0) + + +hashStr :: Ptr Word8 -> Int -> Int -- use the Addr to produce a hash value between 0 & m (inclusive) -hashStr a# len# = loop 0# 0# +hashStr (Ptr a#) (I# len#) = loop 0# 0# where - loop h n | n ==# len# = h + loop h n | n ==# len# = I# h | otherwise = loop h2 (n +# 1#) where c = ord# (indexCharOffAddr# a# n) h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE# -hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int# - -- use the byte array to produce a hash value between 0 & m (inclusive) -hashSubStrBA ba# start# len# = loop 0# 0# - where - loop h n | n ==# len# = h - | otherwise = loop h2 (n +# 1#) - where c = ord# (indexCharArray# ba# (start# +# n)) - h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE# +-- ----------------------------------------------------------------------------- +-- Operations -hashUnicode :: [Int] -> Int# -> Int# -hashUnicode [] h = h -hashUnicode (I# c : cs) h = hashUnicode cs ((c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#) -\end{code} +-- | Returns the length of the 'FastString' in characters +lengthFS :: FastString -> Int +lengthFS f = n_chars f -\begin{code} -cmpFS :: FastString -> FastString -> Ordering -cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ - else compare s1 s2 -cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2) -cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2 -cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) = - if u1# ==# u2# then EQ else - let l# = if l1# <=# l2# then l1# else l2# in - unsafePerformIO ( - memcmp b1# b2# l# >>= \ (I# res) -> - return ( - if res <# 0# then LT - else if res ==# 0# then - if l1# ==# l2# then EQ - else if l1# <# l2# then LT else GT - else GT - )) +-- | Returns 'True' if the 'FastString' is Z-encoded +isZEncoded :: FastString -> Bool +isZEncoded fs | ZEncoded <- enc fs = True + | otherwise = False -#ifndef __HADDOCK__ -foreign import ccall unsafe "ghc_memcmp" - memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int -#endif +-- | Returns 'True' if the 'FastString' is empty +nullFS :: FastString -> Bool +nullFS f = n_bytes f == 0 + +-- | unpacks and decodes the FastString +unpackFS :: FastString -> String +unpackFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + case enc of + ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes) + UTF8Encoded _ -> utf8DecodeString ptr n_bytes + +bytesFS :: FastString -> [Word8] +bytesFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + peekArray n_bytes ptr + +-- | returns a Z-encoded version of a 'FastString'. This might be the +-- original, if it was already Z-encoded. The first time this +-- function is applied to a particular 'FastString', the results are +-- memoized. +-- +zEncodeFS :: FastString -> FastString +zEncodeFS fs@(FastString uid n_bytes _ fp enc) = + case enc of + ZEncoded -> fs + UTF8Encoded ref -> + inlinePerformIO $ do + m <- readIORef ref + case m of + Just fs -> return fs + Nothing -> do + let efs = mkZFastString (zEncodeString (unpackFS fs)) + writeIORef ref (Just efs) + return efs + +appendFS :: FastString -> FastString -> FastString +appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2) + +concatFS :: [FastString] -> FastString +concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better + +headFS :: FastString -> Char +headFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + case enc of + ZEncoded -> do + w <- peek (castPtr ptr) + return (castCCharToChar w) + UTF8Encoded _ -> + return (fst (utf8DecodeChar ptr)) + +tailFS :: FastString -> FastString +tailFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + case enc of + ZEncoded -> do + return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1) + UTF8Encoded _ -> do + let (_,ptr') = utf8DecodeChar ptr + let off = ptr' `minusPtr` ptr + return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off) + +consFS :: Char -> FastString -> FastString +consFS c fs = mkFastString (c : unpackFS fs) + +uniqueOfFS :: FastString -> Int# +uniqueOfFS (FastString (I# u#) _ _ _ _) = u# + +nilFS = mkFastString "" -- ----------------------------------------------------------------------------- -- Outputting 'FastString's -#if __GLASGOW_HASKELL__ >= 504 - --- this is our own version of hPutBuf for FastStrings, because in --- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA. --- The closest is hPutArray in Data.Array.IO, but that does some extra --- range checks that we want to avoid here. - -foreign import ccall unsafe "__hscore_memcpy_dst_off" - memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ()) - -hPutFS handle (FastString _ l# ba#) - | l# ==# 0# = return () - | otherwise - = do wantWritableHandle "hPutFS" handle $ - \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do - - old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size } - <- readIORef ref - - let count = I# l# - raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld - - -- enough room in handle buffer? - if (size - w > count) - -- There's enough room in the buffer: - -- just copy the data in and update bufWPtr. - then do memcpy_baoff_ba old_raw w raw (fromIntegral count) - writeIORef ref old_buf{ bufWPtr = w + count } - return () - - -- else, we have to flush - else do flushed_buf <- flushWriteBuffer fd stream old_buf - writeIORef ref flushed_buf - let this_buf = - Buffer{ bufBuf=raw, bufState=WriteBuffer, - bufRPtr=0, bufWPtr=count, bufSize=count } - flushWriteBuffer fd stream this_buf - return () - -#else - -hPutFS :: Handle -> FastString -> IO () -hPutFS handle (FastString _ l# ba#) - | l# ==# 0# = return () - | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#) - hPutBufBAFull handle mba (I# l#) - where - bot = error "hPutFS.ba" - -#endif +-- |Outputs a 'FastString' with /no decoding at all/, that is, you +-- get the actual bytes in the 'FastString' written to the 'Handle'. +hPutFS handle (FastString _ len _ fp _) + | len == 0 = return () + | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len --- ONLY here for debugging the NCG (so -ddump-stix works for string --- literals); no idea if this is really necessary. JRS, 010131 -hPutFS handle (UnicodeStr _ is) - = hPutStr handle ("(UnicodeStr " ++ show is ++ ")") +-- ToDo: we'll probably want an hPutFSLocal, or something, to output +-- in the current locale's encoding (for error messages and suchlike). -- ----------------------------------------------------------------------------- -- LitStrings, here for convenience only. @@ -492,4 +436,24 @@ type LitString = Ptr () mkLitString# :: Addr# -> LitString mkLitString# a# = Ptr a# + +foreign import ccall unsafe "ghc_strlen" + strLength :: Ptr () -> Int + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + +pokeCAString :: Ptr CChar -> String -> IO () +pokeCAString ptr str = + let + go [] n = pokeElemOff ptr n 0 + go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) + in + go str 0 + \end{code} diff --git a/ghc/compiler/utils/FastTypes.lhs b/ghc/compiler/utils/FastTypes.lhs index 9f9d9038f1..bb92c8c02f 100644 --- a/ghc/compiler/utils/FastTypes.lhs +++ b/ghc/compiler/utils/FastTypes.lhs @@ -9,7 +9,7 @@ module FastTypes ( (+#), (-#), (*#), quotFastInt, negateFastInt, (==#), (<#), (<=#), (>=#), (>#), - FastBool, fastBool, isFastTrue, fastOr + FastBool, fastBool, isFastTrue, fastOr, fastAnd ) where #include "HsVersions.h" diff --git a/ghc/compiler/utils/Pretty.lhs b/ghc/compiler/utils/Pretty.lhs index 916755e902..ec8f1e75ad 100644 --- a/ghc/compiler/utils/Pretty.lhs +++ b/ghc/compiler/utils/Pretty.lhs @@ -179,26 +179,16 @@ module Pretty ( import BufWrite import FastString -import PrimPacked ( strLength ) import GLAEXTS import Numeric (fromRat) import IO -#if __GLASGOW_HASKELL__ < 503 -import IOExts ( hPutBufFull ) -#else import System.IO ( hPutBuf ) -#endif -#if __GLASGOW_HASKELL__ < 503 -import PrelBase ( unpackCString# ) -#else import GHC.Base ( unpackCString# ) -#endif - -import PrimPacked ( Ptr(..) ) +import GHC.Ptr ( Ptr(..) ) -- Don't import Util( assertPanic ) because it makes a loop in the module structure diff --git a/ghc/compiler/utils/PrimPacked.lhs b/ghc/compiler/utils/PrimPacked.lhs deleted file mode 100644 index f2d034dcee..0000000000 --- a/ghc/compiler/utils/PrimPacked.lhs +++ /dev/null @@ -1,265 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998 -% -\section{Basic ops on packed representations} - -Some basic operations for working on packed representations of series -of bytes (character strings). Used by the interface lexer input -subsystem, mostly. - -\begin{code} -{-# OPTIONS -optc-DNON_POSIX_SOURCE #-} - -module PrimPacked ( - Ptr(..), nullPtr, plusAddr#, - BA(..), - packString, -- :: String -> (Int, BA) - unpackNBytesBA, -- :: BA -> Int -> [Char] - strLength, -- :: Ptr CChar -> Int - copyPrefixStr, -- :: Addr# -> Int -> BA - copySubStrBA, -- :: BA -> Int -> Int -> BA - eqStrPrefix, -- :: Addr# -> ByteArray# -> Int# -> Bool - eqStrPrefixBA, -- :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool - ) where - --- This #define suppresses the "import FastString" that --- HsVersions otherwise produces -#define COMPILING_FAST_STRING -#include "HsVersions.h" - -import GLAEXTS -import UNSAFE_IO ( unsafePerformIO ) - -import MONAD_ST -import Foreign - -#if __GLASGOW_HASKELL__ < 503 -import PrelST -#else -import GHC.ST -#endif - -#if __GLASGOW_HASKELL__ >= 504 -import GHC.Ptr ( Ptr(..) ) -#elif __GLASGOW_HASKELL__ >= 500 -import Ptr ( Ptr(..) ) -#endif - -#if __GLASGOW_HASKELL__ < 504 -import PrelIOBase ( IO(..) ) -#else -import GHC.IOBase ( IO(..) ) -#endif -\end{code} - -Compatibility: 4.08 didn't have the Ptr type. - -\begin{code} -#if __GLASGOW_HASKELL__ <= 408 -data Ptr a = Ptr Addr# deriving (Eq, Ord) - -nullPtr :: Ptr a -nullPtr = Ptr (int2Addr# 0#) -#endif - -#if __GLASGOW_HASKELL__ <= 500 --- plusAddr# is a primop in GHC > 5.00 -plusAddr# :: Addr# -> Int# -> Addr# -plusAddr# a# i# = int2Addr# (addr2Int# a# +# i#) -#endif -\end{code} - -Wrapper types for bytearrays - -\begin{code} -data BA = BA ByteArray# -data MBA s = MBA (MutableByteArray# s) -\end{code} - -\begin{code} -packString :: String -> (Int, BA) -packString str = (l, arr) - where - l@(I# length#) = length str - - arr = runST (do - ch_array <- new_ps_array length# - -- fill in packed string from "str" - fill_in ch_array 0# str - -- freeze the puppy: - freeze_ps_array ch_array length# - ) - - fill_in :: MBA s -> Int# -> [Char] -> ST s () - fill_in arr_in# idx [] = - return () - fill_in arr_in# idx (C# c : cs) = - write_ps_array arr_in# idx c >> - fill_in arr_in# (idx +# 1#) cs -\end{code} - -Unpacking a string - -\begin{code} -unpackNBytesBA :: BA -> Int -> [Char] -unpackNBytesBA (BA bytes) (I# len) - = unpack 0# - where - unpack nh - | nh >=# len = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharArray# bytes nh -\end{code} - -Copying a char string prefix into a byte array. - -\begin{code} -copyPrefixStr :: Addr# -> Int -> BA -copyPrefixStr a# len@(I# length#) = copy' length# - where - copy' length# = runST (do - {- allocate an array that will hold the string - -} - ch_array <- new_ps_array length# - {- Revert back to Haskell-only solution for the moment. - _ccall_ memcpy ch_array (A# a) len >>= \ () -> - write_ps_array ch_array length# (chr# 0#) >> - -} - -- fill in packed string from "addr" - fill_in ch_array 0# - -- freeze the puppy: - freeze_ps_array ch_array length# - ) - - fill_in :: MBA s -> Int# -> ST s () - fill_in arr_in# idx - | idx ==# length# - = return () - | otherwise - = case (indexCharOffAddr# a# idx) of { ch -> - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) } -\end{code} - -Copying out a substring, assume a 0-indexed string: -(and positive lengths, thank you). - -\begin{code} -#ifdef UNUSED -copySubStr :: Addr# -> Int -> Int -> BA -copySubStr a# (I# start#) length = - copyPrefixStr (a# `plusAddr#` start#) length -#endif - -copySubStrBA :: BA -> Int -> Int -> BA -copySubStrBA (BA barr#) (I# start#) len@(I# length#) = ba - where - ba = runST (do - -- allocate an array that will hold the string - ch_array <- new_ps_array length# - -- fill in packed string from "addr" - fill_in ch_array 0# - -- freeze the puppy: - freeze_ps_array ch_array length# - ) - - fill_in :: MBA s -> Int# -> ST s () - fill_in arr_in# idx - | idx ==# length# - = return () - | otherwise - = case (indexCharArray# barr# (start# +# idx)) of { ch -> - write_ps_array arr_in# idx ch >> - fill_in arr_in# (idx +# 1#) } -\end{code} - -(Very :-) ``Specialised'' versions of some CharArray things... -[Copied from PackBase; no real reason -- UGH] - -\begin{code} -new_ps_array :: Int# -> ST s (MBA s) -write_ps_array :: MBA s -> Int# -> Char# -> ST s () -freeze_ps_array :: MBA s -> Int# -> ST s BA - -#if __GLASGOW_HASKELL__ < 411 -#define NEW_BYTE_ARRAY newCharArray# -#else -#define NEW_BYTE_ARRAY newPinnedByteArray# -#endif - -new_ps_array size = ST $ \ s -> - case (NEW_BYTE_ARRAY size s) of { (# s2#, barr# #) -> - (# s2#, MBA barr# #) } - -write_ps_array (MBA barr#) n ch = ST $ \ s# -> - case writeCharArray# barr# n ch s# of { s2# -> - (# s2#, () #) } - --- same as unsafeFreezeByteArray -freeze_ps_array (MBA arr#) len# = ST $ \ s# -> - case unsafeFreezeByteArray# arr# s# of { (# s2#, frozen# #) -> - (# s2#, BA frozen# #) } -\end{code} - - -Compare two equal-length strings for equality: - -\begin{code} -eqStrPrefix :: Addr# -> ByteArray# -> Int# -> Bool -eqStrPrefix a# barr# len# = - inlinePerformIO $ do - x <- memcmp_ba a# barr# (I# len#) - return (x == 0) - -#ifdef UNUSED -eqCharStrPrefix :: Addr# -> Addr# -> Int# -> Bool -eqCharStrPrefix a1# a2# len# = - inlinePerformIO $ do - x <- memcmp a1# a2# (I# len#) - return (x == 0) -#endif - -eqStrPrefixBA :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool -eqStrPrefixBA b1# b2# start# len# = - inlinePerformIO $ do - x <- memcmp_baoff_ba b2# (I# start#) b1# (I# len#) - return (x == 0) - -#ifdef UNUSED -eqCharStrPrefixBA :: Addr# -> ByteArray# -> Int# -> Int# -> Bool -eqCharStrPrefixBA a# b2# start# len# = - inlinePerformIO $ do - x <- memcmp_baoff b2# (I# start#) a# (I# len#) - return (x == 0) -#endif -\end{code} - -\begin{code} --- Just like unsafePerformIO, but we inline it. This is safe when --- there are no side effects, and improves performance. -{-# INLINE inlinePerformIO #-} -inlinePerformIO :: IO a -> a -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r - -#if __GLASGOW_HASKELL__ <= 408 -strLength (Ptr a#) = ghc_strlen a# -foreign import ccall unsafe "ghc_strlen" - ghc_strlen :: Addr# -> Int -#else -foreign import ccall unsafe "ghc_strlen" - strLength :: Ptr () -> Int -#endif - -foreign import ccall unsafe "ghc_memcmp" - memcmp :: Addr# -> Addr# -> Int -> IO Int - -foreign import ccall unsafe "ghc_memcmp" - memcmp_ba :: Addr# -> ByteArray# -> Int -> IO Int - -foreign import ccall unsafe "ghc_memcmp_off" - memcmp_baoff :: ByteArray# -> Int -> Addr# -> Int -> IO Int - -foreign import ccall unsafe "ghc_memcmp_off" - memcmp_baoff_ba :: ByteArray# -> Int -> ByteArray# -> Int -> IO Int -\end{code} diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs index e53dbc89ce..e2eed889f2 100644 --- a/ghc/compiler/utils/StringBuffer.lhs +++ b/ghc/compiler/utils/StringBuffer.lhs @@ -6,27 +6,32 @@ Buffers for scanning string input stored in external arrays. \begin{code} +{-# OPTIONS_GHC -O #-} +-- always optimise this module, it's critical + module StringBuffer ( StringBuffer(..), -- non-abstract for vs\/HaskellService -- * Creation\/destruction - hGetStringBuffer, -- :: FilePath -> IO StringBuffer - stringToStringBuffer, -- :: String -> IO StringBuffer + hGetStringBuffer, + stringToStringBuffer, - -- * Lookup - currentChar, -- :: StringBuffer -> Char - prevChar, -- :: StringBuffer -> Char -> Char - lookAhead, -- :: StringBuffer -> Int -> Char - atEnd, -- :: StringBuffer -> Bool + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, - -- * Moving - stepOn, stepOnBy, + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, -- * Conversion - lexemeToString, -- :: StringBuffer -> Int -> String - lexemeToFastString, -- :: StringBuffer -> Int -> FastString + lexemeToString, + lexemeToFastString, -- * Parsing integers parseInteger, @@ -34,22 +39,19 @@ module StringBuffer #include "HsVersions.h" -import FastString -import Panic +import Encoding +import FastString (FastString,mkFastString,mkFastStringBytes) import GLAEXTS import Foreign -#if __GLASGOW_HASKELL__ < 503 -import PrelIOBase -import PrelHandle -#else -import GHC.IOBase -import GHC.IO ( slurpFile ) -#endif +import GHC.IOBase ( IO(..) ) +import GHC.Base ( unsafeChr ) + +import System.IO ( hGetBuf ) -import IO ( openFile, hFileSize, IOMode(ReadMode), +import IO ( hFileSize, IOMode(ReadMode), hClose ) #if __GLASGOW_HASKELL__ >= 601 import System.IO ( openBinaryFile ) @@ -57,37 +59,35 @@ import System.IO ( openBinaryFile ) import IOExts ( openFileEx, IOModeEx(..) ) #endif -#if __GLASGOW_HASKELL__ < 503 -import IArray ( listArray ) -import ArrayBase ( UArray(..) ) -import MutableArray -import IOExts ( hGetBufBA ) -#else -import Data.Array.IArray ( listArray ) -import Data.Array.MArray ( unsafeFreeze, newArray_ ) -import Data.Array.Base ( UArray(..) ) -import Data.Array.IO ( IOArray, hGetArray ) -#endif - -import Char ( ord ) - #if __GLASGOW_HASKELL__ < 601 openBinaryFile fp mode = openFileEx fp (BinaryMode mode) #endif + -- ----------------------------------------------------------------------------- -- The StringBuffer type --- A StringBuffer is a ByteArray# with a pointer into it. We also cache --- the length of the ByteArray# for speed. - +-- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- The bytes are intended to be *immutable*. There are pure +-- operations to read the contents of a StringBuffer. +-- +-- A StringBuffer may have a finalizer, depending on how it was +-- obtained. +-- data StringBuffer - = StringBuffer - ByteArray# - Int# -- length - Int# -- current pos + = StringBuffer { + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos + } + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. instance Show StringBuffer where - showsPrec _ s = showString "<stringbuffer>" + showsPrec _ s = showString "<stringbuffer(" + . shows (len s) . showString "," . shows (cur s) + . showString ">" -- ----------------------------------------------------------------------------- -- Creation / Destruction @@ -95,97 +95,108 @@ instance Show StringBuffer where hGetStringBuffer :: FilePath -> IO StringBuffer hGetStringBuffer fname = do h <- openBinaryFile fname ReadMode - size <- hFileSize h - let size_i@(I# sz#) = fromIntegral size -#if __GLASGOW_HASKELL__ < 503 - arr <- stToIO (newCharArray (0,size_i-1)) - r <- hGetBufBA h arr size_i -#else - arr <- newArray_ (0,size_i-1) - r <- if size_i == 0 then return 0 else hGetArray h arr size_i -#endif - hClose h - if (r /= size_i) + size_i <- hFileSize h + let size = fromIntegral size_i + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + r <- if size == 0 then return 0 else hGetBuf h ptr size + hClose h + if (r /= size) then ioError (userError "short read of file") else do -#if __GLASGOW_HASKELL__ < 503 - frozen <- stToIO (unsafeFreezeByteArray arr) - case frozen of - ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) -#else - frozen <- unsafeFreeze arr - case frozen of - UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) -#endif + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) -#if __GLASGOW_HASKELL__ >= 502 +stringToStringBuffer :: String -> IO StringBuffer stringToStringBuffer str = do - let size@(I# sz#) = length str - arr = listArray (0,size-1) (map (fromIntegral.ord) str) - :: UArray Int Word8 - case arr of - UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#) -#else -stringToStringBuffer = panic "stringToStringBuffer: not implemented" -#endif + let size = utf8EncodedLength str + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) -- ----------------------------------------------------------------------------- --- Lookup - -currentChar :: StringBuffer -> Char -currentChar (StringBuffer arr# l# current#) = - ASSERT(current# <# l#) - C# (indexCharArray# arr# current#) +-- Grab a character + +-- Getting our fingers dirty a little here, but this is performance-critical +{-# INLINE nextChar #-} +nextChar :: StringBuffer -> (Char,StringBuffer) +nextChar (StringBuffer buf len (I# cur#)) = + inlinePerformIO $ do + withForeignPtr buf $ \(Ptr a#) -> do + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, b# #) -> + let cur' = I# (b# `minusAddr#` a#) in + return (C# c#, StringBuffer buf len cur') + +currentChar :: StringBuffer -> Char +currentChar = fst . nextChar prevChar :: StringBuffer -> Char -> Char -prevChar (StringBuffer _ _ 0#) deflt = deflt -prevChar s deflt = lookAhead s (-1) - -lookAhead :: StringBuffer -> Int -> Char -lookAhead (StringBuffer arr# l# c#) (I# i#) = - ASSERT(off <# l# && off >=# 0#) - C# (indexCharArray# arr# off) - where - off = c# +# i# +prevChar (StringBuffer buf len 0) deflt = deflt +prevChar (StringBuffer buf len cur) deflt = + inlinePerformIO $ do + withForeignPtr buf $ \p -> do + p' <- utf8PrevChar (p `plusPtr` cur) + return (fst (utf8DecodeChar p')) -- ----------------------------------------------------------------------------- -- Moving stepOn :: StringBuffer -> StringBuffer -stepOn s = stepOnBy 1 s +stepOn s = snd (nextChar s) + +offsetBytes :: Int -> StringBuffer -> StringBuffer +offsetBytes i s = s { cur = cur s + i } -stepOnBy :: Int -> StringBuffer -> StringBuffer -stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#) +byteDiff :: StringBuffer -> StringBuffer -> Int +byteDiff s1 s2 = cur s2 - cur s1 atEnd :: StringBuffer -> Bool -atEnd (StringBuffer _ l# c#) = l# ==# c# +atEnd (StringBuffer _ l c) = l == c -- ----------------------------------------------------------------------------- -- Conversion -lexemeToString :: StringBuffer -> Int -> String +lexemeToString :: StringBuffer -> Int {-bytes-} -> String lexemeToString _ 0 = "" -lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current# - where - end = current# +# len# +lexemeToString (StringBuffer buf _ cur) bytes = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + utf8DecodeString (ptr `plusPtr` cur) bytes - unpack nh - | nh >=# end = [] - | otherwise = C# ch : unpack (nh +# 1#) - where - ch = indexCharArray# arr# nh - -lexemeToFastString :: StringBuffer -> Int -> FastString +lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString lexemeToFastString _ 0 = mkFastString "" -lexemeToFastString (StringBuffer fo _ current#) (I# len) = - mkFastSubStringBA# fo current# len +lexemeToFastString (StringBuffer buf _ cur) len = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len -- ----------------------------------------------------------------------------- -- Parsing integer strings in various bases +byteOff :: StringBuffer -> Int -> Char +byteOff (StringBuffer buf _ cur) i = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + w <- peek (ptr `plusPtr` (cur+i)) + return (unsafeChr (fromIntegral (w::Word8))) + +-- | XXX assumes ASCII digits only parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer parseInteger buf len radix to_int = go 0 0 where go i x | i == len = x - | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i))) + | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i))) + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + \end{code} diff --git a/ghc/compiler/utils/UnicodeUtil.lhs b/ghc/compiler/utils/UnicodeUtil.lhs deleted file mode 100644 index 56e95a5434..0000000000 --- a/ghc/compiler/utils/UnicodeUtil.lhs +++ /dev/null @@ -1,36 +0,0 @@ -Various Unicode-related utilities. - -\begin{code} -module UnicodeUtil( - stringToUtf8, intsToUtf8 - ) where - -#include "HsVersions.h" - -import Panic ( panic ) -import Char ( chr, ord ) -\end{code} - -\begin{code} -stringToUtf8 :: String -> String -stringToUtf8 s = intsToUtf8 (map ord s) - -intsToUtf8 :: [Int] -> String -intsToUtf8 [] = "" -intsToUtf8 (c:s) - | c >= 1 && c <= 0x7F = chr c : intsToUtf8 s - | c < 0 = panic ("charToUtf8 ("++show c++")") - | c <= 0x7FF = chr (0xC0 + c `div` 0x40 ) : - chr (0x80 + c `mod` 0x40) : - intsToUtf8 s - | c <= 0xFFFF = chr (0xE0 + c `div` 0x1000 ) : - chr (0x80 + c `div` 0x40 `mod` 0x40) : - chr (0x80 + c `mod` 0x40) : - intsToUtf8 s - | c <= 0x10FFFF = chr (0xF0 + c `div` 0x40000 ) : - chr (0x80 + c `div` 0x1000 `mod` 0x40) : - chr (0x80 + c `div` 0x40 `mod` 0x40) : - chr (0x80 + c `mod` 0x40) : - intsToUtf8 s - | otherwise = panic ("charToUtf8 "++show c) -\end{code} |