71 files changed, 1528 insertions, 1720 deletions
diff --git a/ghc/compiler/HsVersions.h b/ghc/compiler/HsVersions.h
index 1c07d2c5ae..dd80922e0b 100644
--- a/ghc/compiler/HsVersions.h
+++ b/ghc/compiler/HsVersions.h
@@ -90,11 +90,19 @@ name = Util.global (value) :: IORef (ty); \
 -- when compiling FastString itself
 #ifndef COMPILING_FAST_STRING
 -- 
-import qualified FastString 
+import qualified FastString as FS 
 #endif
 
-#define SLIT(x)	 (FastString.mkLitString# (x#))
-#define FSLIT(x) (FastString.mkFastString# (x#))
+#define SLIT(x)	 (FS.mkLitString# (x#))
+#define FSLIT(x) (FS.mkFastString# (x#))
+
+-- Useful for declaring arguments to be strict
+#define STRICT1(f) f a b c | a `seq` False = undefined
+#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
+#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
+#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
+#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
+#define STRICT6(f) f a b c d e f | a `seq` b `seq` c `seq` d `seq` e `seq` f `seq` False = undefined
 
 #endif /* HsVersions.h */
 
diff --git a/ghc/compiler/Makefile b/ghc/compiler/Makefile
index 4920e16a0c..2b5252a5be 100644
--- a/ghc/compiler/Makefile
+++ b/ghc/compiler/Makefile
@@ -408,7 +408,7 @@ SRC_HC_OPTS += -DGHCI -package template-haskell
 PKG_DEPENDS += template-haskell
 
 # Use threaded RTS with GHCi, so threads don't get blocked at the prompt.
-SRC_HC_OPTS += -threaded
+# SRC_HC_OPTS += -threaded
 
 ALL_DIRS += ghci
 
diff --git a/ghc/compiler/basicTypes/Id.lhs b/ghc/compiler/basicTypes/Id.lhs
index 0d15b20f22..10d5a28e17 100644
--- a/ghc/compiler/basicTypes/Id.lhs
+++ b/ghc/compiler/basicTypes/Id.lhs
@@ -9,7 +9,7 @@ module Id (
 
 	-- Simple construction
 	mkGlobalId, mkLocalId, mkLocalIdWithInfo, 
-	mkSysLocal, mkSysLocalUnencoded, mkUserLocal, mkVanillaGlobal,
+	mkSysLocal, mkUserLocal, mkVanillaGlobal,
 	mkTemplateLocals, mkTemplateLocalsNum, mkWildId, mkTemplateLocal,
 	mkWorkerId, mkExportedLocalId,
 
@@ -105,15 +105,15 @@ import qualified Demand	( Demand )
 import DataCon		( DataCon, isUnboxedTupleCon )
 import NewDemand	( Demand, StrictSig, topDmd, topSig, isBottomingSig )
 import Name	 	( Name, OccName, nameIsLocalOrFrom, 
-			  mkSystemVarName, mkSystemVarNameEncoded, mkInternalName,
-			  getOccName, getSrcLoc
-			) 
+			  mkSystemVarName, mkInternalName, getOccName,
+			  getSrcLoc ) 
 import Module		( Module )
-import OccName		( EncodedFS, mkWorkerOcc )
+import OccName		( mkWorkerOcc )
 import Maybes		( orElse )
 import SrcLoc		( SrcLoc )
 import Outputable
 import Unique		( Unique, mkBuiltinUnique )
+import FastString	( FastString )
 import StaticFlags	( opt_NoStateHack )
 
 -- infixl so you can say (id `set` a `set` b)
@@ -162,15 +162,10 @@ mkLocalId name ty = mkLocalIdWithInfo name ty vanillaIdInfo
 -- SysLocal: for an Id being created by the compiler out of thin air...
 -- UserLocal: an Id with a name the user might recognize...
 mkUserLocal :: OccName -> Unique -> Type -> SrcLoc -> Id
-mkSysLocal  :: EncodedFS  -> Unique -> Type -> Id
+mkSysLocal  :: FastString  -> Unique -> Type -> Id
 mkVanillaGlobal :: Name -> Type -> IdInfo -> Id
 
--- for SysLocal, we assume the base name is already encoded, to avoid
--- re-encoding the same string over and over again.
-mkSysLocal fs uniq ty = mkLocalId (mkSystemVarNameEncoded uniq fs) ty
-
--- version to use when the faststring needs to be encoded
-mkSysLocalUnencoded fs uniq ty = mkLocalId (mkSystemVarName uniq fs)  ty
+mkSysLocal fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty
 
 mkUserLocal occ uniq ty loc = mkLocalId (mkInternalName    uniq occ loc) ty
 mkVanillaGlobal 	    = mkGlobalId VanillaGlobal
diff --git a/ghc/compiler/basicTypes/Literal.lhs b/ghc/compiler/basicTypes/Literal.lhs
index 9aab6ee7f1..e83ea9db74 100644
--- a/ghc/compiler/basicTypes/Literal.lhs
+++ b/ghc/compiler/basicTypes/Literal.lhs
@@ -35,7 +35,6 @@ import FastTypes
 import FastString
 import Binary
 
-import UnicodeUtil	( stringToUtf8 )
 import Ratio 		( numerator )
 import FastString	( uniqueOfFS, lengthFS )
 import DATA_INT		( Int8,  Int16,  Int32 )
@@ -95,7 +94,11 @@ data Literal
   =	------------------
 	-- First the primitive guys
     MachChar	Char             -- Char#        At least 31 bits
-  | MachStr	FastString
+
+  | MachStr	FastString	-- A string-literal: stored and emitted
+				-- UTF-8 encoded, we'll arrange to decode it
+				-- at runtime.  Also emitted with a '\0'
+				-- terminator.
 
   | MachNullAddr                -- the NULL pointer, the only pointer value
                                 -- that can be represented as a Literal.
@@ -206,7 +209,7 @@ mkMachInt64  x = MachInt64 x
 mkMachWord64 x = MachWord64 x
 
 mkStringLit :: String -> Literal
-mkStringLit s = MachStr (mkFastString (stringToUtf8 s))
+mkStringLit s = MachStr (mkFastString s) -- stored UTF-8 encoded
 
 inIntRange, inWordRange :: Integer -> Bool
 inIntRange  x = x >= tARGET_MIN_INT && x <= tARGET_MAX_INT
diff --git a/ghc/compiler/basicTypes/MkId.lhs b/ghc/compiler/basicTypes/MkId.lhs
index 02d255959c..9d93a67008 100644
--- a/ghc/compiler/basicTypes/MkId.lhs
+++ b/ghc/compiler/basicTypes/MkId.lhs
@@ -59,7 +59,7 @@ import Class		( Class, classTyCon, classSelIds )
 import Var		( Id, TyVar, Var )
 import VarSet		( isEmptyVarSet, subVarSet, varSetElems )
 import Name		( mkFCallName, mkWiredInName, Name, BuiltInSyntax(..) )
-import OccName		( mkOccFS, varName )
+import OccName		( mkOccNameFS, varName )
 import PrimOp		( PrimOp, primOpSig, primOpOcc, primOpTag )
 import ForeignCall	( ForeignCall )
 import DataCon		( DataCon, DataConIds(..), dataConTyVars,
@@ -847,7 +847,7 @@ another gun with which to shoot yourself in the foot.
 
 \begin{code}
 mkWiredInIdName mod fs uniq id
- = mkWiredInName mod (mkOccFS varName fs) uniq Nothing (AnId id) UserSyntax
+ = mkWiredInName mod (mkOccNameFS varName fs) uniq Nothing (AnId id) UserSyntax
 
 unsafeCoerceName = mkWiredInIdName gHC_PRIM FSLIT("unsafeCoerce#") unsafeCoerceIdKey  unsafeCoerceId
 nullAddrName     = mkWiredInIdName gHC_PRIM FSLIT("nullAddr#")	   nullAddrIdKey      nullAddrId
diff --git a/ghc/compiler/basicTypes/Module.lhs b/ghc/compiler/basicTypes/Module.lhs
index 9145b353f3..f4e413d263 100644
--- a/ghc/compiler/basicTypes/Module.lhs
+++ b/ghc/compiler/basicTypes/Module.lhs
@@ -4,7 +4,7 @@
 
 Module
 ~~~~~~~~~~
-Simply the name of a module, represented as a Z-encoded FastString.
+Simply the name of a module, represented as a FastString.
 These are Uniquable, hence we can build FiniteMaps with ModuleNames as
 the keys.
 
@@ -17,13 +17,11 @@ module Module
     , ModLocation(..)
     , addBootSuffix, addBootSuffix_maybe, addBootSuffixLocn
 
-    , moduleString		-- :: ModuleName -> EncodedString
-    , moduleUserString		-- :: ModuleName -> UserString
-    , moduleFS			-- :: ModuleName -> EncodedFS
+    , moduleString		-- :: ModuleName -> String
+    , moduleFS			-- :: ModuleName -> FastString
 
-    , mkModule			-- :: UserString -> ModuleName
-    , mkModuleFS		-- :: UserFS    -> ModuleName
-    , mkSysModuleFS		-- :: EncodedFS -> ModuleName
+    , mkModule			-- :: String -> ModuleName
+    , mkModuleFS		-- :: FastString -> ModuleName
  
     , ModuleEnv
     , elemModuleEnv, extendModuleEnv, extendModuleEnvList, plusModuleEnv_C
@@ -108,7 +106,7 @@ addBootSuffixLocn locn
 %************************************************************************
 
 \begin{code}
-newtype Module = Module EncodedFS
+newtype Module = Module FastString
 	-- Haskell module names can include the quote character ',
 	-- so the module names have the z-encoding applied to them
 
@@ -131,30 +129,26 @@ instance Ord Module where
 instance Outputable Module where
   ppr = pprModule
 
-
 pprModule :: Module -> SDoc
-pprModule (Module nm) = pprEncodedFS nm
+pprModule (Module nm) = 
+    getPprStyle $ \ sty ->
+    if codeStyle sty 
+	then ftext (zEncodeFS nm)
+	else ftext nm
 
-moduleFS :: Module -> EncodedFS
+moduleFS :: Module -> FastString
 moduleFS (Module mod) = mod
 
-moduleString :: Module -> EncodedString
+moduleString :: Module -> String
 moduleString (Module mod) = unpackFS mod
 
-moduleUserString :: Module -> UserString
-moduleUserString (Module mod) = decode (unpackFS mod)
-
 -- used to be called mkSrcModule
-mkModule :: UserString -> Module
-mkModule s = Module (mkFastString (encode s))
+mkModule :: String -> Module
+mkModule s = Module (mkFastString s)
 
 -- used to be called mkSrcModuleFS
-mkModuleFS :: UserFS -> Module
-mkModuleFS s = Module (encodeFS s)
-
--- used to be called mkSysModuleFS
-mkSysModuleFS :: EncodedFS -> Module
-mkSysModuleFS s = Module s 
+mkModuleFS :: FastString -> Module
+mkModuleFS s = Module s
 \end{code}
 
 %************************************************************************
diff --git a/ghc/compiler/basicTypes/Name.lhs b/ghc/compiler/basicTypes/Name.lhs
index fd8f2cfd48..3aeb03b8bf 100644
--- a/ghc/compiler/basicTypes/Name.lhs
+++ b/ghc/compiler/basicTypes/Name.lhs
@@ -12,7 +12,7 @@ module Name (
 	Name,					-- Abstract
 	BuiltInSyntax(..), 
 	mkInternalName, mkSystemName,
-	mkSystemVarName, mkSystemVarNameEncoded, mkSysTvName, 
+	mkSystemVarName, mkSysTvName, 
 	mkFCallName, mkIPName,
 	mkExternalName, mkWiredInName,
 
@@ -38,10 +38,11 @@ module Name (
 import {-# SOURCE #-} TypeRep( TyThing )
 
 import OccName		-- All of it
-import Module		( Module )
+import Module		( Module, moduleFS )
 import SrcLoc		( noSrcLoc, wiredInSrcLoc, SrcLoc )
 import Unique		( Unique, Uniquable(..), getKey, pprUnique )
 import Maybes		( orElse, isJust )
+import FastString	( FastString, zEncodeFS )
 import Outputable
 \end{code}
 
@@ -215,21 +216,16 @@ mkSystemName :: Unique -> OccName -> Name
 mkSystemName uniq occ = Name { n_uniq = uniq, n_sort = System, 
 			       n_occ = occ, n_loc = noSrcLoc }
 
-mkSystemVarName :: Unique -> UserFS -> Name
-mkSystemVarName uniq fs = mkSystemName uniq (mkVarOcc fs)
+mkSystemVarName :: Unique -> FastString -> Name
+mkSystemVarName uniq fs = mkSystemName uniq (mkVarOccFS fs)
 
--- Use this version when the string is already encoded.  Avoids duplicating
--- the string each time a new name is created.
-mkSystemVarNameEncoded :: Unique -> EncodedFS -> Name
-mkSystemVarNameEncoded uniq fs = mkSystemName uniq (mkSysOccFS varName fs) 
+mkSysTvName :: Unique -> FastString -> Name
+mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs) 
 
-mkSysTvName :: Unique -> EncodedFS -> Name
-mkSysTvName uniq fs = mkSystemName uniq (mkSysOccFS tvName fs) 
-
-mkFCallName :: Unique -> EncodedString -> Name
+mkFCallName :: Unique -> String -> Name
 	-- The encoded string completely describes the ccall
 mkFCallName uniq str =  Name { n_uniq = uniq, n_sort = Internal, 
-			       n_occ = mkFCallOcc str, n_loc = noSrcLoc }
+			       n_occ = mkVarOcc str, n_loc = noSrcLoc }
 
 mkIPName :: Unique -> OccName -> Name
 mkIPName uniq occ
@@ -317,13 +313,13 @@ pprName (Name {n_sort = sort, n_uniq = uniq, n_occ = occ})
       Internal    	      -> pprInternal sty uniq occ
 
 pprExternal sty uniq mod occ is_wired is_builtin
-  | codeStyle sty        = ppr mod <> char '_' <> ppr_occ_name occ
+  | codeStyle sty        = ppr_z_module mod <> char '_' <> ppr_z_occ_name occ
 	-- In code style, always qualify
 	-- ToDo: maybe we could print all wired-in things unqualified
 	-- 	 in code style, to reduce symbol table bloat?
   | debugStyle sty       = ppr mod <> dot <> ppr_occ_name occ
 			   <> braces (hsep [if is_wired then ptext SLIT("(w)") else empty,
-					    text (briefOccNameFlavour occ), 
+					    pprNameSpaceBrief (occNameSpace occ), 
 		 			    pprUnique uniq])
   | BuiltInSyntax <- is_builtin  = ppr_occ_name occ
 	-- never qualify builtin syntax
@@ -332,7 +328,7 @@ pprExternal sty uniq mod occ is_wired is_builtin
 
 pprInternal sty uniq occ
   | codeStyle sty  = pprUnique uniq
-  | debugStyle sty = ppr_occ_name occ <> braces (hsep [text (briefOccNameFlavour occ), 
+  | debugStyle sty = ppr_occ_name occ <> braces (hsep [pprNameSpaceBrief (occNameSpace occ), 
 				 		       pprUnique uniq])
   | dumpStyle sty  = ppr_occ_name occ <> char '_' <> pprUnique uniq
 			-- For debug dumps, we're not necessarily dumping
@@ -343,15 +339,21 @@ pprInternal sty uniq occ
 pprSystem sty uniq occ
   | codeStyle sty  = pprUnique uniq
   | debugStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq
-		     <> braces (text (briefOccNameFlavour occ))
+		     <> braces (pprNameSpaceBrief (occNameSpace occ))
   | otherwise	   = ppr_occ_name occ <> char '_' <> pprUnique uniq
 				-- If the tidy phase hasn't run, the OccName
 				-- is unlikely to be informative (like 's'),
 				-- so print the unique
 
-ppr_occ_name occ = pprEncodedFS (occNameFS occ)
+ppr_occ_name occ = ftext (occNameFS occ)
 	-- Don't use pprOccName; instead, just print the string of the OccName; 
 	-- we print the namespace in the debug stuff above
+
+-- In code style, we Z-encode the strings.  The results of Z-encoding each FastString are
+-- cached behind the scenes in the FastString implementation.
+ppr_z_occ_name occ = ftext (zEncodeFS (occNameFS occ))
+ppr_z_module   mod = ftext (zEncodeFS (moduleFS mod))
+
 \end{code}
 
 %************************************************************************
diff --git a/ghc/compiler/basicTypes/OccName.lhs b/ghc/compiler/basicTypes/OccName.lhs
index 756d6a955a..bd6d3f7060 100644
--- a/ghc/compiler/basicTypes/OccName.lhs
+++ b/ghc/compiler/basicTypes/OccName.lhs
@@ -7,62 +7,66 @@
 
 \begin{code}
 module OccName (
-	-- The NameSpace type; abstact
+	-- * The NameSpace type; abstact
 	NameSpace, tcName, clsName, tcClsName, dataName, varName, 
-	tvName, srcDataName, nameSpaceString, 
+	tvName, srcDataName,
 
-	-- The OccName type
+	-- ** Printing
+	pprNameSpace, pprNonVarNameSpace, pprNameSpaceBrief,
+
+	-- * The OccName type
 	OccName, 	-- Abstract, instance of Outputable
 	pprOccName, 
 
+	-- ** Construction	
+	mkOccName, mkOccNameFS, 
+	mkVarOcc, mkVarOccFS,
+	mkTyVarOcc,
+	mkDFunOcc,
+	mkTupleOcc, 
+	setOccNameSpace,
+
+	-- ** Derived OccNames
+	mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc,
+  	mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc, 
+ 	mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2,
+	mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc,
+	mkSuperDictSelOcc, mkLocalOcc, mkMethodOcc,
+
+	-- ** Deconstruction
+	occNameFS, occNameString, occNameSpace, 
+
+	isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
+	parenSymOcc, reportIfUnused, isTcClsName, isVarName,
+
+	isTupleOcc_maybe,
+
 	-- The OccEnv type
 	OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
 	lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
 	occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
 
-
 	-- The OccSet type
-	OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
+	OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, 
+	extendOccSetList,
 	unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts, 
 	foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
 
-	mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
-	mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
-	mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
-	mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
- 	mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
-	mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
-	mkDataConWrapperOcc, mkDataConWorkerOcc,
-	
-	isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
-	parenSymOcc, reportIfUnused, isTcClsName, isVarName,
-
-	occNameFS, occNameString, occNameUserString, occNameSpace, 
-	occNameFlavour, briefOccNameFlavour,
-	setOccNameSpace,
-
-	mkTupleOcc, isTupleOcc_maybe,
-
 	-- Tidying up
 	TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
 
-	-- Encoding
-	EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
-
 	-- The basic form of names
 	isLexCon, isLexVar, isLexId, isLexSym,
 	isLexConId, isLexConSym, isLexVarId, isLexVarSym,
-	isLowerISO, isUpperISO,
 	startsVarSym, startsVarId, startsConSym, startsConId
     ) where
 
 #include "HsVersions.h"
 
-import Char	( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
-import Util	( thenCmp )
-import Unique	( Unique, mkUnique, Uniquable(..) )
-import BasicTypes ( Boxity(..), Arity )
-import StaticFlags ( opt_PprStyle_Debug )
+import Util		( thenCmp )
+import Unique		( Unique, mkUnique, Uniquable(..) )
+import BasicTypes 	( Boxity(..), Arity )
+import StaticFlags 	( opt_PprStyle_Debug )
 import UniqFM
 import UniqSet
 import FastString
@@ -70,34 +74,16 @@ import Outputable
 import Binary
 
 import GLAEXTS
-\end{code}
-
-We hold both module names and identifier names in a 'Z-encoded' form
-that makes them acceptable both as a C identifier and as a Haskell
-(prefix) identifier. 
-
-They can always be decoded again when printing error messages
-or anything else for the user, but it does make sense for it
-to be represented here in encoded form, so that when generating
-code the encoding operation is not performed on each occurrence.
 
-These type synonyms help documentation.
-
-\begin{code}
-type UserFS    = FastString	-- As the user typed it
-type EncodedFS = FastString	-- Encoded form
+import Data.Char	( isUpper, isLower, ord )
 
-type UserString = String	-- As the user typed it
-type EncodedString = String	-- Encoded form
+-- Unicode TODO: put isSymbol in libcompat
+#if __GLASGOW_HASKELL__ > 604
+import Data.Char	( isSymbol )
+#else
+isSymbol = const False
+#endif
 
-
-pprEncodedFS :: EncodedFS -> SDoc
-pprEncodedFS fs
-  = getPprStyle 	$ \ sty ->
-    if userStyle sty || dumpStyle sty
-	-- ftext (decodeFS fs) would needlessly pack the string again
-	then text (decode (unpackFS fs))
-        else ftext fs
 \end{code}
 
 %************************************************************************
@@ -155,12 +141,20 @@ isVarName TvName  = True
 isVarName VarName = True
 isVarName other   = False
 
+pprNameSpace :: NameSpace -> SDoc
+pprNameSpace DataName  = ptext SLIT("data constructor")
+pprNameSpace VarName   = ptext SLIT("variable")
+pprNameSpace TvName    = ptext SLIT("type variable")
+pprNameSpace TcClsName = ptext SLIT("type constructor or class")
+
+pprNonVarNameSpace :: NameSpace -> SDoc
+pprNonVarNameSpace VarName = empty
+pprNonVarNameSpace ns = pprNameSpace ns
 
-nameSpaceString :: NameSpace -> String
-nameSpaceString DataName  = "data constructor"
-nameSpaceString VarName   = "variable"
-nameSpaceString TvName    = "type variable"
-nameSpaceString TcClsName = "type constructor or class"
+pprNameSpaceBrief DataName  = char 'd'
+pprNameSpaceBrief VarName   = char 'v'
+pprNameSpaceBrief TvName    = ptext SLIT("tv")
+pprNameSpaceBrief TcClsName = ptext SLIT("tc")
 \end{code}
 
 
@@ -173,7 +167,7 @@ nameSpaceString TcClsName = "type constructor or class"
 \begin{code}
 data OccName = OccName 
     { occNameSpace  :: !NameSpace
-    , occNameFS     :: !EncodedFS
+    , occNameFS     :: !FastString
     }
 \end{code}
 
@@ -201,9 +195,11 @@ instance Outputable OccName where
 pprOccName :: OccName -> SDoc
 pprOccName (OccName sp occ) 
   = getPprStyle $ \ sty ->
-    pprEncodedFS occ <> if debugStyle sty then
-			   braces (text (briefNameSpaceFlavour sp))
-			else empty
+    if codeStyle sty 
+	then ftext (zEncodeFS occ)
+	else ftext occ <> if debugStyle sty 
+			    then braces (pprNameSpaceBrief sp)
+			    else empty
 \end{code}
 
 
@@ -211,54 +207,24 @@ pprOccName (OccName sp occ)
 %*									*
 \subsection{Construction}
 %*									*
-%*****p*******************************************************************
-
-*Sys* things do no encoding; the caller should ensure that the thing is
-already encoded
-
-\begin{code}
-mkSysOcc :: NameSpace -> EncodedString -> OccName
-mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
-		      OccName occ_sp (mkFastString str)
-
-mkSysOccFS :: NameSpace -> EncodedFS -> OccName
-mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
-		       OccName occ_sp fs
-
-mkFCallOcc :: EncodedString -> OccName
--- This version of mkSysOcc doesn't check that the string is already encoded,
--- because it will be something like "{__ccall f dyn Int# -> Int#}" 
--- This encodes a lot into something that then parses like an Id.
--- But then alreadyEncoded complains about the braces!
-mkFCallOcc str = OccName varName (mkFastString str)
-
--- Kind constructors get a special function.  Uniquely, they are not encoded,
--- so that they have names like '*'.  This means that *even in interface files*
--- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
--- has an ASSERT that doesn't hold.
-mkKindOccFS :: NameSpace -> EncodedFS -> OccName
-mkKindOccFS occ_sp fs = OccName occ_sp fs
-\end{code}
-
-*Source-code* things are encoded.
+%************************************************************************
 
 \begin{code}
-mkOccFS :: NameSpace -> UserFS -> OccName
-mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
-
 mkOccName :: NameSpace -> String -> OccName
-mkOccName ns s = mkSysOcc ns (encode s)
+mkOccName occ_sp str = OccName occ_sp (mkFastString str)
 
-mkVarOcc :: UserFS -> OccName
-mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
+mkOccNameFS :: NameSpace -> FastString -> OccName
+mkOccNameFS occ_sp fs = OccName occ_sp fs
 
-mkTyVarOcc :: UserFS -> OccName
-mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
+mkVarOcc :: String -> OccName
+mkVarOcc s = mkOccName varName s
 
-mkVarOccEncoded :: EncodedFS -> OccName
-mkVarOccEncoded fs = mkSysOccFS varName fs
-\end{code}
+mkVarOccFS :: FastString -> OccName
+mkVarOccFS fs = mkOccNameFS varName fs
 
+mkTyVarOcc :: FastString -> OccName
+mkTyVarOcc fs = mkOccNameFS tvName fs
+\end{code}
 
 
 %************************************************************************
@@ -355,34 +321,13 @@ intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
 %*									*
 %************************************************************************
 
-\begin{code} 
-occNameString :: OccName -> EncodedString
+\begin{code}
+occNameString :: OccName -> String
 occNameString (OccName _ s) = unpackFS s
 
-occNameUserString :: OccName -> UserString
-occNameUserString occ = decode (occNameString occ)
-
 setOccNameSpace :: NameSpace -> OccName -> OccName
 setOccNameSpace sp (OccName _ occ) = OccName sp occ
 
--- occNameFlavour is used only to generate good error messages
-occNameFlavour :: OccName -> SDoc
-occNameFlavour (OccName DataName _)  = ptext SLIT("data constructor")
-occNameFlavour (OccName TvName _)    = ptext SLIT("type variable")
-occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
-occNameFlavour (OccName VarName s)   = empty
-
--- briefOccNameFlavour is used in debug-printing of names
-briefOccNameFlavour :: OccName -> String
-briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
-
-briefNameSpaceFlavour DataName  = "d"
-briefNameSpaceFlavour VarName   = "v"
-briefNameSpaceFlavour TvName    = "tv"
-briefNameSpaceFlavour TcClsName = "tc"
-\end{code}
-
-\begin{code}
 isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
 
 isVarOcc (OccName VarName _) = True
@@ -400,19 +345,19 @@ isValOcc other		      = False
 
 -- Data constructor operator (starts with ':', or '[]')
 -- Pretty inefficient!
-isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
-isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
+isDataSymOcc (OccName DataName s) = isLexConSym s
+isDataSymOcc (OccName VarName s)  = isLexConSym s
 isDataSymOcc other		  = False
 
 isDataOcc (OccName DataName _) = True
-isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
+isDataOcc (OccName VarName s)  = isLexCon s
 isDataOcc other		       = False
 
 -- Any operator (data constructor or variable)
 -- Pretty inefficient!
-isSymOcc (OccName DataName s)  = isLexConSym (decodeFS s)
-isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
-isSymOcc (OccName VarName s)   = isLexSym (decodeFS s)
+isSymOcc (OccName DataName s)  = isLexConSym s
+isSymOcc (OccName TcClsName s) = isLexConSym s
+isSymOcc (OccName VarName s)   = isLexSym s
 isSymOcc other		       = False
 
 parenSymOcc :: OccName -> SDoc -> SDoc
@@ -426,13 +371,12 @@ parenSymOcc occ doc | isSymOcc occ = parens doc
 reportIfUnused :: OccName -> Bool
   -- Haskell 98 encourages compilers to suppress warnings about
   -- unused names in a pattern if they start with "_".
-reportIfUnused occ = case occNameUserString occ of
+reportIfUnused occ = case occNameString occ of
 			('_' : _) -> False
-			zz_other  -> True
+			_other    -> True
 \end{code}
 
 
-
 %************************************************************************
 %*									*
 \subsection{Making system names}
@@ -466,16 +410,17 @@ NB: The string must already be encoded!
 \begin{code}
 mk_deriv :: NameSpace 
 	 -> String		-- Distinguishes one sort of derived name from another
-	 -> EncodedString	-- Must be already encoded!!  We don't want to encode it a 
-				-- second time because encoding isn't idempotent
+	 -> String
 	 -> OccName
 
-mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
+mk_deriv occ_sp sys_prefix str = mkOccName occ_sp (sys_prefix ++ str)
 \end{code}
 
 \begin{code}
-mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
- 	   mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
+mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc,
+  	mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc, 
+ 	mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2,
+	mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc
    :: OccName -> OccName
 
 -- These derived variables have a prefix that no Haskell value could have
@@ -503,7 +448,6 @@ mkDataCOcc = mk_simple_deriv varName  "$c"
 
 mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
 
-
 -- Data constructor workers are made by setting the name space
 -- of the data constructor OccName (which should be a DataName)
 -- to VarName
@@ -528,7 +472,7 @@ mkLocalOcc uniq occ
 
 
 \begin{code}
-mkDFunOcc :: EncodedString	-- Typically the class and type glommed together e.g. "OrdMaybe"
+mkDFunOcc :: String		-- Typically the class and type glommed together e.g. "OrdMaybe"
 				-- Only used in debug mode, for extra clarity
 	  -> Bool		-- True <=> hs-boot instance dfun
 	  -> Int		-- Unique index
@@ -609,248 +553,37 @@ tidyOccName in_scope occ@(OccName occ_sp fs)
 	Just n  -> 	-- Already used: make a new guess, 
 			-- change the guess base, and try again
 		   tidyOccName  (extendOccEnv in_scope occ (n+1))
-				(mkSysOcc occ_sp (unpackFS fs ++ show n))
+				(mkOccName occ_sp (unpackFS fs ++ show n))
 \end{code}
 
-
-%************************************************************************
-%*									*
-\subsection{The 'Z' encoding}
-%*									*
-%************************************************************************
-
-This is the main name-encoding and decoding function.  It encodes any
-string into a string that is acceptable as a C name.  This is the name
-by which things are known right through the compiler.
-
-The basic encoding scheme is this.  
-
-* Tuples (,,,) are coded as Z3T
-
-* Alphabetic characters (upper and lower) and digits
-	all translate to themselves; 
-	except 'Z', which translates to 'ZZ'
-	and    'z', which translates to 'zz'
-  We need both so that we can preserve the variable/tycon distinction
-
-* Most other printable characters translate to 'zx' or 'Zx' for some
-	alphabetic character x
-
-* The others translate as 'znnnU' where 'nnn' is the decimal number
-        of the character
-
-	Before		After
-	--------------------------
-	Trak		Trak
-	foo_wib		foozuwib
-	>		zg
-	>1		zg1
-	foo#		foozh
-	foo##		foozhzh
-	foo##1		foozhzh1
-	fooZ		fooZZ	
-	:+		ZCzp
-	()		Z0T	0-tuple
-	(,,,,)		Z5T	5-tuple  
-	(# #)           Z1H     unboxed 1-tuple	(note the space)
-	(#,,,,#)	Z5H	unboxed 5-tuple
-		(NB: There is no Z1T nor Z0H.)
-
-\begin{code}
--- alreadyEncoded is used in ASSERTs to check for encoded
--- strings.  It isn't fail-safe, of course, because, say 'zh' might
--- be encoded or not.
-alreadyEncoded :: String -> Bool
-alreadyEncoded s = all ok s
-		 where
-		   ok ' ' = True
-			-- This is a bit of a lie; if we really wanted spaces
-			-- in names we'd have to encode them.  But we do put
-			-- spaces in ccall "occurrences", and we don't want to
-			-- reject them here
-		   ok ch  = isAlphaNum ch
-
-alreadyEncodedFS :: FastString -> Bool
-alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
-
-encode :: UserString -> EncodedString
-encode cs = case maybe_tuple cs of
-		Just n  -> n		-- Tuples go to Z2T etc
-		Nothing -> go cs
-	  where
-		go []     = []
-		go (c:cs) = encode_ch c ++ go cs
-
-encodeFS :: UserFS -> EncodedFS
-encodeFS fast_str  | all unencodedChar str = fast_str
-		   | otherwise	           = mkFastString (encode str)
-		   where
-		     str = unpackFS fast_str
-
-unencodedChar :: Char -> Bool	-- True for chars that don't need encoding
-unencodedChar 'Z' = False
-unencodedChar 'z' = False
-unencodedChar c   =  c >= 'a' && c <= 'z'
-	          || c >= 'A' && c <= 'Z'
-		  || c >= '0' && c <= '9'
-
-encode_ch :: Char -> EncodedString
-encode_ch c | unencodedChar c = [c]	-- Common case first
-
--- Constructors
-encode_ch '('  = "ZL"	-- Needed for things like (,), and (->)
-encode_ch ')'  = "ZR"	-- For symmetry with (
-encode_ch '['  = "ZM"
-encode_ch ']'  = "ZN"
-encode_ch ':'  = "ZC"
-encode_ch 'Z'  = "ZZ"
-
--- Variables
-encode_ch 'z'  = "zz"
-encode_ch '&'  = "za"
-encode_ch '|'  = "zb"
-encode_ch '^'  = "zc"
-encode_ch '$'  = "zd"
-encode_ch '='  = "ze"
-encode_ch '>'  = "zg"
-encode_ch '#'  = "zh"
-encode_ch '.'  = "zi"
-encode_ch '<'  = "zl"
-encode_ch '-'  = "zm"
-encode_ch '!'  = "zn"
-encode_ch '+'  = "zp"
-encode_ch '\'' = "zq"
-encode_ch '\\' = "zr"
-encode_ch '/'  = "zs"
-encode_ch '*'  = "zt"
-encode_ch '_'  = "zu"
-encode_ch '%'  = "zv"
-encode_ch c    = 'z' : shows (ord c) "U"
-\end{code}
-
-Decode is used for user printing.
-
-\begin{code}
-decodeFS :: FastString -> FastString
-decodeFS fs = mkFastString (decode (unpackFS fs))
-
-decode :: EncodedString -> UserString
-decode [] = []
-decode ('Z' : d : rest) | isDigit d = decode_tuple   d rest
-			| otherwise = decode_upper   d : decode rest
-decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
-			| otherwise = decode_lower   d : decode rest
-decode (c   : rest) = c : decode rest
-
-decode_upper, decode_lower :: Char -> Char
-
-decode_upper 'L' = '('
-decode_upper 'R' = ')'
-decode_upper 'M' = '['
-decode_upper 'N' = ']'
-decode_upper 'C' = ':'
-decode_upper 'Z' = 'Z'
-decode_upper ch  = pprTrace "decode_upper" (char ch) ch
-	     	
-decode_lower 'z' = 'z'
-decode_lower 'a' = '&'
-decode_lower 'b' = '|'
-decode_lower 'c' = '^'
-decode_lower 'd' = '$'
-decode_lower 'e' = '='
-decode_lower 'g' = '>'
-decode_lower 'h' = '#'
-decode_lower 'i' = '.'
-decode_lower 'l' = '<'
-decode_lower 'm' = '-'
-decode_lower 'n' = '!'
-decode_lower 'p' = '+'
-decode_lower 'q' = '\''
-decode_lower 'r' = '\\'
-decode_lower 's' = '/'
-decode_lower 't' = '*'
-decode_lower 'u' = '_'
-decode_lower 'v' = '%'
-decode_lower ch  = pprTrace "decode_lower" (char ch) ch
-
--- Characters not having a specific code are coded as z224U
-decode_num_esc d rest
-  = go (digitToInt d) rest
-  where
-    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
-    go n ('U' : rest)           = chr n : decode rest
-    go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
-
-decode_tuple :: Char -> EncodedString -> UserString
-decode_tuple d rest
-  = go (digitToInt d) rest
-  where
-	-- NB. recurse back to decode after decoding the tuple, because
-	-- the tuple might be embedded in a longer name.
-    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
-    go 0 ('T':rest)	= "()" ++ decode rest
-    go n ('T':rest)	= '(' : replicate (n-1) ',' ++ ")" ++ decode rest
-    go 1 ('H':rest)	= "(# #)" ++ decode rest
-    go n ('H':rest)	= '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
-    go n other = pprPanic "decode_tuple" (ppr n <+> text other)
-\end{code}
-
-
 %************************************************************************
 %*									*
 		Stuff for dealing with tuples
 %*									*
 %************************************************************************
 
-Tuples are encoded as
-	Z3T or Z3H
-for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts 
-	Z<digit>
-
-* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
-  There are no unboxed 0-tuples.  
-
-* "()" is the tycon for a boxed 0-tuple.
-  There are no boxed 1-tuples.
-
-
-\begin{code}
-maybe_tuple :: UserString -> Maybe EncodedString
-
-maybe_tuple "(# #)" = Just("Z1H")
-maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
-				 (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
-				 other		     -> Nothing
-maybe_tuple "()" = Just("Z0T")
-maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
-				 (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
-				 other	       -> Nothing
-maybe_tuple other    	     = Nothing
-
-count_commas :: Int -> String -> (Int, String)
-count_commas n (',' : cs) = count_commas (n+1) cs
-count_commas n cs	  = (n,cs)
-\end{code}
-
 \begin{code}
 mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
-mkTupleOcc ns bx ar
-  = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
+mkTupleOcc ns bx ar = OccName ns (mkFastString str)
   where
-    bx_char = case bx of
-		Boxed   -> "T"
-		Unboxed -> "H"
+ 	-- no need to cache these, the caching is done in the caller
+	-- (TysWiredIn.mk_tuple)
+    str = case bx of
+		Boxed   -> '(' : commas ++ ")"
+		Unboxed -> '(' : '#' : commas ++ "#)"
+
+    commas = take (ar-1) (repeat ',')
 
 isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
 -- Tuples are special, because there are so many of them!
 isTupleOcc_maybe (OccName ns fs)
   = case unpackFS fs of
-	('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
-	other			 -> Nothing
+	'(':'#':',':rest -> Just (ns, Unboxed, 2 + count_commas rest)
+	'(':',':rest     -> Just (ns, Boxed,   2 + count_commas rest)
+	_other           -> Nothing
   where
-    decode_tup n "H" 	  = (ns, Unboxed, n)
-    decode_tup n "T" 	  = (ns, Boxed, n)
-    decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
+    count_commas (',':rest) = 1 + count_commas rest
+    count_commas _          = 0
 \end{code}
 
 %************************************************************************
@@ -875,37 +608,31 @@ isLexSym cs = isLexConSym cs || isLexVarSym cs
 -------------
 
 isLexConId cs				-- Prefix type or data constructors
-  | nullFastString cs = False		-- 	e.g. "Foo", "[]", "(,)" 
+  | nullFS cs	      = False		-- 	e.g. "Foo", "[]", "(,)" 
   | cs == FSLIT("[]") = True
   | otherwise	      = startsConId (headFS cs)
 
 isLexVarId cs				-- Ordinary prefix identifiers
-  | nullFastString cs = False		-- 	e.g. "x", "_x"
+  | nullFS cs	      = False		-- 	e.g. "x", "_x"
   | otherwise         = startsVarId (headFS cs)
 
 isLexConSym cs				-- Infix type or data constructors
-  | nullFastString cs = False		--	e.g. ":-:", ":", "->"
+  | nullFS cs	      = False		--	e.g. ":-:", ":", "->"
   | cs == FSLIT("->") = True
   | otherwise	      = startsConSym (headFS cs)
 
 isLexVarSym cs				-- Infix identifiers
-  | nullFastString cs = False		-- 	e.g. "+"
+  | nullFS cs	      = False		-- 	e.g. "+"
   | otherwise         = startsVarSym (headFS cs)
 
 -------------
 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
-startsVarSym c = isSymbolASCII c || isSymbolISO c	-- Infix Ids
+startsVarSym c = isSymbolASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
 startsConSym c = c == ':'				-- Infix data constructors
-startsVarId c  = isLower c || isLowerISO c || c == '_'	-- Ordinary Ids
-startsConId c  = isUpper c || isUpperISO c || c == '('	-- Ordinary type constructors and data constructors
-
+startsVarId c  = isLower c || c == '_'	-- Ordinary Ids
+startsConId c  = isUpper c || c == '('	-- Ordinary type constructors and data constructors
 
 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
-isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
-isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
-	--0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
-isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
-	--0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 \end{code}
 
 %************************************************************************
diff --git a/ghc/compiler/basicTypes/RdrName.lhs b/ghc/compiler/basicTypes/RdrName.lhs
index 900717e610..030aa1f609 100644
--- a/ghc/compiler/basicTypes/RdrName.lhs
+++ b/ghc/compiler/basicTypes/RdrName.lhs
@@ -40,25 +40,17 @@ module RdrName (
 
 #include "HsVersions.h"
 
-import OccName	( NameSpace, varName,
-		  OccName, UserFS, 
-		  setOccNameSpace,
-		  mkOccFS, occNameFlavour,
-		  isDataOcc, isTvOcc, isTcOcc,
-		  OccEnv, emptyOccEnv, extendOccEnvList, lookupOccEnv, 
-		  elemOccEnv, plusOccEnv_C, extendOccEnv_C, foldOccEnv,
-		  occEnvElts
-		)
+import OccName
 import Module   ( Module, mkModuleFS )
 import Name	( Name, NamedThing(getName), nameModule, nameParent_maybe,
 		  nameOccName, isExternalName, nameSrcLoc )
 import Maybes	( mapCatMaybes )
 import SrcLoc	( isGoodSrcLoc, SrcSpan )
+import FastString ( FastString )
 import Outputable
 import Util	( thenCmp )
 \end{code}
 
-
 %************************************************************************
 %*									*
 \subsection{The main data type}
@@ -147,14 +139,14 @@ mkDerivedRdrName parent mk_occ
 ---------------
 	-- These two are used when parsing source files
 	-- They do encode the module and occurrence names
-mkUnqual :: NameSpace -> UserFS -> RdrName
-mkUnqual sp n = Unqual (mkOccFS sp n)
+mkUnqual :: NameSpace -> FastString -> RdrName
+mkUnqual sp n = Unqual (mkOccNameFS sp n)
 
-mkVarUnqual :: UserFS -> RdrName
-mkVarUnqual n = Unqual (mkOccFS varName n)
+mkVarUnqual :: FastString -> RdrName
+mkVarUnqual n = Unqual (mkVarOccFS n)
 
-mkQual :: NameSpace -> (UserFS, UserFS) -> RdrName
-mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccFS sp n)
+mkQual :: NameSpace -> (FastString, FastString) -> RdrName
+mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccNameFS sp n)
 
 getRdrName :: NamedThing thing => thing -> RdrName
 getRdrName name = nameRdrName (getName name)
@@ -213,7 +205,7 @@ instance Outputable RdrName where
     ppr (Qual mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
     ppr (Orig mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
 
-ppr_name_space occ = ifPprDebug (parens (occNameFlavour occ))
+ppr_name_space occ = ifPprDebug (parens (pprNonVarNameSpace (occNameSpace occ)))
 
 instance OutputableBndr RdrName where
     pprBndr _ n 
diff --git a/ghc/compiler/cmm/CLabel.hs b/ghc/compiler/cmm/CLabel.hs
index 296ad91ba8..fb13589cdc 100644
--- a/ghc/compiler/cmm/CLabel.hs
+++ b/ghc/compiler/cmm/CLabel.hs
@@ -740,10 +740,10 @@ pprCLbl (CC_Label cc) 		= ppr cc
 pprCLbl (CCS_Label ccs) 	= ppr ccs
 
 pprCLbl (ModuleInitLabel mod way _)	
-   = ptext SLIT("__stginit_") <> ftext (moduleFS mod)
+   = ptext SLIT("__stginit_") <> ppr mod
 	<> char '_' <> text way
 pprCLbl (PlainModuleInitLabel mod _)	
-   = ptext SLIT("__stginit_") <> ftext (moduleFS mod)
+   = ptext SLIT("__stginit_") <> ppr mod
 
 ppIdFlavor :: IdLabelInfo -> SDoc
 ppIdFlavor x = pp_cSEP <>
diff --git a/ghc/compiler/cmm/Cmm.hs b/ghc/compiler/cmm/Cmm.hs
index 304ddb2b0f..13961c15d3 100644
--- a/ghc/compiler/cmm/Cmm.hs
+++ b/ghc/compiler/cmm/Cmm.hs
@@ -30,6 +30,7 @@ import CLabel		( CLabel )
 import ForeignCall	( CCallConv )
 import Unique		( Unique, Uniquable(..) )
 import FastString	( FastString )
+import DATA_WORD	( Word8 )
 
 -----------------------------------------------------------------------------
 --		Cmm, CmmTop, CmmBasicBlock
@@ -251,9 +252,8 @@ data CmmStatic
 	-- align to next N-byte boundary (N must be a power of 2).
   | CmmDataLabel CLabel
 	-- label the current position in this section.
-  | CmmString String
+  | CmmString [Word8]
 	-- string of 8-bit values only, not zero terminated.
-	-- ToDo: might be more honest to use [Word8] here?
 
 -----------------------------------------------------------------------------
 --		Global STG registers
diff --git a/ghc/compiler/cmm/CmmLex.x b/ghc/compiler/cmm/CmmLex.x
index 8515b3e399..c2efd17710 100644
--- a/ghc/compiler/cmm/CmmLex.x
+++ b/ghc/compiler/cmm/CmmLex.x
@@ -227,10 +227,10 @@ tok_decimal span buf len
   = return (L span (CmmT_Int  $! parseInteger buf len 10 octDecDigit))
 
 tok_octal span buf len 
-  = return (L span (CmmT_Int  $! parseInteger (stepOn buf) (len-1) 8 octDecDigit))
+  = return (L span (CmmT_Int  $! parseInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
 
 tok_hexadecimal span buf len 
-  = return (L span (CmmT_Int  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit))
+  = return (L span (CmmT_Int  $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
 
 tok_float str = CmmT_Float $! readRational str
 
diff --git a/ghc/compiler/cmm/CmmParse.y b/ghc/compiler/cmm/CmmParse.y
index 45f411b942..cfb2a9d93c 100644
--- a/ghc/compiler/cmm/CmmParse.y
+++ b/ghc/compiler/cmm/CmmParse.y
@@ -48,6 +48,7 @@ import Constants	( wORD_SIZE )
 import Outputable
 
 import Monad		( when )
+import Data.Char	( ord )
 
 #include "HsVersions.h"
 }
@@ -177,7 +178,7 @@ static 	:: { ExtFCode [CmmStatic] }
 			     return [CmmStaticLit (getLit e)] }
 	| type ';'			{ return [CmmUninitialised
 							(machRepByteWidth $1)] }
-        | 'bits8' '[' ']' STRING ';'	{ return [CmmString $4] }
+        | 'bits8' '[' ']' STRING ';'	{ return [mkString $4] }
         | 'bits8' '[' INT ']' ';'	{ return [CmmUninitialised 
 							(fromIntegral $3)] }
         | typenot8 '[' INT ']' ';'	{ return [CmmUninitialised 
@@ -427,6 +428,9 @@ section "rodata" = ReadOnlyData
 section "bss"	 = UninitialisedData
 section s	 = OtherSection s
 
+mkString :: String -> CmmStatic
+mkString s = CmmString (map (fromIntegral.ord) s)
+
 -- mkMachOp infers the type of the MachOp from the type of its first
 -- argument.  We assume that this is correct: for MachOps that don't have
 -- symmetrical args (e.g. shift ops), the first arg determines the type of
diff --git a/ghc/compiler/cmm/PprC.hs b/ghc/compiler/cmm/PprC.hs
index 7427f50ecc..9fece36a42 100644
--- a/ghc/compiler/cmm/PprC.hs
+++ b/ghc/compiler/cmm/PprC.hs
@@ -45,6 +45,7 @@ import Data.Bits        ( shiftR )
 import Char             ( ord, chr )
 import IO               ( Handle )
 import DATA_BITS
+import Data.Word	( Word8 )
 
 #ifdef DEBUG
 import PprCmm		() -- instances only
@@ -881,25 +882,21 @@ machRepSignedCType r | r == wordRep = ptext SLIT("I_")
 -- ---------------------------------------------------------------------
 -- print strings as valid C strings
 
--- Assumes it contains only characters '\0'..'\xFF'!
-pprFSInCStyle :: FastString -> SDoc
-pprFSInCStyle fs = pprStringInCStyle (unpackFS fs)
-
-pprStringInCStyle :: String -> SDoc
+pprStringInCStyle :: [Word8] -> SDoc
 pprStringInCStyle s = doubleQuotes (text (concatMap charToC s))
 
-charToC :: Char -> String
-charToC '\"' = "\\\""
-charToC '\'' = "\\\'"
-charToC '\\' = "\\\\"
-charToC c | c >= ' ' && c <= '~' = [c]
-          | c > '\xFF' = panic ("charToC "++show c)
-          | otherwise = ['\\',
+charToC :: Word8 -> String
+charToC w = 
+  case chr (fromIntegral w) of
+	'\"' -> "\\\""
+	'\'' -> "\\\'"
+	'\\' -> "\\\\"
+	c | c >= ' ' && c <= '~' -> [c]
+          | otherwise -> ['\\',
                          chr (ord '0' + ord c `div` 64),
                          chr (ord '0' + ord c `div` 8 `mod` 8),
                          chr (ord '0' + ord c         `mod` 8)]
 
-
 -- ---------------------------------------------------------------------------
 -- Initialising static objects with floating-point numbers.  We can't
 -- just emit the floating point number, because C will cast it to an int
diff --git a/ghc/compiler/cmm/PprCmm.hs b/ghc/compiler/cmm/PprCmm.hs
index 3c3e9764a4..6e8367d662 100644
--- a/ghc/compiler/cmm/PprCmm.hs
+++ b/ghc/compiler/cmm/PprCmm.hs
@@ -51,6 +51,7 @@ import FastString   ( mkFastString )
 import Data.List    ( intersperse, groupBy )
 import IO           ( Handle )
 import Maybe	    ( isJust )
+import Data.Char    ( chr )
 
 pprCmms :: [Cmm] -> SDoc
 pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
@@ -391,7 +392,8 @@ pprStatic s = case s of
     CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i)
     CmmAlign i         -> nest 4 $ text "align" <+> int i
     CmmDataLabel clbl  -> pprCLabel clbl <> colon
-    CmmString s'       -> nest 4 $ text "I8[]" <+> doubleQuotes (text s')
+    CmmString s'       -> nest 4 $ text "I8[]" <+> 
+			   doubleQuotes (text (map (chr.fromIntegral) s'))
 
 -- --------------------------------------------------------------------------
 -- Registers, whether local (temps) or global
diff --git a/ghc/compiler/codeGen/CgProf.hs b/ghc/compiler/codeGen/CgProf.hs
index aaab2fcb77..1488e34956 100644
--- a/ghc/compiler/codeGen/CgProf.hs
+++ b/ghc/compiler/codeGen/CgProf.hs
@@ -43,7 +43,7 @@ import MachOp
 import CmmUtils		( zeroCLit, mkIntCLit, mkLblExpr )
 import CLabel		( mkCCLabel, mkCCSLabel, mkRtsDataLabel )
 
-import Module		( moduleUserString )
+import Module		( moduleString )
 import Id		( Id )
 import CostCentre
 import StgSyn		( GenStgExpr(..), StgExpr )
@@ -292,7 +292,7 @@ emitCostCentreDecl
    -> Code
 emitCostCentreDecl cc = do 
   { label <- mkStringCLit (costCentreUserName cc)
-  ; modl  <- mkStringCLit (moduleUserString (cc_mod cc))
+  ; modl  <- mkStringCLit (moduleString (cc_mod cc))
   ; let
      lits = [ zero,   	-- StgInt ccID,
 	      label,	-- char *label,
diff --git a/ghc/compiler/codeGen/CgUtils.hs b/ghc/compiler/codeGen/CgUtils.hs
index 68958d22a9..2f69927db0 100644
--- a/ghc/compiler/codeGen/CgUtils.hs
+++ b/ghc/compiler/codeGen/CgUtils.hs
@@ -54,11 +54,12 @@ import ListSetOps	( assocDefault )
 import Util		( filterOut, sortLe )
 import DynFlags		( DynFlags(..), HscTarget(..) )
 import Packages		( HomeModules )
-import FastString	( LitString, FastString, unpackFS )
+import FastString	( LitString, FastString, bytesFS )
 import Outputable
 
 import Char		( ord )
 import DATA_BITS
+import DATA_WORD	( Word8 )
 import Maybe		( isNothing )
 
 -------------------------------------------------------------------------
@@ -77,7 +78,8 @@ addIdReps ids = [(idCgRep id, id) | id <- ids]
 -------------------------------------------------------------------------
 
 cgLit :: Literal -> FCode CmmLit
-cgLit (MachStr s) = mkStringCLit (unpackFS s)
+cgLit (MachStr s) = mkByteStringCLit (bytesFS s)
+ -- not unpackFS; we want the UTF-8 byte stream.
 cgLit other_lit   = return (mkSimpleLit other_lit)
 
 mkSimpleLit :: Literal -> CmmLit
@@ -308,10 +310,13 @@ emitRODataLits lbl lits
 mkStringCLit :: String -> FCode CmmLit
 -- Make a global definition for the string,
 -- and return its label
-mkStringCLit str 
+mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str)
+
+mkByteStringCLit :: [Word8] -> FCode CmmLit
+mkByteStringCLit bytes
   = do 	{ uniq <- newUnique
 	; let lbl = mkStringLitLabel uniq
-	; emitData ReadOnlyData [CmmDataLabel lbl, CmmString str]
+	; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes]
 	; return (CmmLabel lbl) }
 
 -------------------------------------------------------------------------
diff --git a/ghc/compiler/codeGen/ClosureInfo.lhs b/ghc/compiler/codeGen/ClosureInfo.lhs
index b0e9e232d7..a5362e60e0 100644
--- a/ghc/compiler/codeGen/ClosureInfo.lhs
+++ b/ghc/compiler/codeGen/ClosureInfo.lhs
@@ -69,7 +69,7 @@ import StaticFlags	( opt_SccProfilingOn, opt_OmitBlackHoling,
 import Id		( Id, idType, idArity, idName )
 import DataCon		( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName )
 import Name		( Name, nameUnique, getOccName, getOccString )
-import OccName		( occNameUserString )
+import OccName		( occNameString )
 import Type		( isUnLiftedType, Type, repType, splitTyConApp_maybe )
 import TcType		( tcSplitSigmaTy )
 import TyCon		( isFunTyCon, isAbstractTyCon )
@@ -930,12 +930,12 @@ closureValDescr, closureTypeDescr :: ClosureInfo -> String
 closureValDescr (ClosureInfo {closureDescr = descr}) 
   = descr
 closureValDescr (ConInfo {closureCon = con})
-  = occNameUserString (getOccName con)
+  = occNameString (getOccName con)
 
 closureTypeDescr (ClosureInfo { closureType = ty })
   = getTyDescription ty
 closureTypeDescr (ConInfo { closureCon = data_con })
-  = occNameUserString (getOccName (dataConTyCon data_con))
+  = occNameString (getOccName (dataConTyCon data_con))
 
 getTyDescription :: Type -> String
 getTyDescription ty
diff --git a/ghc/compiler/deSugar/Check.lhs b/ghc/compiler/deSugar/Check.lhs
index 309aab2f50..6d7784dc14 100644
--- a/ghc/compiler/deSugar/Check.lhs
+++ b/ghc/compiler/deSugar/Check.lhs
@@ -18,7 +18,8 @@ import DsUtils		( EquationInfo(..), MatchResult(..),
 import MatchLit		( tidyLitPat, tidyNPat )
 import Id		( Id, idType )
 import DataCon		( DataCon, dataConTyCon, dataConOrigArgTys, dataConFieldLabels )
-import Name             ( Name, mkInternalName, getOccName, isDataSymOcc, getName, mkVarOcc )
+import Name             ( Name, mkInternalName, getOccName, isDataSymOcc,
+			  getName, mkVarOccFS )
 import TysWiredIn
 import PrelNames	( unboundKey )
 import TyCon            ( tyConDataCons, tupleTyConBoxity, isTupleTyCon )
@@ -382,7 +383,7 @@ make_row_vars used_lits (_, EqnInfo { eqn_pats = pats})
      new_var = hash_x
 
 hash_x = mkInternalName unboundKey {- doesn't matter much -}
-		     (mkVarOcc FSLIT("#x"))
+		     (mkVarOccFS FSLIT("#x"))
 		     noSrcLoc
 
 make_row_vars_for_constructor :: (EqnNo, EquationInfo) -> [WarningPat]
diff --git a/ghc/compiler/deSugar/DsForeign.lhs b/ghc/compiler/deSugar/DsForeign.lhs
index d784eb8612..52956a09ff 100644
--- a/ghc/compiler/deSugar/DsForeign.lhs
+++ b/ghc/compiler/deSugar/DsForeign.lhs
@@ -28,9 +28,8 @@ import SMRep		( argMachRep, typeCgRep )
 import CoreUtils	( exprType, mkInlineMe )
 import Id		( Id, idType, idName, mkSysLocal, setInlinePragma )
 import Literal		( Literal(..), mkStringLit )
-import Module		( moduleString )
+import Module		( moduleFS )
 import Name		( getOccString, NamedThing(..) )
-import OccName		( encodeFS )
 import Type		( repType, coreEqType )
 import TcType		( Type, mkFunTys, mkForAllTys, mkTyConApp,
 			  mkFunTy, tcSplitTyConApp_maybe, 
@@ -146,7 +145,7 @@ dsFImport id (CImport cconv safety header lib spec)
   = dsCImport id spec cconv safety no_hdrs	  `thenDs` \(ids, h, c) ->
     returnDs (ids, h, c, if no_hdrs then Nothing else Just header)
   where
-    no_hdrs = nullFastString header
+    no_hdrs = nullFS header
 
   -- FIXME: the `lib' field is needed for .NET ILX generation when invoking
   --	    routines that are external to the .NET runtime, but GHC doesn't
@@ -246,7 +245,7 @@ dsFCall fn_id fcall no_hdrs
  	the_ccall_app = mkFCall ccall_uniq fcall val_args ccall_result_ty
 	work_rhs      = mkLams tvs (mkLams work_arg_ids the_ccall_app)
 	work_id       = setImpInline no_hdrs $	-- See comments with setImpInline
-			mkSysLocal (encodeFS FSLIT("$wccall")) work_uniq worker_ty
+			mkSysLocal FSLIT("$wccall") work_uniq worker_ty
 
 	-- Build the wrapper
 	work_app     = mkApps (mkVarApps (Var work_id) tvs) val_args
@@ -356,7 +355,7 @@ dsFExportDynamic id cconv
      getModuleDs				`thenDs` \ mod_name -> 
      let 
         -- hack: need to get at the name of the C stub we're about to generate.
-       fe_nm	   = mkFastString (moduleString mod_name ++ "_" ++ toCName fe_id)
+       fe_nm	   = mkFastString (unpackFS (zEncodeFS (moduleFS mod_name)) ++ "_" ++ toCName fe_id)
      in
      newSysLocalDs arg_ty			`thenDs` \ cback ->
      dsLookupGlobalId newStablePtrName		`thenDs` \ newStablePtrId ->
diff --git a/ghc/compiler/deSugar/DsMeta.hs b/ghc/compiler/deSugar/DsMeta.hs
index e5e079ea61..fcbcc78347 100644
--- a/ghc/compiler/deSugar/DsMeta.hs
+++ b/ghc/compiler/deSugar/DsMeta.hs
@@ -30,16 +30,16 @@ import qualified Language.Haskell.TH as TH
 import HsSyn
 import Class (FunDep)
 import PrelNames  ( rationalTyConName, integerTyConName, negateName )
-import OccName	  ( isDataOcc, isTvOcc, occNameUserString )
+import OccName	  ( isDataOcc, isTvOcc, occNameString )
 -- To avoid clashes with DsMeta.varName we must make a local alias for OccName.varName
 -- we do this by removing varName from the import of OccName above, making
 -- a qualified instance of OccName and using OccNameAlias.varName where varName
 -- ws previously used in this file.
 import qualified OccName
 
-import Module	  ( Module, mkModule, moduleUserString )
+import Module	  ( Module, mkModule, moduleString )
 import Id         ( Id, mkLocalId )
-import OccName	  ( mkOccFS )
+import OccName	  ( mkOccNameFS )
 import Name       ( Name, mkExternalName, localiseName, nameOccName, nameModule, 
 		    isExternalName, getSrcLoc )
 import NameEnv
@@ -911,7 +911,7 @@ globalVar name
 	; MkC uni <- coreIntLit (getKey (getUnique name))
 	; rep2 mkNameLName [occ,uni] }
   where
-      name_mod = moduleUserString (nameModule name)
+      name_mod = moduleString (nameModule name)
       name_occ = nameOccName name
       mk_varg | OccName.isDataOcc name_occ = mkNameG_dName
 	      | OccName.isVarOcc  name_occ = mkNameG_vName
@@ -963,7 +963,7 @@ wrapNongenSyms binds (MkC body)
 	     ; return (NonRec id var) }
 
 occNameLit :: Name -> DsM (Core String)
-occNameLit n = coreStringLit (occNameUserString (nameOccName n))
+occNameLit n = coreStringLit (occNameString (nameOccName n))
 
 
 -- %*********************************************************************
@@ -1390,7 +1390,7 @@ thSyn = mkModule "Language.Haskell.TH.Syntax"
 thLib = mkModule "Language.Haskell.TH.Lib"
 
 mk_known_key_name mod space str uniq 
-  = mkExternalName uniq mod (mkOccFS space str) 
+  = mkExternalName uniq mod (mkOccNameFS space str) 
 	           Nothing noSrcLoc
 
 libFun = mk_known_key_name thLib OccName.varName
diff --git a/ghc/compiler/deSugar/DsUtils.lhs b/ghc/compiler/deSugar/DsUtils.lhs
index b77bb967cf..1465554175 100644
--- a/ghc/compiler/deSugar/DsUtils.lhs
+++ b/ghc/compiler/deSugar/DsUtils.lhs
@@ -69,11 +69,12 @@ import PrelNames	( unpackCStringName, unpackCStringUtf8Name,
 			  plusIntegerName, timesIntegerName, smallIntegerDataConName, 
 			  lengthPName, indexPName )
 import Outputable
-import UnicodeUtil      ( intsToUtf8 )
 import SrcLoc		( Located(..), unLoc )
 import Util             ( isSingleton, notNull, zipEqual, sortWith )
 import ListSetOps	( assocDefault )
 import FastString
+
+import Data.Char	( ord )
 \end{code}
 
 
@@ -469,7 +470,7 @@ mkSmallIntegerLit small_integer_data_con i = mkConApp small_integer_data_con [mk
 mkStringExpr str = mkStringExprFS (mkFastString str)
 
 mkStringExprFS str
-  | nullFastString str
+  | nullFS str
   = returnDs (mkNilExpr charTy)
 
   | lengthFS str == 1
@@ -478,17 +479,17 @@ mkStringExprFS str
     in
     returnDs (mkConsExpr charTy the_char (mkNilExpr charTy))
 
-  | all safeChar int_chars
+  | all safeChar chars
   = dsLookupGlobalId unpackCStringName	`thenDs` \ unpack_id ->
     returnDs (App (Var unpack_id) (Lit (MachStr str)))
 
   | otherwise
   = dsLookupGlobalId unpackCStringUtf8Name	`thenDs` \ unpack_id ->
-    returnDs (App (Var unpack_id) (Lit (MachStr (mkFastString (intsToUtf8 int_chars)))))
+    returnDs (App (Var unpack_id) (Lit (MachStr str)))
 
   where
-    int_chars = unpackIntFS str
-    safeChar c = c >= 1 && c <= 0xFF
+    chars = unpackFS str
+    safeChar c = ord c >= 1 && ord c <= 0xFF
 \end{code}
 
 
diff --git a/ghc/compiler/ghci/ByteCodeGen.lhs b/ghc/compiler/ghci/ByteCodeGen.lhs
index 4d2fa73876..f526ed9907 100644
--- a/ghc/compiler/ghci/ByteCodeGen.lhs
+++ b/ghc/compiler/ghci/ByteCodeGen.lhs
@@ -53,7 +53,8 @@ import OrdList
 import Constants	( wORD_SIZE )
 
 import Data.List	( intersperse, sortBy, zip4, zip5, partition )
-import Foreign		( Ptr, castPtr, mallocBytes, pokeByteOff, Word8 )
+import Foreign		( Ptr, castPtr, mallocBytes, pokeByteOff, Word8,
+			  withForeignPtr )
 import Foreign.C	( CInt )
 import Control.Exception	( throwDyn )
 
@@ -1084,18 +1085,18 @@ pushAtom d p (AnnLit lit)
         pushStr s 
            = let getMallocvilleAddr
                     = case s of
-                         FastString _ l ba -> 
-                            -- sigh, a string in the heap is no good to us.
-                            -- We need a static C pointer, since the type of 
-                            -- a string literal is Addr#.  So, copy the string 
-                            -- into C land and remember the pointer so we can
-			    -- free it later.
-                            let n = I# l
-                            -- CAREFUL!  Chars are 32 bits in ghc 4.09+
-                            in  ioToBc (mallocBytes (n+1)) `thenBc` \ ptr ->
+                         FastString _ n _ fp _ -> 
+			    -- we could grab the Ptr from the ForeignPtr,
+			    -- but then we have no way to control its lifetime.
+			    -- In reality it'll probably stay alive long enoungh
+			    -- by virtue of the global FastString table, but
+			    -- to be on the safe side we copy the string into
+			    -- a malloc'd area of memory.
+                                ioToBc (mallocBytes (n+1)) `thenBc` \ ptr ->
                                 recordMallocBc ptr         `thenBc_`
                                 ioToBc (
-                                   do memcpy ptr ba (fromIntegral n)
+                                   withForeignPtr fp $ \p -> do
+				      memcpy ptr p (fromIntegral n)
 				      pokeByteOff ptr n (fromIntegral (ord '\0') :: Word8)
                                       return ptr
                                    )
@@ -1110,7 +1111,7 @@ pushAtom d p other
               (pprCoreExpr (deAnnotate (undefined, other)))
 
 foreign import ccall unsafe "memcpy"
- memcpy :: Ptr a -> ByteArray# -> CInt -> IO ()
+ memcpy :: Ptr a -> Ptr b -> CInt -> IO ()
 
 
 -- -----------------------------------------------------------------------------
diff --git a/ghc/compiler/ghci/ByteCodeLink.lhs b/ghc/compiler/ghci/ByteCodeLink.lhs
index ee64b8aa8e..875f1d6331 100644
--- a/ghc/compiler/ghci/ByteCodeLink.lhs
+++ b/ghc/compiler/ghci/ByteCodeLink.lhs
@@ -21,10 +21,10 @@ import ObjLink		( lookupSymbol )
 
 import Name		( Name,  nameModule, nameOccName, isExternalName )
 import NameEnv
-import OccName		( occNameString )
+import OccName		( occNameFS )
 import PrimOp		( PrimOp, primOpOcc )
-import Module		( moduleString  )
-import FastString	( FastString(..), unpackFS )
+import Module		( moduleFS )
+import FastString	( FastString(..), unpackFS, zEncodeFS )
 import Outputable
 import Panic            ( GhcException(..) )
 
@@ -256,12 +256,12 @@ linkFail who what
 -- HACKS!!!  ToDo: cleaner
 nameToCLabel :: Name -> String{-suffix-} -> String
 nameToCLabel n suffix
-   = moduleString (nameModule n)
-     ++ '_':occNameString (nameOccName n) ++ '_':suffix
+   = unpackFS (zEncodeFS (moduleFS (nameModule n)))
+     ++ '_': unpackFS (zEncodeFS (occNameFS (nameOccName n))) ++ '_':suffix
 
 primopToCLabel :: PrimOp -> String{-suffix-} -> String
 primopToCLabel primop suffix
-   = let str = "GHCziPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix
+   = let str = "GHCziPrimopWrappers_" ++ unpackFS (zEncodeFS (occNameFS (primOpOcc primop))) ++ '_':suffix
      in --trace ("primopToCLabel: " ++ str)
         str
 \end{code}
diff --git a/ghc/compiler/ghci/InteractiveUI.hs b/ghc/compiler/ghci/InteractiveUI.hs
index 0bf37dc400..8fee9ba19f 100644
--- a/ghc/compiler/ghci/InteractiveUI.hs
+++ b/ghc/compiler/ghci/InteractiveUI.hs
@@ -25,10 +25,10 @@ import PprTyThing
 import Outputable
 
 -- for createtags (should these come via GHC?)
-import Module( moduleUserString )
-import Name( nameSrcLoc, nameModule, nameOccName )
-import OccName( pprOccName )
-import SrcLoc( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol )
+import Module		( moduleString )
+import Name		( nameSrcLoc, nameModule, nameOccName )
+import OccName		( pprOccName )
+import SrcLoc		( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol )
 
 -- Other random utilities
 import Digraph		( flattenSCCs )
@@ -813,7 +813,7 @@ createTagsFile session tagskind tagFile = do
         is_interpreted <- GHC.moduleIsInterpreted session m
         -- should we just skip these?
         when (not is_interpreted) $
-          throwDyn (CmdLineError ("module '" ++ moduleUserString m ++ "' is not interpreted"))
+          throwDyn (CmdLineError ("module '" ++ moduleString m ++ "' is not interpreted"))
 
         mbModInfo <- GHC.getModuleInfo session m
         let unqual 
diff --git a/ghc/compiler/hsSyn/Convert.lhs b/ghc/compiler/hsSyn/Convert.lhs
index 96623bbd5c..9dddd29c21 100644
--- a/ghc/compiler/hsSyn/Convert.lhs
+++ b/ghc/compiler/hsSyn/Convert.lhs
@@ -20,7 +20,8 @@ import qualified Name	( Name, mkInternalName, getName )
 import Module   ( Module, mkModule )
 import RdrHsSyn	( mkClassDecl, mkTyData )
 import qualified OccName
-import OccName	( startsVarId, startsVarSym, startsConId, startsConSym )
+import OccName	( startsVarId, startsVarSym, startsConId, startsConSym,
+		  pprNameSpace )
 import SrcLoc	( Located(..), SrcSpan )
 import Type	( Type )
 import TysWiredIn ( unitTyCon, tupleTyCon, tupleCon, trueDataCon, nilDataCon, consDataCon )
@@ -550,7 +551,7 @@ okOcc ns str@(c:_)
 
 badOcc :: OccName.NameSpace -> String -> SDoc
 badOcc ctxt_ns occ 
-  = ptext SLIT("Illegal") <+> text (OccName.nameSpaceString ctxt_ns) 
+  = ptext SLIT("Illegal") <+> pprNameSpace ctxt_ns
 	<+> ptext SLIT("name:") <+> quotes (text occ)
 
 thRdrName :: OccName.NameSpace -> String -> TH.NameFlavour -> RdrName
@@ -605,7 +606,7 @@ mk_uniq_occ ns occ uniq
 
 -- The packing and unpacking is rather turgid :-(
 mk_occ :: OccName.NameSpace -> String -> OccName.OccName
-mk_occ ns occ = OccName.mkOccFS ns (mkFastString occ)
+mk_occ ns occ = OccName.mkOccNameFS ns (mkFastString occ)
 
 mk_ghc_ns :: TH.NameSpace -> OccName.NameSpace
 mk_ghc_ns TH.DataName  = OccName.dataName
diff --git a/ghc/compiler/hsSyn/HsDecls.lhs b/ghc/compiler/hsSyn/HsDecls.lhs
index c977496aa9..5253d11482 100644
--- a/ghc/compiler/hsSyn/HsDecls.lhs
+++ b/ghc/compiler/hsSyn/HsDecls.lhs
@@ -721,8 +721,8 @@ instance Outputable ForeignImport where
         ptext SLIT("dynamic")
       pprCEntity _      _   (CWrapper) = ptext SLIT("wrapper")
       --
-      pprLib lib | nullFastString lib = empty
-		 | otherwise	      = char '[' <> ppr lib <> char ']'
+      pprLib lib | nullFS lib = empty
+		 | otherwise  = char '[' <> ppr lib <> char ']'
 
 instance Outputable ForeignExport where
   ppr (CExport  (CExportStatic lbl cconv)) = 
diff --git a/ghc/compiler/hsSyn/HsUtils.lhs b/ghc/compiler/hsSyn/HsUtils.lhs
index 612e57a931..f8efa6cfb9 100644
--- a/ghc/compiler/hsSyn/HsUtils.lhs
+++ b/ghc/compiler/hsSyn/HsUtils.lhs
@@ -27,7 +27,7 @@ import RdrName		( RdrName, getRdrName, mkRdrUnqual )
 import Var		( Id )
 import Type		( Type )
 import DataCon		( DataCon, dataConWrapId, dataConSourceArity )
-import OccName		( mkVarOcc )
+import OccName		( mkVarOccFS )
 import Name		( Name )
 import BasicTypes	( RecFlag(..) )
 import SrcLoc
@@ -136,7 +136,7 @@ mkHsOpApp e1 op e2 = OpApp e1 (noLoc (HsVar op)) (error "mkOpApp:fixity") e2
 
 mkHsSplice e = HsSplice unqualSplice e
 
-unqualSplice = mkRdrUnqual (mkVarOcc FSLIT("splice"))
+unqualSplice = mkRdrUnqual (mkVarOccFS FSLIT("splice"))
 		-- A name (uniquified later) to
 		-- identify the splice
 
diff --git a/ghc/compiler/iface/LoadIface.lhs b/ghc/compiler/iface/LoadIface.lhs
index 356cf224bd..8c496f76ef 100644
--- a/ghc/compiler/iface/LoadIface.lhs
+++ b/ghc/compiler/iface/LoadIface.lhs
@@ -45,7 +45,7 @@ import NameEnv
 import MkId		( seqId )
 import Module		( Module, ModLocation(ml_hi_file), emptyModuleEnv, 
 			  addBootSuffix_maybe,
-			  extendModuleEnv, lookupModuleEnv, moduleUserString
+			  extendModuleEnv, lookupModuleEnv, moduleString
 			)
 import OccName		( OccName, mkOccEnv, lookupOccEnv, mkClassTyConOcc, mkClassDataConOcc,
 			  mkSuperDictSelOcc, mkDataConWrapperOcc, mkDataConWorkerOcc )
@@ -312,7 +312,7 @@ loadDecl ignore_prags mod (_version, decl)
 	-- imported name, to fix the module correctly in the cache
     mk_new_bndr mod mb_parent occ 
 	= newGlobalBinder mod occ mb_parent 
-			  (importedSrcLoc (moduleUserString mod))
+			  (importedSrcLoc (moduleString mod))
 
     doc = ptext SLIT("Declaration for") <+> ppr (ifName decl)
 
diff --git a/ghc/compiler/iface/MkIface.lhs b/ghc/compiler/iface/MkIface.lhs
index 5be56bfa25..2f15ee3773 100644
--- a/ghc/compiler/iface/MkIface.lhs
+++ b/ghc/compiler/iface/MkIface.lhs
@@ -214,7 +214,7 @@ import OccName		( OccName, OccEnv, mkOccEnv, lookupOccEnv, emptyOccEnv,
 			  isEmptyOccSet, intersectOccSet, intersectsOccSet,
 			  occNameFS, isTcOcc )
 import Module		( Module, moduleFS,
-			  ModLocation(..), mkSysModuleFS, moduleUserString,
+			  ModLocation(..), mkModuleFS, moduleString,
 			  ModuleEnv, emptyModuleEnv, lookupModuleEnv,
 			  extendModuleEnv_C
 			)
@@ -726,7 +726,7 @@ mkIfaceExports :: NameSet -> [(Module, [GenAvailInfo OccName])]
   -- Group by module and sort by occurrence
   -- This keeps the list in canonical order
 mkIfaceExports exports 
-  = [ (mkSysModuleFS fs, eltsFM avails)
+  = [ (mkModuleFS fs, eltsFM avails)
     | (fs, avails) <- fmToList groupFM
     ]
   where
@@ -768,7 +768,7 @@ checkOldIface :: HscEnv
 
 checkOldIface hsc_env mod_summary source_unchanged maybe_iface
   = do	{ showPass (hsc_dflags hsc_env) 
-	           ("Checking old interface for " ++ moduleUserString (ms_mod mod_summary)) ;
+	           ("Checking old interface for " ++ moduleString (ms_mod mod_summary)) ;
 
 	; initIfaceCheck hsc_env $
 	  check_old_iface mod_summary source_unchanged maybe_iface
diff --git a/ghc/compiler/main/DriverMkDepend.hs b/ghc/compiler/main/DriverMkDepend.hs
index fe2d8f3785..80d906c4a7 100644
--- a/ghc/compiler/main/DriverMkDepend.hs
+++ b/ghc/compiler/main/DriverMkDepend.hs
@@ -20,7 +20,7 @@ import HscTypes		( HscEnv, IsBootInterface, msObjFilePath, msHsFilePath )
 import Packages		( PackageIdH(..) )
 import SysTools		( newTempName )
 import qualified SysTools
-import Module		( Module, ModLocation(..), mkModule, moduleUserString,
+import Module		( Module, ModLocation(..), mkModule, 
 			  addBootSuffix_maybe )
 import Digraph		( SCC(..) )
 import Finder		( findModule, FindResult(..) )
diff --git a/ghc/compiler/main/DriverPipeline.hs b/ghc/compiler/main/DriverPipeline.hs
index 85099e89d5..171cecf2a6 100644
--- a/ghc/compiler/main/DriverPipeline.hs
+++ b/ghc/compiler/main/DriverPipeline.hs
@@ -1396,9 +1396,9 @@ getOptionsFromSource file
 		       | otherwise -> return []
 
 getOptionsFromStringBuffer :: StringBuffer -> FilePath -> [(Int,String)]
-getOptionsFromStringBuffer buffer@(StringBuffer _ len# _) fn = 
+getOptionsFromStringBuffer buffer@(StringBuffer _ len _) fn = 
   let 
-	ls = lines (lexemeToString buffer (I# len#))  -- lazy, so it's ok
+	ls = lines (lexemeToString buffer len)  -- lazy, so it's ok
   in
   look 1 ls
   where
diff --git a/ghc/compiler/main/Finder.lhs b/ghc/compiler/main/Finder.lhs
index 81dedb83d9..fbde40f6ea 100644
--- a/ghc/compiler/main/Finder.lhs
+++ b/ghc/compiler/main/Finder.lhs
@@ -267,7 +267,7 @@ searchPathExts paths mod exts
 	return result
 
   where
-    basename = dots_to_slashes (moduleUserString mod)
+    basename = dots_to_slashes (moduleString mod)
 
     to_search :: [(FilePath, IO FinderCacheEntry)]
     to_search = [ (file, fn path basename)
@@ -347,7 +347,7 @@ mkHomeModLocation2 :: DynFlags
 		   -> String 	-- Suffix
 		   -> IO ModLocation
 mkHomeModLocation2 dflags mod src_basename ext = do
-   let mod_basename = dots_to_slashes (moduleUserString mod)
+   let mod_basename = dots_to_slashes (moduleString mod)
 
    obj_fn  <- mkObjPath  dflags src_basename mod_basename
    hi_fn   <- mkHiPath   dflags src_basename mod_basename
@@ -420,7 +420,7 @@ mkStubPaths dflags mod location
   = let
 		stubdir = stubDir dflags
 
-		mod_basename = dots_to_slashes (moduleUserString mod)
+		mod_basename = dots_to_slashes (moduleString mod)
 		src_basename = basenameOf (expectJust "mkStubPaths" 
 						(ml_hs_file location))
 
diff --git a/ghc/compiler/main/GHC.hs b/ghc/compiler/main/GHC.hs
index 77cd9d45b0..7e0ec2ffed 100644
--- a/ghc/compiler/main/GHC.hs
+++ b/ghc/compiler/main/GHC.hs
@@ -628,7 +628,7 @@ load2 s@(Session ref) how_much mod_graph = do
 	      when (ghci_mode == BatchCompile && isJust ofile && not do_linking) $
 	        debugTraceMsg dflags 1 (text ("Warning: output was redirected with -o, " ++
 				              "but no output will be generated\n" ++
-				              "because there is no " ++ moduleUserString main_mod ++ " module."))
+				              "because there is no " ++ moduleString main_mod ++ " module."))
 
 	      -- link everything together
               linkresult <- link ghci_mode dflags do_linking (hsc_HPT hsc_env1)
diff --git a/ghc/compiler/main/HscTypes.lhs b/ghc/compiler/main/HscTypes.lhs
index 20e84ab9f9..48041c055e 100644
--- a/ghc/compiler/main/HscTypes.lhs
+++ b/ghc/compiler/main/HscTypes.lhs
@@ -996,7 +996,7 @@ showModMsg use_object mod_summary
 		    char ')'])
  where 
     mod     = ms_mod mod_summary 
-    mod_str = moduleUserString mod ++ hscSourceString (ms_hsc_src mod_summary)
+    mod_str = moduleString mod ++ hscSourceString (ms_hsc_src mod_summary)
 \end{code}
 
 
diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs
index 69d6573049..4392ae737b 100644
--- a/ghc/compiler/nativeGen/PprMach.hs
+++ b/ghc/compiler/nativeGen/PprMach.hs
@@ -697,17 +697,11 @@ pprLabel :: CLabel -> Doc
 pprLabel lbl = pprGloblDecl lbl $$ (pprCLabel_asm lbl <> char ':')
 
 
--- Assume we want to backslash-convert the string
 pprASCII str
-  = vcat (map do1 (str ++ [chr 0]))
+  = vcat (map do1 str) $$ do1 0
     where
-       do1 :: Char -> Doc
-       do1 c = ptext SLIT("\t.byte\t0x") <> hshow (ord c)
-
-       hshow :: Int -> Doc
-       hshow n | n >= 0 && n <= 255
-               = char (tab !! (n `div` 16)) <> char (tab !! (n `mod` 16))
-       tab = "0123456789ABCDEF"
+       do1 :: Word8 -> Doc
+       do1 w = ptext SLIT("\t.byte\t") <> int (fromIntegral w)
 
 pprAlign bytes =
 	IF_ARCH_alpha(ptextSLIT(".align ") <> int pow2,
diff --git a/ghc/compiler/ndpFlatten/FlattenMonad.hs b/ghc/compiler/ndpFlatten/FlattenMonad.hs
index 43e804ccee..45405088fc 100644
--- a/ghc/compiler/ndpFlatten/FlattenMonad.hs
+++ b/ghc/compiler/ndpFlatten/FlattenMonad.hs
@@ -67,7 +67,6 @@ import Monad	    (mplus)
 import Panic        (panic)
 import Outputable   (Outputable(ppr), pprPanic)
 import UniqSupply   (UniqSupply, splitUniqSupply, uniqFromSupply)
-import OccName	    (UserFS)
 import Var          (Var, idType)
 import Id	    (Id, mkSysLocal)
 import Name	    (Name)
@@ -86,6 +85,7 @@ import PrimOp	    ( PrimOp(..) )
 import PrelInfo	    ( primOpId )
 import CoreSyn      (Expr(..), Bind(..), CoreBndr, CoreExpr, CoreBind, mkApps)
 import CoreUtils    (exprType)
+import FastString   (FastString)
 
 -- friends
 import NDPCoreUtils (parrElemTy)
@@ -176,7 +176,7 @@ runFlatten hsc_env eps us m
 -- generate a new local variable whose name is based on the given lexeme and
 -- whose type is as specified in the second argument (EXPORTED)
 --
-newVar           :: UserFS -> Type -> Flatten Var
+newVar           :: FastString -> Type -> Flatten Var
 newVar lexeme ty  = Flatten $ \state ->
   let
     (us1, us2) = splitUniqSupply (us state)
@@ -187,7 +187,7 @@ newVar lexeme ty  = Flatten $ \state ->
 -- generate a non-recursive binding using a new binder whose name is derived
 -- from the given lexeme (EXPORTED)
 --
-mkBind          :: UserFS -> CoreExpr -> Flatten (CoreBndr, CoreBind)
+mkBind          :: FastString -> CoreExpr -> Flatten (CoreBndr, CoreBind)
 mkBind lexeme e  =
   do
     v <- newVar lexeme (exprType e)
diff --git a/ghc/compiler/parser/Ctype.lhs b/ghc/compiler/parser/Ctype.lhs
index dfdb94a0c0..dbe4e9f1b0 100644
--- a/ghc/compiler/parser/Ctype.lhs
+++ b/ghc/compiler/parser/Ctype.lhs
@@ -9,6 +9,7 @@ module Ctype
 	, is_lower	-- Char# -> Bool
 	, is_upper	-- Char# -> Bool
 	, is_digit	-- Char# -> Bool
+	, is_alphanum   -- Char# -> Bool
 
 	, is_hexdigit, is_octdigit
 	, hexDigit, octDecDigit
@@ -50,6 +51,7 @@ is_space  = is_ctype cSpace
 is_lower  = is_ctype cLower
 is_upper  = is_ctype cUpper
 is_digit  = is_ctype cDigit
+is_alphanum = is_ctype (cLower+cUpper+cDigit)
 \end{code}
 
 Utils
@@ -241,99 +243,99 @@ charType c = case c of
    '\158' -> 0                         -- \236
    '\159' -> 0                         -- \237
    '\160' -> cSpace                    --
-   '\161' -> cAny + cSymbol            -- �
-   '\162' -> cAny + cSymbol            -- �
-   '\163' -> cAny + cSymbol            -- �
-   '\164' -> cAny + cSymbol            -- �
-   '\165' -> cAny + cSymbol            -- �
-   '\166' -> cAny + cSymbol            -- �
-   '\167' -> cAny + cSymbol            -- �
-   '\168' -> cAny + cSymbol            -- �
-   '\169' -> cAny + cSymbol            -- �
-   '\170' -> cAny + cSymbol            -- �
-   '\171' -> cAny + cSymbol            -- �
-   '\172' -> cAny + cSymbol            -- �
-   '\173' -> cAny + cSymbol            -- �
-   '\174' -> cAny + cSymbol            -- �
-   '\175' -> cAny + cSymbol            -- �
-   '\176' -> cAny + cSymbol            -- �
-   '\177' -> cAny + cSymbol            -- �
-   '\178' -> cAny + cSymbol            -- �
-   '\179' -> cAny + cSymbol            -- �
-   '\180' -> cAny + cSymbol            -- �
-   '\181' -> cAny + cSymbol            -- �
-   '\182' -> cAny + cSymbol            -- �
-   '\183' -> cAny + cSymbol            -- �
-   '\184' -> cAny + cSymbol            -- �
-   '\185' -> cAny + cSymbol            -- �
-   '\186' -> cAny + cSymbol            -- �
-   '\187' -> cAny + cSymbol            -- �
-   '\188' -> cAny + cSymbol            -- �
-   '\189' -> cAny + cSymbol            -- �
-   '\190' -> cAny + cSymbol            -- �
-   '\191' -> cAny + cSymbol            -- �
-   '\192' -> cAny + cIdent  + cUpper   -- �
-   '\193' -> cAny + cIdent  + cUpper   -- �
-   '\194' -> cAny + cIdent  + cUpper   -- �
-   '\195' -> cAny + cIdent  + cUpper   -- �
-   '\196' -> cAny + cIdent  + cUpper   -- �
-   '\197' -> cAny + cIdent  + cUpper   -- �
-   '\198' -> cAny + cIdent  + cUpper   -- �
-   '\199' -> cAny + cIdent  + cUpper   -- �
-   '\200' -> cAny + cIdent  + cUpper   -- �
-   '\201' -> cAny + cIdent  + cUpper   -- �
-   '\202' -> cAny + cIdent  + cUpper   -- �
-   '\203' -> cAny + cIdent  + cUpper   -- �
-   '\204' -> cAny + cIdent  + cUpper   -- �
-   '\205' -> cAny + cIdent  + cUpper   -- �
-   '\206' -> cAny + cIdent  + cUpper   -- �
-   '\207' -> cAny + cIdent  + cUpper   -- �
-   '\208' -> cAny + cIdent  + cUpper   -- �
-   '\209' -> cAny + cIdent  + cUpper   -- �
-   '\210' -> cAny + cIdent  + cUpper   -- �
-   '\211' -> cAny + cIdent  + cUpper   -- �
-   '\212' -> cAny + cIdent  + cUpper   -- �
-   '\213' -> cAny + cIdent  + cUpper   -- �
-   '\214' -> cAny + cIdent  + cUpper   -- �
-   '\215' -> cAny + cSymbol + cLower   -- �
-   '\216' -> cAny + cIdent  + cUpper   -- �
-   '\217' -> cAny + cIdent  + cUpper   -- �
-   '\218' -> cAny + cIdent  + cUpper   -- �
-   '\219' -> cAny + cIdent  + cUpper   -- �
-   '\220' -> cAny + cIdent  + cUpper   -- �
-   '\221' -> cAny + cIdent  + cUpper   -- �
-   '\222' -> cAny + cIdent  + cUpper   -- �
-   '\223' -> cAny + cIdent             -- �
-   '\224' -> cAny + cIdent  + cLower   -- �
-   '\225' -> cAny + cIdent  + cLower   -- �
-   '\226' -> cAny + cIdent  + cLower   -- �
-   '\227' -> cAny + cIdent  + cLower   -- �
-   '\228' -> cAny + cIdent  + cLower   -- �
-   '\229' -> cAny + cIdent  + cLower   -- �
-   '\230' -> cAny + cIdent  + cLower   -- �
-   '\231' -> cAny + cIdent  + cLower   -- �
-   '\232' -> cAny + cIdent  + cLower   -- �
-   '\233' -> cAny + cIdent  + cLower   -- �
-   '\234' -> cAny + cIdent  + cLower   -- �
-   '\235' -> cAny + cIdent  + cLower   -- �
-   '\236' -> cAny + cIdent  + cLower   -- �
-   '\237' -> cAny + cIdent  + cLower   -- �
-   '\238' -> cAny + cIdent  + cLower   -- �
-   '\239' -> cAny + cIdent  + cLower   -- �
-   '\240' -> cAny + cIdent  + cLower   -- �
-   '\241' -> cAny + cIdent  + cLower   -- �
-   '\242' -> cAny + cIdent  + cLower   -- �
-   '\243' -> cAny + cIdent  + cLower   -- �
-   '\244' -> cAny + cIdent  + cLower   -- �
-   '\245' -> cAny + cIdent  + cLower   -- �
-   '\246' -> cAny + cIdent  + cLower   -- �
-   '\247' -> cAny + cSymbol            -- �
-   '\248' -> cAny + cIdent             -- �
-   '\249' -> cAny + cIdent  + cLower   -- �
-   '\250' -> cAny + cIdent  + cLower   -- �
-   '\251' -> cAny + cIdent  + cLower   -- �
-   '\252' -> cAny + cIdent  + cLower   -- �
-   '\253' -> cAny + cIdent  + cLower   -- �
-   '\254' -> cAny + cIdent  + cLower   -- �
-   '\255' -> cAny + cIdent  + cLower   -- �
+   '\161' -> cAny + cSymbol            -- ¡
+   '\162' -> cAny + cSymbol            -- ¢
+   '\163' -> cAny + cSymbol            -- £
+   '\164' -> cAny + cSymbol            -- ¤
+   '\165' -> cAny + cSymbol            -- ¥
+   '\166' -> cAny + cSymbol            -- ¦
+   '\167' -> cAny + cSymbol            -- §
+   '\168' -> cAny + cSymbol            -- ¨
+   '\169' -> cAny + cSymbol            -- ©
+   '\170' -> cAny + cSymbol            -- ª
+   '\171' -> cAny + cSymbol            -- «
+   '\172' -> cAny + cSymbol            -- ¬
+   '\173' -> cAny + cSymbol            -- 
+   '\174' -> cAny + cSymbol            -- ®
+   '\175' -> cAny + cSymbol            -- ¯
+   '\176' -> cAny + cSymbol            -- °
+   '\177' -> cAny + cSymbol            -- ±
+   '\178' -> cAny + cSymbol            -- ²
+   '\179' -> cAny + cSymbol            -- ³
+   '\180' -> cAny + cSymbol            -- ´
+   '\181' -> cAny + cSymbol            -- µ
+   '\182' -> cAny + cSymbol            -- ¶
+   '\183' -> cAny + cSymbol            -- ·
+   '\184' -> cAny + cSymbol            -- ¸
+   '\185' -> cAny + cSymbol            -- ¹
+   '\186' -> cAny + cSymbol            -- º
+   '\187' -> cAny + cSymbol            -- »
+   '\188' -> cAny + cSymbol            -- ¼
+   '\189' -> cAny + cSymbol            -- ½
+   '\190' -> cAny + cSymbol            -- ¾
+   '\191' -> cAny + cSymbol            -- ¿
+   '\192' -> cAny + cIdent  + cUpper   -- À
+   '\193' -> cAny + cIdent  + cUpper   -- Á
+   '\194' -> cAny + cIdent  + cUpper   -- Â
+   '\195' -> cAny + cIdent  + cUpper   -- Ã
+   '\196' -> cAny + cIdent  + cUpper   -- Ä
+   '\197' -> cAny + cIdent  + cUpper   -- Å
+   '\198' -> cAny + cIdent  + cUpper   -- Æ
+   '\199' -> cAny + cIdent  + cUpper   -- Ç
+   '\200' -> cAny + cIdent  + cUpper   -- È
+   '\201' -> cAny + cIdent  + cUpper   -- É
+   '\202' -> cAny + cIdent  + cUpper   -- Ê
+   '\203' -> cAny + cIdent  + cUpper   -- Ë
+   '\204' -> cAny + cIdent  + cUpper   -- Ì
+   '\205' -> cAny + cIdent  + cUpper   -- Í
+   '\206' -> cAny + cIdent  + cUpper   -- Î
+   '\207' -> cAny + cIdent  + cUpper   -- Ï
+   '\208' -> cAny + cIdent  + cUpper   -- Ð
+   '\209' -> cAny + cIdent  + cUpper   -- Ñ
+   '\210' -> cAny + cIdent  + cUpper   -- Ò
+   '\211' -> cAny + cIdent  + cUpper   -- Ó
+   '\212' -> cAny + cIdent  + cUpper   -- Ô
+   '\213' -> cAny + cIdent  + cUpper   -- Õ
+   '\214' -> cAny + cIdent  + cUpper   -- Ö
+   '\215' -> cAny + cSymbol + cLower   -- ×
+   '\216' -> cAny + cIdent  + cUpper   -- Ø
+   '\217' -> cAny + cIdent  + cUpper   -- Ù
+   '\218' -> cAny + cIdent  + cUpper   -- Ú
+   '\219' -> cAny + cIdent  + cUpper   -- Û
+   '\220' -> cAny + cIdent  + cUpper   -- Ü
+   '\221' -> cAny + cIdent  + cUpper   -- Ý
+   '\222' -> cAny + cIdent  + cUpper   -- Þ
+   '\223' -> cAny + cIdent             -- ß
+   '\224' -> cAny + cIdent  + cLower   -- à
+   '\225' -> cAny + cIdent  + cLower   -- á
+   '\226' -> cAny + cIdent  + cLower   -- â
+   '\227' -> cAny + cIdent  + cLower   -- ã
+   '\228' -> cAny + cIdent  + cLower   -- ä
+   '\229' -> cAny + cIdent  + cLower   -- å
+   '\230' -> cAny + cIdent  + cLower   -- æ
+   '\231' -> cAny + cIdent  + cLower   -- ç
+   '\232' -> cAny + cIdent  + cLower   -- è
+   '\233' -> cAny + cIdent  + cLower   -- é
+   '\234' -> cAny + cIdent  + cLower   -- ê
+   '\235' -> cAny + cIdent  + cLower   -- ë
+   '\236' -> cAny + cIdent  + cLower   -- ì
+   '\237' -> cAny + cIdent  + cLower   -- í
+   '\238' -> cAny + cIdent  + cLower   -- î
+   '\239' -> cAny + cIdent  + cLower   -- ï
+   '\240' -> cAny + cIdent  + cLower   -- ð
+   '\241' -> cAny + cIdent  + cLower   -- ñ
+   '\242' -> cAny + cIdent  + cLower   -- ò
+   '\243' -> cAny + cIdent  + cLower   -- ó
+   '\244' -> cAny + cIdent  + cLower   -- ô
+   '\245' -> cAny + cIdent  + cLower   -- õ
+   '\246' -> cAny + cIdent  + cLower   -- ö
+   '\247' -> cAny + cSymbol            -- ÷
+   '\248' -> cAny + cIdent             -- ø
+   '\249' -> cAny + cIdent  + cLower   -- ù
+   '\250' -> cAny + cIdent  + cLower   -- ú
+   '\251' -> cAny + cIdent  + cLower   -- û
+   '\252' -> cAny + cIdent  + cLower   -- ü
+   '\253' -> cAny + cIdent  + cLower   -- ý
+   '\254' -> cAny + cIdent  + cLower   -- þ
+   '\255' -> cAny + cIdent  + cLower   -- ÿ
 \end{code}
diff --git a/ghc/compiler/parser/Lexer.x b/ghc/compiler/parser/Lexer.x
index eb00e90613..38908a0a85 100644
--- a/ghc/compiler/parser/Lexer.x
+++ b/ghc/compiler/parser/Lexer.x
@@ -1,5 +1,5 @@
 -----------------------------------------------------------------------------
--- (c) The University of Glasgow, 2003
+-- (c) The University of Glasgow, 2006
 --
 -- GHC's lexer.
 --
@@ -43,35 +43,38 @@ import Ctype
 import Util		( maybePrefixMatch, readRational )
 
 import DATA_BITS
-import Char
+import Data.Char
 import Ratio
 --import TRACE
 }
 
-$whitechar   = [\ \t\n\r\f\v\xa0]
+$unispace    = \x05
+$whitechar   = [\ \t\n\r\f\v\xa0 $unispace]
 $white_no_nl = $whitechar # \n
 
 $ascdigit  = 0-9
-$unidigit  = \x01
+$unidigit  = \x03
+$decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
 $digit     = [$ascdigit $unidigit]
 
 $special   = [\(\)\,\;\[\]\`\{\}]
 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
-$unisymbol = \x02
+$unisymbol = \x04
 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
 
-$unilarge  = \x03
+$unilarge  = \x01
 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
 $large     = [$asclarge $unilarge]
 
-$unismall  = \x04
+$unismall  = \x02
 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
 $small     = [$ascsmall $unismall \_]
 
-$graphic   = [$small $large $symbol $digit $special \:\"\']
+$unigraphic = \x06
+$graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 
 $octit	   = 0-7
-$hexit     = [$digit A-F a-f]
+$hexit     = [$decdigit A-F a-f]
 $symchar   = [$symbol \:]
 $nl        = [\n\r]
 $idchar    = [$small $large $digit \']
@@ -82,7 +85,7 @@ $idchar    = [$small $large $digit \']
 @varsym    = $symbol $symchar*
 @consym    = \: $symchar*
 
-@decimal     = $digit+
+@decimal     = $decdigit+
 @octal       = $octit+
 @hexadecimal = $hexit+
 @exponent    = [eE] [\-\+]? @decimal
@@ -154,13 +157,13 @@ $white_no_nl+ 				;
 
 -- single-line line pragmas, of the form
 --    # <line> "<file>" <extra-stuff> \n
-<line_prag1> $digit+			{ setLine line_prag1a }
+<line_prag1> $decdigit+			{ setLine line_prag1a }
 <line_prag1a> \" [$graphic \ ]* \"	{ setFile line_prag1b }
 <line_prag1b> .*			{ pop }
 
 -- Haskell-style line pragmas, of the form
 --    {-# LINE <line> "<file>" #-}
-<line_prag2> $digit+			{ setLine line_prag2a }
+<line_prag2> $decdigit+			{ setLine line_prag2a }
 <line_prag2a> \" [$graphic \ ]* \"	{ setFile line_prag2b }
 <line_prag2b> "#-}"|"-}"		{ pop }
    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
@@ -554,6 +557,13 @@ reservedSymsFM = listToUFM $
        ,(">-",	ITrarrowtail,	bit arrowsBit)
        ,("-<<",	ITLarrowtail,	bit arrowsBit)
        ,(">>-",	ITRarrowtail,	bit arrowsBit)
+
+#if __GLASGOW_HASKELL__ >= 605
+       ,("∀",	ITforall,	bit tvBit)
+       ,("→",   ITrarrow,	0)
+       ,("←",   ITlarrow,	0)
+       ,("⋯", 	ITdotdot,	0)
+#endif
        ]
 
 -- -----------------------------------------------------------------------------
@@ -670,23 +680,29 @@ splitQualName :: StringBuffer -> Int -> (FastString,FastString)
 -- takes a StringBuffer and a length, and returns the module name
 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 -- because of hierarchical module names.
-splitQualName orig_buf len = split orig_buf 0 0
+splitQualName orig_buf len = split orig_buf orig_buf
   where
-    split buf dot_off n
-	| n == len		  = done dot_off
-	| lookAhead buf n == '.'  = split2 buf n (n+1)
-	| otherwise 		  = split buf dot_off (n+1)	
+    split buf dot_buf
+	| orig_buf `byteDiff` buf >= len  = done dot_buf
+	| c == '.'                	  = found_dot buf'
+	| otherwise               	  = split buf' dot_buf
+      where
+       (c,buf') = nextChar buf
   
     -- careful, we might get names like M....
     -- so, if the character after the dot is not upper-case, this is
     -- the end of the qualifier part.
-    split2 buf dot_off n
-	| isUpper (lookAhead buf n) = split buf dot_off (n+1)
-	| otherwise 		    = done dot_off
-
-    done dot_off =
-	(lexemeToFastString orig_buf dot_off, 
-	 lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
+    found_dot buf -- buf points after the '.'
+	| isUpper c    = split buf' buf
+	| otherwise    = done buf
+      where
+       (c,buf') = nextChar buf
+
+    done dot_buf =
+	(lexemeToFastString orig_buf (qual_size - 1),
+	 lexemeToFastString dot_buf (len - qual_size))
+      where
+	qual_size = orig_buf `byteDiff` dot_buf
 
 varid span buf len = 
   case lookupUFM reservedWordsFM fs of
@@ -726,19 +742,19 @@ tok_decimal span buf len
   = return (L span (ITinteger  $! parseInteger buf len 10 octDecDigit))
 
 tok_octal span buf len 
-  = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 octDecDigit))
+  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit))
 
 tok_hexadecimal span buf len 
-  = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit))
+  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
 
 prim_decimal span buf len 
   = return (L span (ITprimint  $! parseInteger buf (len-1) 10 octDecDigit))
 
 prim_octal span buf len 
-  = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 octDecDigit))
+  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit))
 
 prim_hexadecimal span buf len 
-  = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hexDigit))
+  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit))
 
 tok_float        str = ITrational   $! readRational str
 prim_float       str = ITprimfloat  $! readRational str
@@ -839,7 +855,7 @@ lex_string_tok span buf len = do
 lex_string :: String -> P Token
 lex_string s = do
   i <- getInput
-  case alexGetChar i of
+  case alexGetChar' i of
     Nothing -> lit_error
 
     Just ('"',i)  -> do
@@ -848,14 +864,15 @@ lex_string s = do
 	if glaexts
 	  then do
 	    i <- getInput
-	    case alexGetChar i of
+	    case alexGetChar' i of
 	      Just ('#',i) -> do
 		   setInput i
 		   if any (> '\xFF') s
                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
-                    else let s' = mkFastStringNarrow (reverse s) in
-			 -- always a narrow string/byte array
+                    else let s' = mkZFastString (reverse s) in
 			 return (ITprimstring s')
+			-- mkZFastString is a hack to avoid encoding the
+			-- string in UTF-8.  We just want the exact bytes.
 	      _other ->
 		return (ITstring (mkFastString (reverse s)))
 	  else
@@ -866,11 +883,11 @@ lex_string s = do
 		setInput i; lex_string s
 	| Just (c,i) <- next, is_space c -> do 
 		setInput i; lex_stringgap s
-	where next = alexGetChar i
+	where next = alexGetChar' i
 
-    Just _ -> do
-	c <- lex_char
-	lex_string (c:s)
+    Just (c, i) -> do
+	c' <- lex_char c i
+	lex_string (c':s)
 
 lex_stringgap s = do
   c <- getCharOrFail
@@ -890,7 +907,7 @@ lex_char_tok :: Action
 lex_char_tok span buf len = do	-- We've seen '
    i1 <- getInput	-- Look ahead to first character
    let loc = srcSpanStart span
-   case alexGetChar i1 of
+   case alexGetChar' i1 of
 	Nothing -> lit_error 
 
 	Just ('\'', i2@(AI end2 _ _)) -> do 	-- We've seen ''
@@ -905,14 +922,15 @@ lex_char_tok span buf len = do	-- We've seen '
 		  lit_ch <- lex_escape
 		  mc <- getCharOrFail	-- Trailing quote
 		  if mc == '\'' then finish_char_tok loc lit_ch
-			        else lit_error 
+			        else do setInput i2; lit_error 
 
-        Just (c, i2@(AI end2 _ _)) | not (is_any c) -> lit_error
-			      | otherwise      ->
+        Just (c, i2@(AI end2 _ _)) 
+		| not (isAny c) -> lit_error
+		| otherwise ->
 
 		-- We've seen 'x, where x is a valid character
 		--  (i.e. not newline etc) but not a quote or backslash
-	   case alexGetChar i2 of	-- Look ahead one more character
+	   case alexGetChar' i2 of	-- Look ahead one more character
 		Nothing -> lit_error
 		Just ('\'', i3) -> do 	-- We've seen 'x'
 			setInput i3 
@@ -922,7 +940,7 @@ lex_char_tok span buf len = do	-- We've seen '
 			th_exts <- extension thEnabled	
 			let (AI end _ _) = i1
 			if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
-				   else lit_error
+				   else do setInput i2; lit_error
 
 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
 finish_char_tok loc ch	-- We've already seen the closing quote
@@ -930,7 +948,7 @@ finish_char_tok loc ch	-- We've already seen the closing quote
   = do	glaexts <- extension glaExtsEnabled
 	i@(AI end _ _) <- getInput
 	if glaexts then do
-		case alexGetChar i of
+		case alexGetChar' i of
 			Just ('#',i@(AI end _ _)) -> do
 				setInput i
 				return (L (mkSrcSpan loc end) (ITprimchar ch))
@@ -939,14 +957,16 @@ finish_char_tok loc ch	-- We've already seen the closing quote
 	        else do
 		   return (L (mkSrcSpan loc end) (ITchar ch))
 
-lex_char :: P Char
-lex_char = do
-  mc <- getCharOrFail
-  case mc of
-      '\\' -> lex_escape
-      c | is_any c -> return c
+lex_char :: Char -> AlexInput -> P Char
+lex_char c inp = do
+  case c of
+      '\\' -> do setInput inp; lex_escape
+      c | isAny c -> do setInput inp; return c
       _other -> lit_error
 
+isAny c | c > '\xff' = isPrint c
+	| otherwise  = is_any c
+
 lex_escape :: P Char
 lex_escape = do
   c <- getCharOrFail
@@ -972,11 +992,11 @@ lex_escape = do
 
 	c1 ->  do
 	   i <- getInput
-	   case alexGetChar i of
+	   case alexGetChar' i of
 	    Nothing -> lit_error
 	    Just (c2,i2) -> 
-              case alexGetChar i2 of
-		Nothing	-> lit_error
+              case alexGetChar' i2 of
+		Nothing	-> do setInput i2; lit_error
 		Just (c3,i3) -> 
 		   let str = [c1,c2,c3] in
 		   case [ (c,rest) | (p,c) <- silly_escape_chars,
@@ -991,22 +1011,22 @@ lex_escape = do
 
 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 readNum is_digit base conv = do
+  i <- getInput
   c <- getCharOrFail
   if is_digit c 
 	then readNum2 is_digit base conv (conv c)
-	else lit_error
+	else do setInput i; lit_error
 
 readNum2 is_digit base conv i = do
   input <- getInput
   read i input
   where read i input = do
-	  case alexGetChar input of
+	  case alexGetChar' input of
 	    Just (c,input') | is_digit c -> do
 		read (i*base + conv c) input'
 	    _other -> do
-		setInput input
 		if i >= 0 && i <= 0x10FFFF
-		   then return (chr i)
+		   then do setInput input; return (chr i)
 		   else lit_error
 
 silly_escape_chars = [
@@ -1046,12 +1066,16 @@ silly_escape_chars = [
 	("DEL", '\DEL')
 	]
 
+-- before calling lit_error, ensure that the current input is pointing to
+-- the position of the error in the buffer.  This is so that we can report
+-- a correct location to the user, but also so we can detect UTF-8 decoding
+-- errors if they occur.
 lit_error = lexError "lexical error in string/character literal"
 
 getCharOrFail :: P Char
 getCharOrFail =  do
   i <- getInput
-  case alexGetChar i of
+  case alexGetChar' i of
 	Nothing -> lexError "unexpected end-of-file in string/character literal"
 	Just (c,i)  -> do setInput i; return c
 
@@ -1134,21 +1158,74 @@ setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
 data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
 
 alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ _ s) = prevChar s '\n'
+alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
 
 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
 alexGetChar (AI loc ofs s) 
   | atEnd s   = Nothing
-  | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` Just (c, (AI loc' ofs' s'))
-  where c    = currentChar s
-        loc' = advanceSrcLoc loc c
-        ofs' = advanceOffs c ofs
-	s'   = stepOn s
+  | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` 
+		Just (adj_c, (AI loc' ofs' s'))
+  where (c,s') = nextChar s
+        loc'   = advanceSrcLoc loc c
+        ofs'   = advanceOffs c ofs
+
+	non_graphic     = '\x0'
+	upper 		= '\x1'
+	lower		= '\x2'
+	digit		= '\x3'
+	symbol		= '\x4'
+	space		= '\x5'
+	other_graphic	= '\x6'
+
+	adj_c 
+#if __GLASGOW_HASKELL__ < 605
+	  = c  -- no Unicode support
+#else
+	  | c <= '\x04' = non_graphic
+	  | c <= '\xff' = c
+	  | otherwise = 
+		case generalCategory c of
+		  UppercaseLetter       -> upper
+		  LowercaseLetter       -> lower
+		  TitlecaseLetter       -> upper
+		  ModifierLetter        -> other_graphic
+		  OtherLetter           -> other_graphic
+		  NonSpacingMark        -> other_graphic
+		  SpacingCombiningMark  -> other_graphic
+		  EnclosingMark         -> other_graphic
+		  DecimalNumber         -> digit
+		  LetterNumber          -> other_graphic
+		  OtherNumber           -> other_graphic
+		  ConnectorPunctuation  -> other_graphic
+		  DashPunctuation       -> other_graphic
+		  OpenPunctuation       -> other_graphic
+		  ClosePunctuation      -> other_graphic
+		  InitialQuote          -> other_graphic
+		  FinalQuote            -> other_graphic
+		  OtherPunctuation      -> other_graphic
+		  MathSymbol            -> symbol
+		  CurrencySymbol        -> symbol
+		  ModifierSymbol        -> symbol
+		  OtherSymbol           -> symbol
+		  Space                 -> space
+		  _other		-> non_graphic
+#endif
+
+-- This version does not squash unicode characters, it is used when
+-- lexing strings.
+alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar' (AI loc ofs s) 
+  | atEnd s   = Nothing
+  | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` 
+		Just (c, (AI loc' ofs' s'))
+  where (c,s') = nextChar s
+        loc'   = advanceSrcLoc loc c
+        ofs'   = advanceOffs c ofs
 
-	advanceOffs :: Char -> Int -> Int
-	advanceOffs '\n' offs = 0
-	advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
-	advanceOffs _    offs = offs + 1
+advanceOffs :: Char -> Int -> Int
+advanceOffs '\n' offs = 0
+advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
+advanceOffs _    offs = offs + 1
 
 getInput :: P AlexInput
 getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
@@ -1255,7 +1332,7 @@ srcParseErr buf len
 	     else hcat [ptext SLIT("parse error on input "),
           	  	char '`', text token, char '\'']
     ]
-  where token = lexemeToString (stepOnBy (-len) buf) len
+  where token = lexemeToString (offsetBytes (-len) buf) len
 
 -- Report a parse failure, giving the span of the previous token as
 -- the location of the error.  This is the entry point for errors
@@ -1266,14 +1343,12 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
     PFailed last_loc (srcParseErr buf len)
 
 -- A lexical error is reported at a particular position in the source file,
--- not over a token range.  TODO: this is slightly wrong, because we record
--- the error at the character position following the one which caused the
--- error.  We should somehow back up by one character.
+-- not over a token range.
 lexError :: String -> P a
 lexError str = do
   loc <- getSrcLoc
-  i@(AI end _ _) <- getInput
-  failLocMsgP loc end str
+  i@(AI end _ buf) <- getInput
+  reportLexError loc end buf False str
 
 -- -----------------------------------------------------------------------------
 -- This is the top-level function: called from the parser each time a
@@ -1282,7 +1357,7 @@ lexError str = do
 lexer :: (Located Token -> P a) -> P a
 lexer cont = do
   tok@(L _ tok__) <- lexToken
-  -- trace ("token: " ++ show tok__) $ do
+  --trace ("token: " ++ show tok__) $ do
   cont tok
 
 lexToken :: P (Located Token)
@@ -1294,13 +1369,24 @@ lexToken = do
     AlexEOF -> do let span = mkSrcSpan loc1 loc1
 		  setLastToken span 0
 		  return (L span ITeof)
-    AlexError (AI loc2 _ _) -> do failLocMsgP loc1 loc2 "lexical error"
+    AlexError (AI loc2 _ buf) -> do 
+	reportLexError loc1 loc2 buf True "lexical error"
     AlexSkip inp2 _ -> do
 	setInput inp2
 	lexToken
     AlexToken inp2@(AI end _ buf2) len t -> do
 	setInput inp2
 	let span = mkSrcSpan loc1 end
-	span `seq` setLastToken span len
-	t span buf len
+	let bytes = byteDiff buf buf2
+	span `seq` setLastToken span bytes
+	t span buf bytes
+
+reportLexError loc1 loc2 buf is_prev str = 
+  let 
+	c | is_prev   = prevChar buf '\0'
+	  | otherwise = fst (nextChar buf)
+  in
+  if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
+    then failLocMsgP loc2 loc2 "UTF-8 decoding error"
+    else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
 }
diff --git a/ghc/compiler/parser/Parser.y.pp b/ghc/compiler/parser/Parser.y.pp
index 844cc8670e..b4acb890eb 100644
--- a/ghc/compiler/parser/Parser.y.pp
+++ b/ghc/compiler/parser/Parser.y.pp
@@ -25,7 +25,7 @@ import Type		( funTyCon )
 import ForeignCall	( Safety(..), CExportSpec(..), CLabelString,
 			  CCallConv(..), CCallTarget(..), defaultCCallConv
 			)
-import OccName		( UserFS, varName, dataName, tcClsName, tvName )
+import OccName		( varName, dataName, tcClsName, tvName )
 import DataCon		( DataCon, dataConName )
 import SrcLoc		( Located(..), unLoc, getLoc, noLoc, combineSrcSpans,
 			  SrcSpan, combineLocs, srcLocFile, 
@@ -1469,7 +1469,7 @@ varsym_no_minus :: { Located RdrName } -- varsym not including '-'
 -- These special_ids are treated as keywords in various places, 
 -- but as ordinary ids elsewhere.   'special_id' collects all these
 -- except 'unsafe' and 'forall' whose treatment differs depending on context
-special_id :: { Located UserFS }
+special_id :: { Located FastString }
 special_id
 	: 'as'			{ L1 FSLIT("as") }
 	| 'qualified'		{ L1 FSLIT("qualified") }
@@ -1480,7 +1480,7 @@ special_id
 	| 'stdcall'             { L1 FSLIT("stdcall") }
 	| 'ccall'               { L1 FSLIT("ccall") }
 
-special_sym :: { Located UserFS }
+special_sym :: { Located FastString }
 special_sym : '!'	{ L1 FSLIT("!") }
 	    | '.' 	{ L1 FSLIT(".") }
  	    | '*' 	{ L1 FSLIT("*") }
diff --git a/ghc/compiler/parser/ParserCore.y b/ghc/compiler/parser/ParserCore.y
index d8fceebf6a..3210583f96 100644
--- a/ghc/compiler/parser/ParserCore.y
+++ b/ghc/compiler/parser/ParserCore.y
@@ -72,7 +72,7 @@ module	:: { HsExtCore RdrName }
          : '%module' modid tdefs vdefgs { HsExtCore $2 $3 $4 }
 
 modid	:: { Module }
-	: CNAME	                 { mkSysModuleFS (mkFastString $1) }
+	: CNAME	                 { mkModuleFS (mkFastString $1) }
 
 -------------------------------------------------------------
 --     Type and newtype declarations are in HsSyn syntax
@@ -262,25 +262,25 @@ lit	:: { Literal }
 	| '(' STRING '::' aty ')'	{ MachStr (mkFastString $2) }
 
 tv_occ	:: { OccName }
-	: NAME	{ mkSysOcc tvName $1 }
+	: NAME	{ mkOccName tvName $1 }
 
 var_occ	:: { OccName }
-	: NAME	{ mkSysOcc varName $1 }
+	: NAME	{ mkVarOcc $1 }
 
 
 -- Type constructor
 q_tc_name	:: { IfaceExtName }
-        : modid '.' CNAME	{ ExtPkg $1 (mkSysOcc tcName $3) }
+        : modid '.' CNAME	{ ExtPkg $1 (mkOccName tcName $3) }
 
 -- Data constructor in a pattern or data type declaration; use the dataName, 
 -- because that's what we expect in Core case patterns
 d_pat_occ :: { OccName }
-        : CNAME      { mkSysOcc dataName $1 }
+        : CNAME      { mkOccName dataName $1 }
 
 -- Data constructor occurrence in an expression;
 -- use the varName because that's the worker Id
 d_occ :: { OccName }
-       : CNAME { mkSysOcc varName $1 }
+       : CNAME { mkVarOcc $1 }
 
 {
 
diff --git a/ghc/compiler/parser/RdrHsSyn.lhs b/ghc/compiler/parser/RdrHsSyn.lhs
index 2d18d6d5df..6ff15e772b 100644
--- a/ghc/compiler/parser/RdrHsSyn.lhs
+++ b/ghc/compiler/parser/RdrHsSyn.lhs
@@ -59,7 +59,7 @@ import TysWiredIn	( unitTyCon )
 import ForeignCall	( CCallConv, Safety, CCallTarget(..), CExportSpec(..),
 			  DNCallSpec(..), DNKind(..), CLabelString )
 import OccName  	( srcDataName, varName, isDataOcc, isTcOcc, 
-			  occNameUserString )
+			  occNameString )
 import SrcLoc
 import OrdList		( OrdList, fromOL )
 import Bag		( Bag, emptyBag, snocBag, consBag, foldrBag )
@@ -800,8 +800,8 @@ mkExport :: CallConv
 mkExport (CCall  cconv) (L loc entity, v, ty) = return $ 
   ForD (ForeignExport v ty (CExport (CExportStatic entity' cconv)) False)
   where
-    entity' | nullFastString entity = mkExtName (unLoc v)
-	    | otherwise		    = entity
+    entity' | nullFS entity = mkExtName (unLoc v)
+	    | otherwise     = entity
 mkExport DNCall (L loc entity, v, ty) =
   parseError (getLoc v){-TODO: not quite right-}
 	"Foreign export is not yet supported for .NET"
@@ -811,10 +811,9 @@ mkExport DNCall (L loc entity, v, ty) =
 -- of the Haskell name is then performed, so if you foreign export (++),
 -- it's external name will be "++". Too bad; it's important because we don't
 -- want z-encoding (e.g. names with z's in them shouldn't be doubled)
--- (This is why we use occNameUserString.)
 --
 mkExtName :: RdrName -> CLabelString
-mkExtName rdrNm = mkFastString (occNameUserString (rdrNameOcc rdrNm))
+mkExtName rdrNm = mkFastString (occNameString (rdrNameOcc rdrNm))
 \end{code}
 
 
diff --git a/ghc/compiler/prelude/PrelNames.lhs b/ghc/compiler/prelude/PrelNames.lhs
index 0d99121719..eb26d3404a 100644
--- a/ghc/compiler/prelude/PrelNames.lhs
+++ b/ghc/compiler/prelude/PrelNames.lhs
@@ -50,9 +50,8 @@ module PrelNames (
 #include "HsVersions.h"
 
 import Module	  ( Module, mkModule )
-import OccName	  ( dataName, tcName, clsName, varName, mkOccFS
-		  )
-		  
+import OccName	  ( dataName, tcName, clsName, varName, mkOccNameFS,
+		    mkVarOccFS )
 import RdrName	  ( RdrName, nameRdrName, mkOrig, rdrNameOcc, mkUnqual )
 import Unique	  ( Unique, Uniquable(..), hasKey,
 		    mkPreludeMiscIdUnique, mkPreludeDataConUnique,
@@ -75,7 +74,7 @@ import FastString
 This *local* name is used by the interactive stuff
 
 \begin{code}
-itName uniq = mkInternalName uniq (mkOccFS varName FSLIT("it")) noSrcLoc
+itName uniq = mkInternalName uniq (mkOccNameFS varName FSLIT("it")) noSrcLoc
 \end{code}
 
 \begin{code}
@@ -415,10 +414,10 @@ inrDataCon_RDR     = dataQual_RDR pREL_BASE FSLIT("Inr")
 genUnitDataCon_RDR = dataQual_RDR pREL_BASE FSLIT("Unit")
 
 ----------------------
-varQual_RDR  mod str = mkOrig mod (mkOccFS varName str)
-tcQual_RDR   mod str = mkOrig mod (mkOccFS tcName str)
-clsQual_RDR  mod str = mkOrig mod (mkOccFS clsName str)
-dataQual_RDR mod str = mkOrig mod (mkOccFS dataName str)
+varQual_RDR  mod str = mkOrig mod (mkOccNameFS varName str)
+tcQual_RDR   mod str = mkOrig mod (mkOccNameFS tcName str)
+clsQual_RDR  mod str = mkOrig mod (mkOccNameFS clsName str)
+dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str)
 \end{code}
 
 %************************************************************************
@@ -656,17 +655,17 @@ tcQual   = mk_known_key_name tcName
 clsQual  = mk_known_key_name clsName
 
 mk_known_key_name space mod str uniq 
-  = mkExternalName uniq mod (mkOccFS space str) 
+  = mkExternalName uniq mod (mkOccNameFS space str) 
 		   Nothing noSrcLoc
 
 conName :: Name -> FastString -> Unique -> Name
 conName tycon occ uniq
-  = mkExternalName uniq (nameModule tycon) (mkOccFS dataName occ) 
+  = mkExternalName uniq (nameModule tycon) (mkOccNameFS dataName occ) 
 		   (Just tycon) noSrcLoc
 
 methName :: Name -> FastString -> Unique -> Name
 methName cls occ uniq
-  = mkExternalName uniq (nameModule cls) (mkOccFS varName occ) 
+  = mkExternalName uniq (nameModule cls) (mkVarOccFS occ) 
 		   (Just cls) noSrcLoc
 \end{code}
 
diff --git a/ghc/compiler/prelude/PrelRules.lhs b/ghc/compiler/prelude/PrelRules.lhs
index e0b234782e..9cdddc9065 100644
--- a/ghc/compiler/prelude/PrelRules.lhs
+++ b/ghc/compiler/prelude/PrelRules.lhs
@@ -36,7 +36,7 @@ import TyCon		( tyConDataCons_maybe, isEnumerationTyCon, isNewTyCon )
 import DataCon		( dataConTag, dataConTyCon, dataConWorkId, fIRST_TAG )
 import CoreUtils	( cheapEqExpr, exprIsConApp_maybe )
 import Type		( tyConAppTyCon, coreEqType )
-import OccName		( occNameUserString)
+import OccName		( occNameFS )
 import PrelNames	( unpackCStringFoldrName, unpackCStringFoldrIdKey, hasKey,
 			  eqStringName, unpackCStringIdKey )
 import Maybes		( orElse )
@@ -58,7 +58,7 @@ import DATA_WORD	( Word64 )
 primOpRules :: PrimOp -> Name -> [CoreRule]
 primOpRules op op_name = primop_rule op
   where
-    rule_name = mkFastString (occNameUserString (primOpOcc op))
+    rule_name = occNameFS (primOpOcc op)
     rule_name_case = rule_name `appendFS` FSLIT("->case")
 
 	-- A useful shorthand
diff --git a/ghc/compiler/prelude/PrimOp.lhs b/ghc/compiler/prelude/PrimOp.lhs
index e99eb9d17c..a650352280 100644
--- a/ghc/compiler/prelude/PrimOp.lhs
+++ b/ghc/compiler/prelude/PrimOp.lhs
@@ -22,7 +22,7 @@ import TysWiredIn
 
 import NewDemand
 import Var		( TyVar )
-import OccName		( OccName, pprOccName, mkVarOcc )
+import OccName		( OccName, pprOccName, mkVarOccFS )
 import TyCon		( TyCon, isPrimTyCon, tyConPrimRep, PrimRep(..) )
 import Type		( Type, mkForAllTys, mkFunTy, mkFunTys, tyConAppTyCon,
 			  typePrimRep )
@@ -113,10 +113,10 @@ data PrimOpInfo
 		[Type] 
 		Type 
 
-mkDyadic str  ty = Dyadic  (mkVarOcc str) ty
-mkMonadic str ty = Monadic (mkVarOcc str) ty
-mkCompare str ty = Compare (mkVarOcc str) ty
-mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOcc str) tvs tys ty
+mkDyadic str  ty = Dyadic  (mkVarOccFS str) ty
+mkMonadic str ty = Monadic (mkVarOccFS str) ty
+mkCompare str ty = Compare (mkVarOccFS str) ty
+mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOccFS str) tvs tys ty
 \end{code}
 
 %************************************************************************
diff --git a/ghc/compiler/prelude/TysPrim.lhs b/ghc/compiler/prelude/TysPrim.lhs
index 7d397d6ed6..2f6168bafb 100644
--- a/ghc/compiler/prelude/TysPrim.lhs
+++ b/ghc/compiler/prelude/TysPrim.lhs
@@ -46,7 +46,7 @@ module TysPrim(
 
 import Var		( TyVar, mkTyVar )
 import Name		( Name, BuiltInSyntax(..), mkInternalName, mkWiredInName )
-import OccName		( mkOccFS, tcName, mkTyVarOcc )
+import OccName		( mkOccNameFS, tcName, mkTyVarOcc )
 import TyCon		( TyCon, ArgVrcs, mkPrimTyCon, mkLiftedPrimTyCon,
 			  PrimRep(..) )
 import Type		( mkTyConApp, mkTyConTy, mkTyVarTys, mkTyVarTy,
@@ -100,7 +100,7 @@ primTyCons
 
 mkPrimTc :: FastString -> Unique -> TyCon -> Name
 mkPrimTc fs uniq tycon
-  = mkWiredInName gHC_PRIM (mkOccFS tcName fs) 
+  = mkWiredInName gHC_PRIM (mkOccNameFS tcName fs) 
 		  uniq
 		  Nothing 		-- No parent object
 		  (ATyCon tycon)	-- Relevant TyCon
diff --git a/ghc/compiler/prelude/TysWiredIn.lhs b/ghc/compiler/prelude/TysWiredIn.lhs
index e7dea60711..ceb4df550a 100644
--- a/ghc/compiler/prelude/TysWiredIn.lhs
+++ b/ghc/compiler/prelude/TysWiredIn.lhs
@@ -62,16 +62,18 @@ import Module		( Module )
 import RdrName		( nameRdrName )
 import Name		( Name, BuiltInSyntax(..), nameUnique, nameOccName, 
 			  nameModule, mkWiredInName )
-import OccName		( mkOccFS, tcName, dataName, mkTupleOcc, mkDataConWorkerOcc )
+import OccName		( mkOccNameFS, tcName, dataName, mkTupleOcc,
+			  mkDataConWorkerOcc )
 import DataCon		( DataCon, mkDataCon, dataConWorkId, dataConSourceArity )
 import Var		( TyVar, tyVarKind )
 import TyCon		( TyCon, AlgTyConRhs(DataTyCon), tyConDataCons,
-			  mkTupleTyCon, mkAlgTyCon, tyConName
-			)
+			  mkTupleTyCon, mkAlgTyCon, tyConName )
 
-import BasicTypes	( Arity, RecFlag(..), Boxity(..), isBoxed, StrictnessMark(..) )
+import BasicTypes	( Arity, RecFlag(..), Boxity(..), isBoxed,
+			  StrictnessMark(..) )
 
-import Type		( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys, TyThing(..) )
+import Type		( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys,
+			  TyThing(..) )
 import Kind		( mkArrowKinds, liftedTypeKind, ubxTupleKind )
 import Unique		( incrUnique, mkTupleTyConUnique,
 			  mkTupleDataConUnique, mkPArrDataConUnique )
@@ -114,14 +116,14 @@ wiredInTyCons = [ unitTyCon	-- Not treated like other tuples, because
 \begin{code}
 mkWiredInTyConName :: BuiltInSyntax -> Module -> FastString -> Unique -> TyCon -> Name
 mkWiredInTyConName built_in mod fs uniq tycon
-  = mkWiredInName mod (mkOccFS tcName fs) uniq
+  = mkWiredInName mod (mkOccNameFS tcName fs) uniq
 		  Nothing 		-- No parent object
 		  (ATyCon tycon)	-- Relevant TyCon
 		  built_in
 
 mkWiredInDataConName :: BuiltInSyntax -> Module -> FastString -> Unique -> DataCon -> Name -> Name
 mkWiredInDataConName built_in mod fs uniq datacon parent
-  = mkWiredInName mod (mkOccFS dataName fs) uniq
+  = mkWiredInName mod (mkOccNameFS dataName fs) uniq
 		  (Just parent) 	-- Name of parent TyCon
 		  (ADataCon datacon)	-- Relevant DataCon
 		  built_in
@@ -535,7 +537,7 @@ mkPArrFakeCon arity  = data_con
 	tyvar     = head alphaTyVars
 	tyvarTys  = replicate arity $ mkTyVarTy tyvar
         nameStr   = mkFastString ("MkPArr" ++ show arity)
-	name      = mkWiredInName pREL_PARR (mkOccFS dataName nameStr) uniq
+	name      = mkWiredInName pREL_PARR (mkOccNameFS dataName nameStr) uniq
 				  Nothing (ADataCon data_con) UserSyntax
 	uniq      = mkPArrDataConUnique arity
 
diff --git a/ghc/compiler/profiling/CostCentre.lhs b/ghc/compiler/profiling/CostCentre.lhs
index 3616ccbe30..f4a6ba951d 100644
--- a/ghc/compiler/profiling/CostCentre.lhs
+++ b/ghc/compiler/profiling/CostCentre.lhs
@@ -32,9 +32,7 @@ module CostCentre (
 #include "HsVersions.h"
 
 import Var		( Id )
-import Name		( UserFS, EncodedFS, encodeFS, decode,
-			  getOccName, occNameFS
-			)
+import Name		( getOccName, occNameFS )
 import Module		( Module )
 import Outputable	
 import FastTypes
@@ -120,7 +118,7 @@ data CostCentre
 		cc_mod  :: Module	-- Name of module defining this CC.
     }
 
-type CcName = EncodedFS
+type CcName = FastString
 
 data IsDupdCC
   = OriginalCC	-- This says how the CC is *used*.  Saying that
@@ -200,9 +198,9 @@ maybeSingletonCCS _			= Nothing
 Building cost centres
 
 \begin{code}
-mkUserCC :: UserFS -> Module -> CostCentre
+mkUserCC :: FastString -> Module -> CostCentre
 mkUserCC cc_name mod
-  = NormalCC { cc_name = encodeFS cc_name, cc_mod =  mod,
+  = NormalCC { cc_name = cc_name, cc_mod =  mod,
 	       cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-}
     }
 
@@ -370,5 +368,5 @@ ppCostCentreLbl (NormalCC {cc_name = n, cc_mod = m, cc_is_caf = is_caf})
 costCentreUserName (NoCostCentre)  = "NO_CC"
 costCentreUserName (AllCafsCC {})  = "CAF"
 costCentreUserName cc@(NormalCC {cc_name = name, cc_is_caf = is_caf})
-  =  case is_caf of { CafCC -> "CAF:";   _ -> "" } ++ decode (unpackFS name)
+  =  case is_caf of { CafCC -> "CAF:";   _ -> "" } ++ unpackFS name
 \end{code}
diff --git a/ghc/compiler/rename/RnEnv.lhs b/ghc/compiler/rename/RnEnv.lhs
index 99d6a3414c..2be3bfd5c0 100644
--- a/ghc/compiler/rename/RnEnv.lhs
+++ b/ghc/compiler/rename/RnEnv.lhs
@@ -50,7 +50,8 @@ import TcRnMonad
 import Name		( Name, nameIsLocalOrFrom, mkInternalName, isWiredInName,
 			  nameSrcLoc, nameOccName, nameModule, nameParent, isExternalName )
 import NameSet
-import OccName		( tcName, isDataOcc, occNameFlavour, reportIfUnused )
+import OccName		( tcName, isDataOcc, pprNonVarNameSpace, occNameSpace,
+			  reportIfUnused )
 import Module		( Module )
 import PrelNames	( mkUnboundName, rOOT_MAIN, iNTERACTIVE, consDataConKey, hasKey )
 import UniqSupply
@@ -747,7 +748,8 @@ warnUnusedName :: (Name, Maybe Provenance) -> RnM ()
 warnUnusedName (name, prov)
   = addWarnAt loc $
     sep [msg <> colon, 
-	 nest 2 $ occNameFlavour (nameOccName name) <+> quotes (ppr name)]
+	 nest 2 $ pprNonVarNameSpace (occNameSpace (nameOccName name))
+			<+> quotes (ppr name)]
 	-- TODO should be a proper span
   where
     (loc,msg) = case prov of
@@ -778,7 +780,8 @@ shadowedNameWarn doc shadow
 
 unknownNameErr rdr_name
   = sep [ptext SLIT("Not in scope:"), 
-	 nest 2 $ occNameFlavour (rdrNameOcc rdr_name) <+> quotes (ppr rdr_name)]
+	 nest 2 $ pprNonVarNameSpace (occNameSpace (rdrNameOcc rdr_name))
+		  <+> quotes (ppr rdr_name)]
 
 unknownInstBndrErr cls op
   = quotes (ppr op) <+> ptext SLIT("is not a (visible) method of class") <+> quotes (ppr cls)
diff --git a/ghc/compiler/rename/RnExpr.lhs b/ghc/compiler/rename/RnExpr.lhs
index 53a412f765..95d7b8307e 100644
--- a/ghc/compiler/rename/RnExpr.lhs
+++ b/ghc/compiler/rename/RnExpr.lhs
@@ -37,7 +37,6 @@ import Name		( Name, nameOccName, nameIsLocalOrFrom )
 import NameSet
 import RdrName		( RdrName, emptyGlobalRdrEnv, extendLocalRdrEnv, lookupLocalRdrEnv )
 import LoadIface	( loadHomeInterface )
-import UnicodeUtil	( stringToUtf8 )
 import UniqFM		( isNullUFM )
 import UniqSet		( emptyUniqSet )
 import List		( nub )
@@ -932,7 +931,7 @@ mkAssertErrorExpr
   = getSrcSpanM    			`thenM` \ sloc ->
     let
 	expr = HsApp (L sloc (HsVar assertErrorName)) (L sloc (HsLit msg))
-	msg  = HsStringPrim (mkFastString (stringToUtf8 (showSDoc (ppr sloc))))
+	msg  = HsStringPrim (mkFastString (showSDoc (ppr sloc)))
     in
     returnM (expr, emptyFVs)
 \end{code}
diff --git a/ghc/compiler/rename/RnNames.lhs b/ghc/compiler/rename/RnNames.lhs
index bf6e54a4f5..4cdb241d8e 100644
--- a/ghc/compiler/rename/RnNames.lhs
+++ b/ghc/compiler/rename/RnNames.lhs
@@ -26,15 +26,17 @@ import TcRnMonad
 
 import FiniteMap
 import PrelNames	( pRELUDE, isUnboundName, main_RDR_Unqual )
-import Module		( Module, moduleUserString, unitModuleEnv, 
+import Module		( Module, moduleString, unitModuleEnv, 
 			  lookupModuleEnv, moduleEnvElts, foldModuleEnv )
 import Name		( Name, nameSrcLoc, nameOccName, nameModule, isWiredInName,
 			  nameParent, nameParent_maybe, isExternalName,
 			  isBuiltInSyntax )
 import NameSet
 import NameEnv
-import OccName		( srcDataName, isTcOcc, occNameFlavour, OccEnv, 
-			  mkOccEnv, lookupOccEnv, emptyOccEnv, extendOccEnv )
+import OccName		( srcDataName, isTcOcc, pprNonVarNameSpace,
+			  occNameSpace,
+			  OccEnv, mkOccEnv, lookupOccEnv, emptyOccEnv,
+			  extendOccEnv )
 import HscTypes		( GenAvailInfo(..), AvailInfo,
 			  HomePackageTable, PackageIfaceTable, 
 			  unQualInScope, 
@@ -683,7 +685,7 @@ reportDeprecations tcg_env
       ,	Just deprec_txt <- lookupDeprec hpt pit name
       = setSrcSpan (importSpecLoc imp_spec) $
 	addWarn (sep [ptext SLIT("Deprecated use of") <+> 
-			occNameFlavour (nameOccName name) <+> 
+			pprNonVarNameSpace (occNameSpace (nameOccName name)) <+> 
 		 	quotes (ppr name),
 		      (parens imp_msg) <> colon,
 		      (ppr deprec_txt) ])
@@ -958,7 +960,7 @@ printMinimalImports imps
 				 (vcat (map ppr_mod_ie mod_ies)) })
    }
   where
-    mkFilename this_mod = moduleUserString this_mod ++ ".imports"
+    mkFilename this_mod = moduleString this_mod ++ ".imports"
     ppr_mod_ie (mod_name, ies) 
 	| mod_name == pRELUDE 
 	= empty
diff --git a/ghc/compiler/simplCore/SetLevels.lhs b/ghc/compiler/simplCore/SetLevels.lhs
index 4b1c01dfec..f8ab29dcd5 100644
--- a/ghc/compiler/simplCore/SetLevels.lhs
+++ b/ghc/compiler/simplCore/SetLevels.lhs
@@ -60,8 +60,8 @@ import CoreUtils	( exprType, exprIsTrivial, exprIsCheap, mkPiTypes )
 import CoreFVs		-- all of it
 import CoreSubst	( Subst, emptySubst, extendInScope, extendIdSubst,
 			  cloneIdBndr, cloneRecIdBndrs )
-import Id		( Id, idType, mkSysLocalUnencoded, 
-			  isOneShotLambda, zapDemandIdInfo,
+import Id		( Id, idType, mkSysLocal, isOneShotLambda,
+			  zapDemandIdInfo,
 			  idSpecialisation, idWorkerInfo, setIdInfo
 			)
 import IdInfo		( workerExists, vanillaIdInfo, isEmptySpecInfo )
@@ -69,7 +69,7 @@ import Var		( Var )
 import VarSet
 import VarEnv
 import Name		( getOccName )
-import OccName		( occNameUserString )
+import OccName		( occNameString )
 import Type		( isUnLiftedType, Type )
 import BasicTypes	( TopLevelFlag(..) )
 import UniqSupply
@@ -796,9 +796,9 @@ newPolyBndrs dest_lvl env abs_vars bndrs
     in
     returnLvl (extendPolyLvlEnv dest_lvl env abs_vars (bndrs `zip` new_bndrs), new_bndrs)
   where
-    mk_poly_bndr bndr uniq = mkSysLocalUnencoded (mkFastString str) uniq poly_ty
+    mk_poly_bndr bndr uniq = mkSysLocal (mkFastString str) uniq poly_ty
 			   where
-			     str     = "poly_" ++ occNameUserString (getOccName bndr)
+			     str     = "poly_" ++ occNameString (getOccName bndr)
 			     poly_ty = mkPiTypes abs_vars (idType bndr)
 	
 
@@ -807,7 +807,7 @@ newLvlVar :: String
 	  -> LvlM Id
 newLvlVar str vars body_ty 	
   = getUniqueUs	`thenLvl` \ uniq ->
-    returnUs (mkSysLocalUnencoded (mkFastString str) uniq (mkPiTypes vars body_ty))
+    returnUs (mkSysLocal (mkFastString str) uniq (mkPiTypes vars body_ty))
     
 -- The deeply tiresome thing is that we have to apply the substitution
 -- to the rules inside each Id.  Grr.  But it matters.
diff --git a/ghc/compiler/simplCore/SimplMonad.lhs b/ghc/compiler/simplCore/SimplMonad.lhs
index b82562e668..bc09e1128c 100644
--- a/ghc/compiler/simplCore/SimplMonad.lhs
+++ b/ghc/compiler/simplCore/SimplMonad.lhs
@@ -34,7 +34,6 @@ import UniqSupply	( uniqsFromSupply, uniqFromSupply, splitUniqSupply,
 			)
 import DynFlags		( SimplifierSwitch(..), DynFlags, DynFlag(..), dopt )
 import StaticFlags	( opt_PprStyle_Debug, opt_HistorySize )
-import OccName		( EncodedFS )
 import Unique		( Unique )
 import Maybes		( expectJust )
 import FiniteMap	( FiniteMap, emptyFM, isEmptyFM, lookupFM, addToFM, plusFM_C, fmToList )
@@ -160,7 +159,7 @@ getDOptsSmpl :: SimplM DynFlags
 getDOptsSmpl 
    = SM (\dflags us sc -> (dflags, us, sc))
 
-newId :: EncodedFS -> Type -> SimplM Id
+newId :: FastString -> Type -> SimplM Id
 newId fs ty = getUniqueSmpl 	`thenSmpl` \ uniq ->
 	      returnSmpl (mkSysLocal fs uniq ty)
 \end{code}
diff --git a/ghc/compiler/simplCore/Simplify.lhs b/ghc/compiler/simplCore/Simplify.lhs
index 885914063c..17a7969e74 100644
--- a/ghc/compiler/simplCore/Simplify.lhs
+++ b/ghc/compiler/simplCore/Simplify.lhs
@@ -28,7 +28,6 @@ import Id		( Id, idType, idInfo, idArity, isDataConWorkId,
 			)
 import MkId		( eRROR_ID )
 import Literal		( mkStringLit )
-import OccName		( encodeFS )
 import IdInfo		( OccInfo(..), isLoopBreaker,
 			  setArityInfo, zapDemandInfo,
 			  setUnfoldingInfo, 
@@ -1875,7 +1874,7 @@ mkDupableAlt env case_bndr' cont alt
     )							`thenSmpl` \ (final_bndrs', final_args) ->
 
 	-- See comment about "$j" name above
-    newId (encodeFS FSLIT("$j")) (mkPiTypes final_bndrs' rhs_ty')	`thenSmpl` \ join_bndr ->
+    newId FSLIT("$j") (mkPiTypes final_bndrs' rhs_ty')	`thenSmpl` \ join_bndr ->
 	-- Notice the funky mkPiTypes.  If the contructor has existentials
 	-- it's possible that the join point will be abstracted over
 	-- type varaibles as well as term variables.
diff --git a/ghc/compiler/stgSyn/CoreToStg.lhs b/ghc/compiler/stgSyn/CoreToStg.lhs
index 9ae0d2741b..824cabaacb 100644
--- a/ghc/compiler/stgSyn/CoreToStg.lhs
+++ b/ghc/compiler/stgSyn/CoreToStg.lhs
@@ -30,7 +30,7 @@ import VarSet
 import VarEnv
 import Maybes		( maybeToBool )
 import Name		( getOccName, isExternalName, nameOccName )
-import OccName		( occNameUserString, occNameFS )
+import OccName		( occNameString, occNameFS )
 import BasicTypes       ( Arity )
 import Packages		( HomeModules )
 import StaticFlags	( opt_RuntimeTypes )
@@ -688,7 +688,7 @@ coreToStgLet let_no_escape bind body
 is_join_var :: Id -> Bool
 -- A hack (used only for compiler debuggging) to tell if
 -- a variable started life as a join point ($j)
-is_join_var j = occNameUserString (getOccName j) == "$j"
+is_join_var j = occNameString (getOccName j) == "$j"
 \end{code}
 
 \begin{code}
diff --git a/ghc/compiler/typecheck/Inst.lhs b/ghc/compiler/typecheck/Inst.lhs
index 71d3e8472b..d6cf344b90 100644
--- a/ghc/compiler/typecheck/Inst.lhs
+++ b/ghc/compiler/typecheck/Inst.lhs
@@ -79,7 +79,7 @@ import DataCon	( DataCon, dataConTyVars, dataConStupidTheta, dataConName, dataCo
 import Id	( Id, idName, idType, mkUserLocal, mkLocalId )
 import PrelInfo	( isNoDictClass )
 import Name	( Name, mkMethodOcc, getOccName, getSrcLoc, nameModule,
-		  isInternalName, setNameUnique, mkSystemVarNameEncoded )
+		  isInternalName, setNameUnique, mkSystemVarName )
 import NameSet	( addOneToNameSet )
 import Literal	( inIntRange )
 import Var	( TyVar, tyVarKind, setIdType )
@@ -398,9 +398,7 @@ newLitInst orig lit expected_ty	-- Make a LitInst
   = do 	{ loc <- getInstLoc orig
 	; new_uniq <- newUnique
 	; let
-		lit_nm   = mkSystemVarNameEncoded new_uniq FSLIT("lit")
-		-- The "encoded" bit means that we don't need to
-		-- z-encode the string every time we call this!
+		lit_nm   = mkSystemVarName new_uniq FSLIT("lit")
 		lit_inst = LitInst lit_nm lit expected_ty loc
 	; extendLIE lit_inst
 	; return (HsVar (instToId lit_inst)) }
diff --git a/ghc/compiler/typecheck/TcClassDcl.lhs b/ghc/compiler/typecheck/TcClassDcl.lhs
index b382af94df..fbb450a199 100644
--- a/ghc/compiler/typecheck/TcClassDcl.lhs
+++ b/ghc/compiler/typecheck/TcClassDcl.lhs
@@ -52,7 +52,6 @@ import RdrName		( RdrName, mkDerivedRdrName )
 import Outputable
 import PrelNames	( genericTyConNames )
 import DynFlags
-import UnicodeUtil	( stringToUtf8 )
 import ErrUtils		( dumpIfSet_dyn )
 import Util		( count, lengthIs, isSingleton, lengthExceeds )
 import Unique		( Uniquable(..) )
@@ -487,7 +486,7 @@ mkDefMethRhs origin clas inst_tys sel_id loc NoDefMeth
   where
     error_rhs  = noLoc $ HsLam (mkMatchGroup [mkSimpleMatch wild_pats simple_rhs])
     simple_rhs = nlHsApp (nlHsVar (getName nO_METHOD_BINDING_ERROR_ID)) 
-	    	       (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 error_msg))))
+	    	       (nlHsLit (HsStringPrim (mkFastString error_msg)))
     error_msg = showSDoc (hcat [ppr loc, text "|", ppr sel_id ])
 
 	-- When the type is of form t1 -> t2 -> t3
diff --git a/ghc/compiler/typecheck/TcGenDeriv.lhs b/ghc/compiler/typecheck/TcGenDeriv.lhs
index faa32ec18b..94bb152850 100644
--- a/ghc/compiler/typecheck/TcGenDeriv.lhs
+++ b/ghc/compiler/typecheck/TcGenDeriv.lhs
@@ -809,7 +809,7 @@ gen_Read_binds get_fixity tycon
     ident_pat s  = nlConPat ident_RDR  [nlLitPat (mkHsString s)]  -- Ident "foo"
     symbol_pat s = nlConPat symbol_RDR [nlLitPat (mkHsString s)]  -- Symbol ">>"
     
-    data_con_str con = occNameUserString (getOccName con)
+    data_con_str con = occNameString (getOccName con)
     
     read_punc c = bindLex (punc_pat c)
     read_arg a ty 
@@ -832,7 +832,7 @@ gen_Read_binds get_fixity tycon
 		 | otherwise
 		 = [bindLex (ident_pat lbl_str)]
 		 where	
-		   lbl_str = occNameUserString (getOccName lbl) 
+		   lbl_str = occNameString (getOccName lbl) 
 \end{code}
 
 
@@ -899,7 +899,7 @@ gen_Show_binds get_fixity tycon
 
 	     dc_nm	    = getName data_con
 	     dc_occ_nm	    = getOccName data_con
-             con_str        = occNameUserString dc_occ_nm
+             con_str        = occNameString dc_occ_nm
 	     op_con_str     = wrapOpParens con_str
 	     backquote_str  = wrapOpBackquotes con_str
 
@@ -916,7 +916,7 @@ gen_Show_binds get_fixity tycon
 			-- it seems tidier to have them both sides.
 		 where
 		   occ_nm   = getOccName l
-		   nm       = wrapOpParens (occNameUserString occ_nm)
+		   nm       = wrapOpParens (occNameString occ_nm)
 
              show_args 		     = zipWith show_arg bs_needed arg_tys
 	     (show_arg1:show_arg2:_) = show_args
@@ -1128,7 +1128,7 @@ gen_Data_binds fix_env tycon
     constr_args dc =
 	 [ -- nlHsIntLit (toInteger (dataConTag dc)),		-- Tag
 	   nlHsVar data_type_name,				-- DataType
-	   nlHsLit (mkHsString (occNameUserString dc_occ)),	-- String name
+	   nlHsLit (mkHsString (occNameString dc_occ)),	-- String name
            nlList  labels,					-- Field labels
 	   nlHsVar fixity]					-- Fixity
 	where
@@ -1458,7 +1458,7 @@ mk_tc_deriv_name tycon str
   = mkDerivedRdrName tc_name mk_occ
   where
     tc_name = tyConName tycon
-    mk_occ tc_occ = mkOccFS varName (mkFastString new_str)
+    mk_occ tc_occ = mkVarOccFS (mkFastString new_str)
 		  where
 		    new_str = str ++ occNameString tc_occ ++ "#"
 \end{code}
diff --git a/ghc/compiler/typecheck/TcInstDcls.lhs b/ghc/compiler/typecheck/TcInstDcls.lhs
index 432d3c8cae..04fbafb5b1 100644
--- a/ghc/compiler/typecheck/TcInstDcls.lhs
+++ b/ghc/compiler/typecheck/TcInstDcls.lhs
@@ -35,7 +35,6 @@ import Var		( Id, idName, idType )
 import MkId		( mkDictFunId, rUNTIME_ERROR_ID )
 import FunDeps		( checkInstFDs )
 import Name		( Name, getSrcLoc )
-import UnicodeUtil	( stringToUtf8 )
 import Maybe		( catMaybes )
 import SrcLoc		( srcLocSpan, unLoc, noLoc, Located(..), srcSpanStart )
 import ListSetOps	( minusList )
@@ -405,7 +404,7 @@ tcInstDecl2 (InstInfo { iSpec = ispec, iBinds = binds })
 		-- Hardly beautiful, but only three extra lines.
 	    nlHsApp (noLoc $ TyApp (nlHsVar rUNTIME_ERROR_ID) 
 				   [idType this_dict_id])
-		  (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 msg))))
+		  (nlHsLit (HsStringPrim (mkFastString msg)))
 
 	  | otherwise	-- The common case
 	  = mkHsConApp dict_constr inst_tys' (map HsVar scs_and_meths)
diff --git a/ghc/compiler/typecheck/TcRnDriver.lhs b/ghc/compiler/typecheck/TcRnDriver.lhs
index b2e665fdb6..7e3aae2506 100644
--- a/ghc/compiler/typecheck/TcRnDriver.lhs
+++ b/ghc/compiler/typecheck/TcRnDriver.lhs
@@ -63,7 +63,7 @@ import ErrUtils		( Messages, mkDumpDoc, showPass )
 import Id		( Id, mkExportedLocalId, isLocalId, idName, idType )
 import Var		( Var )
 import Module           ( Module, ModuleEnv, moduleEnvElts, elemModuleEnv )
-import OccName		( mkVarOcc, mkOccFS, varName )
+import OccName		( mkVarOccFS )
 import Name		( Name, NamedThing(..), isExternalName, getSrcLoc, isWiredInName,
 			  mkExternalName )
 import NameSet
@@ -734,7 +734,7 @@ checkMain
 	 dflags    <- getDOpts ;
 	 let { main_mod = mainModIs dflags ;
 	       main_fn  = case mainFunIs dflags of {
-				Just fn -> mkRdrUnqual (mkVarOcc (mkFastString fn)) ;
+				Just fn -> mkRdrUnqual (mkVarOccFS (mkFastString fn)) ;
 				Nothing -> main_RDR_Unqual } } ;
 	
 	 check_main ghci_mode tcg_env main_mod main_fn
@@ -776,7 +776,7 @@ check_main ghci_mode tcg_env main_mod main_fn
 	-- for 'main' in the interface file!
 
 	; let { root_main_name =  mkExternalName rootMainKey rOOT_MAIN 
-				   (mkOccFS varName FSLIT("main")) 
+				   (mkVarOccFS FSLIT("main")) 
 				   (Just main_name) (getSrcLoc main_name)
 	      ; root_main_id = mkExportedLocalId root_main_name ty
 	      ; main_bind    = noLoc (VarBind root_main_id main_expr) }
diff --git a/ghc/compiler/typecheck/TcSplice.lhs b/ghc/compiler/typecheck/TcSplice.lhs
index 2844ab42a7..578c96b105 100644
--- a/ghc/compiler/typecheck/TcSplice.lhs
+++ b/ghc/compiler/typecheck/TcSplice.lhs
@@ -41,7 +41,7 @@ import NameEnv		( lookupNameEnv )
 import HscTypes		( lookupType, ExternalPackageState(..), emptyModDetails )
 import OccName
 import Var		( Id, TyVar, idType )
-import Module		( moduleUserString )
+import Module		( moduleString )
 import TcRnMonad
 import IfaceEnv		( lookupOrig )
 import Class		( Class, classExtraBigSig )
@@ -416,7 +416,7 @@ instance TH.Quasi (IOEnv (Env TcGblEnv TcLclEnv)) where
   qReport True msg  = addErr (text msg)
   qReport False msg = addReport (text msg)
 
-  qCurrentModule = do { m <- getModule; return (moduleUserString m) }
+  qCurrentModule = do { m <- getModule; return (moduleString m) }
   qReify v = reify v
   qRecover = recoverM
 
@@ -659,8 +659,8 @@ reifyName thing
 	-- have free variables, we may need to generate NameL's for them.
   where
     name    = getName thing
-    mod     = moduleUserString (nameModule name)
-    occ_str = occNameUserString occ
+    mod     = moduleString (nameModule name)
+    occ_str = occNameString occ
     occ     = nameOccName name
     mk_varg | OccName.isDataOcc occ = TH.mkNameG_d
 	    | OccName.isVarOcc  occ = TH.mkNameG_v
diff --git a/ghc/compiler/types/TypeRep.lhs b/ghc/compiler/types/TypeRep.lhs
index dc5344569b..7bb863a210 100644
--- a/ghc/compiler/types/TypeRep.lhs
+++ b/ghc/compiler/types/TypeRep.lhs
@@ -33,7 +33,7 @@ import Kind
 import Var	  ( Var, Id, TyVar, tyVarKind )
 import VarSet     ( TyVarSet )
 import Name	  ( Name, NamedThing(..), BuiltInSyntax(..), mkWiredInName )
-import OccName	  ( mkOccFS, tcName, parenSymOcc )
+import OccName	  ( mkOccNameFS, tcName, parenSymOcc )
 import BasicTypes ( IPName, tupleParens )
 import TyCon	  ( TyCon, mkFunTyCon, tyConArity, tupleTyConBoxity, isTupleTyCon, isRecursiveTyCon, isNewTyCon )
 import Class	  ( Class )
@@ -273,7 +273,7 @@ funTyCon = mkFunTyCon funTyConName (mkArrowKinds [argTypeKind, openTypeKind] lif
 	-- a prefix way, thus:  (->) Int# Int#.  And this is unusual.
 
 funTyConName = mkWiredInName gHC_PRIM
-			(mkOccFS tcName FSLIT("(->)"))
+			(mkOccNameFS tcName FSLIT("(->)"))
 			funTyConKey
 			Nothing 		-- No parent object
 			(ATyCon funTyCon)	-- Relevant TyCon
diff --git a/ghc/compiler/utils/Binary.hs b/ghc/compiler/utils/Binary.hs
index 1902ff1f66..7b40bd279d 100644
--- a/ghc/compiler/utils/Binary.hs
+++ b/ghc/compiler/utils/Binary.hs
@@ -58,26 +58,7 @@ import UniqFM
 import FastMutInt
 import PackageConfig		( PackageId, packageIdFS, fsToPackageId )
 
-#if __GLASGOW_HASKELL__ < 503
-import DATA_IOREF
-import DATA_BITS
-import DATA_INT
-import DATA_WORD
-import Char
-import Monad
-import Exception
-import GlaExts hiding (ByteArray, newByteArray, freezeByteArray)
-import Array
-import IO
-import PrelIOBase		( IOError(..), IOErrorType(..)
-#if __GLASGOW_HASKELL__ > 411
-				, IOException(..)
-#endif
-				)
-import PrelReal			( Ratio(..) )
-import PrelIOBase	 	( IO(..) )
-import IOExts			( openFileEx, IOModeEx(..) )
-#else
+import Foreign
 import Data.Array.IO
 import Data.Array
 import Data.Bits
@@ -102,44 +83,12 @@ import GHC.Handle		( openFileEx, IOModeEx(..) )
 #else
 import System.IO		( openBinaryFile )
 #endif
-#endif
 
 #if __GLASGOW_HASKELL__ < 601
 openBinaryFile f mode = openFileEx f (BinaryMode mode)
 #endif
 
-#if __GLASGOW_HASKELL__ < 503
-type BinArray = MutableByteArray RealWorld Int
-newArray_ bounds     = stToIO (newCharArray bounds)
-unsafeWrite arr ix e = stToIO (writeWord8Array arr ix e)
-unsafeRead  arr ix   = stToIO (readWord8Array arr ix)
-#if __GLASGOW_HASKELL__ < 411
-newByteArray#        = newCharArray#
-#endif
-hPutArray h arr sz   = hPutBufBAFull h arr sz
-hGetArray h sz       = hGetBufBAFull h sz
-
-mkIOError :: IOErrorType -> String -> Maybe Handle -> Maybe FilePath -> Exception
-mkIOError t location maybe_hdl maybe_filename
-  = IOException (IOError maybe_hdl t location ""
-#if __GLASGOW_HASKELL__ > 411
-		         maybe_filename
-#endif
-  		)
-
-eofErrorType = EOF
-
-#ifndef SIZEOF_HSINT
-#define SIZEOF_HSINT  INT_SIZE_IN_BYTES
-#endif
-
-#ifndef SIZEOF_HSWORD
-#define SIZEOF_HSWORD WORD_SIZE_IN_BYTES
-#endif
-
-#else
 type BinArray = IOUArray Int Word8
-#endif
 
 ---------------------------------------------------------------
 --		BinHandle
@@ -741,13 +690,17 @@ constructDictionary j fm = array (0,j-1) (eltsUFM fm)
 --		Reading and writing FastStrings
 ---------------------------------------------------------
 
-putFS bh (FastString id l ba) = do
-  put_ bh (I# l)
-  putByteArray bh ba l
-putFS bh s = error ("Binary.put_(FastString): " ++ unpackFS s)
-	-- Note: the length of the FastString is *not* the same as
-	-- the size of the ByteArray: the latter is rounded up to a
-	-- multiple of the word size.
+putFS bh (FastString id l _ buf _) = do
+  put_ bh l
+  withForeignPtr buf $ \ptr -> 
+    let 
+	go n | n == l    = return ()
+	     | otherwise = do
+		b <- peekElemOff ptr n
+		putByte bh b
+		go (n+1)
+   in 
+   go 0
   
 {- -- possible faster version, not quite there yet:
 getFS bh@BinMem{} = do
@@ -757,16 +710,24 @@ getFS bh@BinMem{} = do
   return $! (mkFastSubStringBA# arr off l)
 -}
 getFS bh = do
-  (I# l) <- get bh
-  (BA ba) <- getByteArray bh (I# l)
-  return $! (mkFastSubStringBA# ba 0# l)
+  l <- get bh
+  fp <- mallocForeignPtrBytes l
+  withForeignPtr fp $ \ptr -> do
+  let 
+	go n | n == l = mkFastStringForeignPtr ptr fp l
+	     | otherwise = do
+		b <- getByte bh
+		pokeElemOff ptr n b
+		go (n+1)
+  --
+  go 0
 
 instance Binary PackageId where
   put_ bh pid = put_ bh (packageIdFS pid)
   get bh = do { fs <- get bh; return (fsToPackageId fs) }
 
 instance Binary FastString where
-  put_ bh f@(FastString id l ba) =
+  put_ bh f@(FastString id l _ fp _) =
     case getUserData bh of { 
 	UserData { ud_next = j_r, ud_map = out_r, ud_dict = dict} -> do
     out <- readIORef out_r
diff --git a/ghc/compiler/utils/BufWrite.hs b/ghc/compiler/utils/BufWrite.hs
index 6d00e46634..b15089ead3 100644
--- a/ghc/compiler/utils/BufWrite.hs
+++ b/ghc/compiler/utils/BufWrite.hs
@@ -31,17 +31,11 @@ import Char		( ord )
 import Foreign
 import IO
 
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase	( IO(..) )
-import IOExts		( hPutBufFull )
-#else
 import GHC.IOBase	( IO(..) )
 import System.IO	( hPutBuf )
-#endif
-
-import GLAEXTS		( touch#, byteArrayContents#, Int(..), Int#, Addr# )
+import GHC.Ptr		( Ptr(..) )
 
-import PrimPacked	( Ptr(..) )
+import GLAEXTS		( Int(..), Int#, Addr# )
 
 -- -----------------------------------------------------------------------------
 
@@ -88,22 +82,17 @@ bPutStr b@(BufHandle buf r hdl) str = do
 		loop cs (i+1)
   
 bPutFS :: BufHandle -> FastString -> IO ()
-bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len# arr#) = do
-  let len = I# len#
+bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len _ fp _) =
+ withForeignPtr fp $ \ptr -> do
   i <- readFastMutInt r
   if (i + len) >= buf_size
 	then do hPutBuf hdl buf i
 		writeFastMutInt r 0
 		if (len >= buf_size) 
-		    then do
-			 let a# = byteArrayContents# arr#
-			 hPutBuf hdl (Ptr a#) len
-			 touch fs
+		    then hPutBuf hdl ptr len
 		    else bPutFS b fs
 	else do
-		let a# = byteArrayContents# arr#
-		copyBytes (buf `plusPtr` i) (Ptr a#) len
-		touch fs
+		copyBytes (buf `plusPtr` i) ptr len
 		writeFastMutInt r (i+len)
 bPutFS _ _ = panic "bPutFS"
 
@@ -128,8 +117,6 @@ bFlush b@(BufHandle buf r hdl) = do
   free buf
   return ()
 
-touch r = IO $ \s -> case touch# r s of s -> (# s, () #)
-
 #if 0
 myPutBuf s hdl buf i = 
   modifyIOError (\e -> ioeSetErrorString e (ioeGetErrorString e ++ ':':s ++ " (" ++ show buf ++ "," ++ show i ++ ")")) $
diff --git a/ghc/compiler/utils/Encoding.hs b/ghc/compiler/utils/Encoding.hs
new file mode 100644
index 0000000000..d15c0216ae
--- /dev/null
+++ b/ghc/compiler/utils/Encoding.hs
@@ -0,0 +1,386 @@
+{-# OPTIONS_GHC -O #-}
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 1997-2003
+--
+-- Character encodings 
+--
+-- -----------------------------------------------------------------------------
+
+module Encoding ( 
+	-- * UTF-8
+	utf8DecodeChar#,
+	utf8PrevChar,
+	utf8CharStart,
+	utf8DecodeChar,
+	utf8DecodeString,
+	utf8EncodeChar,
+	utf8EncodeString,
+	utf8EncodedLength,
+	countUTF8Chars,
+
+	-- * Latin-1
+	latin1DecodeChar,
+	latin1EncodeChar,
+
+	-- * Z-encoding
+	zEncodeString,
+	zDecodeString
+  ) where
+
+#define COMPILING_FAST_STRING
+#include "HsVersions.h"
+import Foreign
+import Data.Char	( ord, chr, isDigit, digitToInt, isHexDigit )
+import Numeric		( showHex )
+
+import GHC.Ptr		( Ptr(..) )
+import GHC.Base
+
+-- -----------------------------------------------------------------------------
+-- Latin-1
+
+latin1DecodeChar ptr = do
+  w <- peek ptr
+  return (unsafeChr (fromIntegral w), ptr `plusPtr` 1)
+
+latin1EncodeChar c ptr = do
+  poke ptr (fromIntegral (ord c))
+  return (ptr `plusPtr` 1)
+
+-- -----------------------------------------------------------------------------
+-- UTF-8
+
+-- We can't write the decoder as efficiently as we'd like without
+-- resorting to unboxed extensions, unfortunately.  I tried to write
+-- an IO version of this function, but GHC can't eliminate boxed
+-- results from an IO-returning function.
+--
+-- We assume we can ignore overflow when parsing a multibyte character here.
+-- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences
+-- before decoding them (see StringBuffer.hs).
+
+{-# INLINE utf8DecodeChar# #-}
+utf8DecodeChar# :: Addr# -> (# Char#, Addr# #)
+utf8DecodeChar# a# =
+  let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
+  case () of 
+    _ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #)
+
+      | ch0 >=# 0xC0# && ch0 <=# 0xDF# ->
+	let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+	if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+	(# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+	   	  (ch1 -# 0x80#)),
+	   a# `plusAddr#` 2# #)
+
+      | ch0 >=# 0xE0# && ch0 <=# 0xEF# ->
+	let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+	if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+	let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+	if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
+	(# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+	   	 ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#)  +#
+	    	  (ch2 -# 0x80#)),
+	   a# `plusAddr#` 3# #)
+
+     | ch0 >=# 0xF0# && ch0 <=# 0xF8# ->
+	let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+	if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+	let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+	if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
+	let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
+	if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else
+	(# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+		 ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +#
+		 ((ch2 -# 0x80#) `uncheckedIShiftL#` 6#)  +#
+		  (ch3 -# 0x80#)),
+	   a# `plusAddr#` 4# #)
+
+      | otherwise -> fail 1#
+  where
+	-- all invalid sequences end up here:
+	fail n = (# '\0'#, a# `plusAddr#` n #)
+	-- '\xFFFD' would be the usual replacement character, but
+	-- that's a valid symbol in Haskell, so will result in a
+	-- confusing parse error later on.  Instead we use '\0' which
+	-- will signal a lexer error immediately.
+
+utf8DecodeChar :: Ptr Word8 -> (Char, Ptr Word8)
+utf8DecodeChar (Ptr a#) = ( C# c#, Ptr b# )
+  where (# c#, b# #) = utf8DecodeChar# a#
+
+-- UTF-8 is cleverly designed so that we can always figure out where
+-- the start of the current character is, given any position in a
+-- stream.  This function finds the start of the previous character,
+-- assuming there *is* a previous character.
+utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
+utf8PrevChar p = utf8CharStart (p `plusPtr` (-1))
+
+utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
+utf8CharStart p = go p
+ where go p = do w <- peek p
+		 if (w .&. 0xC0) == 0x80
+			then go (p `plusPtr` (-1))
+			else return p
+
+utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
+STRICT2(utf8DecodeString)
+utf8DecodeString (Ptr a#) (I# len#)
+  = unpack a#
+  where
+    end# = addr2Int# (a# `plusAddr#` len#)
+
+    unpack p#
+	| addr2Int# p# >=# end# = return []
+	| otherwise  =
+	case utf8DecodeChar# p# of
+	   (# c#, q# #) -> do
+		chs <- unpack q#
+		return (C# c# : chs)
+
+countUTF8Chars :: Ptr Word8 -> Int -> IO Int
+countUTF8Chars ptr bytes = go ptr 0
+  where
+	end = ptr `plusPtr` bytes
+
+	STRICT2(go)
+	go ptr n 
+	   | ptr >= end = return n
+	   | otherwise  = do
+		case utf8DecodeChar# (unPtr ptr) of
+		  (# c, a #) -> go (Ptr a) (n+1)
+
+unPtr (Ptr a) = a
+
+utf8EncodeChar c ptr =
+  let x = ord c in
+  case () of
+    _ | x > 0 && x <= 0x007f -> do
+	  poke ptr (fromIntegral x)
+	  return (ptr `plusPtr` 1)
+	-- NB. '\0' is encoded as '\xC0\x80', not '\0'.  This is so that we
+	-- can have 0-terminated UTF-8 strings (see GHC.Base.unpackCStringUtf8).
+      | x <= 0x07ff -> do
+	  poke ptr (fromIntegral (0xC0 .|. ((x `shiftR` 6) .&. 0x1F)))
+	  pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+	  return (ptr `plusPtr` 2)
+      | x <= 0xffff -> do
+	  poke ptr (fromIntegral (0xE0 .|. (x `shiftR` 12) .&. 0x0F))
+	  pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x `shiftR` 6) .&. 0x3F))
+	  pokeElemOff ptr 2 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+	  return (ptr `plusPtr` 3)
+      | otherwise -> do
+	  poke ptr (fromIntegral (0xF0 .|. (x `shiftR` 18)))
+	  pokeElemOff ptr 1 (fromIntegral (0x80 .|. ((x `shiftR` 12) .&. 0x3F)))
+	  pokeElemOff ptr 2 (fromIntegral (0x80 .|. ((x `shiftR` 6) .&. 0x3F)))
+	  pokeElemOff ptr 3 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+	  return (ptr `plusPtr` 4)
+
+utf8EncodeString :: Ptr Word8 -> String -> IO ()
+utf8EncodeString ptr str = go ptr str
+  where STRICT2(go)
+	go ptr [] = return ()
+	go ptr (c:cs) = do
+	  ptr' <- utf8EncodeChar c ptr
+	  go ptr' cs
+
+utf8EncodedLength :: String -> Int
+utf8EncodedLength str = go 0 str
+  where STRICT2(go)
+	go n [] = n
+        go n (c:cs)
+	  | ord c > 0 && ord c <= 0x007f = go (n+1) cs
+	  | ord c <= 0x07ff = go (n+2) cs
+	  | ord c <= 0xffff = go (n+3) cs	
+	  | otherwise       = go (n+4) cs	
+
+-- -----------------------------------------------------------------------------
+-- The Z-encoding
+
+{-
+This is the main name-encoding and decoding function.  It encodes any
+string into a string that is acceptable as a C name.  This is the name
+by which things are known right through the compiler.
+
+The basic encoding scheme is this.  
+
+* Tuples (,,,) are coded as Z3T
+
+* Alphabetic characters (upper and lower) and digits
+	all translate to themselves; 
+	except 'Z', which translates to 'ZZ'
+	and    'z', which translates to 'zz'
+  We need both so that we can preserve the variable/tycon distinction
+
+* Most other printable characters translate to 'zx' or 'Zx' for some
+	alphabetic character x
+
+* The others translate as 'znnnU' where 'nnn' is the decimal number
+        of the character
+
+	Before		After
+	--------------------------
+	Trak		Trak
+	foo_wib		foozuwib
+	>		zg
+	>1		zg1
+	foo#		foozh
+	foo##		foozhzh
+	foo##1		foozhzh1
+	fooZ		fooZZ	
+	:+		ZCzp
+	()		Z0T	0-tuple
+	(,,,,)		Z5T	5-tuple  
+	(# #)           Z1H     unboxed 1-tuple	(note the space)
+	(#,,,,#)	Z5H	unboxed 5-tuple
+		(NB: There is no Z1T nor Z0H.)
+-}
+
+type UserString = String	-- As the user typed it
+type EncodedString = String	-- Encoded form
+
+
+zEncodeString :: UserString -> EncodedString
+zEncodeString cs = case maybe_tuple cs of
+		Just n  -> n		-- Tuples go to Z2T etc
+		Nothing -> go cs
+	  where
+		go []     = []
+		go (c:cs) = encode_ch c ++ go cs
+
+unencodedChar :: Char -> Bool	-- True for chars that don't need encoding
+unencodedChar 'Z' = False
+unencodedChar 'z' = False
+unencodedChar c   =  c >= 'a' && c <= 'z'
+	          || c >= 'A' && c <= 'Z'
+		  || c >= '0' && c <= '9'
+
+encode_ch :: Char -> EncodedString
+encode_ch c | unencodedChar c = [c]	-- Common case first
+
+-- Constructors
+encode_ch '('  = "ZL"	-- Needed for things like (,), and (->)
+encode_ch ')'  = "ZR"	-- For symmetry with (
+encode_ch '['  = "ZM"
+encode_ch ']'  = "ZN"
+encode_ch ':'  = "ZC"
+encode_ch 'Z'  = "ZZ"
+
+-- Variables
+encode_ch 'z'  = "zz"
+encode_ch '&'  = "za"
+encode_ch '|'  = "zb"
+encode_ch '^'  = "zc"
+encode_ch '$'  = "zd"
+encode_ch '='  = "ze"
+encode_ch '>'  = "zg"
+encode_ch '#'  = "zh"
+encode_ch '.'  = "zi"
+encode_ch '<'  = "zl"
+encode_ch '-'  = "zm"
+encode_ch '!'  = "zn"
+encode_ch '+'  = "zp"
+encode_ch '\'' = "zq"
+encode_ch '\\' = "zr"
+encode_ch '/'  = "zs"
+encode_ch '*'  = "zt"
+encode_ch '_'  = "zu"
+encode_ch '%'  = "zv"
+encode_ch c    = 'z' : if isDigit (head hex_str) then hex_str
+					         else '0':hex_str
+  where hex_str = showHex (ord c) "U"
+  -- ToDo: we could improve the encoding here in various ways.
+  -- eg. strings of unicode characters come out as 'z1234Uz5678U', we
+  -- could remove the 'U' in the middle (the 'z' works as a separator).
+
+zDecodeString :: EncodedString -> UserString
+zDecodeString [] = []
+zDecodeString ('Z' : d : rest) 
+  | isDigit d = decode_tuple   d rest
+  | otherwise = decode_upper   d : zDecodeString rest
+zDecodeString ('z' : d : rest)
+  | isDigit d = decode_num_esc d rest
+  | otherwise = decode_lower   d : zDecodeString rest
+zDecodeString (c   : rest) = c : zDecodeString rest
+
+decode_upper, decode_lower :: Char -> Char
+
+decode_upper 'L' = '('
+decode_upper 'R' = ')'
+decode_upper 'M' = '['
+decode_upper 'N' = ']'
+decode_upper 'C' = ':'
+decode_upper 'Z' = 'Z'
+decode_upper ch  = {-pprTrace "decode_upper" (char ch)-} ch
+	     	
+decode_lower 'z' = 'z'
+decode_lower 'a' = '&'
+decode_lower 'b' = '|'
+decode_lower 'c' = '^'
+decode_lower 'd' = '$'
+decode_lower 'e' = '='
+decode_lower 'g' = '>'
+decode_lower 'h' = '#'
+decode_lower 'i' = '.'
+decode_lower 'l' = '<'
+decode_lower 'm' = '-'
+decode_lower 'n' = '!'
+decode_lower 'p' = '+'
+decode_lower 'q' = '\''
+decode_lower 'r' = '\\'
+decode_lower 's' = '/'
+decode_lower 't' = '*'
+decode_lower 'u' = '_'
+decode_lower 'v' = '%'
+decode_lower ch  = {-pprTrace "decode_lower" (char ch)-} ch
+
+-- Characters not having a specific code are coded as z224U (in hex)
+decode_num_esc d rest
+  = go (digitToInt d) rest
+  where
+    go n (c : rest) | isHexDigit c = go (16*n + digitToInt c) rest
+    go n ('U' : rest)           = chr n : zDecodeString rest
+    go n other = error ("decode_num_esc: " ++ show n ++  ' ':other)
+
+decode_tuple :: Char -> EncodedString -> UserString
+decode_tuple d rest
+  = go (digitToInt d) rest
+  where
+	-- NB. recurse back to zDecodeString after decoding the tuple, because
+	-- the tuple might be embedded in a longer name.
+    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
+    go 0 ('T':rest)	= "()" ++ zDecodeString rest
+    go n ('T':rest)	= '(' : replicate (n-1) ',' ++ ")" ++ zDecodeString rest
+    go 1 ('H':rest)	= "(# #)" ++ zDecodeString rest
+    go n ('H':rest)	= '(' : '#' : replicate (n-1) ',' ++ "#)" ++ zDecodeString rest
+    go n other = error ("decode_tuple: " ++ show n ++ ' ':other)
+
+{-
+Tuples are encoded as
+	Z3T or Z3H
+for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts 
+	Z<digit>
+
+* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
+  There are no unboxed 0-tuples.  
+
+* "()" is the tycon for a boxed 0-tuple.
+  There are no boxed 1-tuples.
+-}
+
+maybe_tuple :: UserString -> Maybe EncodedString
+
+maybe_tuple "(# #)" = Just("Z1H")
+maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
+				 (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
+				 other		     -> Nothing
+maybe_tuple "()" = Just("Z0T")
+maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
+				 (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
+				 other	       -> Nothing
+maybe_tuple other    	     = Nothing
+
+count_commas :: Int -> String -> (Int, String)
+count_commas n (',' : cs) = count_commas (n+1) cs
+count_commas n cs	  = (n,cs)
diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs
index 52512d3e20..2558c5630a 100644
--- a/ghc/compiler/utils/FastString.lhs
+++ b/ghc/compiler/utils/FastString.lhs
@@ -1,8 +1,10 @@
 %
-% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
+% (c) The University of Glasgow, 1997-2006
 %
-\section{Fast strings}
+\begin{code}
+{-# OPTIONS -fglasgow-exts -O #-}
 
+{-
 FastString:	A compact, hash-consed, representation of character strings.
 		Comparison is O(1), and you can get a Unique from them.
 		Generated by the FSLIT macro
@@ -15,40 +17,46 @@ LitString:	Just a wrapper for the Addr# of a C string (Ptr CChar).
 		Turn into SDoc with Outputable.ptext
 
 Use LitString unless you want the facilities of FastString
-
-\begin{code}
+-}
 module FastString
        (
+	-- * FastStrings
 	FastString(..),     -- not abstract, for now.
 
-        mkFastString,       -- :: String -> FastString
-        mkFastStringNarrow, -- :: String -> FastString
-        mkFastSubString,    -- :: Addr -> Int -> Int -> FastString
-
-	mkFastString#,      -- :: Addr# -> FastString
-        mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
-
-        mkFastStringInt,    -- :: [Int] -> FastString
-
-        uniqueOfFS,	    -- :: FastString -> Int#
-	lengthFS,	    -- :: FastString -> Int
-	nullFastString,     -- :: FastString -> Bool
+	-- ** Construction
+        mkFastString,
+	mkFastStringBytes,
+	mkFastStringForeignPtr,
+	mkFastString#,
+	mkZFastString,
+	mkZFastStringBytes,
 
+	-- ** Deconstruction
 	unpackFS,	    -- :: FastString -> String
-	unpackIntFS,	    -- :: FastString -> [Int]
-	appendFS,	    -- :: FastString -> FastString -> FastString
-        headFS,		    -- :: FastString -> Char
-        headIntFS,	    -- :: FastString -> Int
-        tailFS,		    -- :: FastString -> FastString
-	concatFS,	    -- :: [FastString] -> FastString
-        consFS,             -- :: Char -> FastString -> FastString
-	indexFS,	    -- :: FastString -> Int -> Char
-	nilFS,		    -- :: FastString
-
-        hPutFS,		    -- :: Handle -> FastString -> IO ()
-
+	bytesFS,	    -- :: FastString -> [Word8]
+
+	-- ** Encoding
+	isZEncoded,
+	zEncodeFS,
+
+	-- ** Operations
+        uniqueOfFS,
+	lengthFS,
+	nullFS,
+	appendFS,
+        headFS,
+        tailFS,
+	concatFS,
+        consFS,
+	nilFS,
+
+	-- ** Outputing
+        hPutFS,
+
+	-- * LitStrings
 	LitString, 
-	mkLitString#	    -- :: Addr# -> LitString
+	mkLitString#,
+	strLength
        ) where
 
 -- This #define suppresses the "import FastString" that
@@ -56,64 +64,49 @@ module FastString
 #define COMPILING_FAST_STRING
 #include "HsVersions.h"
 
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase	( IO(..) )
-#else
-import GHC.IOBase	( IO(..) )
-#endif
+import Encoding
 
-import PrimPacked
+import Foreign
+import Foreign.C
 import GLAEXTS
 import UNSAFE_IO	( unsafePerformIO )
 import MONAD_ST		( stToIO )
 import DATA_IOREF	( IORef, newIORef, readIORef, writeIORef )
+import System.IO	( hPutBuf )
 
-#if __GLASGOW_HASKELL__ < 503
-import PrelArr		( STArray(..), newSTArray )
-#else
 import GHC.Arr		( STArray(..), newSTArray )
-#endif
-
-#if __GLASGOW_HASKELL__ >= 504
-import GHC.IOBase
-import GHC.Handle
-import Foreign.C
-#else
-import IOExts		( hPutBufBAFull )
-#endif
+import GHC.IOBase	( IO(..) )
 
 import IO
-import Char             ( chr, ord )
 
 #define hASH_TBL_SIZE  4091
-\end{code} 
 
-@FastString@s are packed representations of strings
-with a unique id for fast comparisons. The unique id
-is assigned when creating the @FastString@, using
-a hash table to map from the character string representation
-to the unique ID.
 
-\begin{code}
-data FastString
-  = FastString   -- packed repr. on the heap.
-      Int#       -- unique id
-		 --  0 => string literal, comparison
-		 --  will
-      Int#       -- length
-      ByteArray# -- stuff
-
-  | UnicodeStr   -- if contains characters outside '\1'..'\xFF'
-      Int#       -- unique id
-      [Int]      -- character numbers
+{-|
+A 'FastString' is an array of bytes, hashed to support fast O(1)
+comparison.  It is also associated with a character encoding, so that
+we know how to convert a 'FastString' to the local encoding, or to the
+Z-encoding used by the compiler internally.
 
-instance Eq FastString where
-	-- shortcut for real FastStrings
-  (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
-  a == b = case cmpFS a b of { LT -> False; EQ -> True;  GT -> False }
+'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
+-}
+
+data FastString = FastString {
+      uniq    :: {-# UNPACK #-} !Int,       -- unique id
+      n_bytes :: {-# UNPACK #-} !Int,       -- number of bytes
+      n_chars :: {-# UNPACK #-} !Int,	  -- number of chars
+      buf     :: {-# UNPACK #-} !(ForeignPtr Word8),
+      enc     :: FSEncoding
+  }
+
+data FSEncoding
+  = ZEncoded
+ 	-- including strings that don't need any encoding
+  | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
+	-- A UTF-8 string with a memoized Z-encoding
 
-  (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
-  a /= b = case cmpFS a b of { LT -> True;  EQ -> False; GT -> True  }
+instance Eq FastString where
+  f1 == f2  =  uniq f1 == uniq f2
 
 instance Ord FastString where
 	-- Compares lexicographically, not by unique
@@ -130,360 +123,311 @@ instance Ord FastString where
 instance Show FastString where
    show fs = show (unpackFS fs)
 
-lengthFS :: FastString -> Int
-lengthFS (FastString _ l# _) = I# l#
-lengthFS (UnicodeStr _ s) = length s
-
-nullFastString :: FastString -> Bool
-nullFastString (FastString _ l# _) = l# ==# 0#
-nullFastString (UnicodeStr _ []) = True
-nullFastString (UnicodeStr _ (_:_)) = False
-
-unpackFS :: FastString -> String
-unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
-unpackFS (UnicodeStr _ s) = map chr s
-
-unpackIntFS :: FastString -> [Int]
-unpackIntFS (UnicodeStr _ s) = s
-unpackIntFS fs = map ord (unpackFS fs)
-
-appendFS :: FastString -> FastString -> FastString
-appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
-
-concatFS :: [FastString] -> FastString
-concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
-
-headFS :: FastString -> Char
-headFS (FastString _ l# ba#) = 
- if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
-headFS (UnicodeStr _ (c:_)) = chr c
-headFS (UnicodeStr _ []) = error ("headFS: empty FS")
-
-headIntFS :: FastString -> Int
-headIntFS (UnicodeStr _ (c:_)) = c
-headIntFS fs = ord (headFS fs)
-
-indexFS :: FastString -> Int -> Char
-indexFS f i@(I# i#) =
- case f of
-   FastString _ l# ba#
-     | l# ># 0# && l# ># i#  -> C# (indexCharArray# ba# i#)
-     | otherwise	     -> error (msg (I# l#))
-   UnicodeStr _ s	     -> chr (s!!i)
- where
-  msg l =  "indexFS: out of range: " ++ show (l,i)
-
-tailFS :: FastString -> FastString
-tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
-tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
-
-consFS :: Char -> FastString -> FastString
-consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
+cmpFS :: FastString -> FastString -> Ordering
+cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
+  if u1 == u2 then EQ else
+  let l = if l1 <= l2 then l1 else l2 in
+  inlinePerformIO $
+    withForeignPtr buf1 $ \p1 ->
+    withForeignPtr buf2 $ \p2 -> do
+      res <- memcmp p1 p2 l
+      case () of
+       _ | res <  0  -> return LT
+	 | res == 0  -> if l1 == l2 then return EQ 
+				    else if l1 < l2 then return LT
+						    else return GT
+ 	 | otherwise -> return GT
 
-uniqueOfFS :: FastString -> Int#
-uniqueOfFS (FastString u# _ _) = u#
-uniqueOfFS (UnicodeStr u# _) = u#
+#ifndef __HADDOCK__
+foreign import ccall unsafe "ghc_memcmp" 
+  memcmp :: Ptr a -> Ptr b -> Int -> IO Int
+#endif
 
-nilFS = mkFastString ""
-\end{code}
+-- -----------------------------------------------------------------------------
+-- Construction
 
+{-
 Internally, the compiler will maintain a fast string symbol
 table, providing sharing and fast comparison. Creation of
 new @FastString@s then covertly does a lookup, re-using the
 @FastString@ if there was a hit.
+-}
 
-Caution: mkFastStringUnicode assumes that if the string is in the
-table, it sits under the UnicodeStr constructor. Other mkFastString
-variants analogously assume the FastString constructor.
-
-\begin{code}
 data FastStringTable = 
  FastStringTable
-    Int#
+    {-# UNPACK #-} !Int
     (MutableArray# RealWorld [FastString])
 
-type FastStringTableVar = IORef FastStringTable
-
-string_table :: FastStringTableVar
+string_table :: IORef FastStringTable
 string_table = 
- unsafePerformIO (
-   stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
-	>>= \ (STArray _ _ arr#) ->
-   newIORef (FastStringTable 0# arr#))
-
-lookupTbl :: FastStringTable -> Int# -> IO [FastString]
-lookupTbl (FastStringTable _ arr#) i# =
-  IO ( \ s# ->
-  readArray# arr# i# s#)
-
-updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
-updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
- IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> 
-	(# s2#, () #) }) >>
- writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
+ unsafePerformIO $ do
+   (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
+   newIORef (FastStringTable 0 arr#)
+
+lookupTbl :: FastStringTable -> Int -> IO [FastString]
+lookupTbl (FastStringTable _ arr#) (I# i#) =
+  IO $ \ s# -> readArray# arr# i# s#
+
+updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
+updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
+  (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
+  writeIORef fs_table_var (FastStringTable (uid+1) arr#)
 
 mkFastString# :: Addr# -> FastString
-mkFastString# a# =
- case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
+mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
+  where ptr = Ptr a#
 
-mkFastStringLen# :: Addr# -> Int# -> FastString
-mkFastStringLen# a# len# =
- unsafePerformIO  (
-  readIORef string_table	>>= \ ft@(FastStringTable uid# tbl#) ->
+mkFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkFastStringBytes ptr len = unsafePerformIO $ do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
   let
-   h = hashStr a# len#
-  in
---  _trace ("hashed: "++show (I# h)) $
-  lookupTbl ft h	>>= \ lookup_result ->
+   h = hashStr ptr len
+   add_it ls = do
+	fs <- copyNewFastString uid ptr len
+	updTbl string_table ft h (fs:ls)
+	{- _trace ("new: " ++ show f_str)   $ -}
+	return fs
+  --
+  lookup_result <- lookupTbl ft h
   case lookup_result of
-    [] -> 
-       -- no match, add it to table by copying out the
-       -- the string into a ByteArray
-       -- _trace "empty bucket" $
-       case copyPrefixStr a# (I# len#) of
-	 BA barr# ->  
-	   let f_str = FastString uid# len# barr# in
-           updTbl string_table ft h [f_str] >>
-           ({- _trace ("new: " ++ show f_str)   $ -} return f_str)
-    ls -> 
-       -- non-empty `bucket', scan the list looking
-       -- entry with same length and compare byte by byte.
-       -- _trace ("non-empty bucket"++show ls) $
-       case bucket_match ls len# a# of
-	 Nothing -> 
-           case copyPrefixStr a# (I# len#) of
-	     BA barr# ->  
-              let f_str = FastString uid# len# barr# in
-              updTbl string_table ft h (f_str:ls) >>
-	      ( {- _trace ("new: " ++ show f_str)  $ -} return f_str)
-	 Just v  -> {- _trace ("re-use: "++show v) $ -} return v)
-  where
-   bucket_match [] _ _ = Nothing
-   bucket_match (v@(FastString _ l# ba#):ls) len# a# =
-      if len# ==# l# && eqStrPrefix a# ba# l# then
-	 Just v
-      else
-	 bucket_match ls len# a#
-   bucket_match (UnicodeStr _ _ : ls) len# a# =
-      bucket_match ls len# a#
-
-mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
-mkFastSubStringBA# barr# start# len# =
- unsafePerformIO  (
-  readIORef string_table	>>= \ ft@(FastStringTable uid# tbl#) ->
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+	 Nothing -> add_it ls
+	 Just v  -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkZFastStringBytes ptr len = unsafePerformIO $ do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
   let
-   h = hashSubStrBA barr# start# len#
-  in
---  _trace ("hashed(b): "++show (I# h)) $
-  lookupTbl ft h		>>= \ lookup_result ->
+   h = hashStr ptr len
+   add_it ls = do
+	fs <- copyNewZFastString uid ptr len
+	updTbl string_table ft h (fs:ls)
+	{- _trace ("new: " ++ show f_str)   $ -}
+	return fs
+  --
+  lookup_result <- lookupTbl ft h
   case lookup_result of
-    [] -> 
-       -- no match, add it to table by copying out the
-       -- the string into a ByteArray
-       -- _trace "empty bucket(b)" $
-       case copySubStrBA (BA barr#) (I# start#) (I# len#) of
-         BA ba# ->  
-          let f_str = FastString uid# len# ba# in
-          updTbl string_table ft h [f_str]     >>
-          -- _trace ("new(b): " ++ show f_str)   $
-	  return f_str
-    ls ->
-       -- non-empty `bucket', scan the list looking
-       -- entry with same length and compare byte by byte. 
-       -- _trace ("non-empty bucket(b)"++show ls) $
-       case bucket_match ls start# len# barr# of
-	 Nothing ->
-          case copySubStrBA (BA barr#) (I# start#) (I# len#) of
-            BA ba# ->  
-              let f_str = FastString uid# len# ba# in
-              updTbl string_table ft h (f_str:ls) >>
-	      -- _trace ("new(b): " ++ show f_str)   $
-	      return f_str
-	 Just v  -> 
-              -- _trace ("re-use(b): "++show v) $
-	      return v
-  )
- where
-   bucket_match [] _ _ _ = Nothing
-   bucket_match (v:ls) start# len# ba# =
-    case v of
-     FastString _ l# barr# ->
-      if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
-	 Just v
-      else
-	 bucket_match ls start# len# ba#
-     UnicodeStr _ _ -> bucket_match ls start# len# ba#
-
-mkFastStringUnicode :: [Int] -> FastString
-mkFastStringUnicode s =
- unsafePerformIO  (
-  readIORef string_table	>>= \ ft@(FastStringTable uid# tbl#) ->
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+	 Nothing -> add_it ls
+	 Just v  -> {- _trace ("re-use: "++show v) $ -} return v
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkFastStringForeignPtr ptr fp len = do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
+--  _trace ("hashed: "++show (I# h)) $
   let
-   h = hashUnicode s 0#
-  in
---  _trace ("hashed(b): "++show (I# h)) $
-  lookupTbl ft h		>>= \ lookup_result ->
+    h = hashStr ptr len
+    add_it ls = do
+	fs <- mkNewFastString uid ptr fp len
+	updTbl string_table ft h (fs:ls)
+	{- _trace ("new: " ++ show f_str)   $ -}
+	return fs
+  --
+  lookup_result <- lookupTbl ft h
+  case lookup_result of
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+	 Nothing -> add_it ls
+	 Just v  -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkZFastStringForeignPtr ptr fp len = do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
+--  _trace ("hashed: "++show (I# h)) $
+  let
+    h = hashStr ptr len
+    add_it ls = do
+	fs <- mkNewZFastString uid ptr fp len
+	updTbl string_table ft h (fs:ls)
+	{- _trace ("new: " ++ show f_str)   $ -}
+	return fs
+  --
+  lookup_result <- lookupTbl ft h
   case lookup_result of
-    [] -> 
-       -- no match, add it to table by copying out the
-       -- the string into a [Int]
-          let f_str = UnicodeStr uid# s in
-          updTbl string_table ft h [f_str]     >>
-          -- _trace ("new(b): " ++ show f_str)   $
-	  return f_str
-    ls -> 
-       -- non-empty `bucket', scan the list looking
-       -- entry with same length and compare byte by byte. 
-       -- _trace ("non-empty bucket(b)"++show ls) $
-       case bucket_match ls of
-	 Nothing -> 
-              let f_str = UnicodeStr uid# s in
-              updTbl string_table ft h (f_str:ls) >>
-	      -- _trace ("new(b): " ++ show f_str)   $
-	      return f_str
-	 Just v  -> 
-              -- _trace ("re-use(b): "++show v) $
-	      return v
-  )
- where
-   bucket_match [] = Nothing
-   bucket_match (v@(UnicodeStr _ s'):ls) =
-       if s' == s then Just v else bucket_match ls
-   bucket_match (FastString _ _ _ : ls) = bucket_match ls
-
-mkFastStringNarrow :: String -> FastString
-mkFastStringNarrow str =
- case packString str of { (I# len#, BA frozen#) -> 
-    mkFastSubStringBA# frozen# 0# len#
- }
- {- 0-indexed array, len# == index to one beyond end of string,
-    i.e., (0,1) => empty string.    -}
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+	 Nothing -> add_it ls
+	 Just v  -> {- _trace ("re-use: "++show v) $ -} return v
 
-mkFastString :: String -> FastString
-mkFastString str = if all good str
-    then mkFastStringNarrow str
-    else mkFastStringUnicode (map ord str)
-    where
-    good c = c >= '\1' && c <= '\xFF'
-
-mkFastStringInt :: [Int] -> FastString
-mkFastStringInt str = if all good str
-    then mkFastStringNarrow (map chr str)
-    else mkFastStringUnicode str
-    where
-    good c = c >= 1 && c <= 0xFF
-
-mkFastSubString :: Addr# -> Int -> Int -> FastString
-mkFastSubString a# (I# start#) (I# len#) =
- mkFastStringLen# (a# `plusAddr#` start#) len#
-\end{code}
 
-\begin{code}
-hashStr  :: Addr# -> Int# -> Int#
+-- | Creates a UTF-8 encoded 'FastString' from a 'String'
+mkFastString :: String -> FastString
+mkFastString str = 
+  inlinePerformIO $ do
+    let l = utf8EncodedLength str
+    buf <- mallocForeignPtrBytes l
+    withForeignPtr buf $ \ptr -> do
+      utf8EncodeString ptr str
+      mkFastStringForeignPtr ptr buf l 
+
+
+-- | Creates a Z-encoded 'FastString' from a 'String'
+mkZFastString :: String -> FastString
+mkZFastString str = 
+  inlinePerformIO $ do
+    let l = Prelude.length str
+    buf <- mallocForeignPtrBytes l
+    withForeignPtr buf $ \ptr -> do
+      pokeCAString (castPtr ptr) str
+      mkZFastStringForeignPtr ptr buf l 
+
+bucket_match [] _ _ = return Nothing
+bucket_match (v@(FastString _ l _ buf _):ls) len ptr
+      | len == l  =  do
+	 b <- cmpStringPrefix ptr buf len
+	 if b then return (Just v)
+	      else bucket_match ls len ptr
+      | otherwise = 
+	 bucket_match ls len ptr
+
+mkNewFastString uid ptr fp len = do
+  ref <- newIORef Nothing
+  n_chars <- countUTF8Chars ptr len
+  return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+mkNewZFastString uid ptr fp len = do
+  return (FastString uid len len fp ZEncoded)
+
+
+copyNewFastString uid ptr len = do
+  fp <- copyBytesToForeignPtr ptr len
+  ref <- newIORef Nothing
+  n_chars <- countUTF8Chars ptr len
+  return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+copyNewZFastString uid ptr len = do
+  fp <- copyBytesToForeignPtr ptr len
+  return (FastString uid len len fp ZEncoded)
+
+
+copyBytesToForeignPtr ptr len = do
+  fp <- mallocForeignPtrBytes len
+  withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
+  return fp
+
+cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
+cmpStringPrefix ptr fp len =
+  withForeignPtr fp $ \ptr' -> do
+    r <- memcmp ptr ptr' len
+    return (r == 0)
+
+
+hashStr  :: Ptr Word8 -> Int -> Int
  -- use the Addr to produce a hash value between 0 & m (inclusive)
-hashStr a# len# = loop 0# 0#
+hashStr (Ptr a#) (I# len#) = loop 0# 0#
    where 
-    loop h n | n ==# len# = h
+    loop h n | n ==# len# = I# h
 	     | otherwise  = loop h2 (n +# 1#)
 	  where c = ord# (indexCharOffAddr# a# n)
 		h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
 
-hashSubStrBA  :: ByteArray# -> Int# -> Int# -> Int#
- -- use the byte array to produce a hash value between 0 & m (inclusive)
-hashSubStrBA ba# start# len# = loop 0# 0#
-   where 
-    loop h n | n ==# len# = h
-	     | otherwise  = loop h2 (n +# 1#)
-	  where c = ord# (indexCharArray# ba# (start# +# n))
-		h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
+-- -----------------------------------------------------------------------------
+-- Operations
 
-hashUnicode :: [Int] -> Int# -> Int#
-hashUnicode [] h = h
-hashUnicode (I# c : cs) h = hashUnicode cs ((c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#)
-\end{code}
+-- | Returns the length of the 'FastString' in characters
+lengthFS :: FastString -> Int
+lengthFS f = n_chars f
 
-\begin{code}
-cmpFS :: FastString -> FastString -> Ordering
-cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
-    else compare s1 s2
-cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
-cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
-cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
-  if u1# ==# u2# then EQ else
-  let l# = if l1# <=# l2# then l1# else l2# in
-  unsafePerformIO (
-    memcmp b1# b2# l# >>= \ (I# res) ->
-    return (
-    if      res <#  0# then LT
-    else if res ==# 0# then 
-	if l1# ==# l2# then EQ
-	else if l1# <# l2# then LT else GT
-    else		    GT
-    ))
+-- | Returns 'True' if the 'FastString' is Z-encoded
+isZEncoded :: FastString -> Bool
+isZEncoded fs | ZEncoded <- enc fs = True
+		| otherwise          = False
 
-#ifndef __HADDOCK__
-foreign import ccall unsafe "ghc_memcmp" 
-  memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
-#endif
+-- | Returns 'True' if the 'FastString' is empty
+nullFS :: FastString -> Bool
+nullFS f  =  n_bytes f == 0
+
+-- | unpacks and decodes the FastString
+unpackFS :: FastString -> String
+unpackFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr ->
+    case enc of
+	ZEncoded      -> peekCAStringLen (castPtr ptr,n_bytes)
+	UTF8Encoded _ -> utf8DecodeString ptr n_bytes
+
+bytesFS :: FastString -> [Word8]
+bytesFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr ->
+    peekArray n_bytes ptr
+
+-- | returns a Z-encoded version of a 'FastString'.  This might be the
+-- original, if it was already Z-encoded.  The first time this
+-- function is applied to a particular 'FastString', the results are
+-- memoized.
+--
+zEncodeFS :: FastString -> FastString
+zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
+  case enc of
+    ZEncoded -> fs
+    UTF8Encoded ref ->
+      inlinePerformIO $ do
+        m <- readIORef ref
+        case m of
+	  Just fs -> return fs
+	  Nothing -> do
+            let efs = mkZFastString (zEncodeString (unpackFS fs))
+	    writeIORef ref (Just efs)
+	    return efs
+
+appendFS :: FastString -> FastString -> FastString
+appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
+
+concatFS :: [FastString] -> FastString
+concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
+
+headFS :: FastString -> Char
+headFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+    case enc of
+      ZEncoded -> do 
+	 w <- peek (castPtr ptr)
+	 return (castCCharToChar w)
+      UTF8Encoded _ -> 
+	 return (fst (utf8DecodeChar ptr))
+
+tailFS :: FastString -> FastString
+tailFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+    case enc of
+      ZEncoded -> do
+	return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
+      UTF8Encoded _ -> do
+	 let (_,ptr') = utf8DecodeChar ptr
+	 let off = ptr' `minusPtr` ptr
+	 return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
+
+consFS :: Char -> FastString -> FastString
+consFS c fs = mkFastString (c : unpackFS fs)
+
+uniqueOfFS :: FastString -> Int#
+uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
+
+nilFS = mkFastString ""
 
 -- -----------------------------------------------------------------------------
 -- Outputting 'FastString's
 
-#if __GLASGOW_HASKELL__ >= 504
-
--- this is our own version of hPutBuf for FastStrings, because in
--- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
--- The closest is hPutArray in Data.Array.IO, but that does some extra
--- range checks that we want to avoid here.
-
-foreign import ccall unsafe "__hscore_memcpy_dst_off"
-   memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
-
-hPutFS handle (FastString _ l# ba#)
-  | l# ==# 0#  = return ()
-  | otherwise
-   = do wantWritableHandle "hPutFS" handle $ 
-          \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
-
-          old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
-	    <- readIORef ref
-
-	  let count = I# l#
-	      raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
-
-          -- enough room in handle buffer?
-          if (size - w > count)
-		-- There's enough room in the buffer:
-		-- just copy the data in and update bufWPtr.
-	    then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
-		    writeIORef ref old_buf{ bufWPtr = w + count }
-		    return ()
-
-		-- else, we have to flush
-	    else do flushed_buf <- flushWriteBuffer fd stream old_buf
-		    writeIORef ref flushed_buf
-		    let this_buf = 
-			    Buffer{ bufBuf=raw, bufState=WriteBuffer, 
-				    bufRPtr=0, bufWPtr=count, bufSize=count }
-		    flushWriteBuffer fd stream this_buf
-		    return ()
-
-#else
-
-hPutFS :: Handle -> FastString -> IO ()
-hPutFS handle (FastString _ l# ba#)
-  | l# ==# 0#  = return ()
-  | otherwise  = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
-                    hPutBufBAFull  handle mba (I# l#)
- where
-  bot = error "hPutFS.ba"
-
-#endif
+-- |Outputs a 'FastString' with /no decoding at all/, that is, you
+-- get the actual bytes in the 'FastString' written to the 'Handle'.
+hPutFS handle (FastString _ len _ fp _)
+  | len == 0  = return ()
+  | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
 
--- ONLY here for debugging the NCG (so -ddump-stix works for string
--- literals); no idea if this is really necessary.  JRS, 010131
-hPutFS handle (UnicodeStr _ is) 
-  = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
+-- ToDo: we'll probably want an hPutFSLocal, or something, to output
+-- in the current locale's encoding (for error messages and suchlike).
 
 -- -----------------------------------------------------------------------------
 -- LitStrings, here for convenience only.
@@ -492,4 +436,24 @@ type LitString = Ptr ()
 
 mkLitString# :: Addr# -> LitString
 mkLitString# a# = Ptr a#
+
+foreign import ccall unsafe "ghc_strlen" 
+  strLength :: Ptr () -> Int
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r
+
+pokeCAString :: Ptr CChar -> String -> IO ()
+pokeCAString ptr str =
+  let
+	go [] n     = pokeElemOff ptr n 0
+    	go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
+  in
+  go str 0
+
 \end{code}
diff --git a/ghc/compiler/utils/FastTypes.lhs b/ghc/compiler/utils/FastTypes.lhs
index 9f9d9038f1..bb92c8c02f 100644
--- a/ghc/compiler/utils/FastTypes.lhs
+++ b/ghc/compiler/utils/FastTypes.lhs
@@ -9,7 +9,7 @@ module FastTypes (
     (+#), (-#), (*#), quotFastInt, negateFastInt,
     (==#), (<#), (<=#), (>=#), (>#),
 
-    FastBool, fastBool, isFastTrue, fastOr
+    FastBool, fastBool, isFastTrue, fastOr, fastAnd
   ) where
 
 #include "HsVersions.h"
diff --git a/ghc/compiler/utils/Pretty.lhs b/ghc/compiler/utils/Pretty.lhs
index 916755e902..ec8f1e75ad 100644
--- a/ghc/compiler/utils/Pretty.lhs
+++ b/ghc/compiler/utils/Pretty.lhs
@@ -179,26 +179,16 @@ module Pretty (
 
 import BufWrite
 import FastString
-import PrimPacked 	( strLength )
 
 import GLAEXTS
 
 import Numeric (fromRat)
 import IO
 
-#if __GLASGOW_HASKELL__ < 503
-import IOExts		( hPutBufFull )
-#else
 import System.IO	( hPutBuf )
-#endif
 
-#if __GLASGOW_HASKELL__ < 503
-import PrelBase		( unpackCString# )
-#else
 import GHC.Base		( unpackCString# )
-#endif
-
-import PrimPacked	( Ptr(..) )
+import GHC.Ptr		( Ptr(..) )
 
 -- Don't import Util( assertPanic ) because it makes a loop in the module structure
 
diff --git a/ghc/compiler/utils/PrimPacked.lhs b/ghc/compiler/utils/PrimPacked.lhs
deleted file mode 100644
index f2d034dcee..0000000000
--- a/ghc/compiler/utils/PrimPacked.lhs
+++ /dev/null
@@ -1,265 +0,0 @@
-%
-% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
-%
-\section{Basic ops on packed representations}
-
-Some basic operations for working on packed representations of series
-of bytes (character strings). Used by the interface lexer input
-subsystem, mostly.
-
-\begin{code}
-{-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
-
-module PrimPacked (
-	Ptr(..), nullPtr, plusAddr#,
-	BA(..),
-	packString, 	   -- :: String -> (Int, BA)
-	unpackNBytesBA,    -- :: BA -> Int -> [Char]
-        strLength,	   -- :: Ptr CChar -> Int
-        copyPrefixStr,	   -- :: Addr# -> Int -> BA
-        copySubStrBA,	   -- :: BA -> Int -> Int -> BA
-        eqStrPrefix,	   -- :: Addr# -> ByteArray# -> Int# -> Bool
-        eqStrPrefixBA,	   -- :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool
- ) where
-
--- This #define suppresses the "import FastString" that
--- HsVersions otherwise produces
-#define COMPILING_FAST_STRING
-#include "HsVersions.h"
-
-import GLAEXTS
-import UNSAFE_IO	( unsafePerformIO )
-
-import MONAD_ST
-import Foreign
-
-#if __GLASGOW_HASKELL__ < 503
-import PrelST
-#else
-import GHC.ST
-#endif
-
-#if __GLASGOW_HASKELL__ >= 504
-import GHC.Ptr	( Ptr(..) )
-#elif __GLASGOW_HASKELL__ >= 500
-import Ptr	( Ptr(..) )
-#endif
-
-#if __GLASGOW_HASKELL__ < 504
-import PrelIOBase	( IO(..) )
-#else
-import GHC.IOBase	( IO(..) )
-#endif
-\end{code}
-
-Compatibility: 4.08 didn't have the Ptr type.
-
-\begin{code}
-#if __GLASGOW_HASKELL__ <= 408
-data Ptr a = Ptr Addr# deriving (Eq, Ord)
-
-nullPtr :: Ptr a
-nullPtr = Ptr (int2Addr# 0#)
-#endif
-
-#if __GLASGOW_HASKELL__ <= 500
--- plusAddr# is a primop in GHC > 5.00
-plusAddr# :: Addr# -> Int# -> Addr#
-plusAddr# a# i# = int2Addr# (addr2Int# a# +# i#)
-#endif
-\end{code}
-
-Wrapper types for bytearrays
-
-\begin{code}
-data BA    = BA  ByteArray#
-data MBA s = MBA (MutableByteArray# s)
-\end{code}
-
-\begin{code}
-packString :: String -> (Int, BA)
-packString str = (l, arr)
- where
-  l@(I# length#) = length str
-
-  arr = runST (do
-    ch_array <- new_ps_array length#
-      -- fill in packed string from "str"
-    fill_in ch_array 0# str
-      -- freeze the puppy:
-    freeze_ps_array ch_array length#
-   )
-
-  fill_in :: MBA s -> Int# -> [Char] -> ST s ()
-  fill_in arr_in# idx [] =
-   return ()
-  fill_in arr_in# idx (C# c : cs) =
-   write_ps_array arr_in# idx c	 >>
-   fill_in arr_in# (idx +# 1#) cs
-\end{code}
-
-Unpacking a string
-
-\begin{code}
-unpackNBytesBA :: BA -> Int -> [Char]
-unpackNBytesBA (BA bytes) (I# len)
- = unpack 0#
- where
-    unpack nh
-      | nh >=# len  = []
-      | otherwise   = C# ch : unpack (nh +# 1#)
-      where
-	ch = indexCharArray# bytes nh
-\end{code}
-
-Copying a char string prefix into a byte array.
-
-\begin{code}
-copyPrefixStr :: Addr# -> Int -> BA
-copyPrefixStr a# len@(I# length#) = copy' length#
- where
-   copy' length# = runST (do
-     {- allocate an array that will hold the string
-     -}
-     ch_array <- new_ps_array length#
-     {- Revert back to Haskell-only solution for the moment.
-     	_ccall_ memcpy ch_array (A# a) len        >>=  \ () ->
-     	write_ps_array ch_array length# (chr# 0#) >>
-     -}
-     -- fill in packed string from "addr"
-     fill_in ch_array 0#
-     -- freeze the puppy:
-     freeze_ps_array ch_array length#
-    )
-
-   fill_in :: MBA s -> Int# -> ST s ()
-   fill_in arr_in# idx
-      | idx ==# length#
-      = return ()
-      | otherwise
-      = case (indexCharOffAddr# a# idx) of { ch ->
-	write_ps_array arr_in# idx ch >>
-	fill_in arr_in# (idx +# 1#) }
-\end{code}
-
-Copying out a substring, assume a 0-indexed string:
-(and positive lengths, thank you).
-
-\begin{code}
-#ifdef UNUSED
-copySubStr :: Addr# -> Int -> Int -> BA
-copySubStr a# (I# start#) length =
-  copyPrefixStr (a# `plusAddr#` start#)  length
-#endif
-
-copySubStrBA :: BA -> Int -> Int -> BA
-copySubStrBA (BA barr#) (I# start#) len@(I# length#) = ba
- where
-  ba = runST (do
-     -- allocate an array that will hold the string
-    ch_array <- new_ps_array length#
-     -- fill in packed string from "addr"
-    fill_in ch_array 0#
-     -- freeze the puppy:
-    freeze_ps_array ch_array length#
-   )
-
-  fill_in :: MBA s -> Int# -> ST s ()
-  fill_in arr_in# idx
-      | idx ==# length#
-      = return ()
-      | otherwise
-      = case (indexCharArray# barr# (start# +# idx)) of { ch ->
-	write_ps_array arr_in# idx ch >>
-	fill_in arr_in# (idx +# 1#) }
-\end{code}
-
-(Very :-) ``Specialised'' versions of some CharArray things...
-[Copied from PackBase; no real reason -- UGH]
-
-\begin{code}
-new_ps_array	:: Int# -> ST s (MBA s)
-write_ps_array	:: MBA s -> Int# -> Char# -> ST s () 
-freeze_ps_array :: MBA s -> Int# -> ST s BA
-
-#if __GLASGOW_HASKELL__ < 411
-#define NEW_BYTE_ARRAY newCharArray#
-#else 
-#define NEW_BYTE_ARRAY newPinnedByteArray#
-#endif
-
-new_ps_array size = ST $ \ s ->
-    case (NEW_BYTE_ARRAY size s)  of { (# s2#, barr# #) ->
-    (# s2#, MBA barr# #) }
-
-write_ps_array (MBA barr#) n ch = ST $ \ s# ->
-    case writeCharArray# barr# n ch s#	of { s2#   ->
-    (# s2#, () #) }
-
--- same as unsafeFreezeByteArray
-freeze_ps_array (MBA arr#) len# = ST $ \ s# ->
-    case unsafeFreezeByteArray# arr# s# of { (# s2#, frozen# #) ->
-    (# s2#, BA frozen# #) }
-\end{code}
-
-
-Compare two equal-length strings for equality:
-
-\begin{code}
-eqStrPrefix :: Addr# -> ByteArray# -> Int# -> Bool
-eqStrPrefix a# barr# len# = 
-  inlinePerformIO $ do
-   x <- memcmp_ba a# barr# (I# len#)
-   return (x == 0)
-
-#ifdef UNUSED
-eqCharStrPrefix :: Addr# -> Addr# -> Int# -> Bool
-eqCharStrPrefix a1# a2# len# = 
-  inlinePerformIO $ do
-   x <- memcmp a1# a2# (I# len#)
-   return (x == 0)
-#endif
-
-eqStrPrefixBA :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool
-eqStrPrefixBA b1# b2# start# len# = 
-  inlinePerformIO $ do
-    x <- memcmp_baoff_ba b2# (I# start#) b1# (I# len#)
-    return (x == 0)
-
-#ifdef UNUSED
-eqCharStrPrefixBA :: Addr# -> ByteArray# -> Int# -> Int# -> Bool
-eqCharStrPrefixBA a# b2# start# len# = 
-  inlinePerformIO $ do
-    x <- memcmp_baoff b2# (I# start#) a# (I# len#) 
-    return (x == 0)
-#endif
-\end{code}
-
-\begin{code}
--- Just like unsafePerformIO, but we inline it.  This is safe when
--- there are no side effects, and improves performance.
-{-# INLINE inlinePerformIO #-}
-inlinePerformIO :: IO a -> a
-inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r
-
-#if __GLASGOW_HASKELL__ <= 408
-strLength (Ptr a#) = ghc_strlen a#
-foreign import ccall unsafe "ghc_strlen" 
-  ghc_strlen :: Addr# -> Int
-#else
-foreign import ccall unsafe "ghc_strlen" 
-  strLength :: Ptr () -> Int
-#endif
-
-foreign import ccall unsafe "ghc_memcmp"
-  memcmp :: Addr# -> Addr# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp" 
-  memcmp_ba :: Addr# -> ByteArray# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp_off"
-  memcmp_baoff :: ByteArray# -> Int -> Addr# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp_off"
-  memcmp_baoff_ba :: ByteArray# -> Int -> ByteArray# -> Int -> IO Int
-\end{code}
diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs
index e53dbc89ce..e2eed889f2 100644
--- a/ghc/compiler/utils/StringBuffer.lhs
+++ b/ghc/compiler/utils/StringBuffer.lhs
@@ -6,27 +6,32 @@
 Buffers for scanning string input stored in external arrays.
 
 \begin{code}
+{-# OPTIONS_GHC -O #-}
+-- always optimise this module, it's critical
+
 module StringBuffer
        (
         StringBuffer(..),
 	-- non-abstract for vs\/HaskellService
 
 	 -- * Creation\/destruction
-        hGetStringBuffer,     -- :: FilePath     -> IO StringBuffer
-	stringToStringBuffer, -- :: String       -> IO StringBuffer
+        hGetStringBuffer,
+	stringToStringBuffer,
 
-         -- * Lookup
-	currentChar,       -- :: StringBuffer -> Char
-	prevChar,          -- :: StringBuffer -> Char -> Char
-	lookAhead,         -- :: StringBuffer -> Int  -> Char
-	atEnd,		   -- :: StringBuffer -> Bool
+	-- * Inspection
+	nextChar,
+	currentChar,
+	prevChar,
+	atEnd,
 
-	-- * Moving
-	stepOn, stepOnBy,
+	-- * Moving and comparison
+	stepOn,
+	offsetBytes,
+	byteDiff,
 
         -- * Conversion
-        lexemeToString,     -- :: StringBuffer -> Int -> String
-        lexemeToFastString, -- :: StringBuffer -> Int -> FastString
+        lexemeToString,
+        lexemeToFastString,
 
 	 -- * Parsing integers
 	parseInteger,
@@ -34,22 +39,19 @@ module StringBuffer
 
 #include "HsVersions.h"
 
-import FastString
-import Panic
+import Encoding
+import FastString	(FastString,mkFastString,mkFastStringBytes)
 
 import GLAEXTS
 
 import Foreign
 
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase
-import PrelHandle
-#else
-import GHC.IOBase
-import GHC.IO		( slurpFile )
-#endif
+import GHC.IOBase		( IO(..) )
+import GHC.Base			( unsafeChr )
+
+import System.IO		( hGetBuf )
 
-import IO			( openFile, hFileSize, IOMode(ReadMode),
+import IO			( hFileSize, IOMode(ReadMode),
 				  hClose )
 #if __GLASGOW_HASKELL__ >= 601
 import System.IO		( openBinaryFile )
@@ -57,37 +59,35 @@ import System.IO		( openBinaryFile )
 import IOExts                   ( openFileEx, IOModeEx(..) )
 #endif
 
-#if __GLASGOW_HASKELL__ < 503
-import IArray			( listArray )
-import ArrayBase		( UArray(..) )
-import MutableArray
-import IOExts			( hGetBufBA )
-#else
-import Data.Array.IArray	( listArray )
-import Data.Array.MArray 	( unsafeFreeze, newArray_ )
-import Data.Array.Base		( UArray(..)  )
-import Data.Array.IO		( IOArray, hGetArray )
-#endif
-
-import Char			( ord )
-
 #if __GLASGOW_HASKELL__ < 601
 openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
 #endif
+
 -- -----------------------------------------------------------------------------
 -- The StringBuffer type
 
--- A StringBuffer is a ByteArray# with a pointer into it.  We also cache
--- the length of the ByteArray# for speed.
-
+-- |A StringBuffer is an internal pointer to a sized chunk of bytes.
+-- The bytes are intended to be *immutable*.  There are pure
+-- operations to read the contents of a StringBuffer.
+--
+-- A StringBuffer may have a finalizer, depending on how it was
+-- obtained.
+--
 data StringBuffer
- = StringBuffer
-     ByteArray#
-     Int#         -- length
-     Int#         -- current pos
+ = StringBuffer {
+     buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+     len :: {-# UNPACK #-} !Int, 	-- length
+     cur :: {-# UNPACK #-} !Int		-- current pos
+  }
+	-- The buffer is assumed to be UTF-8 encoded, and furthermore
+	-- we add three '\0' bytes to the end as sentinels so that the
+	-- decoder doesn't have to check for overflow at every single byte
+	-- of a multibyte sequence.
 
 instance Show StringBuffer where
-	showsPrec _ s = showString "<stringbuffer>"
+	showsPrec _ s = showString "<stringbuffer(" 
+		      . shows (len s) . showString "," . shows (cur s)
+		      . showString ">"
 
 -- -----------------------------------------------------------------------------
 -- Creation / Destruction
@@ -95,97 +95,108 @@ instance Show StringBuffer where
 hGetStringBuffer :: FilePath -> IO StringBuffer
 hGetStringBuffer fname = do
    h <- openBinaryFile fname ReadMode
-   size <- hFileSize h
-   let size_i@(I# sz#) = fromIntegral size
-#if __GLASGOW_HASKELL__ < 503
-   arr <- stToIO (newCharArray (0,size_i-1))
-   r <- hGetBufBA h arr size_i
-#else
-   arr <- newArray_ (0,size_i-1)
-   r <- if size_i == 0 then return 0 else hGetArray h arr size_i
-#endif
-   hClose h
-   if (r /= size_i)
+   size_i <- hFileSize h
+   let size = fromIntegral size_i
+   buf <- mallocForeignPtrArray (size+3)
+   withForeignPtr buf $ \ptr -> do
+     r <- if size == 0 then return 0 else hGetBuf h ptr size
+     hClose h
+     if (r /= size)
 	then ioError (userError "short read of file")
 	else do
-#if __GLASGOW_HASKELL__ < 503
-   frozen <- stToIO (unsafeFreezeByteArray arr)
-   case frozen of
-      ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#else
-   frozen <- unsafeFreeze arr
-   case frozen of
-      UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#endif
+	  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+		 -- sentinels for UTF-8 decoding
+	  return (StringBuffer buf size 0)
 
-#if __GLASGOW_HASKELL__ >= 502
+stringToStringBuffer :: String -> IO StringBuffer
 stringToStringBuffer str = do
-  let size@(I# sz#) = length str
-      arr = listArray (0,size-1) (map (fromIntegral.ord) str)
-		 :: UArray Int Word8
-  case arr of
-	UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#else
-stringToStringBuffer = panic "stringToStringBuffer: not implemented"
-#endif
+  let size = utf8EncodedLength str
+  buf <- mallocForeignPtrArray (size+3)
+  withForeignPtr buf $ \ptr -> do
+    utf8EncodeString ptr str
+    pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+	 -- sentinels for UTF-8 decoding
+  return (StringBuffer buf size 0)
 
 -- -----------------------------------------------------------------------------
--- Lookup
-
-currentChar  :: StringBuffer -> Char
-currentChar (StringBuffer arr# l# current#) =
-  ASSERT(current# <# l#)
-  C# (indexCharArray# arr# current#)
+-- Grab a character
+
+-- Getting our fingers dirty a little here, but this is performance-critical
+{-# INLINE nextChar #-}
+nextChar :: StringBuffer -> (Char,StringBuffer)
+nextChar (StringBuffer buf len (I# cur#)) =
+  inlinePerformIO $ do
+    withForeignPtr buf $ \(Ptr a#) -> do
+	case utf8DecodeChar# (a# `plusAddr#` cur#) of
+	  (# c#, b# #) ->
+	     let cur' = I# (b# `minusAddr#` a#) in
+	     return (C# c#, StringBuffer buf len cur')
+
+currentChar :: StringBuffer -> Char
+currentChar = fst . nextChar
 
 prevChar :: StringBuffer -> Char -> Char
-prevChar (StringBuffer _ _ 0#) deflt = deflt
-prevChar s deflt = lookAhead s (-1)
-
-lookAhead :: StringBuffer -> Int  -> Char
-lookAhead (StringBuffer arr# l# c#) (I# i#) =
-  ASSERT(off <# l#  && off >=# 0#)
-  C# (indexCharArray# arr# off)
- where 
-   off = c# +# i#
+prevChar (StringBuffer buf len 0)   deflt = deflt
+prevChar (StringBuffer buf len cur) deflt = 
+  inlinePerformIO $ do
+    withForeignPtr buf $ \p -> do
+      p' <- utf8PrevChar (p `plusPtr` cur)
+      return (fst (utf8DecodeChar p'))
 
 -- -----------------------------------------------------------------------------
 -- Moving
 
 stepOn :: StringBuffer -> StringBuffer
-stepOn s = stepOnBy 1 s
+stepOn s = snd (nextChar s)
+
+offsetBytes :: Int -> StringBuffer -> StringBuffer
+offsetBytes i s = s { cur = cur s + i }
 
-stepOnBy :: Int -> StringBuffer -> StringBuffer
-stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#)
+byteDiff :: StringBuffer -> StringBuffer -> Int
+byteDiff s1 s2 = cur s2 - cur s1
 
 atEnd :: StringBuffer -> Bool
-atEnd (StringBuffer _ l# c#) = l# ==# c#
+atEnd (StringBuffer _ l c) = l == c
 
 -- -----------------------------------------------------------------------------
 -- Conversion
 
-lexemeToString :: StringBuffer -> Int -> String
+lexemeToString :: StringBuffer -> Int {-bytes-} -> String
 lexemeToString _ 0 = ""
-lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current#
- where
-    end = current# +# len#
+lexemeToString (StringBuffer buf _ cur) bytes =
+  inlinePerformIO $ 
+    withForeignPtr buf $ \ptr -> 
+      utf8DecodeString (ptr `plusPtr` cur) bytes
 
-    unpack nh
-      | nh >=# end  = []
-      | otherwise   = C# ch : unpack (nh +# 1#)
-      where
-	ch = indexCharArray# arr# nh
-
-lexemeToFastString :: StringBuffer -> Int -> FastString
+lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
 lexemeToFastString _ 0 = mkFastString ""
-lexemeToFastString (StringBuffer fo _ current#) (I# len) =
-    mkFastSubStringBA# fo current# len
+lexemeToFastString (StringBuffer buf _ cur) len =
+   inlinePerformIO $
+     withForeignPtr buf $ \ptr ->
+       return $! mkFastStringBytes (ptr `plusPtr` cur) len
 
 -- -----------------------------------------------------------------------------
 -- Parsing integer strings in various bases
 
+byteOff :: StringBuffer -> Int -> Char
+byteOff (StringBuffer buf _ cur) i = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+    w <- peek (ptr `plusPtr` (cur+i))
+    return (unsafeChr (fromIntegral (w::Word8)))
+
+-- | XXX assumes ASCII digits only
 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
 parseInteger buf len radix to_int 
   = go 0 0
   where go i x | i == len  = x
-	       | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
+	       | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r
+
 \end{code}
diff --git a/ghc/compiler/utils/UnicodeUtil.lhs b/ghc/compiler/utils/UnicodeUtil.lhs
deleted file mode 100644
index 56e95a5434..0000000000
--- a/ghc/compiler/utils/UnicodeUtil.lhs
+++ /dev/null
@@ -1,36 +0,0 @@
-Various Unicode-related utilities.
-
-\begin{code}
-module UnicodeUtil(
-    stringToUtf8, intsToUtf8
-  ) where
-
-#include "HsVersions.h"
-
-import Panic ( panic )
-import Char  ( chr, ord )
-\end{code}
-
-\begin{code}
-stringToUtf8 :: String -> String
-stringToUtf8 s = intsToUtf8 (map ord s)
-
-intsToUtf8 :: [Int] -> String
-intsToUtf8 []       = ""
-intsToUtf8 (c:s)
-    | c >= 1 && c <= 0x7F = chr c : intsToUtf8 s
-    | c < 0           = panic ("charToUtf8 ("++show c++")")
-    | c <= 0x7FF      = chr (0xC0 + c `div`       0x40           ) :
-                        chr (0x80 + c                  `mod` 0x40) :
-                        intsToUtf8 s
-    | c <= 0xFFFF     = chr (0xE0 + c `div`     0x1000           ) :
-                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
-                        chr (0x80 + c                  `mod` 0x40) :
-                        intsToUtf8 s
-    | c <= 0x10FFFF   = chr (0xF0 + c `div`    0x40000           ) :
-                        chr (0x80 + c `div`     0x1000 `mod` 0x40) :
-                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
-                        chr (0x80 + c                  `mod` 0x40) :
-                        intsToUtf8 s
-    | otherwise       = panic ("charToUtf8 "++show c)
-\end{code}