Use unboxed codebuffers in base

Metric Decrease: encodingAllocations
author: Josh Meredith <joshmeredith2008@gmail.com> 2023-04-20 08:44:58 +0000
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2023-05-15 18:01:43 -0400
commit: 21f3aae7371469beb3950a6170db6c5668379ff3 (patch)
tree: a2c92b090383fe005004d6f50f38b966314faf27
parent: fbe3fe003ac8d4a065c80041c0a9f9c74b6366ac (diff)
download: haskell-21f3aae7371469beb3950a6170db6c5668379ff3.tar.gz
9 files changed, 597 insertions, 493 deletions
diff --git a/libraries/base/GHC/IO/Encoding.hs b/libraries/base/GHC/IO/Encoding.hs
index 036a00bebc..ab2599559d 100644
--- a/libraries/base/GHC/IO/Encoding.hs
+++ b/libraries/base/GHC/IO/Encoding.hs
@@ -1,5 +1,6 @@
 {-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE CPP, NoImplicitPrelude #-}
+{-# LANGUAGE UnboxedTuples #-}
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 
 -----------------------------------------------------------------------------
@@ -336,11 +337,13 @@ mkTextEncoding' cfm enc =
 
 
 latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
-latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for char8
+latin1_encode input output = IO $ \st -> case Latin1.latin1_encode input output st of
+  (# st', _why, input', output' #) -> (# st', (input', output') #) -- unchecked, used for char8
 --latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode
 
 latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
-latin1_decode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_decode input output
+latin1_decode input output = IO $ \st -> case Latin1.latin1_decode input output st of
+  (# st', _why, input', output' #) -> (# st', (input',output') #)
 --latin1_decode = unsafePerformIO $ do mkTextDecoder Iconv.latin1 >>= return.encode
 
 unknownEncodingErr :: String -> IO a
diff --git a/libraries/base/GHC/IO/Encoding/CodePage/API.hs b/libraries/base/GHC/IO/Encoding/CodePage/API.hs
index 6136156cdb..e8412e8356 100644
--- a/libraries/base/GHC/IO/Encoding/CodePage/API.hs
+++ b/libraries/base/GHC/IO/Encoding/CodePage/API.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE CPP, NoImplicitPrelude, NondecreasingIndentation,
-             RecordWildCards, ScopedTypeVariables #-}
+             RecordWildCards, ScopedTypeVariables,
+             UnboxedTuples #-}
 {-# OPTIONS_GHC -Wno-name-shadowing #-}
 
 module GHC.IO.Encoding.CodePage.API (
@@ -157,11 +158,15 @@ newCP rec fn cp = do
 utf16_native_encode' :: EncodeBuffer
 utf16_native_decode' :: DecodeBuffer
 #if defined(WORDS_BIGENDIAN)
-utf16_native_encode' = utf16be_encode
-utf16_native_decode' = utf16be_decode
+utf16_native_encode' i o = IO $ \st -> case utf16be_encode i o st of
+  (# st', c, i', o' #) -> (# st', (c, i', o') #)
+utf16_native_decode' i o = IO $ \st -> case utf16be_decode i o st of
+  (# st', c, i', o' #) -> (# st', (c, i', o') #)
 #else
-utf16_native_encode' = utf16le_encode
-utf16_native_decode' = utf16le_decode
+utf16_native_encode' i o = IO $ \st -> case utf16le_encode i o st of
+  (# st', c, i', o' #) -> (# st', (c, i', o') #)
+utf16_native_decode' i o = IO $ \st -> case utf16le_decode i o st of
+  (# st', c, i', o' #) -> (# st', (c, i', o') #)
 #endif
 
 saner :: CodeBuffer from to
diff --git a/libraries/base/GHC/IO/Encoding/Failure.hs b/libraries/base/GHC/IO/Encoding/Failure.hs
index fb885bd45b..d9817aa950 100644
--- a/libraries/base/GHC/IO/Encoding/Failure.hs
+++ b/libraries/base/GHC/IO/Encoding/Failure.hs
@@ -1,5 +1,8 @@
 {-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE BangPatterns #-}
 
 -----------------------------------------------------------------------------
 -- |
@@ -18,7 +21,8 @@
 module GHC.IO.Encoding.Failure (
     CodingFailureMode(..), codingFailureModeSuffix,
     isSurrogate,
-    recoverDecode, recoverEncode
+    recoverDecode, recoverEncode,
+    recoverDecode#, recoverEncode#,
   ) where
 
 import GHC.IO
@@ -142,6 +146,12 @@ unescapeRoundtripCharacterSurrogate c
     | otherwise                 = Nothing
   where x = ord c
 
+recoverDecode# :: CodingFailureMode -> Buffer Word8 -> Buffer Char
+               -> State# RealWorld -> (# State# RealWorld, Buffer Word8, Buffer Char #)
+recoverDecode# cfm input output st =
+  let !(# st', (bIn, bOut) #) = unIO (recoverDecode cfm input output) st
+  in (# st', bIn, bOut #)
+
 recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char
               -> IO (Buffer Word8, Buffer Char)
 recoverDecode cfm input@Buffer{  bufRaw=iraw, bufL=ir, bufR=_  }
@@ -160,6 +170,12 @@ recoverDecode cfm input@Buffer{  bufRaw=iraw, bufL=ir, bufR=_  }
       ow' <- writeCharBuf oraw ow (escapeToRoundtripCharacterSurrogate b)
       return (input { bufL=ir+1 }, output { bufR=ow' })
 
+recoverEncode# :: CodingFailureMode -> Buffer Char -> Buffer Word8
+               -> State# RealWorld -> (# State# RealWorld, Buffer Char, Buffer Word8 #)
+recoverEncode# cfm input output st =
+  let !(# st', (bIn, bOut) #) = unIO (recoverEncode cfm input output) st
+  in (# st', bIn, bOut #)
+
 recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8
               -> IO (Buffer Char, Buffer Word8)
 recoverEncode cfm input@Buffer{  bufRaw=iraw, bufL=ir, bufR=_  }
diff --git a/libraries/base/GHC/IO/Encoding/Iconv.hs b/libraries/base/GHC/IO/Encoding/Iconv.hs
index 6c120cae58..672e505e98 100644
--- a/libraries/base/GHC/IO/Encoding/Iconv.hs
+++ b/libraries/base/GHC/IO/Encoding/Iconv.hs
@@ -2,6 +2,8 @@
 {-# LANGUAGE CPP
            , NoImplicitPrelude
            , NondecreasingIndentation
+           , UnboxedTuples
+           , MagicHash
   #-}
 {-# OPTIONS_HADDOCK not-home #-}
 
@@ -133,19 +135,24 @@ newIConv from to rec fn =
   withCAString to   $ \ to_str -> do
     iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
     let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
-    return BufferCodec{
-                encode = fn iconvt,
-                recover = rec,
-                close  = iclose,
+        fn_iconvt ibuf obuf st = case unIO (fn iconvt ibuf obuf) st of
+          (# st', (prog, ibuf', obuf') #) -> (# st', prog, ibuf', obuf' #)
+    return BufferCodec# {
+                encode#   = fn_iconvt,
+                recover#  = rec#,
+                close#    = iclose,
                 -- iconv doesn't supply a way to save/restore the state
-                getState = return (),
-                setState = const $ return ()
+                getState# = return (),
+                setState# = const $ return ()
                 }
+  where
+    rec# ibuf obuf st = case unIO (rec ibuf obuf) st of
+      (# st', (ibuf', obuf') #) -> (# st', ibuf', obuf' #)
 
-iconvDecode :: IConv -> DecodeBuffer
+iconvDecode :: IConv -> Buffer Word8 -> Buffer Char -> IO (CodingProgress, Buffer Word8, Buffer Char)
 iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift
 
-iconvEncode :: IConv -> EncodeBuffer
+iconvEncode :: IConv -> Buffer Char -> Buffer Word8 -> IO (CodingProgress, Buffer Char, Buffer Word8)
 iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0
 
 iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
diff --git a/libraries/base/GHC/IO/Encoding/Latin1.hs b/libraries/base/GHC/IO/Encoding/Latin1.hs
index 730379e4a0..4185a0de42 100644
--- a/libraries/base/GHC/IO/Encoding/Latin1.hs
+++ b/libraries/base/GHC/IO/Encoding/Latin1.hs
@@ -2,6 +2,8 @@
 {-# LANGUAGE NoImplicitPrelude
            , BangPatterns
            , NondecreasingIndentation
+           , UnboxedTuples
+           , MagicHash
   #-}
 {-# OPTIONS_GHC  -funbox-strict-fields #-}
 
@@ -56,22 +58,22 @@ mkLatin1 cfm = TextEncoding { textEncodingName = "ISO-8859-1",
 
 latin1_DF :: CodingFailureMode -> IO (TextDecoder ())
 latin1_DF cfm =
-  return (BufferCodec {
-             encode   = latin1_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = latin1_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 latin1_EF :: CodingFailureMode -> IO (TextEncoder ())
 latin1_EF cfm =
-  return (BufferCodec {
-             encode   = latin1_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = latin1_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 latin1_checked :: TextEncoding
@@ -85,12 +87,12 @@ mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO-8859-1",
 
 latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ())
 latin1_checked_EF cfm =
-  return (BufferCodec {
-             encode   = latin1_checked_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = latin1_checked_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 -- -----------------------------------------------------------------------------
@@ -108,22 +110,22 @@ mkAscii cfm = TextEncoding { textEncodingName = "ASCII",
 
 ascii_DF :: CodingFailureMode -> IO (TextDecoder ())
 ascii_DF cfm =
-  return (BufferCodec {
-             encode   = ascii_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = ascii_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 ascii_EF :: CodingFailureMode -> IO (TextEncoder ())
 ascii_EF cfm =
-  return (BufferCodec {
-             encode   = ascii_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = ascii_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
@@ -134,97 +136,115 @@ ascii_EF cfm =
 -- TODO: Eliminate code duplication between the checked and unchecked
 -- versions of the decoder or encoder (but don't change the Core!)
 
-latin1_decode :: DecodeBuffer
+latin1_decode :: DecodeBuffer#
 latin1_decode 
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let 
-       loop !ir !ow
-         | ow >= os = done OutputUnderflow ir ow
-         | ir >= iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os = done OutputUnderflow ir ow st0
+         | ir >= iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
-              loop (ir+1) ow'
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
+                  !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+              loop (ir+1) ow' st2
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-ascii_decode :: DecodeBuffer
+ascii_decode :: DecodeBuffer#
 ascii_decode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os = done OutputUnderflow ir ow
-         | ir >= iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os = done OutputUnderflow ir ow st0
+         | ir >= iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              if c0 > 0x7f then invalid else do
-              ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
-              loop (ir+1) ow'
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
+              if c0 > 0x7f then invalid st1 else do
+                let !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+                loop (ir+1) ow' st2
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' = 
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-latin1_encode :: EncodeBuffer
+latin1_encode :: EncodeBuffer#
 latin1_encode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ow >= os = done OutputUnderflow ir ow
-        | ir >= iw = done InputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' = 
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ow >= os = done OutputUnderflow ir ow st0
+        | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           writeWord8Buf oraw ow (fromIntegral (ord c))
-           loop ir' (ow+1)
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+               !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
+           loop ir' (ow+1) st2
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-latin1_checked_encode :: EncodeBuffer
+latin1_checked_encode :: EncodeBuffer#
 latin1_checked_encode input output
  = single_byte_checked_encode 0xff input output
 
-ascii_encode :: EncodeBuffer
+ascii_encode :: EncodeBuffer#
 ascii_encode input output
  = single_byte_checked_encode 0x7f input output
 
-single_byte_checked_encode :: Int -> EncodeBuffer
+single_byte_checked_encode :: Int -> EncodeBuffer#
 single_byte_checked_encode max_legal_char
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ow >= os = done OutputUnderflow ir ow
-        | ir >= iw = done InputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' = 
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ow >= os = done OutputUnderflow ir ow st0
+        | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           if ord c > max_legal_char then invalid else do
-           writeWord8Buf oraw ow (fromIntegral (ord c))
-           loop ir' (ow+1)
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+           if ord c > max_legal_char then invalid st1 else do
+             let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
+             loop ir' (ow+1) st2
         where
-           invalid = done InvalidSequence ir ow
+          invalid :: EncodingBuffer#
+          invalid st' = done InvalidSequence ir ow st'
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 {-# INLINE single_byte_checked_encode #-}
diff --git a/libraries/base/GHC/IO/Encoding/UTF16.hs b/libraries/base/GHC/IO/Encoding/UTF16.hs
index a0878d4fce..a592d4c47c 100644
--- a/libraries/base/GHC/IO/Encoding/UTF16.hs
+++ b/libraries/base/GHC/IO/Encoding/UTF16.hs
@@ -3,6 +3,7 @@
            , BangPatterns
            , NondecreasingIndentation
            , MagicHash
+           , UnboxedTuples
   #-}
 {-# OPTIONS_GHC  -funbox-strict-fields #-}
 
@@ -61,64 +62,66 @@ mkUTF16 cfm =  TextEncoding { textEncodingName = "UTF-16",
                               mkTextDecoder = utf16_DF cfm,
                               mkTextEncoder = utf16_EF cfm }
 
-utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
 utf16_DF cfm = do
   seen_bom <- newIORef Nothing
-  return (BufferCodec {
-             encode   = utf16_decode seen_bom,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = readIORef seen_bom,
-             setState = writeIORef seen_bom
+  return (BufferCodec# {
+             encode#   = utf16_decode seen_bom,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = readIORef seen_bom,
+             setState# = writeIORef seen_bom
           })
 
 utf16_EF :: CodingFailureMode -> IO (TextEncoder Bool)
 utf16_EF cfm = do
   done_bom <- newIORef False
-  return (BufferCodec {
-             encode   = utf16_encode done_bom,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = readIORef done_bom,
-             setState = writeIORef done_bom
+  return (BufferCodec# {
+             encode#   = utf16_encode done_bom,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = readIORef done_bom,
+             setState# = writeIORef done_bom
           })
 
-utf16_encode :: IORef Bool -> EncodeBuffer
+utf16_encode :: IORef Bool -> EncodeBuffer#
 utf16_encode done_bom input
   output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+  st0
  = do
-  b <- readIORef done_bom
-  if b then utf16_native_encode input output
+  let !(# st1, b #) = unIO (readIORef done_bom) st0
+  if b then utf16_native_encode input output st1
        else if os - ow < 2
-               then return (OutputUnderflow,input,output)
+               then (# st1,OutputUnderflow,input,output #)
                else do
-                    writeIORef done_bom True
-                    writeWord8Buf oraw ow     bom1
-                    writeWord8Buf oraw (ow+1) bom2
-                    utf16_native_encode input output{ bufR = ow+2 }
+               let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+                   !(# st3, () #) = unIO (writeWord8Buf oraw ow     bom1) st2
+                   !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom2) st3
+               utf16_native_encode input output{ bufR = ow+2 } st4
 
-utf16_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+utf16_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
 utf16_decode seen_bom
   input@Buffer{  bufRaw=iraw, bufL=ir, bufR=iw,  bufSize=_  }
   output
+  st0
  = do
-   mb <- readIORef seen_bom
+   let !(# st1, mb #) = unIO (readIORef seen_bom) st0
    case mb of
-     Just decode -> decode input output
+     Just decode -> decode input output st1
      Nothing ->
-       if iw - ir < 2 then return (InputUnderflow,input,output) else do
-       c0 <- readWord8Buf iraw ir
-       c1 <- readWord8Buf iraw (ir+1)
+       if iw - ir < 2 then (# st1,InputUnderflow,input,output #) else do
+       let !(# st2, c0 #) = unIO (readWord8Buf iraw  ir   ) st1
+           !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
        case () of
-        _ | c0 == bomB && c1 == bomL -> do
-               writeIORef seen_bom (Just utf16be_decode)
-               utf16be_decode input{ bufL= ir+2 } output
-          | c0 == bomL && c1 == bomB -> do
-               writeIORef seen_bom (Just utf16le_decode)
-               utf16le_decode input{ bufL= ir+2 } output
-          | otherwise -> do
-               writeIORef seen_bom (Just utf16_native_decode)
-               utf16_native_decode input output
+        _ | c0 == bomB && c1 == bomL ->
+               let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16be_decode)) st3
+               in utf16be_decode input{ bufL= ir+2 } output st4
+          | c0 == bomL && c1 == bomB ->
+               let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16le_decode)) st3
+               in utf16le_decode input{ bufL= ir+2 } output st4
+          | otherwise ->
+               let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16_native_decode)) st3
+               in utf16_native_decode input output st4
 
 
 bomB, bomL, bom1, bom2 :: Word8
@@ -126,10 +129,10 @@ bomB = 0xfe
 bomL = 0xff
 
 -- choose UTF-16BE by default for UTF-16 output
-utf16_native_decode :: DecodeBuffer
+utf16_native_decode :: DecodeBuffer#
 utf16_native_decode = utf16be_decode
 
-utf16_native_encode :: EncodeBuffer
+utf16_native_encode :: EncodeBuffer#
 utf16_native_encode = utf16be_encode
 
 bom1 = bomB
@@ -149,22 +152,22 @@ mkUTF16be cfm = TextEncoding { textEncodingName = "UTF-16BE",
 
 utf16be_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf16be_DF cfm =
-  return (BufferCodec {
-             encode   = utf16be_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16be_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf16be_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf16be_EF cfm =
-  return (BufferCodec {
-             encode   = utf16be_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16be_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf16le :: TextEncoding
@@ -178,114 +181,127 @@ mkUTF16le cfm = TextEncoding { textEncodingName = "UTF16-LE",
 
 utf16le_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf16le_DF cfm =
-  return (BufferCodec {
-             encode   = utf16le_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16le_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf16le_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf16le_EF cfm =
-  return (BufferCodec {
-             encode   = utf16le_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16le_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
-utf16be_decode :: DecodeBuffer
+utf16be_decode :: DecodeBuffer#
 utf16be_decode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os     = done OutputUnderflow ir ow
-         | ir >= iw     = done InputUnderflow ir ow
-         | ir + 1 == iw = done InputUnderflow ir ow
+       loop !ir !ow st0
+         | ow >= os     = done OutputUnderflow ir ow st0
+         | ir >= iw     = done InputUnderflow ir ow st0
+         | ir + 1 == iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw  ir   ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
               let x1 = fromIntegral c0 `shiftL` 8 + fromIntegral c1
               if validate1 x1
-                 then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
-                         loop (ir+2) ow'
-                 else if iw - ir < 4 then done InputUnderflow ir ow else do
-                      c2 <- readWord8Buf iraw (ir+2)
-                      c3 <- readWord8Buf iraw (ir+3)
-                      let x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
-                      if not (validate2 x1 x2) then invalid else do
-                      ow' <- writeCharBuf oraw ow (chr2 x1 x2)
-                      loop (ir+4) ow'
+                 then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+                      in loop (ir+2) ow' st3
+                 else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+                      let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                          !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+                          x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
+                      if not (validate2 x1 x2) then invalid st4 else do
+                      let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+                      loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input { bufL = 0, bufR = 0 } else input { bufL = ir }
+             !ro = output{ bufR = ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf16le_decode :: DecodeBuffer
+utf16le_decode :: DecodeBuffer#
 utf16le_decode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os     = done OutputUnderflow ir ow
-         | ir >= iw     = done InputUnderflow ir ow
-         | ir + 1 == iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os     = done OutputUnderflow ir ow st0
+         | ir >= iw     = done InputUnderflow ir ow st0
+         | ir + 1 == iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
-              let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw  ir   ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                  x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
               if validate1 x1
-                 then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
-                         loop (ir+2) ow'
-                 else if iw - ir < 4 then done InputUnderflow ir ow else do
-                      c2 <- readWord8Buf iraw (ir+2)
-                      c3 <- readWord8Buf iraw (ir+3)
-                      let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
-                      if not (validate2 x1 x2) then invalid else do
-                      ow' <- writeCharBuf oraw ow (chr2 x1 x2)
-                      loop (ir+4) ow'
+                 then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+                      in loop (ir+2) ow' st3
+                 else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+                      let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                          !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+                          x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
+                      if not (validate2 x1 x2) then invalid st4 else do
+                      let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+                      loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+             !ro = output{ bufR = ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf16be_encode :: EncodeBuffer
+utf16be_encode :: EncodeBuffer#
 utf16be_encode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw     =  done InputUnderflow ir ow
-        | os - ow < 2  =  done OutputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw     =  done InputUnderflow ir ow st0
+        | os - ow < 2  =  done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
            case ord c of
-             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
-                    writeWord8Buf oraw ow     (fromIntegral (x `shiftR` 8))
-                    writeWord8Buf oraw (ow+1) (fromIntegral x)
-                    loop ir' (ow+2)
+             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+                    let !(# st2, () #) = unIO (writeWord8Buf oraw ow     (fromIntegral (x `shiftR` 8))) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral x))              st2
+                    loop ir' (ow+2) st3
                | otherwise -> do
-                    if os - ow < 4 then done OutputUnderflow ir ow else do
+                    if os - ow < 4 then done OutputUnderflow ir ow st1 else do
                     let
                          n1 = x - 0x10000
                          c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -294,35 +310,39 @@ utf16be_encode
                          c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
                          c4 = fromIntegral n2
                     --
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    writeWord8Buf oraw (ow+2) c3
-                    writeWord8Buf oraw (ow+3) c4
-                    loop ir' (ow+4)
+                         !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                         !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                         !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+                         !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c4) st4
+                    loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf16le_encode :: EncodeBuffer
+utf16le_encode :: EncodeBuffer#
 utf16le_encode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw     =  done InputUnderflow ir ow
-        | os - ow < 2  =  done OutputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+            !ro = output{ bufR = ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw     =  done InputUnderflow ir ow st0
+        | os - ow < 2  =  done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
            case ord c of
-             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
-                    writeWord8Buf oraw ow     (fromIntegral x)
-                    writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))
-                    loop ir' (ow+2)
+             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+                    let !(# st2, () #) = unIO (writeWord8Buf oraw ow     (fromIntegral x)) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))) st2
+                    loop ir' (ow+2) st3
                | otherwise ->
-                    if os - ow < 4 then done OutputUnderflow ir ow else do
+                    if os - ow < 4 then done OutputUnderflow ir ow st1 else do
                     let
                          n1 = x - 0x10000
                          c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -331,13 +351,13 @@ utf16le_encode
                          c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
                          c4 = fromIntegral n2
                     --
-                    writeWord8Buf oraw ow     c2
-                    writeWord8Buf oraw (ow+1) c1
-                    writeWord8Buf oraw (ow+2) c4
-                    writeWord8Buf oraw (ow+3) c3
-                    loop ir' (ow+4)
+                         !(# st2, () #) = unIO (writeWord8Buf oraw ow     c2) st1
+                         !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+                         !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c4) st3
+                         !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+                    loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
 chr2 :: Word16 -> Word16 -> Char
 chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#))
diff --git a/libraries/base/GHC/IO/Encoding/UTF32.hs b/libraries/base/GHC/IO/Encoding/UTF32.hs
index 379f76066b..9319234ca7 100644
--- a/libraries/base/GHC/IO/Encoding/UTF32.hs
+++ b/libraries/base/GHC/IO/Encoding/UTF32.hs
@@ -3,6 +3,7 @@
            , BangPatterns
            , NondecreasingIndentation
            , MagicHash
+           , UnboxedTuples
   #-}
 {-# OPTIONS_GHC  -funbox-strict-fields #-}
 
@@ -61,68 +62,70 @@ mkUTF32 cfm = TextEncoding { textEncodingName = "UTF-32",
                              mkTextDecoder = utf32_DF cfm,
                              mkTextEncoder = utf32_EF cfm }
 
-utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
 utf32_DF cfm = do
   seen_bom <- newIORef Nothing
-  return (BufferCodec {
-             encode   = utf32_decode seen_bom,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = readIORef seen_bom,
-             setState = writeIORef seen_bom
+  return (BufferCodec# {
+             encode#   = utf32_decode seen_bom,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = readIORef seen_bom,
+             setState# = writeIORef seen_bom
           })
 
 utf32_EF :: CodingFailureMode -> IO (TextEncoder Bool)
 utf32_EF cfm = do
   done_bom <- newIORef False
-  return (BufferCodec {
-             encode   = utf32_encode done_bom,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = readIORef done_bom,
-             setState = writeIORef done_bom
+  return (BufferCodec# {
+             encode#   = utf32_encode done_bom,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = readIORef done_bom,
+             setState# = writeIORef done_bom
           })
 
-utf32_encode :: IORef Bool -> EncodeBuffer
+utf32_encode :: IORef Bool -> EncodeBuffer#
 utf32_encode done_bom input
   output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+  st0
  = do
-  b <- readIORef done_bom
-  if b then utf32_native_encode input output
+  let !(# st1, b #) = unIO (readIORef done_bom) st0
+  if b then utf32_native_encode input output st1
        else if os - ow < 4
-               then return (OutputUnderflow, input,output)
+               then (# st1,OutputUnderflow,input,output #)
                else do
-                    writeIORef done_bom True
-                    writeWord8Buf oraw ow     bom0
-                    writeWord8Buf oraw (ow+1) bom1
-                    writeWord8Buf oraw (ow+2) bom2
-                    writeWord8Buf oraw (ow+3) bom3
-                    utf32_native_encode input output{ bufR = ow+4 }
-
-utf32_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+               let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+                   !(# st3, () #) = unIO (writeWord8Buf oraw ow     bom0) st2
+                   !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom1) st3
+                   !(# st5, () #) = unIO (writeWord8Buf oraw (ow+2) bom2) st4
+                   !(# st6, () #) = unIO (writeWord8Buf oraw (ow+3) bom3) st5
+               utf32_native_encode input output{ bufR = ow+4 } st6
+
+utf32_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
 utf32_decode seen_bom
   input@Buffer{  bufRaw=iraw, bufL=ir, bufR=iw,  bufSize=_  }
   output
+  st0
  = do
-   mb <- readIORef seen_bom
+   let !(# st1, mb #) = unIO (readIORef seen_bom) st0
    case mb of
-     Just decode -> decode input output
+     Just decode -> decode input output st1
      Nothing ->
-       if iw - ir < 4 then return (InputUnderflow, input,output) else do
-       c0 <- readWord8Buf iraw ir
-       c1 <- readWord8Buf iraw (ir+1)
-       c2 <- readWord8Buf iraw (ir+2)
-       c3 <- readWord8Buf iraw (ir+3)
+       if iw - ir < 4 then (# st1,InputUnderflow,input,output #) else do
+       let !(# st2, c0 #) = unIO (readWord8Buf iraw  ir   ) st1
+           !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
+           !(# st4, c2 #) = unIO (readWord8Buf iraw (ir+2)) st3
+           !(# st5, c3 #) = unIO (readWord8Buf iraw (ir+3)) st4
        case () of
-        _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 -> do
-               writeIORef seen_bom (Just utf32be_decode)
-               utf32be_decode input{ bufL= ir+4 } output
-        _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 -> do
-               writeIORef seen_bom (Just utf32le_decode)
-               utf32le_decode input{ bufL= ir+4 } output
-          | otherwise -> do
-               writeIORef seen_bom (Just utf32_native_decode)
-               utf32_native_decode input output
+        _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 ->
+               let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32be_decode)) st5
+               in utf32be_decode input{ bufL= ir+4 } output st6
+        _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 ->
+               let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32le_decode)) st5
+               in utf32le_decode input{ bufL= ir+4 } output st6
+          | otherwise ->
+               let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32_native_decode)) st5
+               in utf32_native_decode input output st6
 
 
 bom0, bom1, bom2, bom3 :: Word8
@@ -132,10 +135,10 @@ bom2 = 0xfe
 bom3 = 0xff
 
 -- choose UTF-32BE by default for UTF-32 output
-utf32_native_decode :: DecodeBuffer
+utf32_native_decode :: DecodeBuffer#
 utf32_native_decode = utf32be_decode
 
-utf32_native_encode :: EncodeBuffer
+utf32_native_encode :: EncodeBuffer#
 utf32_native_encode = utf32be_encode
 
 -- -----------------------------------------------------------------------------
@@ -152,22 +155,22 @@ mkUTF32be cfm = TextEncoding { textEncodingName = "UTF-32BE",
 
 utf32be_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf32be_DF cfm =
-  return (BufferCodec {
-             encode   = utf32be_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32be_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf32be_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf32be_EF cfm =
-  return (BufferCodec {
-             encode   = utf32be_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32be_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
@@ -182,128 +185,145 @@ mkUTF32le cfm = TextEncoding { textEncodingName = "UTF-32LE",
 
 utf32le_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf32le_DF cfm =
-  return (BufferCodec {
-             encode   = utf32le_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32le_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf32le_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf32le_EF cfm =
-  return (BufferCodec {
-             encode   = utf32le_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32le_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
-utf32be_decode :: DecodeBuffer
+utf32be_decode :: DecodeBuffer#
 utf32be_decode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os    = done OutputUnderflow ir ow
-         | iw - ir < 4 = done InputUnderflow  ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os    = done OutputUnderflow ir ow st0
+         | iw - ir < 4 = done InputUnderflow  ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
-              c2 <- readWord8Buf iraw (ir+2)
-              c3 <- readWord8Buf iraw (ir+3)
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir    ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                  !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                  !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
               let x1 = chr4 c0 c1 c2 c3
-              if not (validate x1) then invalid else do
-              ow' <- writeCharBuf oraw ow x1
-              loop (ir+4) ow'
+              if not (validate x1) then invalid st4 else do
+              let !(# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+              loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf32le_decode :: DecodeBuffer
+utf32le_decode :: DecodeBuffer#
 utf32le_decode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os    = done OutputUnderflow ir ow
-         | iw - ir < 4 = done InputUnderflow  ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os    = done OutputUnderflow ir ow st0
+         | iw - ir < 4 = done InputUnderflow  ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
-              c2 <- readWord8Buf iraw (ir+2)
-              c3 <- readWord8Buf iraw (ir+3)
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir    ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                  !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                  !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
               let x1 = chr4 c3 c2 c1 c0
-              if not (validate x1) then invalid else do
-              ow' <- writeCharBuf oraw ow x1
-              loop (ir+4) ow'
+              if not (validate x1) then invalid st4 else do
+              let !(# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+              loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf32be_encode :: EncodeBuffer
+utf32be_encode :: EncodeBuffer#
 utf32be_encode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw    = done InputUnderflow  ir ow
-        | os - ow < 4 = done OutputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw    = done InputUnderflow  ir ow st0
+        | os - ow < 4 = done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           if isSurrogate c then done InvalidSequence ir ow else do
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+           if isSurrogate c then done InvalidSequence ir ow st1 else do
              let (c0,c1,c2,c3) = ord4 c
-             writeWord8Buf oraw ow     c0
-             writeWord8Buf oraw (ow+1) c1
-             writeWord8Buf oraw (ow+2) c2
-             writeWord8Buf oraw (ow+3) c3
-             loop ir' (ow+4)
+                 !(# st2, () #) = unIO (writeWord8Buf oraw ow     c0) st1
+                 !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+                 !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c2) st3
+                 !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+             loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf32le_encode :: EncodeBuffer
+utf32le_encode :: EncodeBuffer#
 utf32le_encode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw    = done InputUnderflow  ir ow
-        | os - ow < 4 = done OutputUnderflow ir ow
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw    = done InputUnderflow  ir ow st0
+        | os - ow < 4 = done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           if isSurrogate c then done InvalidSequence ir ow else do
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+           if isSurrogate c then done InvalidSequence ir ow st1 else do
              let (c0,c1,c2,c3) = ord4 c
-             writeWord8Buf oraw ow     c3
-             writeWord8Buf oraw (ow+1) c2
-             writeWord8Buf oraw (ow+2) c1
-             writeWord8Buf oraw (ow+3) c0
-             loop ir' (ow+4)
+                 !(# st2, () #) = unIO (writeWord8Buf oraw ow     c3) st1
+                 !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                 !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c1) st3
+                 !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c0) st4
+             loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
 chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
 chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
diff --git a/libraries/base/GHC/IO/Encoding/UTF8.hs b/libraries/base/GHC/IO/Encoding/UTF8.hs
index a8d30d9749..f2f4b69176 100644
--- a/libraries/base/GHC/IO/Encoding/UTF8.hs
+++ b/libraries/base/GHC/IO/Encoding/UTF8.hs
@@ -3,6 +3,7 @@
            , BangPatterns
            , NondecreasingIndentation
            , MagicHash
+           , UnboxedTuples
   #-}
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 
@@ -56,22 +57,22 @@ mkUTF8 cfm = TextEncoding { textEncodingName = "UTF-8",
 
 utf8_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf8_DF cfm =
-  return (BufferCodec {
-             encode   = utf8_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf8_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf8_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf8_EF cfm =
-  return (BufferCodec {
-             encode   = utf8_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf8_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf8_bom :: TextEncoding
@@ -85,177 +86,188 @@ mkUTF8_bom cfm = TextEncoding { textEncodingName = "UTF-8BOM",
 utf8_bom_DF :: CodingFailureMode -> IO (TextDecoder Bool)
 utf8_bom_DF cfm = do
    ref <- newIORef True
-   return (BufferCodec {
-             encode   = utf8_bom_decode ref,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = readIORef ref,
-             setState = writeIORef ref
+   return (BufferCodec# {
+             encode#   = utf8_bom_decode ref,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = readIORef ref,
+             setState# = writeIORef ref
           })
 
 utf8_bom_EF :: CodingFailureMode -> IO (TextEncoder Bool)
 utf8_bom_EF cfm = do
    ref <- newIORef True
-   return (BufferCodec {
-             encode   = utf8_bom_encode ref,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = readIORef ref,
-             setState = writeIORef ref
+   return (BufferCodec# {
+             encode#   = utf8_bom_encode ref,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = readIORef ref,
+             setState# = writeIORef ref
           })
 
-utf8_bom_decode :: IORef Bool -> DecodeBuffer
+utf8_bom_decode :: IORef Bool -> DecodeBuffer#
 utf8_bom_decode ref
   input@Buffer{  bufRaw=iraw, bufL=ir, bufR=iw,  bufSize=_  }
   output
+  st0
  = do
-   first <- readIORef ref
+   let !(# st1, first #) = unIO (readIORef ref) st0
    if not first
-      then utf8_decode input output
+      then utf8_decode input output st1
       else do
-       let no_bom = do writeIORef ref False; utf8_decode input output
-       if iw - ir < 1 then return (InputUnderflow,input,output) else do
-       c0 <- readWord8Buf iraw ir
+       let no_bom = let !(# st', () #) = unIO (writeIORef ref False) st1 in utf8_decode input output st'
+       if iw - ir < 1 then (# st1,InputUnderflow,input,output #) else do
+       let !(# st2, c0 #) = unIO (readWord8Buf iraw ir) st1
        if (c0 /= bom0) then no_bom else do
-       if iw - ir < 2 then return (InputUnderflow,input,output) else do
-       c1 <- readWord8Buf iraw (ir+1)
+       if iw - ir < 2 then (# st2,InputUnderflow,input,output #) else do
+       let !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
        if (c1 /= bom1) then no_bom else do
-       if iw - ir < 3 then return (InputUnderflow,input,output) else do
-       c2 <- readWord8Buf iraw (ir+2)
+       if iw - ir < 3 then (# st3,InputUnderflow,input,output #) else do
+       let !(# st4, c2 #) = unIO (readWord8Buf iraw (ir+2)) st3
        if (c2 /= bom2) then no_bom else do
        -- found a BOM, ignore it and carry on
-       writeIORef ref False
-       utf8_decode input{ bufL = ir + 3 } output
+       let !(# st5, () #) = unIO (writeIORef ref False) st4
+       utf8_decode input{ bufL = ir + 3 } output st5
 
-utf8_bom_encode :: IORef Bool -> EncodeBuffer
+utf8_bom_encode :: IORef Bool -> EncodeBuffer#
 utf8_bom_encode ref input
   output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+  st0
  = do
-  b <- readIORef ref
-  if not b then utf8_encode input output
+  let !(# st1, b #) = unIO (readIORef ref) st0
+  if not b then utf8_encode input output st1
            else if os - ow < 3
-                  then return (OutputUnderflow,input,output)
+                  then (# st1,OutputUnderflow,input,output #)
                   else do
-                    writeIORef ref False
-                    writeWord8Buf oraw ow     bom0
-                    writeWord8Buf oraw (ow+1) bom1
-                    writeWord8Buf oraw (ow+2) bom2
-                    utf8_encode input output{ bufR = ow+3 }
+                    let !(# st2, () #) = unIO (writeIORef ref False)           st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw ow     bom0) st2
+                        !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom1) st3
+                        !(# st5, () #) = unIO (writeWord8Buf oraw (ow+2) bom2) st4
+                    utf8_encode input output{ bufR = ow+3 } st5
 
 bom0, bom1, bom2 :: Word8
 bom0 = 0xef
 bom1 = 0xbb
 bom2 = 0xbf
 
-utf8_decode :: DecodeBuffer
+utf8_decode :: DecodeBuffer#
 utf8_decode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os = done OutputUnderflow ir ow
-         | ir >= iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os = done OutputUnderflow ir ow st0
+         | ir >= iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
               case c0 of
                 _ | c0 <= 0x7f -> do
-                           ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
-                           loop (ir+1) ow'
-                  | c0 >= 0xc0 && c0 <= 0xc1 -> invalid -- Overlong forms
+                           let !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+                           loop (ir+1) ow' st2
+                  | c0 >= 0xc0 && c0 <= 0xc1 -> invalid st1 -- Overlong forms
                   | c0 >= 0xc2 && c0 <= 0xdf ->
-                           if iw - ir < 2 then done InputUnderflow ir ow else do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           if (c1 < 0x80 || c1 >= 0xc0) then invalid else do
-                           ow' <- writeCharBuf oraw ow (chr2 c0 c1)
-                           loop (ir+2) ow'
+                           if iw - ir < 2 then done InputUnderflow ir ow st1 else do
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                           if (c1 < 0x80 || c1 >= 0xc0) then invalid st2 else do
+                           let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (chr2 c0 c1)) st2
+                           loop (ir+2) ow' st3
                   | c0 >= 0xe0 && c0 <= 0xef ->
                       case iw - ir of
-                        1 -> done InputUnderflow ir ow
+                        1 -> done InputUnderflow ir ow st1
                         2 -> do -- check for an error even when we don't have
                                 -- the full sequence yet (#3341)
-                           c1 <- readWord8Buf iraw (ir+1)
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
                            if not (validate3 c0 c1 0x80)
-                              then invalid else done InputUnderflow ir ow
+                              then invalid st2 else done InputUnderflow ir ow st2
                         _ -> do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           c2 <- readWord8Buf iraw (ir+2)
-                           if not (validate3 c0 c1 c2) then invalid else do
-                           ow' <- writeCharBuf oraw ow (chr3 c0 c1 c2)
-                           loop (ir+3) ow'
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                           let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                           if not (validate3 c0 c1 c2) then invalid st3 else do
+                           let !(# st4, ow' #) = unIO (writeCharBuf oraw ow (chr3 c0 c1 c2)) st3
+                           loop (ir+3) ow' st4
                   | c0 >= 0xf0 ->
                       case iw - ir of
-                        1 -> done InputUnderflow ir ow
+                        1 -> done InputUnderflow ir ow st1
                         2 -> do -- check for an error even when we don't have
                                 -- the full sequence yet (#3341)
-                           c1 <- readWord8Buf iraw (ir+1)
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
                            if not (validate4 c0 c1 0x80 0x80)
-                              then invalid else done InputUnderflow ir ow
+                              then invalid st2 else done InputUnderflow ir ow st2
                         3 -> do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           c2 <- readWord8Buf iraw (ir+2)
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                               !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
                            if not (validate4 c0 c1 c2 0x80)
-                              then invalid else done InputUnderflow ir ow
+                              then invalid st3 else done InputUnderflow ir ow st3
                         _ -> do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           c2 <- readWord8Buf iraw (ir+2)
-                           c3 <- readWord8Buf iraw (ir+3)
-                           if not (validate4 c0 c1 c2 c3) then invalid else do
-                           ow' <- writeCharBuf oraw ow (chr4 c0 c1 c2 c3)
-                           loop (ir+4) ow'
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                               !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                               !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+                           if not (validate4 c0 c1 c2 c3) then invalid st4 else do
+                           let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr4 c0 c1 c2 c3)) st4
+                           loop (ir+4) ow' st5
                   | otherwise ->
-                           invalid
+                           invalid st1
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL = 0, bufR = 0} else input{ bufL = ir }
+             !ro = output { bufR = ow }
+         in (# st', why, ri, ro #)
    in
-   loop ir0 ow0
+   loop ir0 ow0 st
 
-utf8_encode :: EncodeBuffer
+utf8_encode :: EncodeBuffer#
 utf8_encode
   input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ow >= os = done OutputUnderflow ir ow
-        | ir >= iw = done InputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+            !ro = output{ bufR = ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ow >= os = done OutputUnderflow ir ow st0
+        | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
            case ord c of
              x | x <= 0x7F   -> do
-                    writeWord8Buf oraw ow (fromIntegral x)
-                    loop ir' (ow+1)
+                     let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral x)) st1
+                     loop ir' (ow+1) st2
                | x <= 0x07FF ->
-                    if os - ow < 2 then done OutputUnderflow ir ow else do
+                    if os - ow < 2 then done OutputUnderflow ir ow st1 else do
                     let (c1,c2) = ord2 c
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    loop ir' (ow+2)
-               | x <= 0xFFFF -> if isSurrogate c then done InvalidSequence ir ow else do
-                    if os - ow < 3 then done OutputUnderflow ir ow else do
+                        !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                    loop ir' (ow+2) st3
+               | x <= 0xFFFF -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+                    if os - ow < 3 then done OutputUnderflow ir ow st1 else do
                     let (c1,c2,c3) = ord3 c
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    writeWord8Buf oraw (ow+2) c3
-                    loop ir' (ow+3)
+                        !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                        !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+                    loop ir' (ow+3) st4
                | otherwise -> do
-                    if os - ow < 4 then done OutputUnderflow ir ow else do
+                    if os - ow < 4 then done OutputUnderflow ir ow st1 else do
                     let (c1,c2,c3,c4) = ord4 c
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    writeWord8Buf oraw (ow+2) c3
-                    writeWord8Buf oraw (ow+3) c4
-                    loop ir' (ow+4)
+                        !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                        !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+                        !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c4) st4
+                    loop ir' (ow+4) st5
    in
-   loop ir0 ow0
+   loop ir0 ow0 st
 
 -- -----------------------------------------------------------------------------
 -- UTF-8 primitives, lifted from Data.Text.Fusion.Utf8
diff --git a/libraries/base/changelog.md b/libraries/base/changelog.md
index a194bfad7f..26e7f36d5f 100644
--- a/libraries/base/changelog.md
+++ b/libraries/base/changelog.md
@@ -28,6 +28,7 @@
   * Add more instances for `Compose`: `Enum`, `Bounded`, `Num`, `Real`, `Integral` ([CLC proposal #160](https://github.com/haskell/core-libraries-committee/issues/160))
   * Make `(&)` representation polymorphic in the return type ([CLC proposal #158](https://github.com/haskell/core-libraries-committee/issues/158))
   * Implement `GHC.IORef.atomicSwapIORef` via a new dedicated primop `atomicSwapMutVar#` ([CLC proposal #139](https://github.com/haskell/core-libraries-committee/issues/139))
+  * Change codebuffers to use an unboxed implementation, while providing a compatibility layer using pattern synonyms. ([CLC proposal #134](https://github.com/haskell/core-libraries-committee/issues/134))
 
 ## 4.18.0.0 *March 2023*
   * Shipped with GHC 9.6.1
author	Josh Meredith <joshmeredith2008@gmail.com>	2023-04-20 08:44:58 +0000
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2023-05-15 18:01:43 -0400
commit	21f3aae7371469beb3950a6170db6c5668379ff3 (patch)
tree	a2c92b090383fe005004d6f50f38b966314faf27
parent	fbe3fe003ac8d4a065c80041c0a9f9c74b6366ac (diff)
download	haskell-21f3aae7371469beb3950a6170db6c5668379ff3.tar.gz