8 files changed, 470 insertions, 0 deletions
diff --git a/testsuite/tests/codeGen/should_run/T15038/Makefile b/testsuite/tests/codeGen/should_run/T15038/Makefile
new file mode 100644
index 0000000000..48493c08ef
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/Makefile
@@ -0,0 +1,15 @@
+TOP=../../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+.PHONY: T15038
+T15038:
+	'$(TEST_HC)' $(TEST_HC_OPTS)  -O2 -o Main \
+	  ./test/Main.hs \
+	  ./test/Parser.hs \
+	  ./src/Packed/Bytes/Stream/ST.hs \
+	  ./src/Packed/Bytes/Parser.hs \
+	  ./src/Packed/Bytes.hs \
+	  ./common/Data/Trie/Naive.hs \
+		-package containers -package ghc-prim -package primitive
+	./Main
diff --git a/testsuite/tests/codeGen/should_run/T15038/all.T b/testsuite/tests/codeGen/should_run/T15038/all.T
new file mode 100644
index 0000000000..6b284784ae
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/all.T
@@ -0,0 +1,4 @@
+test('T15038',
+     [reqlib('containers'), reqlib('ghc-prim'), reqlib('primitive')],
+     run_command,
+     ['$MAKE -s --no-print-directory T15038'])
diff --git a/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs b/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs
new file mode 100644
index 0000000000..a138615b2d
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs
@@ -0,0 +1,80 @@
+{-# LANGUAGE DeriveFunctor #-}
+
+module Data.Trie.Naive
+  ( Trie
+  , singleton
+  , singletonString
+  , lookup
+  , parser
+  , fromList
+  , fromListAppend
+  , fromStringList
+  ) where
+
+import Prelude hiding (lookup)
+
+import Data.Semigroup (Semigroup)
+import Data.Word (Word8)
+import Data.Map (Map)
+import Data.Bifunctor (second)
+import Packed.Bytes (Bytes)
+import qualified Data.Char
+import qualified GHC.OldList as L
+import qualified Packed.Bytes.Parser as P
+import qualified Packed.Bytes as B
+import qualified Data.Semigroup as SG
+import qualified Data.Map.Strict as M
+
+data Trie a = Trie (Maybe a) (Map Word8 (Trie a))
+  deriving (Functor)
+
+instance Semigroup a => Semigroup (Trie a) where
+  (<>) = append
+
+instance Semigroup a => Monoid (Trie a) where
+  mempty = Trie Nothing M.empty
+  mappend = (SG.<>)
+
+append :: Semigroup a => Trie a -> Trie a -> Trie a
+append (Trie v1 m1) (Trie v2 m2) = Trie
+  (SG.getOption (SG.Option v1 SG.<> SG.Option v2))
+  (M.unionWith append m1 m2)
+
+singleton :: Bytes -> a -> Trie a
+singleton k v = B.foldr (\b r -> Trie Nothing (M.singleton b r)) (Trie (Just v) M.empty) k
+
+singletonString :: String -> a -> Trie a
+singletonString k v = L.foldr (\c r -> Trie Nothing (M.singleton (c2w c) r)) (Trie (Just v) M.empty) k
+
+lookup :: Bytes -> Trie a -> Maybe a
+lookup k t0 = case B.foldr lookupStep (Just t0) k of
+  Nothing -> Nothing
+  Just (Trie v _) -> v
+
+lookupStep :: Word8 -> Maybe (Trie a) -> Maybe (Trie a)
+lookupStep w Nothing = Nothing
+lookupStep w (Just (Trie _ m)) = M.lookup w m
+
+parser :: Trie (P.Parser a) -> P.Parser a
+parser (Trie mp m) = case mp of
+  Just p -> p
+  Nothing -> do
+    w <- P.any
+    case M.lookup w m of
+      Nothing -> P.failure
+      Just t -> parser t
+
+fromList :: [(Bytes,a)] -> Trie a
+fromList = fmap SG.getFirst . fromListAppend . map (second SG.First)
+
+fromListAppend :: Semigroup a => [(Bytes,a)] -> Trie a
+fromListAppend = foldMap (uncurry singleton)
+
+fromStringList :: [(String,a)] -> Trie a
+fromStringList = fmap SG.getFirst . fromStringListAppend . map (second SG.First)
+
+fromStringListAppend :: Semigroup a => [(String,a)] -> Trie a
+fromStringListAppend = foldMap (uncurry singletonString)
+
+c2w :: Char -> Word8
+c2w = fromIntegral . Data.Char.ord
diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs
new file mode 100644
index 0000000000..224e03f75d
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs
@@ -0,0 +1,80 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+{-# OPTIONS_GHC
+ -fno-warn-unsafe
+ -fno-warn-implicit-prelude
+ -fno-warn-missing-import-lists
+ -O2
+#-}
+
+module Packed.Bytes
+  ( Bytes(..)
+  , pack
+  , unpack
+  , length
+    -- * Folds
+  , foldr
+    -- * Unsliced Byte Arrays
+  , fromByteArray
+  ) where
+
+import Prelude hiding (take,length,replicate,drop,null,concat,foldr)
+
+import Data.Primitive (ByteArray(..))
+import Data.Word (Word8)
+import Control.Monad.ST (runST, ST)
+import qualified Data.Primitive as PM
+import qualified GHC.OldList as L
+
+data Bytes = Bytes
+  {-# UNPACK #-} !ByteArray -- payload
+  {-# UNPACK #-} !Int -- offset
+  {-# UNPACK #-} !Int -- length
+
+instance Show Bytes where
+  show x = "pack " ++ show (unpack x)
+
+pack :: [Word8] -> Bytes
+pack bs = let arr = packByteArray bs in Bytes arr 0 (lengthByteArray arr)
+
+unpack :: Bytes -> [Word8]
+unpack (Bytes arr off len) = go off
+  where
+  go :: Int -> [Word8]
+  go !ix = if ix < len + off
+    then PM.indexByteArray arr ix : go (ix + 1)
+    else []
+
+fromByteArray :: ByteArray -> Bytes
+fromByteArray ba = Bytes ba 0 (lengthByteArray ba)
+
+length :: Bytes -> Int
+length (Bytes _ _ len) = len
+
+foldr :: (Word8 -> a -> a) -> a -> Bytes -> a
+foldr f a0 (Bytes arr off0 len) = go off0 where
+  !end = off0 + len
+  go !ix = if ix < end
+    then f (PM.indexByteArray arr ix) (go (ix + 1))
+    else a0
+
+packByteArray :: [Word8] -> ByteArray
+packByteArray ws0 = runST $ do
+  marr <- PM.newByteArray (L.length ws0)
+  let go [] !_ = return ()
+      go (w : ws) !ix = PM.writeByteArray marr ix w >> go ws (ix + 1)
+  go ws0 0
+  PM.unsafeFreezeByteArray marr
+
+unpackByteArray :: ByteArray -> [Word8]
+unpackByteArray arr = go 0 where
+  go :: Int -> [Word8]
+  go !ix = if ix < lengthByteArray arr
+    then PM.indexByteArray arr ix : go (ix + 1)
+    else []
+
+lengthByteArray :: ByteArray -> Int
+lengthByteArray = PM.sizeofByteArray
diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs
new file mode 100644
index 0000000000..3f9c42ad52
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs
@@ -0,0 +1,165 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeInType #-}
+{-# LANGUAGE UnboxedSums #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+{-# OPTIONS_GHC
+ -Weverything
+ -fno-warn-unsafe
+ -fno-warn-implicit-prelude
+ -fno-warn-missing-import-lists
+ -fno-warn-noncanonical-monoid-instances
+ -O2
+#-}
+
+module Packed.Bytes.Parser
+  ( Parser(..)
+  , Result(..)
+  , Leftovers(..)
+  , parseStreamST
+  , any
+  , failure
+  ) where
+
+import Control.Applicative
+import Data.Primitive (ByteArray(..))
+import GHC.Int (Int(I#))
+import GHC.ST (ST(..),runST)
+import GHC.Types (TYPE)
+import GHC.Word (Word8(W8#))
+import Packed.Bytes (Bytes(..))
+import Packed.Bytes.Stream.ST (ByteStream(..))
+import Prelude hiding (any,replicate)
+
+import qualified Data.Primitive as PM
+import qualified Control.Monad
+
+import GHC.Exts (Int#,ByteArray#,Word#,State#,(+#),(-#),(>#),indexWord8Array#)
+
+type Bytes# = (# ByteArray#, Int#, Int# #)
+type Maybe# (a :: TYPE r) = (# (# #) | a #)
+type Leftovers# s = (# Bytes# , ByteStream s #)
+type Result# s a = (# Maybe# (Leftovers# s), Maybe# a #)
+
+data Result s a = Result
+  { resultLeftovers :: !(Maybe (Leftovers s))
+  , resultValue :: !(Maybe a)
+  }
+
+data Leftovers s = Leftovers
+  { leftoversChunk :: {-# UNPACK #-} !Bytes
+    -- ^ The last chunk pulled from the stream
+  , leftoversStream :: ByteStream s
+    -- ^ The remaining stream
+  }
+
+data PureResult a = PureResult
+  { pureResultLeftovers :: {-# UNPACK #-} !Bytes
+  , pureResultValue :: !(Maybe a)
+  } deriving (Show)
+
+emptyByteArray :: ByteArray
+emptyByteArray = runST (PM.newByteArray 0 >>= PM.unsafeFreezeByteArray)
+
+parseStreamST :: ByteStream s -> Parser a -> ST s (Result s a)
+parseStreamST stream (Parser f) = ST $ \s0 ->
+  case f (# | (# (# unboxByteArray emptyByteArray, 0#, 0# #), stream #) #) s0 of
+    (# s1, r #) -> (# s1, boxResult r #)
+
+boxResult :: Result# s a -> Result s a
+boxResult (# leftovers, val #) = case val of
+  (# (# #) | #) -> Result (boxLeftovers leftovers) Nothing
+  (# | a #) -> Result (boxLeftovers leftovers) (Just a)
+
+boxLeftovers :: Maybe# (Leftovers# s) -> Maybe (Leftovers s)
+boxLeftovers (# (# #) | #) = Nothing
+boxLeftovers (# | (# theBytes, stream #) #) = Just (Leftovers (boxBytes theBytes) stream)
+
+instance Functor Parser where
+  fmap = mapParser
+
+-- Remember to write liftA2 by hand at some point.
+instance Applicative Parser where
+  pure = pureParser
+  (<*>) = Control.Monad.ap
+
+instance Monad Parser where
+  return = pure
+  (>>=) = bindLifted
+
+newtype Parser a = Parser
+  { getParser :: forall s.
+       Maybe# (Leftovers# s)
+    -> State# s
+    -> (# State# s, Result# s a #)
+  }
+
+nextNonEmpty :: ByteStream s -> State# s -> (# State# s, Maybe# (Leftovers# s) #)
+nextNonEmpty (ByteStream f) s0 = case f s0 of
+  (# s1, r #) -> case r of
+    (# (# #) | #) -> (# s1, (# (# #) | #) #)
+    (# | (# theBytes@(# _,_,len #), stream #) #) -> case len of
+      0# -> nextNonEmpty stream s1
+      _ -> (# s1, (# | (# theBytes, stream #) #) #)
+
+withNonEmpty :: forall s b.
+     Maybe# (Leftovers# s)
+  -> State# s
+  -> (State# s -> (# State# s, Result# s b #))
+  -> (Word# -> Bytes# -> ByteStream s -> State# s -> (# State# s, Result# s b #))
+     -- The first argument is a Word8, not a full machine word.
+     -- The second argument is the complete,non-empty chunk
+     -- with the head byte still intact.
+  -> (# State# s, Result# s b #)
+withNonEmpty (# (# #) | #) s0 g _ = g s0
+withNonEmpty (# | (# bytes0@(# arr0,off0,len0 #), stream0 #) #) s0 g f = case len0 ># 0# of
+  1# -> f (indexWord8Array# arr0 off0) bytes0 stream0 s0
+  _ -> case nextNonEmpty stream0 s0 of
+    (# s1, r #) -> case r of
+      (# (# #) | #) -> g s1
+      (# | (# bytes1@(# arr1, off1, _ #), stream1 #) #) ->
+        f (indexWord8Array# arr1 off1) bytes1 stream1 s1
+
+-- | Consume the next byte from the input.
+any :: Parser Word8
+any = Parser go where
+  go :: Maybe# (Leftovers# s) -> State# s -> (# State# s, Result# s Word8 #)
+  go m s0 = withNonEmpty m s0
+    (\s -> (# s, (# (# (# #) | #), (# (# #) | #) #) #))
+    (\theByte theBytes stream s ->
+      (# s, (# (# | (# unsafeDrop# 1# theBytes, stream #) #), (# | W8# theByte #) #) #)
+    )
+
+-- TODO: improve this
+mapParser :: (a -> b) -> Parser a -> Parser b
+mapParser f p = bindLifted p (pureParser . f)
+
+pureParser :: a -> Parser a
+pureParser a = Parser $ \leftovers0 s0 ->
+  (# s0, (# leftovers0, (# | a #) #) #)
+
+bindLifted :: Parser a -> (a -> Parser b) -> Parser b
+bindLifted (Parser f) g = Parser $ \leftovers0 s0 -> case f leftovers0 s0 of
+  (# s1, (# leftovers1, val #) #) -> case val of
+    (# (# #) | #) -> (# s1, (# leftovers1, (# (# #) | #) #) #)
+    (# | x #) -> case g x of
+      Parser k -> k leftovers1 s1
+
+-- This assumes that the Bytes is longer than the index. It also does
+-- not eliminate zero-length references to byte arrays.
+unsafeDrop# :: Int# -> Bytes# -> Bytes#
+unsafeDrop# i (# arr, off, len #) = (# arr, off +# i, len -# i #)
+
+unboxByteArray :: ByteArray -> ByteArray#
+unboxByteArray (ByteArray arr) = arr
+
+boxBytes :: Bytes# -> Bytes
+boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c)
+
+failure :: Parser a
+failure = Parser (\m s -> (# s, (# m, (# (# #) | #) #) #))
diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs
new file mode 100644
index 0000000000..ffba9c2596
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs
@@ -0,0 +1,61 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnboxedSums #-}
+
+{-# OPTIONS_GHC -O2 #-}
+
+module Packed.Bytes.Stream.ST
+  ( ByteStream(..)
+  , empty
+  , unpack
+  , fromBytes
+  ) where
+
+import Data.Primitive (Array,ByteArray(..))
+import Data.Semigroup (Semigroup)
+import Data.Word (Word8)
+import GHC.Exts (RealWorld,State#,Int#,ByteArray#)
+import GHC.Int (Int(I#))
+import GHC.ST (ST(..))
+import Packed.Bytes (Bytes(..))
+import System.IO (Handle)
+import qualified Data.Primitive as PM
+import qualified Data.Semigroup as SG
+import qualified Packed.Bytes as B
+
+type Bytes# = (# ByteArray#, Int#, Int# #)
+
+newtype ByteStream s = ByteStream
+  (State# s -> (# State# s, (# (# #) | (# Bytes# , ByteStream s #) #) #) )
+
+fromBytes :: Bytes -> ByteStream s
+fromBytes b = ByteStream
+  (\s0 -> (# s0, (# | (# unboxBytes b, empty #) #) #))
+
+nextChunk :: ByteStream s -> ST s (Maybe (Bytes,ByteStream s))
+nextChunk (ByteStream f) = ST $ \s0 -> case f s0 of
+  (# s1, r #) -> case r of
+    (# (# #) | #) -> (# s1, Nothing #)
+    (# | (# theBytes, theStream #) #) -> (# s1, Just (boxBytes theBytes, theStream) #)
+
+empty :: ByteStream s
+empty = ByteStream (\s -> (# s, (# (# #) | #) #) )
+
+boxBytes :: Bytes# -> Bytes
+boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c)
+
+unboxBytes :: Bytes -> Bytes#
+unboxBytes (Bytes (ByteArray a) (I# b) (I# c)) = (# a,b,c #)
+
+unpack :: ByteStream s -> ST s [Word8]
+unpack stream = ST (unpackInternal stream)
+
+unpackInternal :: ByteStream s -> State# s -> (# State# s, [Word8] #)
+unpackInternal (ByteStream f) s0 = case f s0 of
+  (# s1, r #) -> case r of
+    (# (# #) | #) -> (# s1, [] #)
+    (# | (# bytes, stream #) #) -> case unpackInternal stream s1 of
+      (# s2, ws #) -> (# s2, B.unpack (boxBytes bytes) ++ ws #)
diff --git a/testsuite/tests/codeGen/should_run/T15038/test/Main.hs b/testsuite/tests/codeGen/should_run/T15038/test/Main.hs
new file mode 100644
index 0000000000..56acd042db
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/test/Main.hs
@@ -0,0 +1,4 @@
+import qualified Parser as Parser
+
+main :: IO ()
+main = print (iterate Parser.byteParserBadOnce 5 !! 100000)
diff --git a/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs b/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs
new file mode 100644
index 0000000000..70f9f3336b
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs
@@ -0,0 +1,61 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeInType #-}
+{-# LANGUAGE UnboxedSums #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+{-# OPTIONS_GHC -Wall #-}
+
+module Parser
+  ( byteParserBadOnce
+  ) where
+
+import Control.Monad.ST (runST)
+import Data.Word (Word8)
+import Packed.Bytes (Bytes)
+import Packed.Bytes.Parser (Parser)
+import Packed.Bytes.Stream.ST (ByteStream)
+import qualified Data.Char
+import qualified Packed.Bytes as B
+import qualified Packed.Bytes.Parser as P
+import qualified Packed.Bytes.Stream.ST as Stream
+
+-- from common directory
+import qualified Data.Trie.Naive as Naive
+
+snmptrapdNaive :: Naive.Trie (Parser Word)
+snmptrapdNaive = Naive.fromStringList
+  [ ("STRING: ", P.any >>= \_ -> return 5)
+  ]
+
+runExampleParser :: Parser a -> (forall s. ByteStream s) -> (Maybe a, Maybe String)
+runExampleParser parser stream = runST $ do
+  P.Result mleftovers r <- P.parseStreamST stream parser
+  mextra <- case mleftovers of
+    Nothing -> return Nothing
+    Just (P.Leftovers chunk remainingStream) -> do
+      bs <- Stream.unpack remainingStream
+      return (Just (map word8ToChar (B.unpack chunk ++ bs)))
+  return (r,mextra)
+
+byteParserBadOnce :: Int -> Int
+byteParserBadOnce x = do
+  let sample = ("STRING: _6_ " ++ show x)
+      stream = Stream.fromBytes (s2b sample)
+      expected = 6
+      (r,mextra) = runExampleParser (Naive.parser snmptrapdNaive) stream
+      a1 = if Nothing == mextra then 1 else 0
+      a2 = if Just expected == r then 1 else 0
+   in a1 + (a2 + x)
+
+s2b :: String -> Bytes
+s2b = B.pack . map charToWord8
+
+charToWord8 :: Char -> Word8
+charToWord8 = fromIntegral . Data.Char.ord
+
+word8ToChar :: Word8 -> Char
+word8ToChar = Data.Char.chr . fromIntegral