diff options
author | Ian Lynagh <ian@well-typed.com> | 2012-12-13 20:20:58 +0000 |
---|---|---|
committer | Ian Lynagh <ian@well-typed.com> | 2012-12-13 21:31:02 +0000 |
commit | 7651b6799523e55e132ed8f5ccd5bb3f21b9a0ac (patch) | |
tree | abc75a7a48eb43c38a342575a7aaf837630a6f6f | |
parent | d5b5d48881b3adbf3bd5e177ee6ef506e589b882 (diff) | |
download | haskell-7651b6799523e55e132ed8f5ccd5bb3f21b9a0ac.tar.gz |
Make FastBytes a synonym for ByteString
A step on the way to getting rid of FastBytes
slow nofib Compile times look like:
-1 s.d. -2.4%
+1 s.d. +3.4%
Average +0.4%
but looking at the times for the longer-running compilations I think the
change is just noise.
-rw-r--r-- | compiler/ghci/ByteCodeGen.lhs | 10 | ||||
-rw-r--r-- | compiler/utils/Binary.hs | 12 | ||||
-rw-r--r-- | compiler/utils/BufWrite.hs | 3 | ||||
-rw-r--r-- | compiler/utils/FastString.lhs | 158 |
4 files changed, 83 insertions, 100 deletions
diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs index 2b332a4581..9c9526de27 100644 --- a/compiler/ghci/ByteCodeGen.lhs +++ b/compiler/ghci/ByteCodeGen.lhs @@ -63,6 +63,8 @@ import BreakArray import Data.Maybe import Module +import qualified Data.ByteString as BS +import qualified Data.ByteString.Unsafe as BS import Data.Map (Map) import qualified Data.Map as Map import qualified FiniteMap as Map @@ -1266,18 +1268,18 @@ pushAtom _ _ (AnnLit lit) = do where pushStr s = let getMallocvilleAddr - = case s of - FastBytes n fp -> + = -- we could grab the Ptr from the ForeignPtr, -- but then we have no way to control its lifetime. -- In reality it'll probably stay alive long enoungh -- by virtue of the global FastString table, but -- to be on the safe side we copy the string into -- a malloc'd area of memory. - do ptr <- ioToBc (mallocBytes (n+1)) + do let n = BS.length s + ptr <- ioToBc (mallocBytes (n+1)) recordMallocBc ptr ioToBc ( - withForeignPtr fp $ \p -> do + BS.unsafeUseAsCString s $ \p -> do memcpy ptr p (fromIntegral n) pokeByteOff ptr n (fromIntegral (ord '\0') :: Word8) return ptr diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs index 68ea2730b7..2576562ef5 100644 --- a/compiler/utils/Binary.hs +++ b/compiler/utils/Binary.hs @@ -74,6 +74,7 @@ import BasicTypes import Foreign import Data.Array +import qualified Data.ByteString.Unsafe as BS import Data.IORef import Data.Char ( ord, chr ) import Data.Time @@ -720,17 +721,16 @@ getFS bh = do fb <- getFB bh mkFastStringFastBytes fb putFB :: BinHandle -> FastBytes -> IO () -putFB bh (FastBytes l buf) = do +putFB bh bs = + BS.unsafeUseAsCStringLen bs $ \(ptr, l) -> do put_ bh l - withForeignPtr buf $ \ptr -> - let + let go n | n == l = return () | otherwise = do - b <- peekElemOff ptr n + b <- peekElemOff (castPtr ptr) n putByte bh b go (n+1) - in - go 0 + go 0 {- -- possible faster version, not quite there yet: getFB bh@BinMem{} = do diff --git a/compiler/utils/BufWrite.hs b/compiler/utils/BufWrite.hs index 5ad165dcd8..8ad045bf66 100644 --- a/compiler/utils/BufWrite.hs +++ b/compiler/utils/BufWrite.hs @@ -94,8 +94,7 @@ bPutFZS :: BufHandle -> FastZString -> IO () bPutFZS b fs = bPutBS b $ fastZStringToByteString fs bPutFB :: BufHandle -> FastBytes -> IO () -bPutFB b (FastBytes len fp) = - withForeignPtr fp $ \ptr -> bPutCStringLen b (castPtr ptr, len) +bPutFB b bs = BS.unsafeUseAsCStringLen bs $ bPutCStringLen b bPutBS :: BufHandle -> ByteString -> IO () bPutBS b bs = BS.unsafeUseAsCStringLen bs $ bPutCStringLen b diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs index 03a36f21e2..42bcb0ba41 100644 --- a/compiler/utils/FastString.lhs +++ b/compiler/utils/FastString.lhs @@ -27,7 +27,7 @@ module FastString ( -- * FastBytes - FastBytes(..), + FastBytes, mkFastStringFastBytes, foreignPtrToFastBytes, fastStringToFastBytes, @@ -109,8 +109,10 @@ import Panic import Util import Data.ByteString (ByteString) -import qualified Data.ByteString.Char8 as BS -import qualified Data.ByteString.Unsafe as BS +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC +import qualified Data.ByteString.Internal as BS +import qualified Data.ByteString.Unsafe as BS import Foreign.C import GHC.Exts import System.IO @@ -132,37 +134,13 @@ import GHC.Base ( unpackCString# ) #define hASH_TBL_SIZE_UNBOXED 4091# -data FastBytes = FastBytes { - fb_n_bytes :: {-# UNPACK #-} !Int, -- number of bytes - fb_buf :: {-# UNPACK #-} !(ForeignPtr Word8) - } deriving Typeable - -instance Data FastBytes where - -- don't traverse? - toConstr _ = abstractConstr "FastBytes" - gunfold _ _ = error "gunfold" - dataTypeOf _ = mkNoRepType "FastBytes" - -instance Eq FastBytes where - x == y = (x `compare` y) == EQ - -instance Ord FastBytes where - compare = cmpFB - -instance Show FastBytes where - show fb = show (concatMap escape $ bytesFB fb) ++ "#" - where escape :: Word8 -> String - escape w = let c = chr (fromIntegral w) - in if isAscii c - then [c] - else '\\' : show w +type FastBytes = ByteString foreignPtrToFastBytes :: ForeignPtr Word8 -> Int -> FastBytes -foreignPtrToFastBytes fp len = FastBytes len fp +foreignPtrToFastBytes fp len = BS.fromForeignPtr fp 0 len mkFastStringFastBytes :: FastBytes -> IO FastString -mkFastStringFastBytes (FastBytes len fp) - = withForeignPtr fp $ \ptr -> mkFastStringForeignPtr ptr fp len +mkFastStringFastBytes bs = mkFastStringByteString bs fastStringToFastBytes :: FastString -> FastBytes fastStringToFastBytes f = fs_fb f @@ -199,35 +177,21 @@ pokeCAString ptr str = -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' bytesFB :: FastBytes -> [Word8] -bytesFB (FastBytes n_bytes buf) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> - peekArray n_bytes ptr +bytesFB = BS.unpack hashFB :: FastBytes -> Int -hashFB (FastBytes len buf) - = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $ hashStr ptr len +hashFB bs + = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> + return $ hashStr (castPtr ptr) len lengthFB :: FastBytes -> Int -lengthFB f = fb_n_bytes f +lengthFB f = BS.length f appendFB :: FastBytes -> FastBytes -> FastBytes -appendFB fb1 fb2 = - inlinePerformIO $ do - r <- mallocForeignPtrBytes len - withForeignPtr r $ \ r' -> do - withForeignPtr (fb_buf fb1) $ \ fb1Ptr -> do - withForeignPtr (fb_buf fb2) $ \ fb2Ptr -> do - copyBytes r' fb1Ptr len1 - copyBytes (advancePtr r' len1) fb2Ptr len2 - return $ foreignPtrToFastBytes r len - where len = len1 + len2 - len1 = fb_n_bytes fb1 - len2 = fb_n_bytes fb2 +appendFB = BS.append hPutFB :: Handle -> FastBytes -> IO () -hPutFB handle (FastBytes len fp) - | len == 0 = return () - | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len +hPutFB = BS.hPut -- ----------------------------------------------------------------------------- @@ -244,7 +208,7 @@ lengthFZS :: FastZString -> Int lengthFZS (FastZString bs) = BS.length bs mkFastZStringString :: String -> FastZString -mkFastZStringString str = FastZString (BS.pack str) +mkFastZStringString str = FastZString (BSC.pack str) -- ----------------------------------------------------------------------------- @@ -291,21 +255,7 @@ instance Data FastString where cmpFS :: FastString -> FastString -> Ordering cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) = if u1 == u2 then EQ else - cmpFB (fastStringToFastBytes f1) (fastStringToFastBytes f2) - -cmpFB :: FastBytes -> FastBytes -> Ordering -cmpFB (FastBytes l1 buf1) (FastBytes l2 buf2) = - case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of - LT -> LT - EQ -> compare l1 l2 - GT -> GT - -unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int -unsafeMemcmp buf1 buf2 l = - inlinePerformIO $ - withForeignPtr buf1 $ \p1 -> - withForeignPtr buf2 $ \p2 -> - memcmp p1 p2 l + compare (fastStringToFastBytes f1) (fastStringToFastBytes f2) #ifndef __HADDOCK__ foreign import ccall unsafe "ghc_memcmp" @@ -393,6 +343,31 @@ mkFastStringForeignPtr ptr fp len = do Nothing -> add_it ls Just v -> {- _trace ("re-use: "++show v) $ -} return v +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringByteString :: ByteString -> IO FastString +mkFastStringByteString bs = BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do + ft@(FastStringTable uid _) <- readIORef string_table +-- _trace ("hashed: "++show (I# h)) $ + let + ptr' = castPtr ptr + h = hashStr ptr' len + add_it ls = do + fs <- mkNewFastStringByteString uid ptr' len bs + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr' + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + -- | Creates a UTF-8 encoded 'FastString' from a 'String' mkFastString :: String -> FastString mkFastString str = @@ -419,9 +394,10 @@ mkZFastString = mkFastZStringString bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString) bucket_match [] _ _ = return Nothing -bucket_match (v@(FastString _ _ (FastBytes l buf) _):ls) len ptr - | len == l = do - b <- cmpStringPrefix ptr buf len +bucket_match (v@(FastString _ _ bs _):ls) len ptr + | len == BS.length bs = do + b <- BS.unsafeUseAsCString bs $ \buf -> + cmpStringPrefix ptr (castPtr buf) len if b then return (Just v) else bucket_match ls len ptr | otherwise = @@ -432,14 +408,21 @@ mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int mkNewFastString uid ptr fp len = do ref <- newIORef Nothing n_chars <- countUTF8Chars ptr len - return (FastString uid n_chars (FastBytes len fp) ref) + return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref) + +mkNewFastStringByteString :: Int -> Ptr Word8 -> Int -> ByteString + -> IO FastString +mkNewFastStringByteString uid ptr len bs = do + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid n_chars bs ref) copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString copyNewFastString uid ptr len = do fp <- copyBytesToForeignPtr ptr len ref <- newIORef Nothing n_chars <- countUTF8Chars ptr len - return (FastString uid n_chars (FastBytes len fp) ref) + return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref) copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8) copyBytesToForeignPtr ptr len = do @@ -447,10 +430,9 @@ copyBytesToForeignPtr ptr len = do withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len return fp -cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool -cmpStringPrefix ptr fp len = - withForeignPtr fp $ \ptr' -> do - r <- memcmp ptr ptr' len +cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool +cmpStringPrefix ptr1 ptr2 len = + do r <- memcmp ptr1 ptr2 len return (r == 0) @@ -481,13 +463,13 @@ hasZEncoding (FastString _ _ _ ref) = -- | Returns @True@ if the 'FastString' is empty nullFS :: FastString -> Bool -nullFS f = fb_n_bytes (fs_fb f) == 0 +nullFS f = BS.null (fs_fb f) -- | Unpacks and decodes the FastString unpackFS :: FastString -> String -unpackFS (FastString _ _ (FastBytes n_bytes buf) _) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> - utf8DecodeString ptr n_bytes +unpackFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> + utf8DecodeString (castPtr ptr) len -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' bytesFS :: FastString -> [Word8] @@ -520,17 +502,17 @@ concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better headFS :: FastString -> Char headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString" -headFS (FastString _ _ (FastBytes _ buf) _) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> do - return (fst (utf8DecodeChar ptr)) +headFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> + return (fst (utf8DecodeChar (castPtr ptr))) tailFS :: FastString -> FastString tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString" -tailFS (FastString _ _ (FastBytes n_bytes buf) _) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> do - let (_,ptr') = utf8DecodeChar ptr - let off = ptr' `minusPtr` ptr - return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off) +tailFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> + do let (_, ptr') = utf8DecodeChar (castPtr ptr) + n = ptr' `minusPtr` ptr + mkFastStringByteString $ BS.drop n bs consFS :: Char -> FastString -> FastString consFS c fs = mkFastString (c : unpackFS fs) |