summaryrefslogtreecommitdiff
path: root/compiler/utils/StringBuffer.lhs
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2007-11-30 10:11:00 +0000
committerSimon Marlow <simonmar@microsoft.com>2007-11-30 10:11:00 +0000
commit5386eff068d2b5fa3b55fe9f0573aa42416471b7 (patch)
treef4921f6d8fc89e0ee6f02646fbd151a98b5970b9 /compiler/utils/StringBuffer.lhs
parent162ae90572443ca726992ea54f4cbc75658453d3 (diff)
downloadhaskell-5386eff068d2b5fa3b55fe9f0573aa42416471b7.tar.gz
FIX #1744: ignore the byte-order mark at the beginning of a file
Diffstat (limited to 'compiler/utils/StringBuffer.lhs')
-rw-r--r--compiler/utils/StringBuffer.lhs22
1 files changed, 16 insertions, 6 deletions
diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs
index d7135353a1..92a937b74f 100644
--- a/compiler/utils/StringBuffer.lhs
+++ b/compiler/utils/StringBuffer.lhs
@@ -106,10 +106,7 @@ hGetStringBuffer fname = do
hClose h
if (r /= size)
then ioError (userError "short read of file")
- else do
- pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- -- sentinels for UTF-8 decoding
- return (StringBuffer buf size 0)
+ else newUTF8StringBuffer buf ptr size
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock handle wanted
@@ -121,8 +118,21 @@ hGetStringBufferBlock handle wanted
do r <- if size == 0 then return 0 else hGetBuf handle ptr size
if r /= size
then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
- else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- return (StringBuffer buf size 0)
+ else newUTF8StringBuffer buf ptr size
+
+newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
+newUTF8StringBuffer buf ptr size = do
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ let
+ sb0 = StringBuffer buf size 0
+ (first_char, sb1) = nextChar sb0
+ -- skip the byte-order mark if there is one (see #1744)
+ -- This is better than treating #FEFF as whitespace,
+ -- because that would mess up layout. We don't have a concept
+ -- of zero-width whitespace in Haskell: all whitespace codepoints
+ -- have a width of one column.
+ return (if first_char == '\xfeff' then sb1 else sb0)
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers sb1 sb2