diff options
author | Joachim Breitner <mail@joachim-breitner.de> | 2015-09-23 10:10:03 +0200 |
---|---|---|
committer | Joachim Breitner <mail@joachim-breitner.de> | 2015-09-25 13:09:53 +0200 |
commit | f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304 (patch) | |
tree | 3c3fbb5e34f88daa53997bd2454b70f5452fe666 /compiler/utils/StringBuffer.hs | |
parent | a0b1f414a459e102f5c3d93bfbf53ebe0d81c467 (diff) | |
download | haskell-f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304.tar.gz |
Skip a possible BOM in utf8 encoding
and not the system locale, which might be something else. This fixes
bug #10907. A test is added, but less useful than it could be until
task #10909 is done.
Differential Revision: D1274
Diffstat (limited to 'compiler/utils/StringBuffer.hs')
-rw-r--r-- | compiler/utils/StringBuffer.hs | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs index 2e339d8d75..6b39fc8608 100644 --- a/compiler/utils/StringBuffer.hs +++ b/compiler/utils/StringBuffer.hs @@ -53,6 +53,8 @@ import Data.Maybe import Control.Exception import System.IO import System.IO.Unsafe ( unsafePerformIO ) +import GHC.IO.Encoding.UTF8 ( mkUTF8 ) +import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) ) import GHC.Exts @@ -131,14 +133,16 @@ skipBOM h size offset = then do -- Validate assumption that handle is in binary mode. ASSERTM( hGetEncoding h >>= return . isNothing ) - -- Temporarily select text mode to make `hLookAhead` and - -- `hGetChar` return full Unicode characters. - bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do + -- Temporarily select utf8 encoding with error ignoring, + -- to make `hLookAhead` and `hGetChar` return full Unicode characters. + bracket_ (hSetEncoding h safeEncoding) (hSetBinaryMode h True) $ do c <- hLookAhead h if c == '\xfeff' then hGetChar h >> hTell h else return offset else return offset + where + safeEncoding = mkUTF8 IgnoreCodingFailure newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer newUTF8StringBuffer buf ptr size = do |