diff options
| author | Joachim Breitner <mail@joachim-breitner.de> | 2015-09-23 10:10:03 +0200 | 
|---|---|---|
| committer | Joachim Breitner <mail@joachim-breitner.de> | 2015-09-25 13:09:53 +0200 | 
| commit | f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304 (patch) | |
| tree | 3c3fbb5e34f88daa53997bd2454b70f5452fe666 | |
| parent | a0b1f414a459e102f5c3d93bfbf53ebe0d81c467 (diff) | |
| download | haskell-f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304.tar.gz | |
Skip a possible BOM in utf8 encoding
and not the system locale, which might be something else. This fixes
bug #10907. A test is added, but less useful than it could be until
task #10909 is done.
Differential Revision: D1274
| -rw-r--r-- | compiler/utils/StringBuffer.hs | 10 | ||||
| -rw-r--r-- | testsuite/tests/parser/unicode/T10907.hs | 1 | ||||
| -rw-r--r-- | testsuite/tests/parser/unicode/all.T | 3 | 
3 files changed, 11 insertions, 3 deletions
| diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs index 2e339d8d75..6b39fc8608 100644 --- a/compiler/utils/StringBuffer.hs +++ b/compiler/utils/StringBuffer.hs @@ -53,6 +53,8 @@ import Data.Maybe  import Control.Exception  import System.IO  import System.IO.Unsafe         ( unsafePerformIO ) +import GHC.IO.Encoding.UTF8     ( mkUTF8 ) +import GHC.IO.Encoding.Failure  ( CodingFailureMode(IgnoreCodingFailure) )  import GHC.Exts @@ -131,14 +133,16 @@ skipBOM h size offset =      then do        -- Validate assumption that handle is in binary mode.        ASSERTM( hGetEncoding h >>= return . isNothing ) -      -- Temporarily select text mode to make `hLookAhead` and -      -- `hGetChar` return full Unicode characters. -      bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do +      -- Temporarily select utf8 encoding with error ignoring, +      -- to make `hLookAhead` and `hGetChar` return full Unicode characters. +      bracket_ (hSetEncoding h safeEncoding) (hSetBinaryMode h True) $ do          c <- hLookAhead h          if c == '\xfeff'            then hGetChar h >> hTell h            else return offset      else return offset +  where +    safeEncoding = mkUTF8 IgnoreCodingFailure  newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer  newUTF8StringBuffer buf ptr size = do diff --git a/testsuite/tests/parser/unicode/T10907.hs b/testsuite/tests/parser/unicode/T10907.hs new file mode 100644 index 0000000000..60aa3e7394 --- /dev/null +++ b/testsuite/tests/parser/unicode/T10907.hs @@ -0,0 +1 @@ +module ByteOrderMark () where diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T index ec08ae552c..6972a0d602 100644 --- a/testsuite/tests/parser/unicode/all.T +++ b/testsuite/tests/parser/unicode/all.T @@ -22,3 +22,6 @@ test('T2302', only_ways(['normal']), compile_fail, [''])  test('T4373', normal, compile, [''])  test('T6016', extra_clean(['T6016-twoBOMs']), compile_and_run, ['-package ghc'])  test('T7671', normal, compile, ['']) +# TODO: This test ought to be run in a non-UTF8 locale, but this is not yet +# supported by the test suite (see 10907) +test('T10907', normal, compile, ['']) | 
