diff options
| author | Thomas Miedema <thomasmiedema@gmail.com> | 2015-07-03 22:37:18 +0200 |
|---|---|---|
| committer | Ben Gamari <ben@smart-cactus.org> | 2015-07-03 22:44:50 +0200 |
| commit | 6b01d3ce6c681428e7a9865af85685c2a76ba657 (patch) | |
| tree | 1c3a846b5bfb22e45077235e14ee807d643ce59a | |
| parent | 39d83f239d33b1d214bdb7f7b3ce94d76d3e1467 (diff) | |
| download | haskell-6b01d3ce6c681428e7a9865af85685c2a76ba657.tar.gz | |
parser: Allow Lm (MODIFIER LETTER) category in identifiers
Easy fix in the parser to stop regressions, due to Unicode 7.0 changing
the classification of some prior code points.
Signed-off-by: Austin Seipp <austin@well-typed.com>
Test Plan: `tests/parser/should_compile/T10196.hs`
Reviewers: hvr, austin, bgamari
Reviewed By: austin, bgamari
Subscribers: thomie, bgamari
Differential Revision: https://phabricator.haskell.org/D969
GHC Trac Issues: #10196
| -rw-r--r-- | compiler/basicTypes/Lexeme.hs | 11 | ||||
| -rw-r--r-- | compiler/parser/Lexer.x | 8 | ||||
| -rw-r--r-- | compiler/utils/Util.hs | 5 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_compile/T10196.hs | 13 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_compile/all.T | 1 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_fail/T10196Fail1.hs | 4 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_fail/T10196Fail1.stderr | 2 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_fail/T10196Fail2.hs | 4 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_fail/T10196Fail2.stderr | 2 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_fail/T10196Fail3.hs | 6 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_fail/T10196Fail3.stderr | 2 | ||||
| -rw-r--r-- | testsuite/tests/parser/should_fail/all.T | 3 |
12 files changed, 58 insertions, 3 deletions
diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs index c5bda4dbdd..a2409614d1 100644 --- a/compiler/basicTypes/Lexeme.hs +++ b/compiler/basicTypes/Lexeme.hs @@ -28,6 +28,7 @@ module Lexeme ( ) where import FastString +import Util ((<||>)) import Data.Char import qualified Data.Set as Set @@ -194,7 +195,8 @@ okConSymOcc str = all okSymChar str && -- but not worrying about case or clashing with reserved words? okIdOcc :: String -> Bool okIdOcc str - = let hashes = dropWhile okIdChar str in + -- TODO. #10196. Only allow modifier letters in the suffix of an identifier. + = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in all (== '#') hashes -- -XMagicHash allows a suffix of hashes -- of course, `all` says "True" to an empty list @@ -210,6 +212,13 @@ okIdChar c = case generalCategory c of OtherNumber -> True _ -> c == '\'' || c == '_' +-- | Is this character acceptable in the suffix of an identifier. +-- See alexGetByte in Lexer.x +okIdSuffixChar :: Char -> Bool +okIdSuffixChar c = case generalCategory c of + ModifierLetter -> True -- See #10196 + _ -> False + -- | Is this character acceptable in a symbol (after the first char)? -- See alexGetByte in Lexer.x okSymChar :: Char -> Bool diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 5839a410fb..2e883fd102 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -156,7 +156,10 @@ $graphic = [$small $large $symbol $digit $special $unigraphic \"\'] $binit = 0-1 $octit = 0-7 $hexit = [$decdigit A-F a-f] -$idchar = [$small $large $digit \'] + +$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte. +-- TODO #10196. Only allow modifier letters in the suffix of an identifier. +$idchar = [$small $large $digit $suffix \'] $pragmachar = [$small $large $digit] @@ -1842,6 +1845,7 @@ alexGetByte (AI loc s) symbol = '\x04' space = '\x05' other_graphic = '\x06' + suffix = '\x07' adj_c | c <= '\x06' = non_graphic @@ -1858,7 +1862,7 @@ alexGetByte (AI loc s) UppercaseLetter -> upper LowercaseLetter -> lower TitlecaseLetter -> upper - ModifierLetter -> other_graphic + ModifierLetter -> suffix -- see #10196 OtherLetter -> lower -- see #1103 NonSpacingMark -> other_graphic SpacingCombiningMark -> other_graphic diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs index 732f2b8f6b..96cd7521e7 100644 --- a/compiler/utils/Util.hs +++ b/compiler/utils/Util.hs @@ -55,6 +55,7 @@ module Util ( isEqual, eqListBy, eqMaybeBy, thenCmp, cmpList, removeSpaces, + (<&&>), (<||>), -- * Edit distance fuzzyMatch, fuzzyLookup, @@ -665,6 +666,10 @@ removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace (<&&>) = liftA2 (&&) infixr 3 <&&> -- same as (&&) +(<||>) :: Applicative f => f Bool -> f Bool -> f Bool +(<||>) = liftA2 (||) +infixr 2 <||> -- same as (||) + {- ************************************************************************ * * diff --git a/testsuite/tests/parser/should_compile/T10196.hs b/testsuite/tests/parser/should_compile/T10196.hs new file mode 100644 index 0000000000..f80911829b --- /dev/null +++ b/testsuite/tests/parser/should_compile/T10196.hs @@ -0,0 +1,13 @@ +module T10196 where + +data X = Xᵦ | Xᵤ | Xᵩ | Xᵢ | Xᵪ | Xᵣ + +f :: Int +f = + let xᵦ = 1 + xᵤ = xᵦ + xᵩ = xᵤ + xᵢ = xᵩ + xᵪ = xᵢ + xᵣ = xᵪ + in xᵣ diff --git a/testsuite/tests/parser/should_compile/all.T b/testsuite/tests/parser/should_compile/all.T index 68845c1cc6..521b5a42a0 100644 --- a/testsuite/tests/parser/should_compile/all.T +++ b/testsuite/tests/parser/should_compile/all.T @@ -101,4 +101,5 @@ test('T5682', normal, compile, ['']) test('T9723a', normal, compile, ['']) test('T9723b', normal, compile, ['']) test('T10188', normal, compile, ['']) +test('T10196', normal, compile, ['']) test('T10582', expect_broken(10582), compile, ['']) diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.hs b/testsuite/tests/parser/should_fail/T10196Fail1.hs new file mode 100644 index 0000000000..2f1c8f39ea --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail1.hs @@ -0,0 +1,4 @@ +module T10196Fail1 where + +-- Constructors are not allowed to start with a modifier letter. +data Foo = ᵦfoo diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.stderr b/testsuite/tests/parser/should_fail/T10196Fail1.stderr new file mode 100644 index 0000000000..3c4a173eef --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail1.stderr @@ -0,0 +1,2 @@ + +T10196Fail1.hs:4:12: error: lexical error at character '\7526' diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.hs b/testsuite/tests/parser/should_fail/T10196Fail2.hs new file mode 100644 index 0000000000..64b3cacd62 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail2.hs @@ -0,0 +1,4 @@ +module T10196Fail2 where + +-- Variables are not allowed to start with a modifier letter. +ᵦ = 1 diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.stderr b/testsuite/tests/parser/should_fail/T10196Fail2.stderr new file mode 100644 index 0000000000..abba8aa04c --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail2.stderr @@ -0,0 +1,2 @@ + +T10196Fail2.hs:4:1: error: lexical error at character '\7526' diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.hs b/testsuite/tests/parser/should_fail/T10196Fail3.hs new file mode 100644 index 0000000000..09b80ddeff --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail3.hs @@ -0,0 +1,6 @@ +module T10196Fail3 where + +-- Modifier letters are not allowed in the middle of an identifier. +-- And this should not be lexed as 2 separate identifiers either. +xᵦx :: Int +xᵦx = 1 diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.stderr b/testsuite/tests/parser/should_fail/T10196Fail3.stderr new file mode 100644 index 0000000000..64037440e2 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail3.stderr @@ -0,0 +1,2 @@ + +T10196Fail3.hs:5:2: error: lexical error at character '/7526' diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T index affea929d3..33da7217a7 100644 --- a/testsuite/tests/parser/should_fail/all.T +++ b/testsuite/tests/parser/should_fail/all.T @@ -88,3 +88,6 @@ test('T8431', compile_timeout_multiplier(0.05), compile_fail, ['-XAlternativeLayoutRule']) test('T8506', normal, compile_fail, ['']) test('T9225', normal, compile_fail, ['']) +test('T10196Fail1', normal, compile_fail, ['']) +test('T10196Fail2', normal, compile_fail, ['']) +test('T10196Fail3', expect_broken(10196), compile_fail, ['']) |
