diff options
| author | Sylvain Henry <sylvain@haskus.fr> | 2020-03-18 10:44:56 +0100 | 
|---|---|---|
| committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-03-29 17:28:51 -0400 | 
| commit | 1941ef4f050c0dfcb68229641fcbbde3a10f1072 (patch) | |
| tree | 8e25a61af77696d3022d35cc277b5db5af540f03 /compiler/GHC/Utils | |
| parent | 1c446220250dcada51d4bb33a0cc7d8ce572e8b6 (diff) | |
| download | haskell-1941ef4f050c0dfcb68229641fcbbde3a10f1072.tar.gz | |
Modules: Types (#13009)
Update Haddock submodule
Metric Increase:
   haddock.compiler
Diffstat (limited to 'compiler/GHC/Utils')
| -rw-r--r-- | compiler/GHC/Utils/Lexeme.hs | 240 | 
1 files changed, 240 insertions, 0 deletions
| diff --git a/compiler/GHC/Utils/Lexeme.hs b/compiler/GHC/Utils/Lexeme.hs new file mode 100644 index 0000000000..2ea773a2f0 --- /dev/null +++ b/compiler/GHC/Utils/Lexeme.hs @@ -0,0 +1,240 @@ +-- (c) The GHC Team +-- +-- Functions to evaluate whether or not a string is a valid identifier. +-- There is considerable overlap between the logic here and the logic +-- in Lexer.x, but sadly there seems to be no way to merge them. + +module GHC.Utils.Lexeme ( +          -- * Lexical characteristics of Haskell names + +          -- | Use these functions to figure what kind of name a 'FastString' +          -- represents; these functions do /not/ check that the identifier +          -- is valid. + +        isLexCon, isLexVar, isLexId, isLexSym, +        isLexConId, isLexConSym, isLexVarId, isLexVarSym, +        startsVarSym, startsVarId, startsConSym, startsConId, + +          -- * Validating identifiers + +          -- | These functions (working over plain old 'String's) check +          -- to make sure that the identifier is valid. +        okVarOcc, okConOcc, okTcOcc, +        okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc + +        -- Some of the exports above are not used within GHC, but may +        -- be of value to GHC API users. + +  ) where + +import GhcPrelude + +import FastString + +import Data.Char +import qualified Data.Set as Set + +import GHC.Lexeme + +{- + +************************************************************************ +*                                                                      * +    Lexical categories +*                                                                      * +************************************************************************ + +These functions test strings to see if they fit the lexical categories +defined in the Haskell report. + +Note [Classification of generated names] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some names generated for internal use can show up in debugging output, +e.g.  when using -ddump-simpl. These generated names start with a $ +but should still be pretty-printed using prefix notation. We make sure +this is the case in isLexVarSym by only classifying a name as a symbol +if all its characters are symbols, not just its first one. +-} + +isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool +isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool + +isLexCon cs = isLexConId  cs || isLexConSym cs +isLexVar cs = isLexVarId  cs || isLexVarSym cs + +isLexId  cs = isLexConId  cs || isLexVarId  cs +isLexSym cs = isLexConSym cs || isLexVarSym cs + +------------- +isLexConId cs                           -- Prefix type or data constructors +  | nullFS cs          = False          --      e.g. "Foo", "[]", "(,)" +  | cs == (fsLit "[]") = True +  | otherwise          = startsConId (headFS cs) + +isLexVarId cs                           -- Ordinary prefix identifiers +  | nullFS cs         = False           --      e.g. "x", "_x" +  | otherwise         = startsVarId (headFS cs) + +isLexConSym cs                          -- Infix type or data constructors +  | nullFS cs          = False          --      e.g. ":-:", ":", "->" +  | cs == (fsLit "->") = True +  | otherwise          = startsConSym (headFS cs) + +isLexVarSym fs                          -- Infix identifiers e.g. "+" +  | fs == (fsLit "~R#") = True +  | otherwise +  = case (if nullFS fs then [] else unpackFS fs) of +      [] -> False +      (c:cs) -> startsVarSym c && all isVarSymChar cs +        -- See Note [Classification of generated names] + +{- + +************************************************************************ +*                                                                      * +    Detecting valid names for Template Haskell +*                                                                      * +************************************************************************ + +-} + +---------------------- +-- External interface +---------------------- + +-- | Is this an acceptable variable name? +okVarOcc :: String -> Bool +okVarOcc str@(c:_) +  | startsVarId c +  = okVarIdOcc str +  | startsVarSym c +  = okVarSymOcc str +okVarOcc _ = False + +-- | Is this an acceptable constructor name? +okConOcc :: String -> Bool +okConOcc str@(c:_) +  | startsConId c +  = okConIdOcc str +  | startsConSym c +  = okConSymOcc str +  | str == "[]" +  = True +okConOcc _ = False + +-- | Is this an acceptable type name? +okTcOcc :: String -> Bool +okTcOcc "[]" = True +okTcOcc "->" = True +okTcOcc "~"  = True +okTcOcc str@(c:_) +  | startsConId c +  = okConIdOcc str +  | startsConSym c +  = okConSymOcc str +  | startsVarSym c +  = okVarSymOcc str +okTcOcc _ = False + +-- | Is this an acceptable alphanumeric variable name, assuming it starts +-- with an acceptable letter? +okVarIdOcc :: String -> Bool +okVarIdOcc str = okIdOcc str && +                 -- admit "_" as a valid identifier.  Required to support typed +                 -- holes in Template Haskell.  See #10267 +                 (str == "_" || not (str `Set.member` reservedIds)) + +-- | Is this an acceptable symbolic variable name, assuming it starts +-- with an acceptable character? +okVarSymOcc :: String -> Bool +okVarSymOcc str = all okSymChar str && +                  not (str `Set.member` reservedOps) && +                  not (isDashes str) + +-- | Is this an acceptable alphanumeric constructor name, assuming it +-- starts with an acceptable letter? +okConIdOcc :: String -> Bool +okConIdOcc str = okIdOcc str || +                 is_tuple_name1 True  str || +                   -- Is it a boxed tuple... +                 is_tuple_name1 False str || +                   -- ...or an unboxed tuple (#12407)... +                 is_sum_name1 str +                   -- ...or an unboxed sum (#12514)? +  where +    -- check for tuple name, starting at the beginning +    is_tuple_name1 True  ('(' : rest)       = is_tuple_name2 True  rest +    is_tuple_name1 False ('(' : '#' : rest) = is_tuple_name2 False rest +    is_tuple_name1 _     _                  = False + +    -- check for tuple tail +    is_tuple_name2 True  ")"          = True +    is_tuple_name2 False "#)"         = True +    is_tuple_name2 boxed (',' : rest) = is_tuple_name2 boxed rest +    is_tuple_name2 boxed (ws  : rest) +      | isSpace ws                    = is_tuple_name2 boxed rest +    is_tuple_name2 _     _            = False + +    -- check for sum name, starting at the beginning +    is_sum_name1 ('(' : '#' : rest) = is_sum_name2 False rest +    is_sum_name1 _                  = False + +    -- check for sum tail, only allowing at most one underscore +    is_sum_name2 _          "#)"         = True +    is_sum_name2 underscore ('|' : rest) = is_sum_name2 underscore rest +    is_sum_name2 False      ('_' : rest) = is_sum_name2 True rest +    is_sum_name2 underscore (ws  : rest) +      | isSpace ws                       = is_sum_name2 underscore rest +    is_sum_name2 _          _            = False + +-- | Is this an acceptable symbolic constructor name, assuming it +-- starts with an acceptable character? +okConSymOcc :: String -> Bool +okConSymOcc ":" = True +okConSymOcc str = all okSymChar str && +                  not (str `Set.member` reservedOps) + +---------------------- +-- Internal functions +---------------------- + +-- | Is this string an acceptable id, possibly with a suffix of hashes, +-- but not worrying about case or clashing with reserved words? +okIdOcc :: String -> Bool +okIdOcc str +  = let hashes = dropWhile okIdChar str in +    all (== '#') hashes   -- -XMagicHash allows a suffix of hashes +                          -- of course, `all` says "True" to an empty list + +-- | Is this character acceptable in an identifier (after the first letter)? +-- See alexGetByte in Lexer.x +okIdChar :: Char -> Bool +okIdChar c = case generalCategory c of +  UppercaseLetter -> True +  LowercaseLetter -> True +  TitlecaseLetter -> True +  ModifierLetter  -> True -- See #10196 +  OtherLetter     -> True -- See #1103 +  NonSpacingMark  -> True -- See #7650 +  DecimalNumber   -> True +  OtherNumber     -> True -- See #4373 +  _               -> c == '\'' || c == '_' + +-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report. +reservedIds :: Set.Set String +reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving" +                           , "do", "else", "foreign", "if", "import", "in" +                           , "infix", "infixl", "infixr", "instance", "let" +                           , "module", "newtype", "of", "then", "type", "where" +                           , "_" ] + +-- | All reserved operators. Taken from section 2.4 of the 2010 Report. +reservedOps :: Set.Set String +reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->" +                           , "@", "~", "=>" ] + +-- | Does this string contain only dashes and has at least 2 of them? +isDashes :: String -> Bool +isDashes ('-' : '-' : rest) = all (== '-') rest +isDashes _                  = False | 
