summaryrefslogtreecommitdiff
path: root/gcc/ada/scans.ads
blob: 6f9bcb8c727f3e6da451503db45ec35f9b651e1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
------------------------------------------------------------------------------
--                                                                          --
--                         GNAT COMPILER COMPONENTS                         --
--                                                                          --
--                                S C A N S                                 --
--                                                                          --
--                                 S p e c                                  --
--                                                                          --
--          Copyright (C) 1992-2009, Free Software Foundation, Inc.         --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
--                                                                          --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception,   --
-- version 3.1, as published by the Free Software Foundation.               --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
--                                                                          --
------------------------------------------------------------------------------

with Namet;  use Namet;
with Types;  use Types;
with Uintp;  use Uintp;
with Urealp; use Urealp;

package Scans is

--  The scanner maintains a current state in the global variables defined
--  in this package. The call to the Scan routine advances this state to
--  the next token. The state is initialized by the call to one of the
--  initialization routines in Sinput.

   --  The following type is used to identify token types returned by Scan.
   --  The class column in this table indicates the token classes which
   --  apply to the token, as defined by subsequent subtype declarations.

   --  Note: Namet.Is_Keyword_Name depends on the fact that the first entry in
   --  this type declaration is *not* for a reserved word. For details on why
   --  there is this requirement, see Scans.Initialize_Ada_Keywords.

   type Token_Type is (

      --  Token name          Token type   Class(es)

      Tok_Integer_Literal, -- numeric lit  Literal, Lit_Or_Name

      Tok_Real_Literal,    -- numeric lit  Literal, Lit_Or_Name

      Tok_String_Literal,  -- string lit   Literal. Lit_Or_Name

      Tok_Char_Literal,    -- char lit     Name, Literal. Lit_Or_Name

      Tok_Operator_Symbol, -- op symbol    Name, Literal, Lit_Or_Name, Desig

      Tok_Identifier,      -- identifier   Name, Lit_Or_Name, Desig

      Tok_Double_Asterisk, -- **

      Tok_Ampersand,       -- &            Binary_Addop
      Tok_Minus,           -- -            Binary_Addop, Unary_Addop
      Tok_Plus,            -- +            Binary_Addop, Unary_Addop

      Tok_Asterisk,        -- *            Mulop
      Tok_Mod,             -- MOD          Mulop
      Tok_Rem,             -- REM          Mulop
      Tok_Slash,           -- /            Mulop

      Tok_New,             -- NEW

      Tok_Abs,             -- ABS
      Tok_Others,          -- OTHERS
      Tok_Null,            -- NULL

      Tok_Dot,             -- .            Namext
      Tok_Apostrophe,      -- '            Namext

      Tok_Left_Paren,      -- (            Namext, Consk

      Tok_Delta,           -- DELTA        Atkwd, Sterm, Consk
      Tok_Digits,          -- DIGITS       Atkwd, Sterm, Consk
      Tok_Range,           -- RANGE        Atkwd, Sterm, Consk

      Tok_Right_Paren,     -- )            Sterm
      Tok_Comma,           -- ,            Sterm

      Tok_And,             -- AND          Logop, Sterm
      Tok_Or,              -- OR           Logop, Sterm
      Tok_Xor,             -- XOR          Logop, Sterm

      Tok_Less,            -- <            Relop, Sterm
      Tok_Equal,           -- =            Relop, Sterm
      Tok_Greater,         -- >            Relop, Sterm
      Tok_Not_Equal,       -- /=           Relop, Sterm
      Tok_Greater_Equal,   -- >=           Relop, Sterm
      Tok_Less_Equal,      -- <=           Relop, Sterm

      Tok_In,              -- IN           Relop, Sterm
      Tok_Not,             -- NOT          Relop, Sterm

      Tok_Box,             -- <>           Relop, Eterm, Sterm
      Tok_Colon_Equal,     -- :=           Eterm, Sterm
      Tok_Colon,           -- :            Eterm, Sterm
      Tok_Greater_Greater, -- >>           Eterm, Sterm

      Tok_Abstract,        -- ABSTRACT     Eterm, Sterm
      Tok_Access,          -- ACCESS       Eterm, Sterm
      Tok_Aliased,         -- ALIASED      Eterm, Sterm
      Tok_All,             -- ALL          Eterm, Sterm
      Tok_Array,           -- ARRAY        Eterm, Sterm
      Tok_At,              -- AT           Eterm, Sterm
      Tok_Body,            -- BODY         Eterm, Sterm
      Tok_Constant,        -- CONSTANT     Eterm, Sterm
      Tok_Do,              -- DO           Eterm, Sterm
      Tok_Is,              -- IS           Eterm, Sterm
      Tok_Interface,       -- INTERFACE    Eterm, Sterm
      Tok_Limited,         -- LIMITED      Eterm, Sterm
      Tok_Of,              -- OF           Eterm, Sterm
      Tok_Out,             -- OUT          Eterm, Sterm
      Tok_Record,          -- RECORD       Eterm, Sterm
      Tok_Renames,         -- RENAMES      Eterm, Sterm
      Tok_Reverse,         -- REVERSE      Eterm, Sterm
      Tok_Tagged,          -- TAGGED       Eterm, Sterm
      Tok_Then,            -- THEN         Eterm, Sterm

      Tok_Less_Less,       -- <<           Eterm, Sterm, After_SM

      Tok_Abort,           -- ABORT        Eterm, Sterm, After_SM
      Tok_Accept,          -- ACCEPT       Eterm, Sterm, After_SM
      Tok_Case,            -- CASE         Eterm, Sterm, After_SM
      Tok_Delay,           -- DELAY        Eterm, Sterm, After_SM
      Tok_Else,            -- ELSE         Eterm, Sterm, After_SM
      Tok_Elsif,           -- ELSIF        Eterm, Sterm, After_SM
      Tok_End,             -- END          Eterm, Sterm, After_SM
      Tok_Exception,       -- EXCEPTION    Eterm, Sterm, After_SM
      Tok_Exit,            -- EXIT         Eterm, Sterm, After_SM
      Tok_Goto,            -- GOTO         Eterm, Sterm, After_SM
      Tok_If,              -- IF           Eterm, Sterm, After_SM
      Tok_Pragma,          -- PRAGMA       Eterm, Sterm, After_SM
      Tok_Raise,           -- RAISE        Eterm, Sterm, After_SM
      Tok_Requeue,         -- REQUEUE      Eterm, Sterm, After_SM
      Tok_Return,          -- RETURN       Eterm, Sterm, After_SM
      Tok_Select,          -- SELECT       Eterm, Sterm, After_SM
      Tok_Terminate,       -- TERMINATE    Eterm, Sterm, After_SM
      Tok_Until,           -- UNTIL        Eterm, Sterm, After_SM
      Tok_When,            -- WHEN         Eterm, Sterm, After_SM

      Tok_Begin,           -- BEGIN        Eterm, Sterm, After_SM, Labeled_Stmt
      Tok_Declare,         -- DECLARE      Eterm, Sterm, After_SM, Labeled_Stmt
      Tok_For,             -- FOR          Eterm, Sterm, After_SM, Labeled_Stmt
      Tok_Loop,            -- LOOP         Eterm, Sterm, After_SM, Labeled_Stmt
      Tok_While,           -- WHILE        Eterm, Sterm, After_SM, Labeled_Stmt

      Tok_Entry,           -- ENTRY        Eterm, Sterm, Declk, Deckn, After_SM
      Tok_Protected,       -- PROTECTED    Eterm, Sterm, Declk, Deckn, After_SM
      Tok_Task,            -- TASK         Eterm, Sterm, Declk, Deckn, After_SM
      Tok_Type,            -- TYPE         Eterm, Sterm, Declk, Deckn, After_SM
      Tok_Subtype,         -- SUBTYPE      Eterm, Sterm, Declk, Deckn, After_SM
      Tok_Overriding,      -- OVERRIDING   Eterm, Sterm, Declk, Declk, After_SM
      Tok_Synchronized,    -- SYNCHRONIZED Eterm, Sterm, Declk, Deckn, After_SM
      Tok_Use,             -- USE          Eterm, Sterm, Declk, Deckn, After_SM

      Tok_Function,        -- FUNCTION     Eterm, Sterm, Cunit, Declk, After_SM
      Tok_Generic,         -- GENERIC      Eterm, Sterm, Cunit, Declk, After_SM
      Tok_Package,         -- PACKAGE      Eterm, Sterm, Cunit, Declk, After_SM
      Tok_Procedure,       -- PROCEDURE    Eterm, Sterm, Cunit, Declk, After_SM

      Tok_Private,         -- PRIVATE      Eterm, Sterm, Cunit, After_SM
      Tok_With,            -- WITH         Eterm, Sterm, Cunit, After_SM
      Tok_Separate,        -- SEPARATE     Eterm, Sterm, Cunit, After_SM

      Tok_EOF,             -- End of file  Eterm, Sterm, Cterm, After_SM

      Tok_Semicolon,       -- ;            Eterm, Sterm, Cterm

      Tok_Arrow,           -- =>           Sterm, Cterm, Chtok

      Tok_Vertical_Bar,    -- |            Cterm, Sterm, Chtok

      Tok_Dot_Dot,         -- ..           Sterm, Chtok

      Tok_Project,
      Tok_Extends,
      Tok_External,
      --  These three entries represent keywords for the project file language
      --  and can be returned only in the case of scanning project files.

      Tok_Comment,
      --  This entry is used when scanning project files (where it represents
      --  an entire comment), and in preprocessing with the -C switch set
      --  (where it represents just the "--" of a comment). For the project
      --  file case, the text of the comment is stored in

      Tok_End_Of_Line,
      --  Represents an end of line. Not used during normal compilation scans
      --  where end of line is ignored. Active for preprocessor scanning and
      --  also when scanning project files (where it is needed because of ???)

      Tok_Special,
      --  Used only in preprocessor scanning (to represent one of the
      --  characters '#', '$', '?', '@', '`', '\', '^', '~', or '_'. The
      --  character value itself is stored in Scans.Special_Character.

      No_Token);
      --  No_Token is used for initializing Token values to indicate that
      --  no value has been set yet.

   --  Note: in the RM, operator symbol is a special case of string literal.
   --  We distinguish at the lexical level in this compiler, since there are
   --  many syntactic situations in which only an operator symbol is allowed.

   --  The following subtype declarations group the token types into classes.
   --  These are used for class tests in the parser.

   subtype Token_Class_Numeric_Literal is
     Token_Type range Tok_Integer_Literal .. Tok_Real_Literal;
   --  Numeric literal

   subtype Token_Class_Literal is
     Token_Type range Tok_Integer_Literal .. Tok_Operator_Symbol;
   --  Literal

   subtype Token_Class_Lit_Or_Name is
     Token_Type range Tok_Integer_Literal .. Tok_Identifier;

   subtype Token_Class_Binary_Addop is
     Token_Type range Tok_Ampersand .. Tok_Plus;
   --  Binary adding operator (& + -)

   subtype Token_Class_Unary_Addop is
     Token_Type range Tok_Minus .. Tok_Plus;
   --  Unary adding operator (+ -)

   subtype Token_Class_Mulop is
     Token_Type range Tok_Asterisk .. Tok_Slash;
   --  Multiplying operator

   subtype Token_Class_Logop is
     Token_Type range Tok_And .. Tok_Xor;
   --  Logical operator (and, or, xor)

   subtype Token_Class_Relop is
     Token_Type range Tok_Less .. Tok_Box;
   --  Relational operator (= /= < <= > >= not, in plus <> to catch misuse
   --  of Pascal style not equal operator).

   subtype Token_Class_Name is
     Token_Type range Tok_Char_Literal .. Tok_Identifier;
   --  First token of name (4.1),
   --    (identifier, char literal, operator symbol)

   subtype Token_Class_Desig is
     Token_Type range Tok_Operator_Symbol .. Tok_Identifier;
   --  Token which can be a Designator (identifier, operator symbol)

   subtype Token_Class_Namext is
     Token_Type range Tok_Dot .. Tok_Left_Paren;
   --  Name extension tokens. These are tokens which can appear immediately
   --  after a name to extend it recursively (period, quote, left paren)

   subtype Token_Class_Consk is
     Token_Type range Tok_Left_Paren .. Tok_Range;
   --  Keywords which can start constraint
   --    (left paren, delta, digits, range)

   subtype Token_Class_Eterm is
     Token_Type range Tok_Colon_Equal .. Tok_Semicolon;
   --  Expression terminators. These tokens can never appear within a simple
   --  expression. This is used for error recovery purposes (if we encounter
   --  an error in an expression, we simply scan to the next Eterm token).

   subtype Token_Class_Sterm is
     Token_Type range Tok_Delta .. Tok_Dot_Dot;
   --  Simple_Expression terminators. A Simple_Expression must be followed
   --  by a token in this class, or an error message is issued complaining
   --  about a missing binary operator.

   subtype Token_Class_Atkwd is
     Token_Type range Tok_Delta .. Tok_Range;
   --  Attribute keywords. This class includes keywords which can be used
   --  as an Attribute_Designator, namely DELTA, DIGITS and RANGE

   subtype Token_Class_Cterm is
     Token_Type range Tok_EOF .. Tok_Vertical_Bar;
   --  Choice terminators. These tokens terminate a choice. This is used for
   --  error recovery purposes (if we encounter an error in a Choice, we
   --  simply scan to the next Cterm token).

   subtype Token_Class_Chtok is
     Token_Type range Tok_Arrow .. Tok_Dot_Dot;
   --  Choice tokens. These tokens signal a choice when used in an Aggregate

   subtype Token_Class_Cunit is
     Token_Type range Tok_Function .. Tok_Separate;
   --  Tokens which can begin a compilation unit

   subtype Token_Class_Declk is
     Token_Type range Tok_Entry .. Tok_Procedure;
   --  Keywords which start a declaration

   subtype Token_Class_Deckn is
     Token_Type range Tok_Entry .. Tok_Use;
   --  Keywords which start a declaration but can't start a compilation unit

   subtype Token_Class_After_SM is
     Token_Type range Tok_Less_Less .. Tok_EOF;
   --  Tokens which always, or almost always, appear after a semicolon. Used
   --  in the Resync_Past_Semicolon routine to avoid gobbling up stuff when
   --  a semicolon is missing. Of significance only for error recovery.

   subtype Token_Class_Labeled_Stmt is
     Token_Type range Tok_Begin .. Tok_While;
   --  Tokens which start labeled statements

   type Token_Flag_Array is array (Token_Type) of Boolean;
   Is_Reserved_Keyword : constant Token_Flag_Array :=
                           Token_Flag_Array'
                             (Tok_Mod      .. Tok_Rem      => True,
                              Tok_New      .. Tok_Null     => True,
                              Tok_Delta    .. Tok_Range    => True,
                              Tok_And      .. Tok_Xor      => True,
                              Tok_In       .. Tok_Not      => True,
                              Tok_Abstract .. Tok_Then     => True,
                              Tok_Abort    .. Tok_Separate => True,
                              others                       => False);
   --  Flag array used to test for reserved word

   procedure Initialize_Ada_Keywords;
   --  Set up Token_Type values in Names table entries for Ada reserved words

   --------------------------
   -- Scan State Variables --
   --------------------------

   --  Note: these variables can only be referenced during the parsing of a
   --  file. Reference to any of them from Sem or the expander is wrong.

   --  These variables are initialized as required by Scn.Initialize_Scanner,
   --  and should not be referenced before such a call. However, there are
   --  situations in which these variables are saved and restored, and this
   --  may happen before the first Initialize_Scanner call, resulting in the
   --  assignment of invalid values. To avoid this, and allow building with
   --  the -gnatVa switch, we initialize some variables to known valid values.

   Scan_Ptr : Source_Ptr := No_Location; -- init for -gnatVa
   --  Current scan pointer location. After a call to Scan, this points
   --  just past the end of the token just scanned.

   Token : Token_Type := No_Token; -- init for -gnatVa
   --  Type of current token

   Token_Ptr : Source_Ptr := No_Location; -- init for -gnatVa
   --  Pointer to first character of current token

   Current_Line_Start : Source_Ptr := No_Location; -- init for -gnatVa
   --  Pointer to first character of line containing current token.

   Start_Column : Column_Number := No_Column_Number; -- init for -gnatVa
   --  Starting column number (zero origin) of the first non-blank character
   --  on the line containing the current token. This is used for error
   --  recovery circuits which depend on looking at the column line up.

   Type_Token_Location : Source_Ptr := No_Location; -- init for -gnatVa
   --  Within a type declaration, gives the location of the TYPE keyword that
   --  opened the type declaration. Used in checking the end column of a record
   --  declaration, which can line up either with the TYPE keyword, or with the
   --  start of the line containing the RECORD keyword.

   Checksum : Word := 0; -- init for -gnatVa
   --  Used to accumulate a CRC representing the tokens in the source
   --  file being compiled. This CRC includes only program tokens, and
   --  excludes comments.

   First_Non_Blank_Location : Source_Ptr := No_Location; -- init for -gnatVa
   --  Location of first non-blank character on the line containing the
   --  current token (i.e. the location of the character whose column number
   --  is stored in Start_Column).

   Token_Node : Node_Id := Empty;
   --  Node table Id for the current token. This is set only if the current
   --  token is one for which the scanner constructs a node (i.e. it is an
   --  identifier, operator symbol, or literal. For other token types,
   --  Token_Node is undefined.

   Token_Name : Name_Id := No_Name;
   --  For identifiers, this is set to the Name_Id of the identifier scanned.
   --  For all other tokens, Token_Name is set to Error_Name. Note that it
   --  would be possible for the caller to extract this information from
   --  Token_Node. We set Token_Name separately for two reasons. First it
   --  allows a quicker test for a specific identifier. Second, it allows
   --  a version of the parser to be built that does not build tree nodes,
   --  usable as a syntax checker.

   Prev_Token : Token_Type := No_Token;
   --  Type of previous token

   Prev_Token_Ptr : Source_Ptr;
   --  Pointer to first character of previous token

   Version_To_Be_Found : Boolean;
   --  This flag is True if the scanner is still looking for an RCS version
   --  number in a comment. Normally it is initialized to False so that this
   --  circuit is not activated. If the -dv switch is set, then this flag is
   --  initialized to True, and then reset when the version number is found.
   --  We do things this way to minimize the impact on comment scanning.

   Character_Code : Char_Code;
   --  Valid only when Token is Tok_Char_Literal

   Real_Literal_Value : Ureal;
   --  Valid only when Token is Tok_Real_Literal

   Int_Literal_Value : Uint;
   --  Valid only when Token = Tok_Integer_Literal;

   String_Literal_Id : String_Id;
   --  Id for currently scanned string value.
   --  Valid only when Token = Tok_String_Literal or Tok_Operator_Symbol.

   Wide_Character_Found : Boolean := False;
   --  Set True if wide character found.
   --  Valid only when Token = Tok_String_Literal.

   Special_Character : Character;
   --  Valid only when Token = Tok_Special. Returns one of the characters
   --  '#', '$', '?', '@', '`', '\', '^', '~', or '_'.
   --
   --  Why only this set? What about wide characters???

   Comment_Id : Name_Id := No_Name;
   --  Valid only when Token = Tok_Comment. Store the string that follows
   --  the "--" of a comment when scanning project files.
   --
   --  Is it really right for this to be a Name rather than a String, what
   --  about the case of Wide_Wide_Characters???

   --------------------------------------------------------
   -- Procedures for Saving and Restoring the Scan State --
   --------------------------------------------------------

   --  The following procedures can be used to save and restore the entire
   --  scan state. They are used in cases where it is necessary to backup
   --  the scan during the parse.

   type Saved_Scan_State is private;
   --  Used for saving and restoring the scan state

   procedure Save_Scan_State (Saved_State : out Saved_Scan_State);
   pragma Inline (Save_Scan_State);
   --  Saves the current scan state for possible later restoration. Note that
   --  there is no harm in saving the state and then never restoring it.

   procedure Restore_Scan_State (Saved_State : Saved_Scan_State);
   pragma Inline (Restore_Scan_State);
   --  Restores a scan state saved by a call to Save_Scan_State.
   --  The saved scan state must refer to the current source file.

private
   type Saved_Scan_State is record
      Save_Scan_Ptr                 : Source_Ptr;
      Save_Token                    : Token_Type;
      Save_Token_Ptr                : Source_Ptr;
      Save_Current_Line_Start       : Source_Ptr;
      Save_Start_Column             : Column_Number;
      Save_Checksum                 : Word;
      Save_First_Non_Blank_Location : Source_Ptr;
      Save_Token_Node               : Node_Id;
      Save_Token_Name               : Name_Id;
      Save_Prev_Token               : Token_Type;
      Save_Prev_Token_Ptr           : Source_Ptr;
   end record;

end Scans;