summaryrefslogtreecommitdiff
path: root/gcc/ada/g-awk.ads
blob: 27a58951c38a182dc9ceb2fbcbfc36881ee044c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
------------------------------------------------------------------------------
--                                                                          --
--                         GNAT COMPILER COMPONENTS                         --
--                                                                          --
--                              G N A T . A W K                             --
--                                                                          --
--                                 S p e c                                  --
--                                                                          --
--                                                                          --
--           Copyright (C) 2000-2001 Ada Core Technologies, Inc.            --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
-- for  more details.  You should have  received  a copy of the GNU General --
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
-- MA 02111-1307, USA.                                                      --
--                                                                          --
-- As a special exception,  if other files  instantiate  generics from this --
-- unit, or you link  this unit with other files  to produce an executable, --
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
-- covered  by the  GNU  General  Public  License.  This exception does not --
-- however invalidate  any other reasons why  the executable file  might be --
-- covered by the  GNU Public License.                                      --
--                                                                          --
-- GNAT is maintained by Ada Core Technologies Inc (http://www.gnat.com).   --
--                                                                          --
------------------------------------------------------------------------------
--
--  This is an AWK-like unit. It provides an easy interface for parsing one
--  or more files containing formatted data. The file can be viewed seen as
--  a database where each record is a line and a field is a data element in
--  this line. In this implementation an AWK record is a line. This means
--  that a record cannot span multiple lines. The operating procedure is to
--  read files line by line, with each line being presented to the user of
--  the package. The interface provides services to access specific fields
--  in the line. Thus it is possible to control actions takn on a line based
--  on values of some fields. This can be achieved directly or by registering
--  callbacks triggered on programmed conditions.
--
--  The state of an AWK run is recorded in an object of type session.
--  The following is the procedure for using a session to control an
--  AWK run:
--
--     1) Specify which session is to be used. It is possible to use the
--        default session or to create a new one by declaring an object of
--        type Session_Type. For example:
--
--           Computers : Session_Type;
--
--     2) Specify how to cut a line into fields. There are two modes: using
--        character fields separators or column width. This is done by using
--        Set_Fields_Separators or Set_Fields_Width. For example by:
--
--           AWK.Set_Field_Separators (";,", Computers);
--
--        or by using iterators' Separators parameter.
--
--     3) Specify which files to parse. This is done with Add_File/Add_Files
--        services, or by using the iterators' Filename parameter. For
--        example:
--
--           AWK.Add_File ("myfile.db", Computers);
--
--     4) Run the AWK session using one of the provided iterators.
--
--           Parse
--              This is the most automated iterator. You can gain control on
--              the session only by registering one or more callbacks (see
--              Register).
--
--           Get_Line/End_Of_Data
--              This is a manual iterator to be used with a loop. You have
--              complete control on the session. You can use callbacks but
--              this is not required.
--
--           For_Every_Line
--              This provides a mixture of manual/automated iterator action.
--
--        Examples of these three approaches appear below
--
--  There is many ways to use this package. The following discussion shows
--  three approaches, using the three iterator forms, to using this package.
--  All examples will use the following file (computer.db):
--
--     Pluton;Windows-NT;Pentium III
--     Mars;Linux;Pentium Pro
--     Venus;Solaris;Sparc
--     Saturn;OS/2;i486
--     Jupiter;MacOS;PPC
--
--  1) Using Parse iterator
--
--     Here the first step is to register some action associated to a pattern
--     and then to call the Parse iterator (this is the simplest way to use
--     this unit). The default session is used here. For example to output the
--     second field (the OS) of computer "Saturn".
--
--           procedure Action is
--           begin
--              Put_Line (AWK.Field (2));
--           end Action;
--
--        begin
--           AWK.Register (1, "Saturn", Action'Access);
--           AWK.Parse (";", "computer.db");
--
--
--  2) Using the Get_Line/End_Of_Data iterator
--
--     Here you have full control. For example to do the same as
--     above but using a specific session, you could write:
--
--           Computer_File : Session_Type;
--
--        begin
--           AWK.Set_Current (Computer_File);
--           AWK.Open (Separators => ";",
--                     Filename   => "computer.db");
--
--           --  Display Saturn OS
--
--           while not AWK.End_Of_File loop
--              AWK.Get_Line;
--
--              if AWK.Field (1) = "Saturn" then
--                 Put_Line (AWK.Field (2));
--              end if;
--           end loop;
--
--           AWK.Close (Computer_File);
--
--
--  3) Using For_Every_Line iterator
--
--     In this case you use a provided iterator and you pass the procedure
--     that must be called for each record. You could code the previous
--     example could be coded as follows (using the iterator quick interface
--     but without using the current session):
--
--           Computer_File : Session_Type;
--
--           procedure Action (Quit : in out Boolean) is
--           begin
--              if AWK.Field (1, Computer_File) = "Saturn" then
--                 Put_Line (AWK.Field (2, Computer_File));
--              end if;
--           end Action;
--
--           procedure Look_For_Saturn is
--              new AWK.For_Every_Line (Action);
--
--        begin
--           Look_For_Saturn (Separators => ";",
--                            Filename   => "computer.db",
--                            Session    => Computer_File);
--
--           Integer_Text_IO.Put
--             (Integer (AWK.NR (Session => Computer_File)));
--           Put_Line (" line(s) have been processed.");
--
--  You can also use a regular expression for the pattern. Let us output
--  the computer name for all computer for which the OS has a character
--  O in its name.
--
--           Regexp   : String := ".*O.*";
--
--           Matcher  : Regpat.Pattern_Matcher := Regpat.Compile (Regexp);
--
--           procedure Action is
--           begin
--              Text_IO.Put_Line (AWK.Field (2));
--           end Action;
--
--        begin
--           AWK.Register (2, Matcher, Action'Unrestricted_Access);
--           AWK.Parse (";", "computer.db");
--

with Ada.Finalization;
with GNAT.Regpat;

package GNAT.AWK is

   Session_Error : exception;
   --  Raised when a Session is reused but is not closed.

   File_Error : exception;
   --  Raised when there is a file problem (see below).

   End_Error : exception;
   --  Raised when an attempt is made to read beyond the end of the last
   --  file of a session.

   Field_Error : exception;
   --  Raised when accessing a field value which does not exist.

   Data_Error : exception;
   --  Raised when it is not possible to convert a field value to a specific
   --  type.

   type Count is new Natural;

   type Widths_Set is array (Positive range <>) of Positive;
   --  Used to store a set of columns widths.

   Default_Separators : constant String := " " & ASCII.HT;

   Use_Current : constant String := "";
   --  Value used when no separator or filename is specified in iterators.

   type Session_Type is limited private;
   --  This is the main exported type. A session is used to keep the state of
   --  a full AWK run. The state comprises a list of files, the current file,
   --  the number of line processed, the current line, the number of fields in
   --  the current line... A default session is provided (see Set_Current,
   --  Current_Session and Default_Session above).

   ----------------------------
   -- Package initialization --
   ----------------------------

   --  To be thread safe it is not possible to use the default provided
   --  session. Each task must used a specific session and specify it
   --  explicitly for every services.

   procedure Set_Current (Session : Session_Type);
   --  Set the session to be used by default. This file will be used when the
   --  Session parameter in following services is not specified.

   function Current_Session return Session_Type;
   --  Returns the session used by default by all services. This is the
   --  latest session specified by Set_Current service or the session
   --  provided by default with this implementation.

   function Default_Session return Session_Type;
   --  Returns the default session provided by this package. Note that this is
   --  the session return by Current_Session if Set_Current has not been used.

   procedure Set_Field_Separators
     (Separators : String       := Default_Separators;
      Session    : Session_Type := Current_Session);
   --  Set the field separators. Each character in the string is a field
   --  separator. When a line is read it will be split by field using the
   --  separators set here. Separators can be changed at any point and in this
   --  case the current line is split according to the new separators. In the
   --  special case that Separators is a space and a tabulation
   --  (Default_Separators), fields are separated by runs of spaces and/or
   --  tabs.

   procedure Set_FS
     (Separators : String       := Default_Separators;
      Session    : Session_Type := Current_Session)
     renames Set_Field_Separators;
   --  FS is the AWK abbreviation for above service.

   procedure Set_Field_Widths
     (Field_Widths : Widths_Set;
      Session      : Session_Type := Current_Session);
   --  This is another way to split a line by giving the length (in number of
   --  characters) of each field in a line. Field widths can be changed at any
   --  point and in this case the current line is split according to the new
   --  field lengths. A line split with this method must have a length equal or
   --  greater to the total of the field widths. All characters remaining on
   --  the line after the latest field are added to a new automatically
   --  created field.

   procedure Add_File
     (Filename : String;
      Session  : Session_Type := Current_Session);
   --  Add Filename to the list of file to be processed. There is no limit on
   --  the number of files that can be added. Files are processed in the order
   --  they have been added (i.e. the filename list is FIFO). If Filename does
   --  not exist or if it is not readable, File_Error is raised.

   procedure Add_Files
     (Directory             : String;
      Filenames             : String;
      Number_Of_Files_Added : out Natural;
      Session               : Session_Type := Current_Session);
   --  Add all files matching the regular expression Filenames in the specified
   --  directory to the list of file to be processed. There is no limit on
   --  the number of files that can be added. Each file is processed in
   --  the same order they have been added (i.e. the filename list is FIFO).
   --  The number of files (possibly 0) added is returned in
   --  Number_Of_Files_Added.

   -------------------------------------
   -- Information about current state --
   -------------------------------------

   function Number_Of_Fields
     (Session : Session_Type := Current_Session)
      return    Count;
   pragma Inline (Number_Of_Fields);
   --  Returns the number of fields in the current record. It returns 0 when
   --  no file is being processed.

   function NF
     (Session : Session_Type := Current_Session)
      return    Count
     renames Number_Of_Fields;
   --  AWK abbreviation for above service.

   function Number_Of_File_Lines
     (Session : Session_Type := Current_Session)
      return    Count;
   pragma Inline (Number_Of_File_Lines);
   --  Returns the current line number in the processed file. It returns 0 when
   --  no file is being processed.

   function FNR
     (Session : Session_Type := Current_Session)
      return    Count renames Number_Of_File_Lines;
   --  AWK abbreviation for above service.

   function Number_Of_Lines
     (Session : Session_Type := Current_Session)
      return    Count;
   pragma Inline (Number_Of_Lines);
   --  Returns the number of line processed until now. This is equal to number
   --  of line in each already processed file plus FNR. It returns 0 when
   --  no file is being processed.

   function NR
     (Session : Session_Type := Current_Session)
      return    Count
     renames Number_Of_Lines;
   --  AWK abbreviation for above service.

   function Number_Of_Files
     (Session : Session_Type := Current_Session)
      return    Natural;
   pragma Inline (Number_Of_Files);
   --  Returns the number of files associated with Session. This is the total
   --  number of files added with Add_File and Add_Files services.

   function File
     (Session : Session_Type := Current_Session)
      return    String;
   --  Returns the name of the file being processed. It returns the empty
   --  string when no file is being processed.

   ---------------------
   -- Field accessors --
   ---------------------

   function Field
     (Rank    : Count;
      Session : Session_Type := Current_Session)
      return    String;
   --  Returns field number Rank value of the current record. If Rank = 0 it
   --  returns the current record (i.e. the line as read in the file). It
   --  raises Field_Error if Rank > NF or if Session is not open.

   function Field
     (Rank    : Count;
      Session : Session_Type := Current_Session)
      return    Integer;
   --  Returns field number Rank value of the current record as an integer. It
   --  raises Field_Error if Rank > NF or if Session is not open. It
   --  raises Data_Error if the field value cannot be converted to an integer.

   function Field
     (Rank    : Count;
      Session : Session_Type := Current_Session)
      return    Float;
   --  Returns field number Rank value of the current record as a float. It
   --  raises Field_Error if Rank > NF or if Session is not open. It
   --  raises Data_Error if the field value cannot be converted to a float.

   generic
      type Discrete is (<>);
   function Discrete_Field
     (Rank    : Count;
      Session : Session_Type := Current_Session)
      return    Discrete;
   --  Returns field number Rank value of the current record as a type
   --  Discrete. It raises Field_Error if Rank > NF. It raises Data_Error if
   --  the field value cannot be converted to type Discrete.

   --------------------
   -- Pattern/Action --
   --------------------

   --  AWK defines rules like "PATTERN { ACTION }". Which means that ACTION
   --  will be executed if PATTERN match. A pattern in this implementation can
   --  be a simple string (match function is equality), a regular expression,
   --  a function returning a boolean. An action is associated to a pattern
   --  using the Register services.
   --
   --  Each procedure Register will add a rule to the set of rules for the
   --  session. Rules are examined in the order they have been added.

   type Pattern_Callback is access function return Boolean;
   --  This is a pattern function pointer. When it returns True the associated
   --  action will be called.

   type Action_Callback is access procedure;
   --  A simple action pointer

   type Match_Action_Callback is
     access procedure (Matches : GNAT.Regpat.Match_Array);
   --  An advanced action pointer used with a regular expression pattern. It
   --  returns an array of all the matches. See GNAT.Regpat for further
   --  information.

   procedure Register
     (Field   : Count;
      Pattern : String;
      Action  : Action_Callback;
      Session : Session_Type := Current_Session);
   --  Register an Action associated with a Pattern. The pattern here is a
   --  simple string that must match exactly the field number specified.

   procedure Register
     (Field   : Count;
      Pattern : GNAT.Regpat.Pattern_Matcher;
      Action  : Action_Callback;
      Session : Session_Type := Current_Session);
   --  Register an Action associated with a Pattern. The pattern here is a
   --  simple regular expression which must match the field number specified.

   procedure Register
     (Field   : Count;
      Pattern : GNAT.Regpat.Pattern_Matcher;
      Action  : Match_Action_Callback;
      Session : Session_Type := Current_Session);
   --  Same as above but it pass the set of matches to the action
   --  procedure. This is useful to analyse further why and where a regular
   --  expression did match.

   procedure Register
     (Pattern : Pattern_Callback;
      Action  : Action_Callback;
      Session : Session_Type := Current_Session);
   --  Register an Action associated with a Pattern. The pattern here is a
   --  function that must return a boolean. Action callback will be called if
   --  the pattern callback returns True and nothing will happen if it is
   --  False. This version is more general, the two other register services
   --  trigger an action based on the value of a single field only.

   procedure Register
     (Action  : Action_Callback;
      Session : Session_Type := Current_Session);
   --  Register an Action that will be called for every line. This is
   --  equivalent to a Pattern_Callback function always returning True.

   --------------------
   -- Parse iterator --
   --------------------

   procedure Parse
     (Separators : String := Use_Current;
      Filename   : String := Use_Current;
      Session    : Session_Type := Current_Session);
   --  Launch the iterator, it will read every line in all specified
   --  session's files. Registered callbacks are then called if the associated
   --  pattern match. It is possible to specify a filename and a set of
   --  separators directly. This offer a quick way to parse a single
   --  file. These parameters will override those specified by Set_FS and
   --  Add_File. The Session will be opened and closed automatically.
   --  File_Error is raised if there is no file associated with Session, or if
   --  a file associated with Session is not longer readable. It raises
   --  Session_Error is Session is already open.

   -----------------------------------
   -- Get_Line/End_Of_Data Iterator --
   -----------------------------------

   type Callback_Mode is (None, Only, Pass_Through);
   --  These mode are used for Get_Line/End_Of_Data and For_Every_Line
   --  iterators. The associated semantic is:
   --
   --    None
   --       callbacks are not active. This is the default mode for
   --       Get_Line/End_Of_Data and For_Every_Line iterators.
   --
   --    Only
   --       callbacks are active, if at least one pattern match, the associated
   --       action is called and this line will not be passed to the user. In
   --       the Get_Line case the next line will be read (if there is some
   --       line remaining), in the For_Every_Line case Action will
   --       not be called for this line.
   --
   --    Pass_Through
   --       callbacks are active, for patterns which match the associated
   --       action is called. Then the line is passed to the user. It means
   --       that Action procedure is called in the For_Every_Line case and
   --       that Get_Line returns with the current line active.
   --

   procedure Open
     (Separators : String := Use_Current;
      Filename   : String := Use_Current;
      Session    : Session_Type := Current_Session);
   --  Open the first file and initialize the unit. This must be called once
   --  before using Get_Line. It is possible to specify a filename and a set of
   --  separators directly. This offer a quick way to parse a single file.
   --  These parameters will override those specified by Set_FS and Add_File.
   --  File_Error is raised if there is no file associated with Session, or if
   --  the first file associated with Session is no longer readable. It raises
   --  Session_Error is Session is already open.

   procedure Get_Line
     (Callbacks : Callback_Mode := None;
      Session   : Session_Type  := Current_Session);
   --  Read a line from the current input file. If the file index is at the
   --  end of the current input file (i.e. End_Of_File is True) then the
   --  following file is opened. If there is no more file to be processed,
   --  exception End_Error will be raised. File_Error will be raised if Open
   --  has not been called. Next call to Get_Line will return the following
   --  line in the file. By default the registered callbacks are not called by
   --  Get_Line, this can activated by setting Callbacks (see Callback_Mode
   --  description above). File_Error may be raised if a file associated with
   --  Session is not readable.
   --
   --  When Callbacks is not None, it is possible to exhaust all the lines
   --  of all the files associated with Session. In this case, File_Error
   --  is not raised.
   --
   --  This procedure can be used from a subprogram called by procedure Parse
   --  or by an instantiation of For_Every_Line (see below).

   function End_Of_Data
     (Session : Session_Type := Current_Session)
      return    Boolean;
   pragma Inline (End_Of_Data);
   --  Returns True if there is no more data to be processed in Session. It
   --  means that the latest session's file is being processed and that
   --  there is no more data to be read in this file (End_Of_File is True).

   function End_Of_File
     (Session : Session_Type := Current_Session)
      return    Boolean;
   pragma Inline (End_Of_File);
   --  Returns True when there is no more data to be processed on the current
   --  session's file.

   procedure Close (Session : Session_Type);
   --  Release all associated data with Session. All memory allocated will
   --  be freed, the current file will be closed if needed, the callbacks
   --  will be unregistered. Close is convenient in reestablishing a session
   --  for new use. Get_Line is no longer usable (will raise File_Error)
   --  except after a successful call to Open, Parse or an instantiation
   --  of For_Every_Line.

   -----------------------------
   -- For_Every_Line iterator --
   -----------------------------

   generic
      with procedure Action (Quit : in out Boolean);
   procedure For_Every_Line
     (Separators : String := Use_Current;
      Filename   : String := Use_Current;
      Callbacks  : Callback_Mode := None;
      Session    : Session_Type := Current_Session);
   --  This is another iterator. Action will be called for each new
   --  record. The iterator's termination can be controlled by setting Quit
   --  to True. It is by default set to False. It is possible to specify a
   --  filename and a set of separators directly. This offer a quick way to
   --  parse a single file. These parameters will override those specified by
   --  Set_FS and Add_File. By default the registered callbacks are not called
   --  by For_Every_Line, this can activated by setting Callbacks (see
   --  Callback_Mode description above). The Session will be opened and
   --  closed automatically. File_Error is raised if there is no file
   --  associated with Session. It raises Session_Error is Session is already
   --  open.

private
   type Session_Data;
   type Session_Data_Access is access Session_Data;

   type Session_Type is new Ada.Finalization.Limited_Controlled with record
      Data : Session_Data_Access;
   end record;

   procedure Initialize (Session : in out Session_Type);
   procedure Finalize   (Session : in out Session_Type);

end GNAT.AWK;