summaryrefslogtreecommitdiff
path: root/gcc/ada/g-regpat.adb
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/ada/g-regpat.adb')
-rw-r--r--gcc/ada/g-regpat.adb563
1 files changed, 331 insertions, 232 deletions
diff --git a/gcc/ada/g-regpat.adb b/gcc/ada/g-regpat.adb
index 1949a8a5540..4ad6efbf944 100644
--- a/gcc/ada/g-regpat.adb
+++ b/gcc/ada/g-regpat.adb
@@ -7,7 +7,7 @@
-- B o d y --
-- --
-- Copyright (C) 1986 by University of Toronto. --
--- Copyright (C) 1996-2002 Ada Core Technologies, Inc. --
+-- Copyright (C) 1996-2003 Ada Core Technologies, Inc. --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
@@ -27,7 +27,8 @@
-- however invalidate any other reasons why the executable file might be --
-- covered by the GNU Public License. --
-- --
--- GNAT is maintained by Ada Core Technologies Inc (http://www.gnat.com). --
+-- GNAT was originally developed by the GNAT team at New York University. --
+-- Extensive contributions were provided by Ada Core Technologies Inc. --
-- --
------------------------------------------------------------------------------
@@ -240,7 +241,7 @@ package body GNAT.Regpat is
return Boolean;
-- Return True if the entry is set for C in the class Bitmap.
- procedure Reset_Class (Bitmap : in out Character_Class);
+ procedure Reset_Class (Bitmap : out Character_Class);
-- Clear all the entries in the class Bitmap.
pragma Inline (Set_In_Class);
@@ -256,7 +257,7 @@ package body GNAT.Regpat is
function Is_Alnum (C : Character) return Boolean;
-- Return True if C is an alphanum character or an underscore ('_')
- function Is_Space (C : Character) return Boolean;
+ function Is_White_Space (C : Character) return Boolean;
-- Return True if C is a whitespace character
function Is_Printable (C : Character) return Boolean;
@@ -305,7 +306,7 @@ package body GNAT.Regpat is
pragma Inline ("=");
pragma Inline (Is_Alnum);
- pragma Inline (Is_Space);
+ pragma Inline (Is_White_Space);
pragma Inline (Get_Next);
pragma Inline (Get_Next_Offset);
pragma Inline (Operand);
@@ -377,20 +378,19 @@ package body GNAT.Regpat is
Emit_Ptr : Pointer := Program_First;
Parse_Pos : Natural := Expression'First; -- Input-scan pointer
- Parse_End : Natural := Expression'Last;
+ Parse_End : constant Natural := Expression'Last;
----------------------------
-- Subprograms for Create --
----------------------------
procedure Emit (B : Character);
- -- Output the Character to the Program.
- -- If code-generation is disables, simply increments the program
- -- counter.
+ -- Output the Character B to the Program. If code-generation is
+ -- disabled, simply increments the program counter.
function Emit_Node (Op : Opcode) return Pointer;
-- If code-generation is enabled, Emit_Node outputs the
- -- opcode and reserves space for a pointer to the next node.
+ -- opcode Op and reserves space for a pointer to the next node.
-- Return value is the location of new opcode, ie old Emit_Ptr.
procedure Emit_Natural (IP : Pointer; N : Natural);
@@ -405,24 +405,26 @@ package body GNAT.Regpat is
procedure Parse
(Parenthesized : Boolean;
- Flags : in out Expression_Flags;
+ Flags : out Expression_Flags;
IP : out Pointer);
-- Parse regular expression, i.e. main body or parenthesized thing
-- Caller must absorb opening parenthesis.
procedure Parse_Branch
- (Flags : in out Expression_Flags;
+ (Flags : out Expression_Flags;
First : Boolean;
IP : out Pointer);
-- Implements the concatenation operator and handles '|'
-- First should be true if this is the first item of the alternative.
procedure Parse_Piece
- (Expr_Flags : in out Expression_Flags; IP : out Pointer);
+ (Expr_Flags : out Expression_Flags;
+ IP : out Pointer);
-- Parse something followed by possible [*+?]
procedure Parse_Atom
- (Expr_Flags : in out Expression_Flags; IP : out Pointer);
+ (Expr_Flags : out Expression_Flags;
+ IP : out Pointer);
-- Parse_Atom is the lowest level parse procedure.
-- Optimization: gobbles an entire sequence of ordinary characters
-- so that it can turn them into a single node, which is smaller to
@@ -475,15 +477,16 @@ package body GNAT.Regpat is
Greedy : out Boolean);
-- Parse the argument list for a curly operator.
-- It is assumed that IP is indeed pointing at a valid operator.
+ -- So what is IP and how come IP is not referenced in the body ???
procedure Parse_Character_Class (IP : out Pointer);
-- Parse a character class.
-- The calling subprogram should consume the opening '[' before.
- procedure Parse_Literal (Expr_Flags : in out Expression_Flags;
- IP : out Pointer);
- -- Parse_Literal encodes a string of characters
- -- to be matched exactly.
+ procedure Parse_Literal
+ (Expr_Flags : out Expression_Flags;
+ IP : out Pointer);
+ -- Parse_Literal encodes a string of characters to be matched exactly
function Parse_Posix_Character_Class return Std_Class;
-- Parse a posic character class, like [:alpha:] or [:^alpha:].
@@ -589,7 +592,7 @@ package body GNAT.Regpat is
Max : out Natural;
Greedy : out Boolean)
is
- pragma Warnings (Off, IP);
+ pragma Unreferenced (IP);
Save_Pos : Natural := Parse_Pos + 1;
@@ -701,8 +704,8 @@ package body GNAT.Regpat is
-- the operator before it.
if Emit_Code then
- Program (Operand + Size .. Emit_Ptr + Size)
- := Program (Operand .. Emit_Ptr);
+ Program (Operand + Size .. Emit_Ptr + Size) :=
+ Program (Operand .. Emit_Ptr);
end if;
-- Insert the operator at the position previously occupied by the
@@ -848,7 +851,7 @@ package body GNAT.Regpat is
procedure Parse
(Parenthesized : in Boolean;
- Flags : in out Expression_Flags;
+ Flags : out Expression_Flags;
IP : out Pointer)
is
E : String renames Expression;
@@ -972,7 +975,7 @@ package body GNAT.Regpat is
----------------
procedure Parse_Atom
- (Expr_Flags : in out Expression_Flags;
+ (Expr_Flags : out Expression_Flags;
IP : out Pointer)
is
C : Character;
@@ -1039,8 +1042,15 @@ package body GNAT.Regpat is
when '|' | ASCII.LF | ')' =>
Fail ("internal urp"); -- Supposed to be caught earlier
- when '?' | '+' | '*' | '{' =>
- Fail ("?+*{ follows nothing");
+ when '?' | '+' | '*' =>
+ Fail (C & " follows nothing");
+
+ when '{' =>
+ if Is_Curly_Operator (Parse_Pos - 1) then
+ Fail (C & " follows nothing");
+ else
+ Parse_Literal (Expr_Flags, IP);
+ end if;
when '\' =>
if Parse_Pos > Parse_End then
@@ -1096,7 +1106,7 @@ package body GNAT.Regpat is
IP := Emit_Node (REFF);
declare
- Save : Natural := Parse_Pos - 1;
+ Save : constant Natural := Parse_Pos - 1;
begin
while Parse_Pos <= Expression'Last
@@ -1124,7 +1134,7 @@ package body GNAT.Regpat is
------------------
procedure Parse_Branch
- (Flags : in out Expression_Flags;
+ (Flags : out Expression_Flags;
First : Boolean;
IP : out Pointer)
is
@@ -1132,7 +1142,9 @@ package body GNAT.Regpat is
Chain : Pointer;
Last : Pointer;
New_Flags : Expression_Flags;
- Dummy : Pointer;
+
+ Discard : Pointer;
+ pragma Warnings (Off, Discard);
begin
Flags := Worst_Expression; -- Tentatively
@@ -1168,10 +1180,11 @@ package body GNAT.Regpat is
Chain := Last;
end loop;
- if Chain = 0 then -- Loop ran zero CURLY
- Dummy := Emit_Node (NOTHING);
- end if;
+ -- Case where loop ran zero CURLY
+ if Chain = 0 then
+ Discard := Emit_Node (NOTHING);
+ end if;
end Parse_Branch;
---------------------------
@@ -1283,14 +1296,14 @@ package body GNAT.Regpat is
when ANYOF_SPACE =>
for Value in Class_Byte'Range loop
- if Is_Space (Character'Val (Value)) then
+ if Is_White_Space (Character'Val (Value)) then
Set_In_Class (Bitmap, Character'Val (Value));
end if;
end loop;
when ANYOF_NSPACE =>
for Value in Class_Byte'Range loop
- if not Is_Space (Character'Val (Value)) then
+ if not Is_White_Space (Character'Val (Value)) then
Set_In_Class (Bitmap, Character'Val (Value));
end if;
end loop;
@@ -1392,7 +1405,7 @@ package body GNAT.Regpat is
when ANYOF_PUNCT =>
for Value in Class_Byte'Range loop
if Is_Printable (Character'Val (Value))
- and then not Is_Space (Character'Val (Value))
+ and then not Is_White_Space (Character'Val (Value))
and then not Is_Alnum (Character'Val (Value))
then
Set_In_Class (Bitmap, Character'Val (Value));
@@ -1402,7 +1415,7 @@ package body GNAT.Regpat is
when ANYOF_NPUNCT =>
for Value in Class_Byte'Range loop
if not Is_Printable (Character'Val (Value))
- or else Is_Space (Character'Val (Value))
+ or else Is_White_Space (Character'Val (Value))
or else Is_Alnum (Character'Val (Value))
then
Set_In_Class (Bitmap, Character'Val (Value));
@@ -1520,17 +1533,17 @@ package body GNAT.Regpat is
-- This is a bit tricky due to quoted chars and due to
-- the multiplier characters '*', '+', and '?' that
-- take the SINGLE char previous as their operand.
- --
+
-- On entry, the character at Parse_Pos - 1 is going to go
-- into the string, no matter what it is. It could be
-- following a \ if Parse_Atom was entered from the '\' case.
- --
+
-- Basic idea is to pick up a good char in C and examine
-- the next char. If Is_Mult (C) then twiddle, if it's a \
-- then frozzle and if it's another magic char then push C and
-- terminate the string. If none of the above, push C on the
-- string and go around again.
- --
+
-- Start_Pos is used to remember where "the current character"
-- starts in the string, if due to an Is_Mult we need to back
-- up and put the current char in a separate 1-character string.
@@ -1539,12 +1552,13 @@ package body GNAT.Regpat is
-- flag at the end.
procedure Parse_Literal
- (Expr_Flags : in out Expression_Flags;
+ (Expr_Flags : out Expression_Flags;
IP : out Pointer)
is
Start_Pos : Natural := 0;
C : Character;
Length_Ptr : Pointer;
+
Has_Special_Operator : Boolean := False;
begin
@@ -1561,7 +1575,6 @@ package body GNAT.Regpat is
Parse_Loop :
loop
-
C := Expression (Parse_Pos); -- Get current character
case C is
@@ -1582,9 +1595,11 @@ package body GNAT.Regpat is
-- Are we looking at an operator, or is this
-- simply a normal character ?
+
elsif not Is_Mult (Parse_Pos) then
Start_Pos := Parse_Pos;
Case_Emit (C);
+
else
-- We've got something like "abc?d". Mark this as a
-- special case. What we want to emit is a first
@@ -1592,14 +1607,17 @@ package body GNAT.Regpat is
-- ultimately be transformed with a CURLY operator, A
-- special case has to be handled for "a?", since there
-- is no initial string to emit.
+
Has_Special_Operator := True;
exit Parse_Loop;
end if;
when '\' =>
Start_Pos := Parse_Pos;
+
if Parse_Pos = Parse_End then
Fail ("Trailing \");
+
else
case Expression (Parse_Pos + 1) is
when 'b' | 'B' | 's' | 'S' | 'd' | 'D'
@@ -1613,6 +1631,7 @@ package body GNAT.Regpat is
when 'a' => Emit (ASCII.BEL);
when others => Emit (Expression (Parse_Pos + 1));
end case;
+
Parse_Pos := Parse_Pos + 1;
end if;
@@ -1663,8 +1682,8 @@ package body GNAT.Regpat is
-- role is not redundant.
procedure Parse_Piece
- (Expr_Flags : in out Expression_Flags;
- IP : out Pointer)
+ (Expr_Flags : out Expression_Flags;
+ IP : out Pointer)
is
Op : Character;
New_Flags : Expression_Flags;
@@ -1774,7 +1793,26 @@ package body GNAT.Regpat is
Class : Std_Class := ANYOF_NONE;
E : String renames Expression;
+ -- Class names. Note that code assumes that the length of all
+ -- classes starting with the same letter have the same length.
+
+ Alnum : constant String := "alnum:]";
+ Alpha : constant String := "alpha:]";
+ Ascii_C : constant String := "ascii:]";
+ Cntrl : constant String := "cntrl:]";
+ Digit : constant String := "digit:]";
+ Graph : constant String := "graph:]";
+ Lower : constant String := "lower:]";
+ Print : constant String := "print:]";
+ Punct : constant String := "punct:]";
+ Space : constant String := "space:]";
+ Upper : constant String := "upper:]";
+ Word : constant String := "word:]";
+ Xdigit : constant String := "xdigit:]";
+
begin
+ -- Case of character class specified
+
if Parse_Pos <= Parse_End
and then Expression (Parse_Pos) = ':'
then
@@ -1789,150 +1827,196 @@ package body GNAT.Regpat is
Parse_Pos := Parse_Pos + 1;
end if;
- -- All classes have 6 characters at least
- -- ??? magid constant 6 should have a name!
+ -- Check for class names based on first letter
- if Parse_Pos + 6 <= Parse_End then
+ case Expression (Parse_Pos) is
- case Expression (Parse_Pos) is
- when 'a' =>
- if E (Parse_Pos .. Parse_Pos + 4) = "alnum:]" then
+ when 'a' =>
+
+ -- All 'a' classes have the same length (Alnum'Length)
+
+ if Parse_Pos + Alnum'Length - 1 <= Parse_End then
+
+ if E (Parse_Pos .. Parse_Pos + Alnum'Length - 1) =
+ Alnum
+ then
if Invert then
Class := ANYOF_NALNUMC;
else
Class := ANYOF_ALNUMC;
end if;
- elsif E (Parse_Pos .. Parse_Pos + 6) = "alpha:]" then
+ Parse_Pos := Parse_Pos + Alnum'Length;
+
+ elsif E (Parse_Pos .. Parse_Pos + Alpha'Length - 1) =
+ Alpha
+ then
if Invert then
Class := ANYOF_NALPHA;
else
Class := ANYOF_ALPHA;
end if;
- elsif E (Parse_Pos .. Parse_Pos + 6) = "ascii:]" then
+ Parse_Pos := Parse_Pos + Alpha'Length;
+
+ elsif E (Parse_Pos .. Parse_Pos + Ascii_C'Length - 1) =
+ Ascii_C
+ then
if Invert then
Class := ANYOF_NASCII;
else
Class := ANYOF_ASCII;
end if;
+ Parse_Pos := Parse_Pos + Ascii_C'Length;
end if;
+ end if;
- when 'c' =>
- if E (Parse_Pos .. Parse_Pos + 6) = "cntrl:]" then
- if Invert then
- Class := ANYOF_NCNTRL;
- else
- Class := ANYOF_CNTRL;
- end if;
+ when 'c' =>
+ if Parse_Pos + Cntrl'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Cntrl'Length - 1) =
+ Cntrl
+ then
+ if Invert then
+ Class := ANYOF_NCNTRL;
+ else
+ Class := ANYOF_CNTRL;
end if;
- when 'd' =>
+ Parse_Pos := Parse_Pos + Cntrl'Length;
+ end if;
- if E (Parse_Pos .. Parse_Pos + 6) = "digit:]" then
- if Invert then
- Class := ANYOF_NDIGIT;
- else
- Class := ANYOF_DIGIT;
- end if;
+ when 'd' =>
+ if Parse_Pos + Digit'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Digit'Length - 1) =
+ Digit
+ then
+ if Invert then
+ Class := ANYOF_NDIGIT;
+ else
+ Class := ANYOF_DIGIT;
end if;
- when 'g' =>
+ Parse_Pos := Parse_Pos + Digit'Length;
+ end if;
- if E (Parse_Pos .. Parse_Pos + 6) = "graph:]" then
- if Invert then
- Class := ANYOF_NGRAPH;
- else
- Class := ANYOF_GRAPH;
- end if;
+ when 'g' =>
+ if Parse_Pos + Graph'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Graph'Length - 1) =
+ Graph
+ then
+ if Invert then
+ Class := ANYOF_NGRAPH;
+ else
+ Class := ANYOF_GRAPH;
end if;
+ Parse_Pos := Parse_Pos + Graph'Length;
+ end if;
- when 'l' =>
-
- if E (Parse_Pos .. Parse_Pos + 6) = "lower:]" then
- if Invert then
- Class := ANYOF_NLOWER;
- else
- Class := ANYOF_LOWER;
- end if;
+ when 'l' =>
+ if Parse_Pos + Lower'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Lower'Length - 1) =
+ Lower
+ then
+ if Invert then
+ Class := ANYOF_NLOWER;
+ else
+ Class := ANYOF_LOWER;
end if;
+ Parse_Pos := Parse_Pos + Lower'Length;
+ end if;
- when 'p' =>
+ when 'p' =>
- if E (Parse_Pos .. Parse_Pos + 6) = "print:]" then
+ -- All 'p' classes have the same length
+
+ if Parse_Pos + Print'Length - 1 <= Parse_End then
+ if E (Parse_Pos .. Parse_Pos + Print'Length - 1) =
+ Print
+ then
if Invert then
Class := ANYOF_NPRINT;
else
Class := ANYOF_PRINT;
end if;
- elsif E (Parse_Pos .. Parse_Pos + 6) = "punct:]" then
+ Parse_Pos := Parse_Pos + Print'Length;
+
+ elsif E (Parse_Pos .. Parse_Pos + Punct'Length - 1) =
+ Punct
+ then
if Invert then
Class := ANYOF_NPUNCT;
else
Class := ANYOF_PUNCT;
end if;
- end if;
- when 's' =>
-
- if E (Parse_Pos .. Parse_Pos + 6) = "space:]" then
- if Invert then
- Class := ANYOF_NSPACE;
- else
- Class := ANYOF_SPACE;
- end if;
+ Parse_Pos := Parse_Pos + Punct'Length;
end if;
+ end if;
- when 'u' =>
-
- if E (Parse_Pos .. Parse_Pos + 6) = "upper:]" then
- if Invert then
- Class := ANYOF_NUPPER;
- else
- Class := ANYOF_UPPER;
- end if;
+ when 's' =>
+ if Parse_Pos + Space'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Space'Length - 1) =
+ Space
+ then
+ if Invert then
+ Class := ANYOF_NSPACE;
+ else
+ Class := ANYOF_SPACE;
end if;
- when 'w' =>
+ Parse_Pos := Parse_Pos + Space'Length;
+ end if;
- if E (Parse_Pos .. Parse_Pos + 5) = "word:]" then
- if Invert then
- Class := ANYOF_NALNUM;
- else
- Class := ANYOF_ALNUM;
- end if;
+ when 'u' =>
- Parse_Pos := Parse_Pos - 1;
+ if Parse_Pos + Upper'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Upper'Length - 1) =
+ Upper
+ then
+ if Invert then
+ Class := ANYOF_NUPPER;
+ else
+ Class := ANYOF_UPPER;
end if;
+ Parse_Pos := Parse_Pos + Upper'Length;
+ end if;
- when 'x' =>
-
- if Parse_Pos + 7 <= Parse_End
- and then E (Parse_Pos .. Parse_Pos + 7) = "xdigit:]"
- then
- if Invert then
- Class := ANYOF_NXDIGIT;
- else
- Class := ANYOF_XDIGIT;
- end if;
+ when 'w' =>
- Parse_Pos := Parse_Pos + 1;
+ if Parse_Pos + Word'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Word'Length - 1) =
+ Word
+ then
+ if Invert then
+ Class := ANYOF_NALNUM;
+ else
+ Class := ANYOF_ALNUM;
end if;
+ Parse_Pos := Parse_Pos + Word'Length;
+ end if;
- when others =>
- Class := ANYOF_NONE;
+ when 'x' =>
- end case;
+ if Parse_Pos + Xdigit'Length - 1 <= Parse_End
+ and then E (Parse_Pos .. Parse_Pos + Xdigit'Length - 1)
+ = Digit
+ then
+ if Invert then
+ Class := ANYOF_NXDIGIT;
+ else
+ Class := ANYOF_XDIGIT;
+ end if;
- if Class /= ANYOF_NONE then
- Parse_Pos := Parse_Pos + 7;
- end if;
+ Parse_Pos := Parse_Pos + Xdigit'Length;
+ end if;
- else
- Fail ("Invalid character class");
- end if;
+ when others =>
+ Fail ("Invalid character class");
+ end case;
+
+ -- Character class not specified
else
return ANYOF_NONE;
@@ -2040,7 +2124,7 @@ package body GNAT.Regpat is
end if;
declare
- Point : String := Pointer'Image (Index);
+ Point : constant String := Pointer'Image (Index);
begin
for J in 1 .. 6 - Point'Length loop
@@ -2218,8 +2302,8 @@ package body GNAT.Regpat is
Value : constant Class_Byte := Character'Pos (C);
begin
- return (Bitmap (Value / 8)
- and Bit_Conversion (Value mod 8)) /= 0;
+ return
+ (Bitmap (Value / 8) and Bit_Conversion (Value mod 8)) /= 0;
end Get_From_Class;
--------------
@@ -2264,26 +2348,24 @@ package body GNAT.Regpat is
------------------
function Is_Printable (C : Character) return Boolean is
- Value : constant Natural := Character'Pos (C);
-
begin
- return (Value > 32 and then Value < 127)
- or else Is_Space (C);
+ -- Printable if space or graphic character or other whitespace
+ -- Other white space includes (HT/LF/VT/FF/CR = codes 9-13)
+
+ return C in Character'Val (32) .. Character'Val (126)
+ or else C in ASCII.HT .. ASCII.CR;
end Is_Printable;
- --------------
- -- Is_Space --
- --------------
+ --------------------
+ -- Is_White_Space --
+ --------------------
- function Is_Space (C : Character) return Boolean is
+ function Is_White_Space (C : Character) return Boolean is
begin
- return C = ' '
- or else C = ASCII.HT
- or else C = ASCII.CR
- or else C = ASCII.LF
- or else C = ASCII.VT
- or else C = ASCII.FF;
- end Is_Space;
+ -- Note: HT = 9, LF = 10, VT = 11, FF = 12, CR = 13
+
+ return C = ' ' or else C in ASCII.HT .. ASCII.CR;
+ end Is_White_Space;
-----------
-- Match --
@@ -2292,10 +2374,15 @@ package body GNAT.Regpat is
procedure Match
(Self : Pattern_Matcher;
Data : String;
- Matches : out Match_Array)
+ Matches : out Match_Array;
+ Data_First : Integer := -1;
+ Data_Last : Positive := Positive'Last)
is
Program : Program_Data renames Self.Program; -- Shorter notation
+ First_In_Data : constant Integer := Integer'Max (Data_First, Data'First);
+ Last_In_Data : constant Integer := Integer'Min (Data_Last, Data'Last);
+
-- Global work variables
Input_Pos : Natural; -- String-input pointer
@@ -2365,9 +2452,11 @@ package body GNAT.Regpat is
-- particular by going through "ordinary" nodes (that don't
-- need to know whether the rest of the match failed) by
-- using a loop instead of recursion.
+ -- Why is the above comment part of the spec rather than body ???
- function Match_Whilem (IP : Pointer) return Boolean;
+ function Match_Whilem (IP : Pointer) return Boolean;
-- Return True if a WHILEM matches
+ -- How come IP is unreferenced in the body ???
function Recurse_Match (IP : Pointer; From : Natural) return Boolean;
pragma Inline (Recurse_Match);
@@ -2401,7 +2490,7 @@ package body GNAT.Regpat is
return Natural
is
begin
- for J in Start .. Data'Last loop
+ for J in Start .. Last_In_Data loop
if Data (J) = C then
return J;
end if;
@@ -2416,15 +2505,19 @@ package body GNAT.Regpat is
function Recurse_Match (IP : Pointer; From : Natural) return Boolean is
L : constant Natural := Last_Paren;
+
Tmp_F : constant Match_Array :=
- Matches_Full (From + 1 .. Matches_Full'Last);
+ Matches_Full (From + 1 .. Matches_Full'Last);
+
Start : constant Natural_Array :=
- Matches_Tmp (From + 1 .. Matches_Tmp'Last);
+ Matches_Tmp (From + 1 .. Matches_Tmp'Last);
Input : constant Natural := Input_Pos;
+
begin
if Match (IP) then
return True;
end if;
+
Last_Paren := L;
Matches_Full (Tmp_F'Range) := Tmp_F;
Matches_Tmp (Start'Range) := Start;
@@ -2480,28 +2573,24 @@ package body GNAT.Regpat is
null;
when BOL =>
- exit State_Machine when
- Input_Pos /= BOL_Pos
+ exit State_Machine when Input_Pos /= BOL_Pos
and then ((Self.Flags and Multiple_Lines) = 0
or else Data (Input_Pos - 1) /= ASCII.LF);
when MBOL =>
- exit State_Machine when
- Input_Pos /= BOL_Pos
+ exit State_Machine when Input_Pos /= BOL_Pos
and then Data (Input_Pos - 1) /= ASCII.LF;
when SBOL =>
exit State_Machine when Input_Pos /= BOL_Pos;
when EOL =>
- exit State_Machine when
- Input_Pos <= Data'Last
+ exit State_Machine when Input_Pos <= Data'Last
and then ((Self.Flags and Multiple_Lines) = 0
or else Data (Input_Pos) /= ASCII.LF);
when MEOL =>
- exit State_Machine when
- Input_Pos <= Data'Last
+ exit State_Machine when Input_Pos <= Data'Last
and then Data (Input_Pos) /= ASCII.LF;
when SEOL =>
@@ -2516,11 +2605,11 @@ package body GNAT.Regpat is
Ln : Boolean := False;
begin
- if Input_Pos /= Data'First then
+ if Input_Pos /= First_In_Data then
N := Is_Alnum (Data (Input_Pos - 1));
end if;
- if Input_Pos > Data'Last then
+ if Input_Pos > Last_In_Data then
Ln := False;
else
Ln := Is_Alnum (Data (Input_Pos));
@@ -2538,60 +2627,55 @@ package body GNAT.Regpat is
end;
when SPACE =>
- exit State_Machine when
- Input_Pos > Data'Last
- or else not Is_Space (Data (Input_Pos));
+ exit State_Machine when Input_Pos > Last_In_Data
+ or else not Is_White_Space (Data (Input_Pos));
Input_Pos := Input_Pos + 1;
when NSPACE =>
- exit State_Machine when
- Input_Pos > Data'Last
- or else Is_Space (Data (Input_Pos));
+ exit State_Machine when Input_Pos > Last_In_Data
+ or else Is_White_Space (Data (Input_Pos));
Input_Pos := Input_Pos + 1;
when DIGIT =>
- exit State_Machine when
- Input_Pos > Data'Last
+ exit State_Machine when Input_Pos > Last_In_Data
or else not Is_Digit (Data (Input_Pos));
Input_Pos := Input_Pos + 1;
when NDIGIT =>
- exit State_Machine when
- Input_Pos > Data'Last
+ exit State_Machine when Input_Pos > Last_In_Data
or else Is_Digit (Data (Input_Pos));
Input_Pos := Input_Pos + 1;
when ALNUM =>
- exit State_Machine when
- Input_Pos > Data'Last
+ exit State_Machine when Input_Pos > Last_In_Data
or else not Is_Alnum (Data (Input_Pos));
Input_Pos := Input_Pos + 1;
when NALNUM =>
- exit State_Machine when
- Input_Pos > Data'Last
+ exit State_Machine when Input_Pos > Last_In_Data
or else Is_Alnum (Data (Input_Pos));
Input_Pos := Input_Pos + 1;
when ANY =>
- exit State_Machine when Input_Pos > Data'Last
+ exit State_Machine when Input_Pos > Last_In_Data
or else Data (Input_Pos) = ASCII.LF;
Input_Pos := Input_Pos + 1;
when SANY =>
- exit State_Machine when Input_Pos > Data'Last;
+ exit State_Machine when Input_Pos > Last_In_Data;
Input_Pos := Input_Pos + 1;
when EXACT =>
declare
- Opnd : Pointer := String_Operand (Scan);
- Current : Positive := Input_Pos;
+ Opnd : Pointer := String_Operand (Scan);
+ Current : Positive := Input_Pos;
+
Last : constant Pointer :=
Opnd + String_Length (Program, Scan);
begin
while Opnd <= Last loop
- exit State_Machine when Current > Data'Last
+ exit State_Machine when Current > Last_In_Data
or else Program (Opnd) /= Data (Current);
Current := Current + 1;
Opnd := Opnd + 1;
@@ -2602,14 +2686,15 @@ package body GNAT.Regpat is
when EXACTF =>
declare
- Opnd : Pointer := String_Operand (Scan);
- Current : Positive := Input_Pos;
+ Opnd : Pointer := String_Operand (Scan);
+ Current : Positive := Input_Pos;
+
Last : constant Pointer :=
Opnd + String_Length (Program, Scan);
begin
while Opnd <= Last loop
- exit State_Machine when Current > Data'Last
+ exit State_Machine when Current > Last_In_Data
or else Program (Opnd) /= To_Lower (Data (Current));
Current := Current + 1;
Opnd := Opnd + 1;
@@ -2624,8 +2709,7 @@ package body GNAT.Regpat is
begin
Bitmap_Operand (Program, Scan, Bitmap);
- exit State_Machine when
- Input_Pos > Data'Last
+ exit State_Machine when Input_Pos > Last_In_Data
or else not Get_From_Class (Bitmap, Data (Input_Pos));
Input_Pos := Input_Pos + 1;
end;
@@ -2633,7 +2717,8 @@ package body GNAT.Regpat is
when OPEN =>
declare
No : constant Natural :=
- Character'Pos (Program (Operand (Scan)));
+ Character'Pos (Program (Operand (Scan)));
+
begin
Matches_Tmp (No) := Input_Pos;
end;
@@ -2641,9 +2726,11 @@ package body GNAT.Regpat is
when CLOSE =>
declare
No : constant Natural :=
- Character'Pos (Program (Operand (Scan)));
+ Character'Pos (Program (Operand (Scan)));
+
begin
Matches_Full (No) := (Matches_Tmp (No), Input_Pos - 1);
+
if Last_Paren < No then
Last_Paren := No;
end if;
@@ -2653,6 +2740,7 @@ package body GNAT.Regpat is
declare
No : constant Natural :=
Character'Pos (Program (Operand (Scan)));
+
Data_Pos : Natural;
begin
@@ -2663,8 +2751,9 @@ package body GNAT.Regpat is
end if;
Data_Pos := Matches_Full (No).First;
+
while Data_Pos <= Matches_Full (No).Last loop
- if Input_Pos > Data'Last
+ if Input_Pos > Last_In_Data
or else Data (Input_Pos) /= Data (Data_Pos)
then
return False;
@@ -2681,6 +2770,7 @@ package body GNAT.Regpat is
when STAR | PLUS | CURLY =>
declare
Greed : constant Boolean := Greedy;
+
begin
Greedy := True;
return Match_Simple_Operator (Op, Scan, Next, Greed);
@@ -2689,15 +2779,18 @@ package body GNAT.Regpat is
when CURLYX =>
-- Looking at something like:
+
-- 1: CURLYX {n,m} (->4)
-- 2: code for complex thing (->3)
-- 3: WHILEM (->0)
-- 4: NOTHING
declare
+ Min : constant Natural :=
+ Read_Natural (Program, Scan + 3);
+ Max : constant Natural :=
+ Read_Natural (Program, Scan + 5);
Cc : aliased Current_Curly_Record;
- Min : Natural := Read_Natural (Program, Scan + 3);
- Max : Natural := Read_Natural (Program, Scan + 5);
Has_Match : Boolean;
@@ -2723,9 +2816,6 @@ package body GNAT.Regpat is
when WHILEM =>
return Match_Whilem (IP);
-
- when others =>
- raise Expression_Error; -- Invalid instruction
end case;
Scan := Next;
@@ -2756,7 +2846,7 @@ package body GNAT.Regpat is
Operand_Code : Pointer;
Old : Natural;
Last_Pos : Natural;
- Save : Natural := Input_Pos;
+ Save : constant Natural := Input_Pos;
begin
-- Lookahead to avoid useless match attempts
@@ -2787,6 +2877,7 @@ package body GNAT.Regpat is
-- Non greedy operators
if not Greedy then
+
-- Test the minimal repetitions
if Min /= 0
@@ -2804,10 +2895,10 @@ package body GNAT.Regpat is
Last_Pos := Input_Pos + Max;
- if Last_Pos > Data'Last
+ if Last_Pos > Last_In_Data
or else Max = Natural'Last
then
- Last_Pos := Data'Last;
+ Last_Pos := Last_In_Data;
end if;
-- Look for the first possible opportunity
@@ -2890,7 +2981,7 @@ package body GNAT.Regpat is
while No >= Min loop
if not Next_Char_Known
- or else (Input_Pos <= Data'Last
+ or else (Input_Pos <= Last_In_Data
and then Data (Input_Pos) = Next_Char)
then
if Match (Next) then
@@ -2903,6 +2994,7 @@ package body GNAT.Regpat is
No := No - 1;
Input_Pos := Save + No;
end loop;
+
return False;
end if;
end Match_Simple_Operator;
@@ -2911,20 +3003,20 @@ package body GNAT.Regpat is
-- Match_Whilem --
------------------
- -- This is really hard to understand, because after we match what we're
- -- trying to match, we must make sure the rest of the REx is going to
- -- match for sure, and to do that we have to go back UP the parse tree
- -- by recursing ever deeper. And if it fails, we have to reset our
- -- parent's current state that we can try again after backing off.
+ -- This is really hard to understand, because after we match what we
+ -- are trying to match, we must make sure the rest of the REx is going
+ -- to match for sure, and to do that we have to go back UP the parse
+ -- tree by recursing ever deeper. And if it fails, we have to reset
+ -- our parent's current state that we can try again after backing off.
function Match_Whilem (IP : Pointer) return Boolean is
- pragma Warnings (Off, IP);
+ pragma Unreferenced (IP);
Cc : Current_Curly_Access := Current_Curly;
- N : Natural := Cc.Cur + 1;
+ N : constant Natural := Cc.Cur + 1;
Ln : Natural := 0;
- Lastloc : Natural := Cc.Lastloc;
+ Lastloc : constant Natural := Cc.Lastloc;
-- Detection of 0-len.
begin
@@ -3058,8 +3150,8 @@ package body GNAT.Regpat is
Bitmap : Character_Class;
begin
- if Max = Natural'Last or else Scan + Max - 1 > Data'Last then
- Last := Data'Last;
+ if Max = Natural'Last or else Scan + Max - 1 > Last_In_Data then
+ Last := Last_In_Data;
else
Last := Scan + Max - 1;
end if;
@@ -3125,14 +3217,14 @@ package body GNAT.Regpat is
when SPACE =>
while Scan <= Last
- and then Is_Space (Data (Scan))
+ and then Is_White_Space (Data (Scan))
loop
Scan := Scan + 1;
end loop;
when NSPACE =>
while Scan <= Last
- and then not Is_Space (Data (Scan))
+ and then not Is_White_Space (Data (Scan))
loop
Scan := Scan + 1;
end loop;
@@ -3202,7 +3294,7 @@ package body GNAT.Regpat is
Must_First : constant Pointer := Self.Must_Have;
Must_Last : constant Pointer :=
Must_First + Pointer (Self.Must_Have_Length - 1);
- Next_Try : Natural := Index (Data'First, First);
+ Next_Try : Natural := Index (First_In_Data, First);
begin
while Next_Try /= 0
@@ -3226,11 +3318,11 @@ package body GNAT.Regpat is
-- Simplest case first: an anchored match need be tried only once
if Self.Anchored and then (Self.Flags and Multiple_Lines) = 0 then
- Matched := Try (Data'First);
+ Matched := Try (First_In_Data);
elsif Self.Anchored then
declare
- Next_Try : Natural := Data'First;
+ Next_Try : Natural := First_In_Data;
begin
-- Test the first position in the buffer
Matched := Try (Next_Try);
@@ -3238,8 +3330,8 @@ package body GNAT.Regpat is
-- Else only test after newlines
if not Matched then
- while Next_Try <= Data'Last loop
- while Next_Try <= Data'Last
+ while Next_Try <= Last_In_Data loop
+ while Next_Try <= Last_In_Data
and then Data (Next_Try) /= ASCII.LF
loop
Next_Try := Next_Try + 1;
@@ -3247,7 +3339,7 @@ package body GNAT.Regpat is
Next_Try := Next_Try + 1;
- if Next_Try <= Data'Last then
+ if Next_Try <= Last_In_Data then
Matched := Try (Next_Try);
exit when Matched;
end if;
@@ -3256,11 +3348,10 @@ package body GNAT.Regpat is
end;
elsif Self.First /= ASCII.NUL then
-
-- We know what char it must start with
declare
- Next_Try : Natural := Index (Data'First, Self.First);
+ Next_Try : Natural := Index (First_In_Data, Self.First);
begin
while Next_Try /= 0 loop
@@ -3273,10 +3364,10 @@ package body GNAT.Regpat is
else
-- Messy cases: try all locations (including for the empty string)
- Matched := Try (Data'First);
+ Matched := Try (First_In_Data);
if not Matched then
- for S in Data'First + 1 .. Data'Last loop
+ for S in First_In_Data + 1 .. Last_In_Data loop
Matched := Try (S);
exit when Matched;
end loop;
@@ -3295,13 +3386,15 @@ package body GNAT.Regpat is
function Match
(Self : Pattern_Matcher;
- Data : String)
+ Data : String;
+ Data_First : Integer := -1;
+ Data_Last : Positive := Positive'Last)
return Natural
is
Matches : Match_Array (0 .. 0);
begin
- Match (Self, Data, Matches);
+ Match (Self, Data, Matches, Data_First, Data_Last);
if Matches (0) = No_Match then
return Data'First - 1;
else
@@ -3313,24 +3406,28 @@ package body GNAT.Regpat is
(Expression : String;
Data : String;
Matches : out Match_Array;
- Size : Program_Size := 0)
+ Size : Program_Size := 0;
+ Data_First : Integer := -1;
+ Data_Last : Positive := Positive'Last)
is
PM : Pattern_Matcher (Size);
Finalize_Size : Program_Size;
begin
if Size = 0 then
- Match (Compile (Expression), Data, Matches);
+ Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
else
Compile (PM, Expression, Finalize_Size);
- Match (PM, Data, Matches);
+ Match (PM, Data, Matches, Data_First, Data_Last);
end if;
end Match;
function Match
(Expression : String;
Data : String;
- Size : Program_Size := 0)
+ Size : Program_Size := 0;
+ Data_First : Integer := -1;
+ Data_Last : Positive := Positive'Last)
return Natural
is
PM : Pattern_Matcher (Size);
@@ -3338,17 +3435,19 @@ package body GNAT.Regpat is
begin
if Size = 0 then
- return Match (Compile (Expression), Data);
+ return Match (Compile (Expression), Data, Data_First, Data_Last);
else
Compile (PM, Expression, Final_Size);
- return Match (PM, Data);
+ return Match (PM, Data, Data_First, Data_Last);
end if;
end Match;
function Match
(Expression : String;
Data : String;
- Size : Program_Size := 0)
+ Size : Program_Size := 0;
+ Data_First : Integer := -1;
+ Data_Last : Positive := Positive'Last)
return Boolean
is
Matches : Match_Array (0 .. 0);
@@ -3357,10 +3456,10 @@ package body GNAT.Regpat is
begin
if Size = 0 then
- Match (Compile (Expression), Data, Matches);
+ Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
else
Compile (PM, Expression, Final_Size);
- Match (PM, Data, Matches);
+ Match (PM, Data, Matches, Data_First, Data_Last);
end if;
return Matches (0).First >= Data'First;
@@ -3457,8 +3556,8 @@ package body GNAT.Regpat is
begin
for J in Str'Range loop
case Str (J) is
- when '^' | '$' | '|' | '*' | '+' | '?' | '{'
- | '}' | '[' | ']' | '(' | ')' | '\' =>
+ when '^' | '$' | '|' | '*' | '+' | '?' | '{' |
+ '}' | '[' | ']' | '(' | ')' | '\' =>
S (Last + 1) := '\';
S (Last + 2) := Str (J);
@@ -3491,7 +3590,7 @@ package body GNAT.Regpat is
-- Reset_Class --
-----------------
- procedure Reset_Class (Bitmap : in out Character_Class) is
+ procedure Reset_Class (Bitmap : out Character_Class) is
begin
Bitmap := (others => 0);
end Reset_Class;