diff options
Diffstat (limited to 'gcc/ada/g-regexp.adb')
-rw-r--r-- | gcc/ada/g-regexp.adb | 1359 |
1 files changed, 5 insertions, 1354 deletions
diff --git a/gcc/ada/g-regexp.adb b/gcc/ada/g-regexp.adb index b89129014c1..65c1166bff1 100644 --- a/gcc/ada/g-regexp.adb +++ b/gcc/ada/g-regexp.adb @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 1999-2005, AdaCore -- +-- Copyright (C) 1999-2007, AdaCore -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -31,1357 +31,8 @@ -- -- ------------------------------------------------------------------------------ -with Unchecked_Deallocation; -with Ada.Exceptions; -with GNAT.Case_Util; +-- This package does not require a body, since it is a package renaming. We +-- provide a dummy file containing a No_Body pragma so that previous versions +-- of the body (which did exist) will not intefere. -package body GNAT.Regexp is - - Open_Paren : constant Character := '('; - Close_Paren : constant Character := ')'; - Open_Bracket : constant Character := '['; - Close_Bracket : constant Character := ']'; - - type State_Index is new Natural; - type Column_Index is new Natural; - - type Regexp_Array is array - (State_Index range <>, Column_Index range <>) of State_Index; - -- First index is for the state number - -- Second index is for the character type - -- Contents is the new State - - type Regexp_Array_Access is access Regexp_Array; - -- Use this type through the functions Set below, so that it - -- can grow dynamically depending on the needs. - - type Mapping is array (Character'Range) of Column_Index; - -- Mapping between characters and column in the Regexp_Array - - type Boolean_Array is array (State_Index range <>) of Boolean; - - type Regexp_Value - (Alphabet_Size : Column_Index; - Num_States : State_Index) is - record - Map : Mapping; - States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size); - Is_Final : Boolean_Array (1 .. Num_States); - Case_Sensitive : Boolean; - end record; - -- Deterministic finite-state machine - - ----------------------- - -- Local Subprograms -- - ----------------------- - - procedure Set - (Table : in out Regexp_Array_Access; - State : State_Index; - Column : Column_Index; - Value : State_Index); - -- Sets a value in the table. If the table is too small, reallocate it - -- dynamically so that (State, Column) is a valid index in it. - - function Get - (Table : Regexp_Array_Access; - State : State_Index; - Column : Column_Index) - return State_Index; - -- Returns the value in the table at (State, Column). - -- If this index does not exist in the table, returns 0 - - procedure Free is new Unchecked_Deallocation - (Regexp_Array, Regexp_Array_Access); - - ------------ - -- Adjust -- - ------------ - - procedure Adjust (R : in out Regexp) is - Tmp : Regexp_Access; - - begin - Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size, - Num_States => R.R.Num_States); - Tmp.all := R.R.all; - R.R := Tmp; - end Adjust; - - ------------- - -- Compile -- - ------------- - - function Compile - (Pattern : String; - Glob : Boolean := False; - Case_Sensitive : Boolean := True) - return Regexp - is - S : String := Pattern; - -- The pattern which is really compiled (when the pattern is case - -- insensitive, we convert this string to lower-cases - - Map : Mapping := (others => 0); - -- Mapping between characters and columns in the tables - - Alphabet_Size : Column_Index := 0; - -- Number of significant characters in the regular expression. - -- This total does not include special operators, such as *, (, ... - - procedure Create_Mapping; - -- Creates a mapping between characters in the regexp and columns - -- in the tables representing the regexp. Test that the regexp is - -- well-formed Modifies Alphabet_Size and Map - - procedure Create_Primary_Table - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index); - -- Creates the first version of the regexp (this is a non determinist - -- finite state machine, which is unadapted for a fast pattern - -- matching algorithm). We use a recursive algorithm to process the - -- parenthesis sub-expressions. - -- - -- Table : at the end of the procedure : Column 0 is for any character - -- ('.') and the last columns are for no character (closure) - -- Num_States is set to the number of states in the table - -- Start_State is the number of the starting state in the regexp - -- End_State is the number of the final state when the regexp matches - - procedure Create_Primary_Table_Glob - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index); - -- Same function as above, but it deals with the second possible - -- grammar for 'globbing pattern', which is a kind of subset of the - -- whole regular expression grammar. - - function Create_Secondary_Table - (First_Table : Regexp_Array_Access; - Num_States : State_Index; - Start_State : State_Index; - End_State : State_Index) - return Regexp; - -- Creates the definitive table representing the regular expression - -- This is actually a transformation of the primary table First_Table, - -- where every state is grouped with the states in its 'no-character' - -- columns. The transitions between the new states are then recalculated - -- and if necessary some new states are created. - -- - -- Note that the resulting finite-state machine is not optimized in - -- terms of the number of states : it would be more time-consuming to - -- add a third pass to reduce the number of states in the machine, with - -- no speed improvement... - - procedure Raise_Exception - (M : String; - Index : Integer); - pragma No_Return (Raise_Exception); - -- Raise an exception, indicating an error at character Index in S - - -------------------- - -- Create_Mapping -- - -------------------- - - procedure Create_Mapping is - - procedure Add_In_Map (C : Character); - -- Add a character in the mapping, if it is not already defined - - ---------------- - -- Add_In_Map -- - ---------------- - - procedure Add_In_Map (C : Character) is - begin - if Map (C) = 0 then - Alphabet_Size := Alphabet_Size + 1; - Map (C) := Alphabet_Size; - end if; - end Add_In_Map; - - J : Integer := S'First; - Parenthesis_Level : Integer := 0; - Curly_Level : Integer := 0; - - -- Start of processing for Create_Mapping - - begin - while J <= S'Last loop - case S (J) is - when Open_Bracket => - J := J + 1; - - if S (J) = '^' then - J := J + 1; - end if; - - if S (J) = ']' or S (J) = '-' then - J := J + 1; - end if; - - -- The first character never has a special meaning - - loop - if J > S'Last then - Raise_Exception - ("Ran out of characters while parsing ", J); - end if; - - exit when S (J) = Close_Bracket; - - if S (J) = '-' - and then S (J + 1) /= Close_Bracket - then - declare - Start : constant Integer := J - 1; - - begin - J := J + 1; - - if S (J) = '\' then - J := J + 1; - end if; - - for Char in S (Start) .. S (J) loop - Add_In_Map (Char); - end loop; - end; - else - if S (J) = '\' then - J := J + 1; - end if; - - Add_In_Map (S (J)); - end if; - - J := J + 1; - end loop; - - -- A close bracket must follow a open_bracket, - -- and cannot be found alone on the line - - when Close_Bracket => - Raise_Exception - ("Incorrect character ']' in regular expression", J); - - when '\' => - if J < S'Last then - J := J + 1; - Add_In_Map (S (J)); - - else - -- \ not allowed at the end of the regexp - - Raise_Exception - ("Incorrect character '\' in regular expression", J); - end if; - - when Open_Paren => - if not Glob then - Parenthesis_Level := Parenthesis_Level + 1; - else - Add_In_Map (Open_Paren); - end if; - - when Close_Paren => - if not Glob then - Parenthesis_Level := Parenthesis_Level - 1; - - if Parenthesis_Level < 0 then - Raise_Exception - ("')' is not associated with '(' in regular " - & "expression", J); - end if; - - if S (J - 1) = Open_Paren then - Raise_Exception - ("Empty parenthesis not allowed in regular " - & "expression", J); - end if; - - else - Add_In_Map (Close_Paren); - end if; - - when '.' => - if Glob then - Add_In_Map ('.'); - end if; - - when '{' => - if not Glob then - Add_In_Map (S (J)); - else - Curly_Level := Curly_Level + 1; - end if; - - when '}' => - if not Glob then - Add_In_Map (S (J)); - else - Curly_Level := Curly_Level - 1; - end if; - - when '*' | '?' => - if not Glob then - if J = S'First then - Raise_Exception - ("'*', '+', '?' and '|' operators cannot be in " - & "first position in regular expression", J); - end if; - end if; - - when '|' | '+' => - if not Glob then - if J = S'First then - - -- These operators must apply to a sub-expression, - -- and cannot be found at the beginning of the line - - Raise_Exception - ("'*', '+', '?' and '|' operators cannot be in " - & "first position in regular expression", J); - end if; - - else - Add_In_Map (S (J)); - end if; - - when others => - Add_In_Map (S (J)); - end case; - - J := J + 1; - end loop; - - -- A closing parenthesis must follow an open parenthesis - - if Parenthesis_Level /= 0 then - Raise_Exception - ("'(' must always be associated with a ')'", J); - end if; - - if Curly_Level /= 0 then - Raise_Exception - ("'{' must always be associated with a '}'", J); - end if; - end Create_Mapping; - - -------------------------- - -- Create_Primary_Table -- - -------------------------- - - procedure Create_Primary_Table - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index) - is - Empty_Char : constant Column_Index := Alphabet_Size + 1; - - Current_State : State_Index := 0; - -- Index of the last created state - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index); - -- Add a empty-character transition from State to To_State - - procedure Create_Repetition - (Repetition : Character; - Start_Prev : State_Index; - End_Prev : State_Index; - New_Start : out State_Index; - New_End : in out State_Index); - -- Create the table in case we have a '*', '+' or '?'. - -- Start_Prev .. End_Prev should indicate respectively the start and - -- end index of the previous expression, to which '*', '+' or '?' is - -- applied. - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index); - -- Fill the table for the regexp Simple. - -- This is the recursive procedure called to handle () expressions - -- If End_State = 0, then the call to Create_Simple creates an - -- independent regexp, not a concatenation - -- Start_Index .. End_Index is the starting index in the string S. - -- - -- Warning: it may look like we are creating too many empty-string - -- transitions, but they are needed to get the correct regexp. - -- The table is filled as follow ( s means start-state, e means - -- end-state) : - -- - -- regexp state_num | a b * empty_string - -- ------- ------------------------------ - -- a 1 (s) | 2 - - - - -- 2 (e) | - - - - - -- - -- ab 1 (s) | 2 - - - - -- 2 | - - - 3 - -- 3 | - 4 - - - -- 4 (e) | - - - - - -- - -- a|b 1 | 2 - - - - -- 2 | - - - 6 - -- 3 | - 4 - - - -- 4 | - - - 6 - -- 5 (s) | - - - 1,3 - -- 6 (e) | - - - - - -- - -- a* 1 | 2 - - - - -- 2 | - - - 4 - -- 3 (s) | - - - 1,4 - -- 4 (e) | - - - 3 - -- - -- (a) 1 (s) | 2 - - - - -- 2 (e) | - - - - - -- - -- a+ 1 | 2 - - - - -- 2 | - - - 4 - -- 3 (s) | - - - 1 - -- 4 (e) | - - - 3 - -- - -- a? 1 | 2 - - - - -- 2 | - - - 4 - -- 3 (s) | - - - 1,4 - -- 4 (e) | - - - - - -- - -- . 1 (s) | 2 2 2 - - -- 2 (e) | - - - - - - function Next_Sub_Expression - (Start_Index : Integer; - End_Index : Integer) - return Integer; - -- Returns the index of the last character of the next sub-expression - -- in Simple. Index cannot be greater than End_Index. - - -------------------- - -- Add_Empty_Char -- - -------------------- - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index) - is - J : Column_Index := Empty_Char; - - begin - while Get (Table, State, J) /= 0 loop - J := J + 1; - end loop; - - Set (Table, State, J, To_State); - end Add_Empty_Char; - - ----------------------- - -- Create_Repetition -- - ----------------------- - - procedure Create_Repetition - (Repetition : Character; - Start_Prev : State_Index; - End_Prev : State_Index; - New_Start : out State_Index; - New_End : in out State_Index) - is - begin - New_Start := Current_State + 1; - - if New_End /= 0 then - Add_Empty_Char (New_End, New_Start); - end if; - - Current_State := Current_State + 2; - New_End := Current_State; - - Add_Empty_Char (End_Prev, New_End); - Add_Empty_Char (New_Start, Start_Prev); - - if Repetition /= '+' then - Add_Empty_Char (New_Start, New_End); - end if; - - if Repetition /= '?' then - Add_Empty_Char (New_End, New_Start); - end if; - end Create_Repetition; - - ------------------- - -- Create_Simple -- - ------------------- - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index) - is - J : Integer := Start_Index; - Last_Start : State_Index := 0; - - begin - Start_State := 0; - End_State := 0; - while J <= End_Index loop - case S (J) is - when Open_Paren => - declare - J_Start : constant Integer := J + 1; - Next_Start : State_Index; - Next_End : State_Index; - - begin - J := Next_Sub_Expression (J, End_Index); - Create_Simple (J_Start, J - 1, Next_Start, Next_End); - - if J < End_Index - and then (S (J + 1) = '*' or else - S (J + 1) = '+' or else - S (J + 1) = '?') - then - J := J + 1; - Create_Repetition - (S (J), - Next_Start, - Next_End, - Last_Start, - End_State); - - else - Last_Start := Next_Start; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Next_End; - end if; - end; - - when '|' => - declare - Start_Prev : constant State_Index := Start_State; - End_Prev : constant State_Index := End_State; - Start_J : constant Integer := J + 1; - Start_Next : State_Index := 0; - End_Next : State_Index := 0; - - begin - J := Next_Sub_Expression (J, End_Index); - - -- Create a new state for the start of the alternative - - Current_State := Current_State + 1; - Last_Start := Current_State; - Start_State := Last_Start; - - -- Create the tree for the second part of alternative - - Create_Simple (Start_J, J, Start_Next, End_Next); - - -- Create the end state - - Add_Empty_Char (Last_Start, Start_Next); - Add_Empty_Char (Last_Start, Start_Prev); - Current_State := Current_State + 1; - End_State := Current_State; - Add_Empty_Char (End_Prev, End_State); - Add_Empty_Char (End_Next, End_State); - end; - - when Open_Bracket => - Current_State := Current_State + 1; - - declare - Next_State : State_Index := Current_State + 1; - - begin - J := J + 1; - - if S (J) = '^' then - J := J + 1; - - Next_State := 0; - - for Column in 0 .. Alphabet_Size loop - Set (Table, Current_State, Column, - Value => Current_State + 1); - end loop; - end if; - - -- Automatically add the first character - - if S (J) = '-' or S (J) = ']' then - Set (Table, Current_State, Map (S (J)), - Value => Next_State); - J := J + 1; - end if; - - -- Loop till closing bracket found - - loop - exit when S (J) = Close_Bracket; - - if S (J) = '-' - and then S (J + 1) /= ']' - then - declare - Start : constant Integer := J - 1; - - begin - J := J + 1; - - if S (J) = '\' then - J := J + 1; - end if; - - for Char in S (Start) .. S (J) loop - Set (Table, Current_State, Map (Char), - Value => Next_State); - end loop; - end; - - else - if S (J) = '\' then - J := J + 1; - end if; - - Set (Table, Current_State, Map (S (J)), - Value => Next_State); - end if; - J := J + 1; - end loop; - end; - - Current_State := Current_State + 1; - - -- If the next symbol is a special symbol - - if J < End_Index - and then (S (J + 1) = '*' or else - S (J + 1) = '+' or else - S (J + 1) = '?') - then - J := J + 1; - Create_Repetition - (S (J), - Current_State - 1, - Current_State, - Last_Start, - End_State); - - else - Last_Start := Current_State - 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - end if; - - when '*' | '+' | '?' | Close_Paren | Close_Bracket => - Raise_Exception - ("Incorrect character in regular expression :", J); - - when others => - Current_State := Current_State + 1; - - -- Create the state for the symbol S (J) - - if S (J) = '.' then - for K in 0 .. Alphabet_Size loop - Set (Table, Current_State, K, - Value => Current_State + 1); - end loop; - - else - if S (J) = '\' then - J := J + 1; - end if; - - Set (Table, Current_State, Map (S (J)), - Value => Current_State + 1); - end if; - - Current_State := Current_State + 1; - - -- If the next symbol is a special symbol - - if J < End_Index - and then (S (J + 1) = '*' or else - S (J + 1) = '+' or else - S (J + 1) = '?') - then - J := J + 1; - Create_Repetition - (S (J), - Current_State - 1, - Current_State, - Last_Start, - End_State); - - else - Last_Start := Current_State - 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - end if; - - end case; - - if Start_State = 0 then - Start_State := Last_Start; - end if; - - J := J + 1; - end loop; - end Create_Simple; - - ------------------------- - -- Next_Sub_Expression -- - ------------------------- - - function Next_Sub_Expression - (Start_Index : Integer; - End_Index : Integer) - return Integer - is - J : Integer := Start_Index; - Start_On_Alter : Boolean := False; - - begin - if S (J) = '|' then - Start_On_Alter := True; - end if; - - loop - exit when J = End_Index; - J := J + 1; - - case S (J) is - when '\' => - J := J + 1; - - when Open_Bracket => - loop - J := J + 1; - exit when S (J) = Close_Bracket; - - if S (J) = '\' then - J := J + 1; - end if; - end loop; - - when Open_Paren => - J := Next_Sub_Expression (J, End_Index); - - when Close_Paren => - return J; - - when '|' => - if Start_On_Alter then - return J - 1; - end if; - - when others => - null; - end case; - end loop; - - return J; - end Next_Sub_Expression; - - -- Start of Create_Primary_Table - - begin - Table.all := (others => (others => 0)); - Create_Simple (S'First, S'Last, Start_State, End_State); - Num_States := Current_State; - end Create_Primary_Table; - - ------------------------------- - -- Create_Primary_Table_Glob -- - ------------------------------- - - procedure Create_Primary_Table_Glob - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index) - is - Empty_Char : constant Column_Index := Alphabet_Size + 1; - - Current_State : State_Index := 0; - -- Index of the last created state - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index); - -- Add a empty-character transition from State to To_State - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index); - -- Fill the table for the S (Start_Index .. End_Index). - -- This is the recursive procedure called to handle () expressions - - -------------------- - -- Add_Empty_Char -- - -------------------- - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index) - is - J : Column_Index := Empty_Char; - - begin - while Get (Table, State, J) /= 0 loop - J := J + 1; - end loop; - - Set (Table, State, J, - Value => To_State); - end Add_Empty_Char; - - ------------------- - -- Create_Simple -- - ------------------- - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index) - is - J : Integer := Start_Index; - Last_Start : State_Index := 0; - - begin - Start_State := 0; - End_State := 0; - - while J <= End_Index loop - case S (J) is - - when Open_Bracket => - Current_State := Current_State + 1; - - declare - Next_State : State_Index := Current_State + 1; - - begin - J := J + 1; - - if S (J) = '^' then - J := J + 1; - Next_State := 0; - - for Column in 0 .. Alphabet_Size loop - Set (Table, Current_State, Column, - Value => Current_State + 1); - end loop; - end if; - - -- Automatically add the first character - - if S (J) = '-' or S (J) = ']' then - Set (Table, Current_State, Map (S (J)), - Value => Current_State); - J := J + 1; - end if; - - -- Loop till closing bracket found - - loop - exit when S (J) = Close_Bracket; - - if S (J) = '-' - and then S (J + 1) /= ']' - then - declare - Start : constant Integer := J - 1; - begin - J := J + 1; - - if S (J) = '\' then - J := J + 1; - end if; - - for Char in S (Start) .. S (J) loop - Set (Table, Current_State, Map (Char), - Value => Next_State); - end loop; - end; - - else - if S (J) = '\' then - J := J + 1; - end if; - - Set (Table, Current_State, Map (S (J)), - Value => Next_State); - end if; - J := J + 1; - end loop; - end; - - Last_Start := Current_State; - Current_State := Current_State + 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - - when '{' => - declare - End_Sub : Integer; - Start_Regexp_Sub : State_Index; - End_Regexp_Sub : State_Index; - Create_Start : State_Index := 0; - - Create_End : State_Index := 0; - -- Initialized to avoid junk warning - - begin - while S (J) /= '}' loop - - -- First step : find sub pattern - - End_Sub := J + 1; - while S (End_Sub) /= ',' - and then S (End_Sub) /= '}' - loop - End_Sub := End_Sub + 1; - end loop; - - -- Second step : create a sub pattern - - Create_Simple - (J + 1, - End_Sub - 1, - Start_Regexp_Sub, - End_Regexp_Sub); - - J := End_Sub; - - -- Third step : create an alternative - - if Create_Start = 0 then - Current_State := Current_State + 1; - Create_Start := Current_State; - Add_Empty_Char (Create_Start, Start_Regexp_Sub); - Current_State := Current_State + 1; - Create_End := Current_State; - Add_Empty_Char (End_Regexp_Sub, Create_End); - - else - Current_State := Current_State + 1; - Add_Empty_Char (Current_State, Create_Start); - Create_Start := Current_State; - Add_Empty_Char (Create_Start, Start_Regexp_Sub); - Add_Empty_Char (End_Regexp_Sub, Create_End); - end if; - end loop; - - if End_State /= 0 then - Add_Empty_Char (End_State, Create_Start); - end if; - - End_State := Create_End; - Last_Start := Create_Start; - end; - - when '*' => - Current_State := Current_State + 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Current_State); - end if; - - Add_Empty_Char (Current_State, Current_State + 1); - Add_Empty_Char (Current_State, Current_State + 3); - Last_Start := Current_State; - - Current_State := Current_State + 1; - - for K in 0 .. Alphabet_Size loop - Set (Table, Current_State, K, - Value => Current_State + 1); - end loop; - - Current_State := Current_State + 1; - Add_Empty_Char (Current_State, Current_State + 1); - - Current_State := Current_State + 1; - Add_Empty_Char (Current_State, Last_Start); - End_State := Current_State; - - when others => - Current_State := Current_State + 1; - - if S (J) = '?' then - for K in 0 .. Alphabet_Size loop - Set (Table, Current_State, K, - Value => Current_State + 1); - end loop; - - else - if S (J) = '\' then - J := J + 1; - end if; - - -- Create the state for the symbol S (J) - - Set (Table, Current_State, Map (S (J)), - Value => Current_State + 1); - end if; - - Last_Start := Current_State; - Current_State := Current_State + 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - - end case; - - if Start_State = 0 then - Start_State := Last_Start; - end if; - - J := J + 1; - end loop; - end Create_Simple; - - -- Start of processing for Create_Primary_Table_Glob - - begin - Table.all := (others => (others => 0)); - Create_Simple (S'First, S'Last, Start_State, End_State); - Num_States := Current_State; - end Create_Primary_Table_Glob; - - ---------------------------- - -- Create_Secondary_Table -- - ---------------------------- - - function Create_Secondary_Table - (First_Table : Regexp_Array_Access; - Num_States : State_Index; - Start_State : State_Index; - End_State : State_Index) return Regexp - is - pragma Warnings (Off, Num_States); - - Last_Index : constant State_Index := First_Table'Last (1); - type Meta_State is array (1 .. Last_Index) of Boolean; - - Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) := - (others => (others => 0)); - - Meta_States : array (1 .. Last_Index + 1) of Meta_State := - (others => (others => False)); - - Temp_State_Not_Null : Boolean; - - Is_Final : Boolean_Array (1 .. Last_Index) := (others => False); - - Current_State : State_Index := 1; - Nb_State : State_Index := 1; - - procedure Closure - (State : in out Meta_State; - Item : State_Index); - -- Compute the closure of the state (that is every other state which - -- has a empty-character transition) and add it to the state - - ------------- - -- Closure -- - ------------- - - procedure Closure - (State : in out Meta_State; - Item : State_Index) - is - begin - if State (Item) then - return; - end if; - - State (Item) := True; - - for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop - if First_Table (Item, Column) = 0 then - return; - end if; - - Closure (State, First_Table (Item, Column)); - end loop; - end Closure; - - -- Start of procesing for Create_Secondary_Table - - begin - -- Create a new state - - Closure (Meta_States (Current_State), Start_State); - - while Current_State <= Nb_State loop - - -- If this new meta-state includes the primary table end state, - -- then this meta-state will be a final state in the regexp - - if Meta_States (Current_State)(End_State) then - Is_Final (Current_State) := True; - end if; - - -- For every character in the regexp, calculate the possible - -- transitions from Current_State - - for Column in 0 .. Alphabet_Size loop - Meta_States (Nb_State + 1) := (others => False); - Temp_State_Not_Null := False; - - for K in Meta_States (Current_State)'Range loop - if Meta_States (Current_State)(K) - and then First_Table (K, Column) /= 0 - then - Closure - (Meta_States (Nb_State + 1), First_Table (K, Column)); - Temp_State_Not_Null := True; - end if; - end loop; - - -- If at least one transition existed - - if Temp_State_Not_Null then - - -- Check if this new state corresponds to an old one - - for K in 1 .. Nb_State loop - if Meta_States (K) = Meta_States (Nb_State + 1) then - Table (Current_State, Column) := K; - exit; - end if; - end loop; - - -- If not, create a new state - - if Table (Current_State, Column) = 0 then - Nb_State := Nb_State + 1; - Table (Current_State, Column) := Nb_State; - end if; - end if; - end loop; - - Current_State := Current_State + 1; - end loop; - - -- Returns the regexp - - declare - R : Regexp_Access; - - begin - R := new Regexp_Value (Alphabet_Size => Alphabet_Size, - Num_States => Nb_State); - R.Map := Map; - R.Is_Final := Is_Final (1 .. Nb_State); - R.Case_Sensitive := Case_Sensitive; - - for State in 1 .. Nb_State loop - for K in 0 .. Alphabet_Size loop - R.States (State, K) := Table (State, K); - end loop; - end loop; - - return (Ada.Finalization.Controlled with R => R); - end; - end Create_Secondary_Table; - - --------------------- - -- Raise_Exception -- - --------------------- - - procedure Raise_Exception - (M : String; - Index : Integer) - is - begin - Ada.Exceptions.Raise_Exception - (Error_In_Regexp'Identity, M & " at offset " & Index'Img); - end Raise_Exception; - - -- Start of processing for Compile - - begin - -- Special case for the empty string: it always matches, and the - -- following processing would fail on it. - if S = "" then - return (Ada.Finalization.Controlled with - R => new Regexp_Value' - (Alphabet_Size => 0, - Num_States => 1, - Map => (others => 0), - States => (others => (others => 1)), - Is_Final => (others => True), - Case_Sensitive => True)); - end if; - - if not Case_Sensitive then - GNAT.Case_Util.To_Lower (S); - end if; - - Create_Mapping; - - -- Creates the primary table - - declare - Table : Regexp_Array_Access; - Num_States : State_Index; - Start_State : State_Index; - End_State : State_Index; - R : Regexp; - - begin - Table := new Regexp_Array (1 .. 100, - 0 .. Alphabet_Size + 10); - if not Glob then - Create_Primary_Table (Table, Num_States, Start_State, End_State); - else - Create_Primary_Table_Glob - (Table, Num_States, Start_State, End_State); - end if; - - -- Creates the secondary table - - R := Create_Secondary_Table - (Table, Num_States, Start_State, End_State); - Free (Table); - return R; - end; - end Compile; - - -------------- - -- Finalize -- - -------------- - - procedure Finalize (R : in out Regexp) is - procedure Free is new - Unchecked_Deallocation (Regexp_Value, Regexp_Access); - - begin - Free (R.R); - end Finalize; - - --------- - -- Get -- - --------- - - function Get - (Table : Regexp_Array_Access; - State : State_Index; - Column : Column_Index) return State_Index - is - begin - if State <= Table'Last (1) - and then Column <= Table'Last (2) - then - return Table (State, Column); - else - return 0; - end if; - end Get; - - ----------- - -- Match -- - ----------- - - function Match (S : String; R : Regexp) return Boolean is - Current_State : State_Index := 1; - - begin - if R.R = null then - raise Constraint_Error; - end if; - - for Char in S'Range loop - - if R.R.Case_Sensitive then - Current_State := R.R.States (Current_State, R.R.Map (S (Char))); - else - Current_State := - R.R.States (Current_State, - R.R.Map (GNAT.Case_Util.To_Lower (S (Char)))); - end if; - - if Current_State = 0 then - return False; - end if; - - end loop; - - return R.R.Is_Final (Current_State); - end Match; - - --------- - -- Set -- - --------- - - procedure Set - (Table : in out Regexp_Array_Access; - State : State_Index; - Column : Column_Index; - Value : State_Index) - is - New_Lines : State_Index; - New_Columns : Column_Index; - New_Table : Regexp_Array_Access; - - begin - if State <= Table'Last (1) - and then Column <= Table'Last (2) - then - Table (State, Column) := Value; - else - -- Doubles the size of the table until it is big enough that - -- (State, Column) is a valid index - - New_Lines := Table'Last (1) * (State / Table'Last (1) + 1); - New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1); - New_Table := new Regexp_Array (Table'First (1) .. New_Lines, - Table'First (2) .. New_Columns); - New_Table.all := (others => (others => 0)); - - for J in Table'Range (1) loop - for K in Table'Range (2) loop - New_Table (J, K) := Table (J, K); - end loop; - end loop; - - Free (Table); - Table := New_Table; - Table (State, Column) := Value; - end if; - end Set; - -end GNAT.Regexp; +pragma No_Body; |