diff options
Diffstat (limited to 'gcc/ada/s-wchstw.adb')
-rw-r--r-- | gcc/ada/s-wchstw.adb | 269 |
1 files changed, 111 insertions, 158 deletions
diff --git a/gcc/ada/s-wchstw.adb b/gcc/ada/s-wchstw.adb index 6e8d5cb7b72..0deb55631e2 100644 --- a/gcc/ada/s-wchstw.adb +++ b/gcc/ada/s-wchstw.adb @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 1992-2000 Free Software Foundation, Inc. -- +-- Copyright (C) 1992-2005 Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -31,189 +31,142 @@ -- -- ------------------------------------------------------------------------------ -with Interfaces; use Interfaces; with System.WCh_Con; use System.WCh_Con; -with System.WCh_JIS; use System.WCh_JIS; +with System.WCh_Cnv; use System.WCh_Cnv; package body System.WCh_StW is - --------------------------- - -- String_To_Wide_String -- - --------------------------- - - function String_To_Wide_String - (S : String; - EM : WC_Encoding_Method) - return Wide_String + ----------------------- + -- Local Subprograms -- + ----------------------- + + procedure Get_Next_Code + (S : String; + P : in out Natural; + V : out UTF_32_Code; + EM : WC_Encoding_Method); + -- Scans next character starting at S(P) and returns its value in V. On + -- exit P is updated past the last character read. Raises Constraint_Error + -- if the string is not well formed. Raises Constraint_Error if the code + -- value is greater than 16#7FFF_FFFF#. On entry P <= S'Last. + + ------------------- + -- Get_Next_Code -- + ------------------- + + procedure Get_Next_Code + (S : String; + P : in out Natural; + V : out UTF_32_Code; + EM : WC_Encoding_Method) is - R : Wide_String (1 .. S'Length); - RP : Natural; - SP : Natural; - U1 : Unsigned_16; - U2 : Unsigned_16; - U3 : Unsigned_16; - U : Unsigned_16; - - Last : constant Natural := S'Last; + function In_Char return Character; + -- Function to return a character, bumping P, raises Constraint_Error + -- if P > S'Last on entry. - function Get_Hex (C : Character) return Unsigned_16; - -- Converts character from hex digit to value in range 0-15. The - -- input must be in 0-9, A-F, or a-f, and no check is needed. + function Get_UTF_32 is new Char_Sequence_To_UTF_32 (In_Char); + -- Function to get next UFT_32 value. - procedure Get_Hex_4; - -- Translates four hex characters starting at S (SP) to a single - -- wide character. Used in WCEM_Hex and WCEM_Brackets mode. SP - -- is not modified by the call. The resulting wide character value - -- is stored in R (RP). RP is not modified by the call. + ------------- + -- In_Char -- + ------------- - function Get_Hex (C : Character) return Unsigned_16 is + function In_Char return Character is begin - if C in '0' .. '9' then - return Character'Pos (C) - Character'Pos ('0'); - elsif C in 'A' .. 'F' then - return Character'Pos (C) - Character'Pos ('A') + 10; + if P > S'Last then + raise Constraint_Error; else - return Character'Pos (C) - Character'Pos ('a') + 10; + P := P + 1; + return S (P - 1); end if; - end Get_Hex; + end In_Char; - procedure Get_Hex_4 is - begin - R (RP) := Wide_Character'Val ( - Get_Hex (S (SP + 3)) + 16 * - (Get_Hex (S (SP + 2)) + 16 * - (Get_Hex (S (SP + 1)) + 16 * - (Get_Hex (S (SP + 0)))))); - end Get_Hex_4; + begin + -- Check for wide character encoding - -- Start of processing for String_To_Wide_String + case EM is + when WCEM_Hex => + if S (P) = ASCII.ESC then + V := Get_UTF_32 (In_Char, EM); + return; + end if; + + when WCEM_Upper | WCEM_Shift_JIS | WCEM_EUC | WCEM_UTF8 => + if S (P) >= Character'Val (16#80#) then + V := Get_UTF_32 (In_Char, EM); + return; + end if; + + when WCEM_Brackets => + if P + 2 <= S'Last + and then S (P) = '[' + and then S (P + 1) = '"' + and then S (P + 2) /= '"' + then + V := Get_UTF_32 (In_Char, EM); + return; + end if; + end case; + + -- If it is not a wide character code, just get it + + V := Character'Pos (S (P)); + P := P + 1; + end Get_Next_Code; + + --------------------------- + -- String_To_Wide_String -- + --------------------------- + + function String_To_Wide_String + (S : String; + EM : WC_Encoding_Method) return Wide_String + is + R : Wide_String (1 .. S'Length); + RP : Natural; + SP : Natural; + V : UTF_32_Code; begin SP := S'First; RP := 0; + while SP <= S'Last loop + Get_Next_Code (S, SP, V, EM); - case EM is + if V > 16#FFFF# then + raise Constraint_Error; + end if; - -- ESC-Hex representation + RP := RP + 1; + R (RP) := Wide_Character'Val (V); + end loop; - when WCEM_Hex => - while SP <= Last - 4 loop - RP := RP + 1; - - if S (SP) = ASCII.ESC then - SP := SP + 1; - Get_Hex_4; - SP := SP + 4; - else - R (RP) := Wide_Character'Val (Character'Pos (S (SP))); - SP := SP + 1; - end if; - end loop; - - -- Upper bit shift, internal code = external code - - when WCEM_Upper => - while SP < Last loop - RP := RP + 1; - - if S (SP) >= Character'Val (16#80#) then - U1 := Character'Pos (S (SP)); - U2 := Character'Pos (S (SP + 1)); - R (RP) := Wide_Character'Val (256 * U1 + U2); - SP := SP + 2; - else - R (RP) := Wide_Character'Val (Character'Pos (S (SP))); - SP := SP + 1; - end if; - end loop; - - -- Upper bit shift, shift-JIS - - when WCEM_Shift_JIS => - while SP < Last loop - RP := RP + 1; - - if S (SP) >= Character'Val (16#80#) then - R (RP) := Shift_JIS_To_JIS (S (SP), S (SP + 1)); - SP := SP + 2; - else - R (RP) := Wide_Character'Val (Character'Pos (S (SP))); - SP := SP + 1; - end if; - end loop; - - -- Upper bit shift, EUC - - when WCEM_EUC => - while SP < Last loop - RP := RP + 1; - - if S (SP) >= Character'Val (16#80#) then - R (RP) := EUC_To_JIS (S (SP), S (SP + 1)); - SP := SP + 2; - else - R (RP) := Wide_Character'Val (Character'Pos (S (SP))); - SP := SP + 1; - end if; - end loop; - - -- Upper bit shift, UTF-8 - - when WCEM_UTF8 => - while SP < Last loop - RP := RP + 1; - - if S (SP) >= Character'Val (16#80#) then - U1 := Character'Pos (S (SP)); - U2 := Character'Pos (S (SP + 1)); - - U := Shift_Left (U1 and 2#00011111#, 6) + - (U2 and 2#00111111#); - SP := SP + 2; - - if U1 >= 2#11100000# then - U3 := Character'Pos (S (SP)); - U := Shift_Left (U, 6) + (U3 and 2#00111111#); - SP := SP + 1; - end if; - - R (RP) := Wide_Character'Val (U); - - else - R (RP) := Wide_Character'Val (Character'Pos (S (SP))); - SP := SP + 1; - end if; - end loop; - - -- Brackets representation + return R (1 .. RP); + end String_To_Wide_String; - when WCEM_Brackets => - while SP <= Last - 7 loop - RP := RP + 1; - - if S (SP) = '[' - and then S (SP + 1) = '"' - and then S (SP + 2) /= '"' - then - SP := SP + 2; - Get_Hex_4; - SP := SP + 6; - - else - R (RP) := Wide_Character'Val (Character'Pos (S (SP))); - SP := SP + 1; - end if; - end loop; + -------------------------------- + -- String_To_Wide_Wide_String -- + -------------------------------- - end case; + function String_To_Wide_Wide_String + (S : String; + EM : WC_Encoding_Method) return Wide_Wide_String + is + R : Wide_Wide_String (1 .. S'Length); + RP : Natural; + SP : Natural; + V : UTF_32_Code; - while SP <= Last loop + begin + SP := S'First; + RP := 0; + while SP <= S'Last loop + Get_Next_Code (S, SP, V, EM); RP := RP + 1; - R (RP) := Wide_Character'Val (Character'Pos (S (SP))); - SP := SP + 1; + R (RP) := Wide_Wide_Character'Val (V); end loop; return R (1 .. RP); - end String_To_Wide_String; + end String_To_Wide_Wide_String; end System.WCh_StW; |