summaryrefslogtreecommitdiff
path: root/gcc/ada/s-wchstw.adb
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/ada/s-wchstw.adb')
-rw-r--r--gcc/ada/s-wchstw.adb269
1 files changed, 111 insertions, 158 deletions
diff --git a/gcc/ada/s-wchstw.adb b/gcc/ada/s-wchstw.adb
index 6e8d5cb7b72..0deb55631e2 100644
--- a/gcc/ada/s-wchstw.adb
+++ b/gcc/ada/s-wchstw.adb
@@ -6,7 +6,7 @@
-- --
-- B o d y --
-- --
--- Copyright (C) 1992-2000 Free Software Foundation, Inc. --
+-- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
@@ -31,189 +31,142 @@
-- --
------------------------------------------------------------------------------
-with Interfaces; use Interfaces;
with System.WCh_Con; use System.WCh_Con;
-with System.WCh_JIS; use System.WCh_JIS;
+with System.WCh_Cnv; use System.WCh_Cnv;
package body System.WCh_StW is
- ---------------------------
- -- String_To_Wide_String --
- ---------------------------
-
- function String_To_Wide_String
- (S : String;
- EM : WC_Encoding_Method)
- return Wide_String
+ -----------------------
+ -- Local Subprograms --
+ -----------------------
+
+ procedure Get_Next_Code
+ (S : String;
+ P : in out Natural;
+ V : out UTF_32_Code;
+ EM : WC_Encoding_Method);
+ -- Scans next character starting at S(P) and returns its value in V. On
+ -- exit P is updated past the last character read. Raises Constraint_Error
+ -- if the string is not well formed. Raises Constraint_Error if the code
+ -- value is greater than 16#7FFF_FFFF#. On entry P <= S'Last.
+
+ -------------------
+ -- Get_Next_Code --
+ -------------------
+
+ procedure Get_Next_Code
+ (S : String;
+ P : in out Natural;
+ V : out UTF_32_Code;
+ EM : WC_Encoding_Method)
is
- R : Wide_String (1 .. S'Length);
- RP : Natural;
- SP : Natural;
- U1 : Unsigned_16;
- U2 : Unsigned_16;
- U3 : Unsigned_16;
- U : Unsigned_16;
-
- Last : constant Natural := S'Last;
+ function In_Char return Character;
+ -- Function to return a character, bumping P, raises Constraint_Error
+ -- if P > S'Last on entry.
- function Get_Hex (C : Character) return Unsigned_16;
- -- Converts character from hex digit to value in range 0-15. The
- -- input must be in 0-9, A-F, or a-f, and no check is needed.
+ function Get_UTF_32 is new Char_Sequence_To_UTF_32 (In_Char);
+ -- Function to get next UFT_32 value.
- procedure Get_Hex_4;
- -- Translates four hex characters starting at S (SP) to a single
- -- wide character. Used in WCEM_Hex and WCEM_Brackets mode. SP
- -- is not modified by the call. The resulting wide character value
- -- is stored in R (RP). RP is not modified by the call.
+ -------------
+ -- In_Char --
+ -------------
- function Get_Hex (C : Character) return Unsigned_16 is
+ function In_Char return Character is
begin
- if C in '0' .. '9' then
- return Character'Pos (C) - Character'Pos ('0');
- elsif C in 'A' .. 'F' then
- return Character'Pos (C) - Character'Pos ('A') + 10;
+ if P > S'Last then
+ raise Constraint_Error;
else
- return Character'Pos (C) - Character'Pos ('a') + 10;
+ P := P + 1;
+ return S (P - 1);
end if;
- end Get_Hex;
+ end In_Char;
- procedure Get_Hex_4 is
- begin
- R (RP) := Wide_Character'Val (
- Get_Hex (S (SP + 3)) + 16 *
- (Get_Hex (S (SP + 2)) + 16 *
- (Get_Hex (S (SP + 1)) + 16 *
- (Get_Hex (S (SP + 0))))));
- end Get_Hex_4;
+ begin
+ -- Check for wide character encoding
- -- Start of processing for String_To_Wide_String
+ case EM is
+ when WCEM_Hex =>
+ if S (P) = ASCII.ESC then
+ V := Get_UTF_32 (In_Char, EM);
+ return;
+ end if;
+
+ when WCEM_Upper | WCEM_Shift_JIS | WCEM_EUC | WCEM_UTF8 =>
+ if S (P) >= Character'Val (16#80#) then
+ V := Get_UTF_32 (In_Char, EM);
+ return;
+ end if;
+
+ when WCEM_Brackets =>
+ if P + 2 <= S'Last
+ and then S (P) = '['
+ and then S (P + 1) = '"'
+ and then S (P + 2) /= '"'
+ then
+ V := Get_UTF_32 (In_Char, EM);
+ return;
+ end if;
+ end case;
+
+ -- If it is not a wide character code, just get it
+
+ V := Character'Pos (S (P));
+ P := P + 1;
+ end Get_Next_Code;
+
+ ---------------------------
+ -- String_To_Wide_String --
+ ---------------------------
+
+ function String_To_Wide_String
+ (S : String;
+ EM : WC_Encoding_Method) return Wide_String
+ is
+ R : Wide_String (1 .. S'Length);
+ RP : Natural;
+ SP : Natural;
+ V : UTF_32_Code;
begin
SP := S'First;
RP := 0;
+ while SP <= S'Last loop
+ Get_Next_Code (S, SP, V, EM);
- case EM is
+ if V > 16#FFFF# then
+ raise Constraint_Error;
+ end if;
- -- ESC-Hex representation
+ RP := RP + 1;
+ R (RP) := Wide_Character'Val (V);
+ end loop;
- when WCEM_Hex =>
- while SP <= Last - 4 loop
- RP := RP + 1;
-
- if S (SP) = ASCII.ESC then
- SP := SP + 1;
- Get_Hex_4;
- SP := SP + 4;
- else
- R (RP) := Wide_Character'Val (Character'Pos (S (SP)));
- SP := SP + 1;
- end if;
- end loop;
-
- -- Upper bit shift, internal code = external code
-
- when WCEM_Upper =>
- while SP < Last loop
- RP := RP + 1;
-
- if S (SP) >= Character'Val (16#80#) then
- U1 := Character'Pos (S (SP));
- U2 := Character'Pos (S (SP + 1));
- R (RP) := Wide_Character'Val (256 * U1 + U2);
- SP := SP + 2;
- else
- R (RP) := Wide_Character'Val (Character'Pos (S (SP)));
- SP := SP + 1;
- end if;
- end loop;
-
- -- Upper bit shift, shift-JIS
-
- when WCEM_Shift_JIS =>
- while SP < Last loop
- RP := RP + 1;
-
- if S (SP) >= Character'Val (16#80#) then
- R (RP) := Shift_JIS_To_JIS (S (SP), S (SP + 1));
- SP := SP + 2;
- else
- R (RP) := Wide_Character'Val (Character'Pos (S (SP)));
- SP := SP + 1;
- end if;
- end loop;
-
- -- Upper bit shift, EUC
-
- when WCEM_EUC =>
- while SP < Last loop
- RP := RP + 1;
-
- if S (SP) >= Character'Val (16#80#) then
- R (RP) := EUC_To_JIS (S (SP), S (SP + 1));
- SP := SP + 2;
- else
- R (RP) := Wide_Character'Val (Character'Pos (S (SP)));
- SP := SP + 1;
- end if;
- end loop;
-
- -- Upper bit shift, UTF-8
-
- when WCEM_UTF8 =>
- while SP < Last loop
- RP := RP + 1;
-
- if S (SP) >= Character'Val (16#80#) then
- U1 := Character'Pos (S (SP));
- U2 := Character'Pos (S (SP + 1));
-
- U := Shift_Left (U1 and 2#00011111#, 6) +
- (U2 and 2#00111111#);
- SP := SP + 2;
-
- if U1 >= 2#11100000# then
- U3 := Character'Pos (S (SP));
- U := Shift_Left (U, 6) + (U3 and 2#00111111#);
- SP := SP + 1;
- end if;
-
- R (RP) := Wide_Character'Val (U);
-
- else
- R (RP) := Wide_Character'Val (Character'Pos (S (SP)));
- SP := SP + 1;
- end if;
- end loop;
-
- -- Brackets representation
+ return R (1 .. RP);
+ end String_To_Wide_String;
- when WCEM_Brackets =>
- while SP <= Last - 7 loop
- RP := RP + 1;
-
- if S (SP) = '['
- and then S (SP + 1) = '"'
- and then S (SP + 2) /= '"'
- then
- SP := SP + 2;
- Get_Hex_4;
- SP := SP + 6;
-
- else
- R (RP) := Wide_Character'Val (Character'Pos (S (SP)));
- SP := SP + 1;
- end if;
- end loop;
+ --------------------------------
+ -- String_To_Wide_Wide_String --
+ --------------------------------
- end case;
+ function String_To_Wide_Wide_String
+ (S : String;
+ EM : WC_Encoding_Method) return Wide_Wide_String
+ is
+ R : Wide_Wide_String (1 .. S'Length);
+ RP : Natural;
+ SP : Natural;
+ V : UTF_32_Code;
- while SP <= Last loop
+ begin
+ SP := S'First;
+ RP := 0;
+ while SP <= S'Last loop
+ Get_Next_Code (S, SP, V, EM);
RP := RP + 1;
- R (RP) := Wide_Character'Val (Character'Pos (S (SP)));
- SP := SP + 1;
+ R (RP) := Wide_Wide_Character'Val (V);
end loop;
return R (1 .. RP);
- end String_To_Wide_String;
+ end String_To_Wide_Wide_String;
end System.WCh_StW;