summaryrefslogtreecommitdiff
path: root/gcc/ada/s-wchcnv.adb
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/ada/s-wchcnv.adb')
-rw-r--r--gcc/ada/s-wchcnv.adb330
1 files changed, 243 insertions, 87 deletions
diff --git a/gcc/ada/s-wchcnv.adb b/gcc/ada/s-wchcnv.adb
index 3da16f854ea..77ee233b70f 100644
--- a/gcc/ada/s-wchcnv.adb
+++ b/gcc/ada/s-wchcnv.adb
@@ -6,7 +6,7 @@
-- --
-- B o d y --
-- --
--- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
+-- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
@@ -41,54 +41,70 @@ with System.WCh_JIS; use System.WCh_JIS;
package body System.WCh_Cnv is
- --------------------------------
- -- Char_Sequence_To_Wide_Char --
- --------------------------------
+ -----------------------------
+ -- Char_Sequence_To_UTF_32 --
+ -----------------------------
- function Char_Sequence_To_Wide_Char
- (C : Character;
- EM : WC_Encoding_Method)
- return Wide_Character
+ function Char_Sequence_To_UTF_32
+ (C : Character;
+ EM : WC_Encoding_Method) return UTF_32_Code
is
- B1 : Integer;
+ B1 : Unsigned_32;
C1 : Character;
- U : Unsigned_16;
- W : Unsigned_16;
+ U : Unsigned_32;
+ W : Unsigned_32;
procedure Get_Hex (N : Character);
-- If N is a hex character, then set B1 to 16 * B1 + character N.
-- Raise Constraint_Error if character N is not a hex character.
+ procedure Get_UTF_Byte;
+ pragma Inline (Get_UTF_Byte);
+ -- Used to interpret a 2#10xxxxxx# continuation byte in UTF-8 mode.
+ -- Reads a byte, and raises CE if the first two bits are not 10.
+ -- Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits.
+
-------------
-- Get_Hex --
-------------
procedure Get_Hex (N : Character) is
- B2 : constant Integer := Character'Pos (N);
-
+ B2 : constant Unsigned_32 := Character'Pos (N);
begin
if B2 in Character'Pos ('0') .. Character'Pos ('9') then
B1 := B1 * 16 + B2 - Character'Pos ('0');
-
elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then
B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10);
-
elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then
B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10);
-
else
raise Constraint_Error;
end if;
end Get_Hex;
- -- Start of processing for Char_Sequence_To_Wide_Char
+ ------------------
+ -- Get_UTF_Byte --
+ ------------------
+
+ procedure Get_UTF_Byte is
+ begin
+ U := Unsigned_32 (Character'Pos (In_Char));
+
+ if (U and 2#11000000#) /= 2#10_000000# then
+ raise Constraint_Error;
+ end if;
+
+ W := Shift_Left (W, 6) or (U and 2#00111111#);
+ end Get_UTF_Byte;
+
+ -- Start of processing for Char_Sequence_To_Wide
begin
case EM is
when WCEM_Hex =>
if C /= ASCII.ESC then
- return Wide_Character'Val (Character'Pos (C));
+ return Character'Pos (C);
else
B1 := 0;
@@ -97,82 +113,106 @@ package body System.WCh_Cnv is
Get_Hex (In_Char);
Get_Hex (In_Char);
- return Wide_Character'Val (B1);
+ return UTF_32_Code (B1);
end if;
when WCEM_Upper =>
if C > ASCII.DEL then
- return
- Wide_Character'Val
- (Integer (256 * Character'Pos (C)) +
- Character'Pos (In_Char));
+ return 256 * Character'Pos (C) + Character'Pos (In_Char);
else
- return Wide_Character'Val (Character'Pos (C));
+ return Character'Pos (C);
end if;
when WCEM_Shift_JIS =>
if C > ASCII.DEL then
- return Shift_JIS_To_JIS (C, In_Char);
+ return Wide_Character'Pos (Shift_JIS_To_JIS (C, In_Char));
else
- return Wide_Character'Val (Character'Pos (C));
+ return Character'Pos (C);
end if;
when WCEM_EUC =>
if C > ASCII.DEL then
- return EUC_To_JIS (C, In_Char);
+ return Wide_Character'Pos (EUC_To_JIS (C, In_Char));
else
- return Wide_Character'Val (Character'Pos (C));
+ return Character'Pos (C);
end if;
when WCEM_UTF8 =>
- if C > ASCII.DEL then
- -- 16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx#
- -- 16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx#
+ -- Note: for details of UTF8 encoding see RFC 3629
- U := Unsigned_16 (Character'Pos (C));
+ U := Unsigned_32 (Character'Pos (C));
- if (U and 2#11100000#) = 2#11000000# then
- W := Shift_Left (U and 2#00011111#, 6);
- U := Unsigned_16 (Character'Pos (In_Char));
+ -- 16#00_0000#-16#00_007F#: 0xxxxxxx
- if (U and 2#11000000#) /= 2#10000000# then
- raise Constraint_Error;
- end if;
+ if (U and 2#10000000#) = 2#00000000# then
+ return Character'Pos (C);
- W := W or (U and 2#00111111#);
+ -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
- elsif (U and 2#11110000#) = 2#11100000# then
- W := Shift_Left (U and 2#00001111#, 12);
- U := Unsigned_16 (Character'Pos (In_Char));
+ elsif (U and 2#11100000#) = 2#110_00000# then
+ W := Shift_Left (U and 2#00011111#, 6);
+ U := Unsigned_32 (Character'Pos (In_Char));
- if (U and 2#11000000#) /= 2#10000000# then
- raise Constraint_Error;
- end if;
+ if (U and 2#11000000#) /= 2#10_000000# then
+ raise Constraint_Error;
+ end if;
- W := W or Shift_Left (U and 2#00111111#, 6);
- U := Unsigned_16 (Character'Pos (In_Char));
+ W := W or (U and 2#00111111#);
- if (U and 2#11000000#) /= 2#10000000# then
- raise Constraint_Error;
- end if;
+ return UTF_32_Code (W);
- W := W or (U and 2#00111111#);
+ -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
- else
- raise Constraint_Error;
- end if;
+ elsif (U and 2#11110000#) = 2#1110_0000# then
+ W := U and 2#00001111#;
+ Get_UTF_Byte;
+ Get_UTF_Byte;
+ return UTF_32_Code (W);
+
+ -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+ elsif (U and 2#11111000#) = 2#11110_000# then
+ W := U and 2#00000111#;
+
+ for K in 1 .. 3 loop
+ Get_UTF_Byte;
+ end loop;
- return Wide_Character'Val (W);
+ return UTF_32_Code (W);
+
+ -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
+ -- 10xxxxxx 10xxxxxx
+
+ elsif (U and 2#11111100#) = 2#111110_00# then
+ W := U and 2#00000011#;
+
+ for K in 1 .. 4 loop
+ Get_UTF_Byte;
+ end loop;
+
+ return UTF_32_Code (W);
+
+ -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
+ -- 10xxxxxx 10xxxxxx 10xxxxxx
+
+ elsif (U and 2#11111110#) = 2#1111110_0# then
+ W := U and 2#00000001#;
+
+ for K in 1 .. 5 loop
+ Get_UTF_Byte;
+ end loop;
+
+ return UTF_32_Code (W);
else
- return Wide_Character'Val (Character'Pos (C));
+ raise Constraint_Error;
end if;
when WCEM_Brackets =>
if C /= '[' then
- return Wide_Character'Val (Character'Pos (C));
+ return Character'Pos (C);
end if;
if In_Char /= '"' then
@@ -182,15 +222,33 @@ package body System.WCh_Cnv is
B1 := 0;
Get_Hex (In_Char);
Get_Hex (In_Char);
+
C1 := In_Char;
if C1 /= '"' then
Get_Hex (C1);
Get_Hex (In_Char);
+
C1 := In_Char;
if C1 /= '"' then
- raise Constraint_Error;
+ Get_Hex (C1);
+ Get_Hex (In_Char);
+
+ C1 := In_Char;
+
+ if C1 /= '"' then
+ Get_Hex (C1);
+ Get_Hex (In_Char);
+
+ if B1 > Unsigned_32 (UTF_32_Code'Last) then
+ raise Constraint_Error;
+ end if;
+
+ if In_Char /= '"' then
+ raise Constraint_Error;
+ end if;
+ end if;
end if;
end if;
@@ -198,23 +256,44 @@ package body System.WCh_Cnv is
raise Constraint_Error;
end if;
- return Wide_Character'Val (B1);
+ return UTF_32_Code (B1);
end case;
- end Char_Sequence_To_Wide_Char;
+ end Char_Sequence_To_UTF_32;
--------------------------------
- -- Wide_Char_To_Char_Sequence --
+ -- Char_Sequence_To_Wide_Char --
--------------------------------
- procedure Wide_Char_To_Char_Sequence
- (WC : Wide_Character;
- EM : WC_Encoding_Method)
+ function Char_Sequence_To_Wide_Char
+ (C : Character;
+ EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character
+ is
+ function Char_Sequence_To_UTF is new Char_Sequence_To_UTF_32 (In_Char);
+
+ U : constant UTF_32_Code := Char_Sequence_To_UTF (C, EM);
+
+ begin
+ if U > 16#FFFF# then
+ raise Constraint_Error;
+ else
+ return Wide_Character'Val (U);
+ end if;
+ end Char_Sequence_To_Wide_Char;
+
+ -----------------------------
+ -- UTF_32_To_Char_Sequence --
+ -----------------------------
+
+ procedure UTF_32_To_Char_Sequence
+ (Val : UTF_32_Code;
+ EM : System.WCh_Con.WC_Encoding_Method)
is
- Val : constant Natural := Wide_Character'Pos (WC);
- Hexc : constant array (0 .. 15) of Character := "0123456789ABCDEF";
+ Hexc : constant array (UTF_32_Code range 0 .. 15) of Character :=
+ "0123456789ABCDEF";
+
C1, C2 : Character;
- U : Unsigned_16;
+ U : Unsigned_32;
begin
case EM is
@@ -222,22 +301,21 @@ package body System.WCh_Cnv is
when WCEM_Hex =>
if Val < 256 then
Out_Char (Character'Val (Val));
-
- else
+ elsif Val <= 16#FFFF# then
Out_Char (ASCII.ESC);
Out_Char (Hexc (Val / (16**3)));
Out_Char (Hexc ((Val / (16**2)) mod 16));
Out_Char (Hexc ((Val / 16) mod 16));
Out_Char (Hexc (Val mod 16));
+ else
+ raise Constraint_Error;
end if;
when WCEM_Upper =>
if Val < 128 then
Out_Char (Character'Val (Val));
-
- elsif Val < 16#8000# then
+ elsif Val < 16#8000# or else Val > 16#FFFF# then
raise Constraint_Error;
-
else
Out_Char (Character'Val (Val / 256));
Out_Char (Character'Val (Val mod 256));
@@ -246,58 +324,136 @@ package body System.WCh_Cnv is
when WCEM_Shift_JIS =>
if Val < 128 then
Out_Char (Character'Val (Val));
- else
- JIS_To_Shift_JIS (WC, C1, C2);
+ elsif Val <= 16#FFFF# then
+ JIS_To_Shift_JIS (Wide_Character'Val (Val), C1, C2);
Out_Char (C1);
Out_Char (C2);
+ else
+ raise Constraint_Error;
end if;
when WCEM_EUC =>
if Val < 128 then
Out_Char (Character'Val (Val));
- else
- JIS_To_EUC (WC, C1, C2);
+ elsif Val <= 16#FFFF# then
+ JIS_To_EUC (Wide_Character'Val (Val), C1, C2);
Out_Char (C1);
Out_Char (C2);
+ else
+ raise Constraint_Error;
end if;
when WCEM_UTF8 =>
- U := Unsigned_16 (Val);
- -- 16#0000#-16#007f#: 2#0xxxxxxx#
- -- 16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx#
- -- 16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx#
+ -- Note: for details of UTF8 encoding see RFC 3629
+
+ U := Unsigned_32 (Val);
+
+ -- 16#00_0000#-16#00_007F#: 0xxxxxxx
- if U < 16#80# then
+ if U <= 16#00_007F# then
Out_Char (Character'Val (U));
- elsif U < 16#0800# then
+ -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
+
+ elsif U <= 16#00_07FF# then
Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
- else
+ -- 16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
+
+ elsif U <= 16#00_FFFF# then
Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
- and 2#00111111#)));
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
+
+ -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+ elsif U <= 16#10_FFFF# then
+ Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
+
+ -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
+ -- 10xxxxxx 10xxxxxx
+
+ elsif U <= 16#03FF_FFFF# then
+ Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
+ and 2#00111111#)));
Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
+
+ -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
+ -- 10xxxxxx 10xxxxxx 10xxxxxx
+
+ elsif U <= 16#7FFF_FFFF# then
+ Out_Char (Character'Val (2#11111100# or Shift_Right (U, 30)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 24)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
+ and 2#00111111#)));
+ Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
+
+ else
+ raise Constraint_Error;
end if;
when WCEM_Brackets =>
-
if Val < 256 then
Out_Char (Character'Val (Val));
else
Out_Char ('[');
Out_Char ('"');
- Out_Char (Hexc (Val / (16**3)));
- Out_Char (Hexc ((Val / (16**2)) mod 16));
+
+ if Val > 16#FFFF# then
+ if Val > 16#00FF_FFFF# then
+ if Val > 16#7FFF_FFFF# then
+ raise Constraint_Error;
+ end if;
+
+ Out_Char (Hexc (Val / 16 ** 7));
+ Out_Char (Hexc ((Val / 16 ** 6) mod 16));
+ end if;
+
+ Out_Char (Hexc ((Val / 16 ** 5) mod 16));
+ Out_Char (Hexc ((Val / 16 ** 4) mod 16));
+ end if;
+
+ Out_Char (Hexc ((Val / 16 ** 3) mod 16));
+ Out_Char (Hexc ((Val / 16 ** 2) mod 16));
Out_Char (Hexc ((Val / 16) mod 16));
Out_Char (Hexc (Val mod 16));
+
Out_Char ('"');
Out_Char (']');
end if;
end case;
+ end UTF_32_To_Char_Sequence;
+
+ --------------------------------
+ -- Wide_Char_To_Char_Sequence --
+ --------------------------------
+
+ procedure Wide_Char_To_Char_Sequence
+ (WC : Wide_Character;
+ EM : System.WCh_Con.WC_Encoding_Method)
+ is
+ procedure UTF_To_Char_Sequence is new UTF_32_To_Char_Sequence (Out_Char);
+ begin
+ UTF_To_Char_Sequence (Wide_Character'Pos (WC), EM);
end Wide_Char_To_Char_Sequence;
end System.WCh_Cnv;