summaryrefslogtreecommitdiff
path: root/gcc/ada/scng.adb
diff options
context:
space:
mode:
authorcharlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>2005-03-15 16:10:45 +0000
committercharlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>2005-03-15 16:10:45 +0000
commitaacdaf7476cd43df3f313785cf8056514a8a0e44 (patch)
tree4f5eb72a81692323ac08f26f3c009091d857d189 /gcc/ada/scng.adb
parent7e6c6ade025ad961075d83ae8d2e3c94ee308904 (diff)
downloadgcc-aacdaf7476cd43df3f313785cf8056514a8a0e44.tar.gz
2005-03-08 Robert Dewar <dewar@adacore.com>
* Make-lang.in: Add g-utf_32 unit for gnat and gnatbind * impunit.adb: Add GNAT.UTF_32 * scng.adb: Use gnat.utf_32 instead of widechar for utf_32 stuff * widechar.ads, widechar.adb: Remove redundant UTF-32 tables (scng now uses GNAT.UTF_32). * g-utf_32.ads, g-utf_32.adb: This is a new unit with full capabilities for categorizing characters using Unicode categories git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@96501 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/ada/scng.adb')
-rw-r--r--gcc/ada/scng.adb52
1 files changed, 31 insertions, 21 deletions
diff --git a/gcc/ada/scng.adb b/gcc/ada/scng.adb
index 158524df989..5621ff602ba 100644
--- a/gcc/ada/scng.adb
+++ b/gcc/ada/scng.adb
@@ -40,6 +40,8 @@ with Widechar; use Widechar;
with System.CRC32;
with System.WCh_Con; use System.WCh_Con;
+with GNAT.UTF_32; use GNAT.UTF_32;
+
package body Scng is
use ASCII;
@@ -1103,7 +1105,7 @@ package body Scng is
Accumulate_Checksum (Code);
if Ada_Version >= Ada_05
- and then Is_UTF_32_Non_Graphic (Code)
+ and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
then
Error_Msg
("(Ada 2005) non-graphic character not permitted " &
@@ -1515,7 +1517,7 @@ package body Scng is
-- If UTF_32 terminator, terminate comment scan
- elsif Is_UTF_32_Line_Terminator (Code) then
+ elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
Scan_Ptr := Wptr;
exit;
end if;
@@ -1639,7 +1641,7 @@ package body Scng is
Code := Character'Pos (' ');
elsif Ada_Version >= Ada_05
- and then Is_UTF_32_Non_Graphic (Code)
+ and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
then
Error_Msg
("(Ada 2005) non-graphic character not permitted " &
@@ -1899,7 +1901,7 @@ package body Scng is
-- Invalid control characters
- when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
+ when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO |
SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
EM | FS | GS | RS | US | DEL
=>
@@ -1942,6 +1944,7 @@ package body Scng is
declare
Code : Char_Code;
+ Cat : Category;
Err : Boolean;
begin
@@ -1953,10 +1956,13 @@ package body Scng is
if Err then
Error_Illegal_Wide_Character;
goto Scan_Next_Character;
+ end if;
+
+ Cat := Get_Category (UTF_32 (Code));
-- If OK letter, reset scan ptr and go scan identifier
- elsif Is_UTF_32_Letter (Code) then
+ if Is_UTF_32_Letter (Cat) then
Scan_Ptr := Wptr;
Name_Len := 0;
Underline_Found := False;
@@ -1965,18 +1971,18 @@ package body Scng is
-- If OK wide space, ignore and keep scanning (we do not include
-- any ignored spaces in checksum)
- elsif Is_UTF_32_Space (Code) then
+ elsif Is_UTF_32_Space (Cat) then
goto Scan_Next_Character;
-- If OK wide line terminator, terminate current line
- elsif Is_UTF_32_Line_Terminator (Code) then
+ elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
Scan_Ptr := Wptr;
goto Scan_Line_Terminator;
-- Punctuation is an error (at start of identifier)
- elsif Is_UTF_32_Punctuation (Code) then
+ elsif Is_UTF_32_Punctuation (Cat) then
Error_Msg
("identifier cannot start with punctuation", Wptr);
Scan_Ptr := Wptr;
@@ -1986,7 +1992,7 @@ package body Scng is
-- Mark character is an error (at start of identifer)
- elsif Is_UTF_32_Mark (Code) then
+ elsif Is_UTF_32_Mark (Cat) then
Error_Msg
("identifier cannot start with mark character", Wptr);
Scan_Ptr := Wptr;
@@ -1996,7 +2002,7 @@ package body Scng is
-- Other format character is an error (at start of identifer)
- elsif Is_UTF_32_Other (Code) then
+ elsif Is_UTF_32_Other (Cat) then
Error_Msg
("identifier cannot start with other format character", Wptr);
Scan_Ptr := Wptr;
@@ -2008,7 +2014,7 @@ package body Scng is
-- identifier or bad literal. Not worth doing too much to try to
-- distinguish these cases, but we will do a little bit.
- elsif Is_UTF_32_Digit (Code) then
+ elsif Is_UTF_32_Digit (Cat) then
Error_Msg
("identifier cannot start with digit character", Wptr);
Scan_Ptr := Wptr;
@@ -2155,9 +2161,10 @@ package body Scng is
-- encoding into the name table entry for the identifier.
declare
- Code : Char_Code;
- Err : Boolean;
- Chr : Character;
+ Code : Char_Code;
+ Err : Boolean;
+ Chr : Character;
+ Cat : Category;
begin
Wptr := Scan_Ptr;
@@ -2198,19 +2205,22 @@ package body Scng is
("wide character not allowed in identifier", Wptr);
end if;
+ Cat := Get_Category (UTF_32 (Code));
+
-- If OK letter, store it folding to upper case. Note
-- that we include the folded letter in the checksum.
- if Is_UTF_32_Letter (Code) then
- Code := UTF_32_To_Upper_Case (Code);
+ if Is_UTF_32_Letter (Cat) then
+ Code :=
+ Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
Accumulate_Checksum (Code);
Store_Encoded_Character (Code);
Underline_Found := False;
-- If OK extended digit or mark, then store it
- elsif Is_UTF_32_Digit (Code)
- or else Is_UTF_32_Mark (Code)
+ elsif Is_UTF_32_Digit (Cat)
+ or else Is_UTF_32_Mark (Cat)
then
Accumulate_Checksum (Code);
Store_Encoded_Character (Code);
@@ -2219,7 +2229,7 @@ package body Scng is
-- Wide punctuation is also stored, but counts as an
-- underline character for error checking purposes.
- elsif Is_UTF_32_Punctuation (Code) then
+ elsif Is_UTF_32_Punctuation (Cat) then
Accumulate_Checksum (Code);
if Underline_Found then
@@ -2241,12 +2251,12 @@ package body Scng is
-- stored. It seems reasonable to exclude it from the
-- checksum.
- elsif Is_UTF_32_Other (Code) then
+ elsif Is_UTF_32_Other (Cat) then
null;
-- Wide character in category Separator,Space terminates
- elsif Is_UTF_32_Space (Code) then
+ elsif Is_UTF_32_Space (Cat) then
goto Scan_Identifier_Complete;
-- Any other wide character is not acceptable