summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-03-16 10:54:24 -0600
committerKarl Williamson <public@khwilliamson.com>2012-03-19 16:51:25 -0600
commit21a1aff7e1eddd8acb5f096dd264e2f967ad5401 (patch)
treeab3a1b13368036259e7df9720cf984871dd5da63
parent6901521e3ffe762ff8e3f268762b0d2f667771a7 (diff)
downloadperl-21a1aff7e1eddd8acb5f096dd264e2f967ad5401.tar.gz
mktables: Backport name_alias changes to earlier Unicode versions
This allow mktables to be compiled for earlier Unicode versions and work for them.
-rw-r--r--lib/unicore/mktables242
1 files changed, 230 insertions, 12 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index f0813cf97c..fa3e66e9f1 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -11655,15 +11655,228 @@ sub filter_script_extensions_line {
}
sub setup_early_name_alias {
+ my $file= shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
my $aliases = property_ref('Name_Alias');
$aliases = Property->new('Name_Alias') if ! defined $aliases;
+ $file->insert_lines(get_old_name_aliases());
- # Before 6.0, this wasn't a problem, and after it, this alias is part of
- # the Unicode-delivered file.
- $aliases->add_map(7, 7, "ALERT: control") if $v_version eq v6.0.0;
return;
}
+sub get_old_name_aliases () {
+ my @return = split /\n/, <<'END';
+0000;NULL;control
+0000;NUL;abbreviation
+0001;START OF HEADING;control
+0001;SOH;abbreviation
+0002;START OF TEXT;control
+0002;STX;abbreviation
+0003;END OF TEXT;control
+0003;ETX;abbreviation
+0004;END OF TRANSMISSION;control
+0004;EOT;abbreviation
+0005;ENQUIRY;control
+0005;ENQ;abbreviation
+0006;ACKNOWLEDGE;control
+0006;ACK;abbreviation
+0007;ALERT;control
+0007;BELL;control
+0007;BEL;abbreviation
+0008;BACKSPACE;control
+0008;BS;abbreviation
+0009;CHARACTER TABULATION;control
+0009;HORIZONTAL TABULATION;control
+0009;HT;abbreviation
+0009;TAB;abbreviation
+000A;LINE FEED;control
+000A;LINE FEED (LF);control
+000A;NEW LINE;control
+000A;END OF LINE;control
+000A;LF;abbreviation
+000A;NL;abbreviation
+000A;EOL;abbreviation
+000B;LINE TABULATION;control
+000B;VERTICAL TABULATION;control
+000B;VT;abbreviation
+000C;FORM FEED;control
+000C;FORM FEED (FF);control
+000C;FF;abbreviation
+000D;CARRIAGE RETURN;control
+000D;CARRIAGE RETURN (CR);control
+000D;CR;abbreviation
+000E;SHIFT OUT;control
+000E;LOCKING-SHIFT ONE;control
+000E;SO;abbreviation
+000F;SHIFT IN;control
+000F;LOCKING-SHIFT ZERO;control
+000F;SI;abbreviation
+0010;DATA LINK ESCAPE;control
+0010;DLE;abbreviation
+0011;DEVICE CONTROL ONE;control
+0011;DC1;abbreviation
+0012;DEVICE CONTROL TWO;control
+0012;DC2;abbreviation
+0013;DEVICE CONTROL THREE;control
+0013;DC3;abbreviation
+0014;DEVICE CONTROL FOUR;control
+0014;DC4;abbreviation
+0015;NEGATIVE ACKNOWLEDGE;control
+0015;NAK;abbreviation
+0016;SYNCHRONOUS IDLE;control
+0016;SYN;abbreviation
+0017;END OF TRANSMISSION BLOCK;control
+0017;ETB;abbreviation
+0018;CANCEL;control
+0018;CAN;abbreviation
+0019;END OF MEDIUM;control
+0019;EOM;abbreviation
+001A;SUBSTITUTE;control
+001A;SUB;abbreviation
+001B;ESCAPE;control
+001B;ESC;abbreviation
+001C;INFORMATION SEPARATOR FOUR;control
+001C;FILE SEPARATOR;control
+001C;FS;abbreviation
+001D;INFORMATION SEPARATOR THREE;control
+001D;GROUP SEPARATOR;control
+001D;GS;abbreviation
+001E;INFORMATION SEPARATOR TWO;control
+001E;RECORD SEPARATOR;control
+001E;RS;abbreviation
+001F;INFORMATION SEPARATOR ONE;control
+001F;UNIT SEPARATOR;control
+001F;US;abbreviation
+0020;SP;abbreviation
+007F;DELETE;control
+007F;DEL;abbreviation
+0080;PADDING CHARACTER;figment
+0080;PAD;abbreviation
+0081;HIGH OCTET PRESET;figment
+0081;HOP;abbreviation
+0082;BREAK PERMITTED HERE;control
+0082;BPH;abbreviation
+0083;NO BREAK HERE;control
+0083;NBH;abbreviation
+0084;INDEX;control
+0084;IND;abbreviation
+0085;NEXT LINE;control
+0085;NEXT LINE (NEL);control
+0085;NEL;abbreviation
+0086;START OF SELECTED AREA;control
+0086;SSA;abbreviation
+0087;END OF SELECTED AREA;control
+0087;ESA;abbreviation
+0088;CHARACTER TABULATION SET;control
+0088;HORIZONTAL TABULATION SET;control
+0088;HTS;abbreviation
+0089;CHARACTER TABULATION WITH JUSTIFICATION;control
+0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
+0089;HTJ;abbreviation
+008A;LINE TABULATION SET;control
+008A;VERTICAL TABULATION SET;control
+008A;VTS;abbreviation
+008B;PARTIAL LINE FORWARD;control
+008B;PARTIAL LINE DOWN;control
+008B;PLD;abbreviation
+008C;PARTIAL LINE BACKWARD;control
+008C;PARTIAL LINE UP;control
+008C;PLU;abbreviation
+008D;REVERSE LINE FEED;control
+008D;REVERSE INDEX;control
+008D;RI;abbreviation
+008E;SINGLE SHIFT TWO;control
+008E;SINGLE-SHIFT-2;control
+008E;SS2;abbreviation
+008F;SINGLE SHIFT THREE;control
+008F;SINGLE-SHIFT-3;control
+008F;SS3;abbreviation
+0090;DEVICE CONTROL STRING;control
+0090;DCS;abbreviation
+0091;PRIVATE USE ONE;control
+0091;PRIVATE USE-1;control
+0091;PU1;abbreviation
+0092;PRIVATE USE TWO;control
+0092;PRIVATE USE-2;control
+0092;PU2;abbreviation
+0093;SET TRANSMIT STATE;control
+0093;STS;abbreviation
+0094;CANCEL CHARACTER;control
+0094;CCH;abbreviation
+0095;MESSAGE WAITING;control
+0095;MW;abbreviation
+0096;START OF GUARDED AREA;control
+0096;START OF PROTECTED AREA;control
+0096;SPA;abbreviation
+0097;END OF GUARDED AREA;control
+0097;END OF PROTECTED AREA;control
+0097;EPA;abbreviation
+0098;START OF STRING;control
+0098;SOS;abbreviation
+0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
+0099;SGC;abbreviation
+009A;SINGLE CHARACTER INTRODUCER;control
+009A;SCI;abbreviation
+009B;CONTROL SEQUENCE INTRODUCER;control
+009B;CSI;abbreviation
+009C;STRING TERMINATOR;control
+009C;ST;abbreviation
+009D;OPERATING SYSTEM COMMAND;control
+009D;OSC;abbreviation
+009E;PRIVACY MESSAGE;control
+009E;PM;abbreviation
+009F;APPLICATION PROGRAM COMMAND;control
+009F;APC;abbreviation
+00A0;NBSP;abbreviation
+00AD;SHY;abbreviation
+200B;ZWSP;abbreviation
+200C;ZWNJ;abbreviation
+200D;ZWJ;abbreviation
+200E;LRM;abbreviation
+200F;RLM;abbreviation
+202A;LRE;abbreviation
+202B;RLE;abbreviation
+202C;PDF;abbreviation
+202D;LRO;abbreviation
+202E;RLO;abbreviation
+FEFF;BYTE ORDER MARK;alternate
+FEFF;BOM;abbreviation
+FEFF;ZWNBSP;abbreviation
+END
+
+ if ($v_version ge v3.0.0) {
+ push @return, split /\n/, <<'END';
+180B; FVS1; abbreviation
+180C; FVS2; abbreviation
+180D; FVS3; abbreviation
+180E; MVS; abbreviation
+202F; NNBSP; abbreviation
+END
+ }
+
+ if ($v_version ge v3.2.0) {
+ push @return, split /\n/, <<'END';
+034F; CGJ; abbreviation
+205F; MMSP; abbreviation
+2060; WJ; abbreviation
+END
+ # Add in VS1..VS16
+ my $cp = 0xFE00 - 1;
+ for my $i (1..16) {
+ push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
+ }
+ }
+ if ($v_version ge v4.0.0) { # Add in VS17..VS256
+ my $cp = 0xE0100 - 17;
+ for my $i (17..256) {
+ push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
+ }
+ }
+
+ return @return;
+}
+
sub filter_later_version_name_alias_line {
# This file has an extra entry per line for the alias type. This is
@@ -11688,8 +11901,10 @@ sub filter_later_version_name_alias_line {
sub filter_early_version_name_alias_line {
# Early versions did not have the trailing alias type field; implicitly it
- # was 'correction'
- $_ .= "; correction";
+ # was 'correction'. But our synthetic lines we add in this program do
+ # have it, so test for the type field.
+ $_ .= "; correction" if $_ !~ /;.*;/;
+
filter_later_version_name_alias_line;
return;
}
@@ -12537,11 +12752,11 @@ sub compile_perl() {
}
my $alias_sentence = "";
+ my %abbreviations;
my $alias = property_ref('Name_Alias');
if (defined $alias) {
push @composition, 'Name_Alias';
$perl_charname->set_proxy_for('Name_Alias');
- my %abbreviations;
# Add each entry in Name_Alias to Perl_Charnames. Where these go with
# respect to any existing entry depends on the entry type.
@@ -12587,12 +12802,15 @@ sub compile_perl() {
# above.
$perl_charname->add_duplicate($code_point, $value, Replace => $replace_type);
}
+ }
+
+ # Now add the Unicode_1 names for the controls. The Unicode_1 names had
+ # precedence before 6.1, so should be first in the file; the other names
+ # have precedence starting in 6.1,
+ my $before_or_after = ($v_version lt v6.1.0)
+ ? $MULTIPLE_BEFORE
+ : $MULTIPLE_AFTER;
- # Now add the Unicode_1 names for the controls. These come after the
- # official names, as they are only recommended (by TR18; unclear as of
- # this writing if that recommendation will be withdrawn, but if it is,
- # we want to add them anyway for backwards compatibility). Only a few
- # differ from the official names.
foreach my $range (property_ref('Unicode_1_Name')->ranges) {
my $code_point = $range->start;
my $unicode_1_value = $range->value;
@@ -12611,7 +12829,7 @@ sub compile_perl() {
# This won't add an exact duplicate.
$perl_charname->add_duplicate($code_point, $unicode_1_value,
- Replace => $MULTIPLE_AFTER);
+ Replace => $before_or_after);
}
# Now that have everything added, add in abbreviations after