diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-03-16 10:54:24 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-03-19 16:51:25 -0600 |
commit | 21a1aff7e1eddd8acb5f096dd264e2f967ad5401 (patch) | |
tree | ab3a1b13368036259e7df9720cf984871dd5da63 | |
parent | 6901521e3ffe762ff8e3f268762b0d2f667771a7 (diff) | |
download | perl-21a1aff7e1eddd8acb5f096dd264e2f967ad5401.tar.gz |
mktables: Backport name_alias changes to earlier Unicode versions
This allow mktables to be compiled for earlier Unicode versions and
work for them.
-rw-r--r-- | lib/unicore/mktables | 242 |
1 files changed, 230 insertions, 12 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index f0813cf97c..fa3e66e9f1 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -11655,15 +11655,228 @@ sub filter_script_extensions_line { } sub setup_early_name_alias { + my $file= shift; + Carp::carp_extra_args(\@_) if main::DEBUG && @_; + my $aliases = property_ref('Name_Alias'); $aliases = Property->new('Name_Alias') if ! defined $aliases; + $file->insert_lines(get_old_name_aliases()); - # Before 6.0, this wasn't a problem, and after it, this alias is part of - # the Unicode-delivered file. - $aliases->add_map(7, 7, "ALERT: control") if $v_version eq v6.0.0; return; } +sub get_old_name_aliases () { + my @return = split /\n/, <<'END'; +0000;NULL;control +0000;NUL;abbreviation +0001;START OF HEADING;control +0001;SOH;abbreviation +0002;START OF TEXT;control +0002;STX;abbreviation +0003;END OF TEXT;control +0003;ETX;abbreviation +0004;END OF TRANSMISSION;control +0004;EOT;abbreviation +0005;ENQUIRY;control +0005;ENQ;abbreviation +0006;ACKNOWLEDGE;control +0006;ACK;abbreviation +0007;ALERT;control +0007;BELL;control +0007;BEL;abbreviation +0008;BACKSPACE;control +0008;BS;abbreviation +0009;CHARACTER TABULATION;control +0009;HORIZONTAL TABULATION;control +0009;HT;abbreviation +0009;TAB;abbreviation +000A;LINE FEED;control +000A;LINE FEED (LF);control +000A;NEW LINE;control +000A;END OF LINE;control +000A;LF;abbreviation +000A;NL;abbreviation +000A;EOL;abbreviation +000B;LINE TABULATION;control +000B;VERTICAL TABULATION;control +000B;VT;abbreviation +000C;FORM FEED;control +000C;FORM FEED (FF);control +000C;FF;abbreviation +000D;CARRIAGE RETURN;control +000D;CARRIAGE RETURN (CR);control +000D;CR;abbreviation +000E;SHIFT OUT;control +000E;LOCKING-SHIFT ONE;control +000E;SO;abbreviation +000F;SHIFT IN;control +000F;LOCKING-SHIFT ZERO;control +000F;SI;abbreviation +0010;DATA LINK ESCAPE;control +0010;DLE;abbreviation +0011;DEVICE CONTROL ONE;control +0011;DC1;abbreviation +0012;DEVICE CONTROL TWO;control +0012;DC2;abbreviation +0013;DEVICE CONTROL THREE;control +0013;DC3;abbreviation +0014;DEVICE CONTROL FOUR;control +0014;DC4;abbreviation +0015;NEGATIVE ACKNOWLEDGE;control +0015;NAK;abbreviation +0016;SYNCHRONOUS IDLE;control +0016;SYN;abbreviation +0017;END OF TRANSMISSION BLOCK;control +0017;ETB;abbreviation +0018;CANCEL;control +0018;CAN;abbreviation +0019;END OF MEDIUM;control +0019;EOM;abbreviation +001A;SUBSTITUTE;control +001A;SUB;abbreviation +001B;ESCAPE;control +001B;ESC;abbreviation +001C;INFORMATION SEPARATOR FOUR;control +001C;FILE SEPARATOR;control +001C;FS;abbreviation +001D;INFORMATION SEPARATOR THREE;control +001D;GROUP SEPARATOR;control +001D;GS;abbreviation +001E;INFORMATION SEPARATOR TWO;control +001E;RECORD SEPARATOR;control +001E;RS;abbreviation +001F;INFORMATION SEPARATOR ONE;control +001F;UNIT SEPARATOR;control +001F;US;abbreviation +0020;SP;abbreviation +007F;DELETE;control +007F;DEL;abbreviation +0080;PADDING CHARACTER;figment +0080;PAD;abbreviation +0081;HIGH OCTET PRESET;figment +0081;HOP;abbreviation +0082;BREAK PERMITTED HERE;control +0082;BPH;abbreviation +0083;NO BREAK HERE;control +0083;NBH;abbreviation +0084;INDEX;control +0084;IND;abbreviation +0085;NEXT LINE;control +0085;NEXT LINE (NEL);control +0085;NEL;abbreviation +0086;START OF SELECTED AREA;control +0086;SSA;abbreviation +0087;END OF SELECTED AREA;control +0087;ESA;abbreviation +0088;CHARACTER TABULATION SET;control +0088;HORIZONTAL TABULATION SET;control +0088;HTS;abbreviation +0089;CHARACTER TABULATION WITH JUSTIFICATION;control +0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control +0089;HTJ;abbreviation +008A;LINE TABULATION SET;control +008A;VERTICAL TABULATION SET;control +008A;VTS;abbreviation +008B;PARTIAL LINE FORWARD;control +008B;PARTIAL LINE DOWN;control +008B;PLD;abbreviation +008C;PARTIAL LINE BACKWARD;control +008C;PARTIAL LINE UP;control +008C;PLU;abbreviation +008D;REVERSE LINE FEED;control +008D;REVERSE INDEX;control +008D;RI;abbreviation +008E;SINGLE SHIFT TWO;control +008E;SINGLE-SHIFT-2;control +008E;SS2;abbreviation +008F;SINGLE SHIFT THREE;control +008F;SINGLE-SHIFT-3;control +008F;SS3;abbreviation +0090;DEVICE CONTROL STRING;control +0090;DCS;abbreviation +0091;PRIVATE USE ONE;control +0091;PRIVATE USE-1;control +0091;PU1;abbreviation +0092;PRIVATE USE TWO;control +0092;PRIVATE USE-2;control +0092;PU2;abbreviation +0093;SET TRANSMIT STATE;control +0093;STS;abbreviation +0094;CANCEL CHARACTER;control +0094;CCH;abbreviation +0095;MESSAGE WAITING;control +0095;MW;abbreviation +0096;START OF GUARDED AREA;control +0096;START OF PROTECTED AREA;control +0096;SPA;abbreviation +0097;END OF GUARDED AREA;control +0097;END OF PROTECTED AREA;control +0097;EPA;abbreviation +0098;START OF STRING;control +0098;SOS;abbreviation +0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment +0099;SGC;abbreviation +009A;SINGLE CHARACTER INTRODUCER;control +009A;SCI;abbreviation +009B;CONTROL SEQUENCE INTRODUCER;control +009B;CSI;abbreviation +009C;STRING TERMINATOR;control +009C;ST;abbreviation +009D;OPERATING SYSTEM COMMAND;control +009D;OSC;abbreviation +009E;PRIVACY MESSAGE;control +009E;PM;abbreviation +009F;APPLICATION PROGRAM COMMAND;control +009F;APC;abbreviation +00A0;NBSP;abbreviation +00AD;SHY;abbreviation +200B;ZWSP;abbreviation +200C;ZWNJ;abbreviation +200D;ZWJ;abbreviation +200E;LRM;abbreviation +200F;RLM;abbreviation +202A;LRE;abbreviation +202B;RLE;abbreviation +202C;PDF;abbreviation +202D;LRO;abbreviation +202E;RLO;abbreviation +FEFF;BYTE ORDER MARK;alternate +FEFF;BOM;abbreviation +FEFF;ZWNBSP;abbreviation +END + + if ($v_version ge v3.0.0) { + push @return, split /\n/, <<'END'; +180B; FVS1; abbreviation +180C; FVS2; abbreviation +180D; FVS3; abbreviation +180E; MVS; abbreviation +202F; NNBSP; abbreviation +END + } + + if ($v_version ge v3.2.0) { + push @return, split /\n/, <<'END'; +034F; CGJ; abbreviation +205F; MMSP; abbreviation +2060; WJ; abbreviation +END + # Add in VS1..VS16 + my $cp = 0xFE00 - 1; + for my $i (1..16) { + push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i); + } + } + if ($v_version ge v4.0.0) { # Add in VS17..VS256 + my $cp = 0xE0100 - 17; + for my $i (17..256) { + push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i); + } + } + + return @return; +} + sub filter_later_version_name_alias_line { # This file has an extra entry per line for the alias type. This is @@ -11688,8 +11901,10 @@ sub filter_later_version_name_alias_line { sub filter_early_version_name_alias_line { # Early versions did not have the trailing alias type field; implicitly it - # was 'correction' - $_ .= "; correction"; + # was 'correction'. But our synthetic lines we add in this program do + # have it, so test for the type field. + $_ .= "; correction" if $_ !~ /;.*;/; + filter_later_version_name_alias_line; return; } @@ -12537,11 +12752,11 @@ sub compile_perl() { } my $alias_sentence = ""; + my %abbreviations; my $alias = property_ref('Name_Alias'); if (defined $alias) { push @composition, 'Name_Alias'; $perl_charname->set_proxy_for('Name_Alias'); - my %abbreviations; # Add each entry in Name_Alias to Perl_Charnames. Where these go with # respect to any existing entry depends on the entry type. @@ -12587,12 +12802,15 @@ sub compile_perl() { # above. $perl_charname->add_duplicate($code_point, $value, Replace => $replace_type); } + } + + # Now add the Unicode_1 names for the controls. The Unicode_1 names had + # precedence before 6.1, so should be first in the file; the other names + # have precedence starting in 6.1, + my $before_or_after = ($v_version lt v6.1.0) + ? $MULTIPLE_BEFORE + : $MULTIPLE_AFTER; - # Now add the Unicode_1 names for the controls. These come after the - # official names, as they are only recommended (by TR18; unclear as of - # this writing if that recommendation will be withdrawn, but if it is, - # we want to add them anyway for backwards compatibility). Only a few - # differ from the official names. foreach my $range (property_ref('Unicode_1_Name')->ranges) { my $code_point = $range->start; my $unicode_1_value = $range->value; @@ -12611,7 +12829,7 @@ sub compile_perl() { # This won't add an exact duplicate. $perl_charname->add_duplicate($code_point, $unicode_1_value, - Replace => $MULTIPLE_AFTER); + Replace => $before_or_after); } # Now that have everything added, add in abbreviations after |