summaryrefslogtreecommitdiff
path: root/codepage
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-06-16 00:07:23 -0700
committerH. Peter Anvin <hpa@zytor.com>2008-06-16 00:07:23 -0700
commit52ff420fba565c4803cd43260c2d36189ff3d71a (patch)
treec7ad33a0acb5bd9e1c2c5c2453fbb0ec28dd7ea4 /codepage
parent08844f9e811b9ad77b281d8608e90c140c76e44f (diff)
downloadsyslinux-52ff420fba565c4803cd43260c2d36189ff3d71a.tar.gz
codepage: include case variant characters in UnicodeData
Adjust the gensubset.pl script to that all case variants are explicitly included in UnicodeData.
Diffstat (limited to 'codepage')
-rw-r--r--codepage/UnicodeData12
-rwxr-xr-xcodepage/gensubset.pl19
2 files changed, 31 insertions, 0 deletions
diff --git a/codepage/UnicodeData b/codepage/UnicodeData
index 3eb8eb7a..9b2209da 100644
--- a/codepage/UnicodeData
+++ b/codepage/UnicodeData
@@ -223,19 +223,31 @@
00FE;LATIN SMALL LETTER THORN;Ll;0;L;;;;;N;;Icelandic;00DE;;00DE
00FF;LATIN SMALL LETTER Y WITH DIAERESIS;Ll;0;L;0079 0308;;;;N;LATIN SMALL LETTER Y DIAERESIS;;0178;;0178
0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
+0178;LATIN CAPITAL LETTER Y WITH DIAERESIS;Lu;0;L;0059 0308;;;;N;LATIN CAPITAL LETTER Y DIAERESIS;;;00FF;
+0191;LATIN CAPITAL LETTER F WITH HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER F HOOK;;;0192;
0192;LATIN SMALL LETTER F WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER SCRIPT F;;0191;;0191
+0391;GREEK CAPITAL LETTER ALPHA;Lu;0;L;;;;;N;;;;03B1;
0393;GREEK CAPITAL LETTER GAMMA;Lu;0;L;;;;;N;;;;03B3;
+0394;GREEK CAPITAL LETTER DELTA;Lu;0;L;;;;;N;;;;03B4;
+0395;GREEK CAPITAL LETTER EPSILON;Lu;0;L;;;;;N;;;;03B5;
0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8;
+039C;GREEK CAPITAL LETTER MU;Lu;0;L;;;;;N;;;;03BC;
+03A0;GREEK CAPITAL LETTER PI;Lu;0;L;;;;;N;;;;03C0;
03A3;GREEK CAPITAL LETTER SIGMA;Lu;0;L;;;;;N;;;;03C3;
+03A4;GREEK CAPITAL LETTER TAU;Lu;0;L;;;;;N;;;;03C4;
03A6;GREEK CAPITAL LETTER PHI;Lu;0;L;;;;;N;;;;03C6;
03A9;GREEK CAPITAL LETTER OMEGA;Lu;0;L;;;;;N;;;;03C9;
03B1;GREEK SMALL LETTER ALPHA;Ll;0;L;;;;;N;;;0391;;0391
+03B3;GREEK SMALL LETTER GAMMA;Ll;0;L;;;;;N;;;0393;;0393
03B4;GREEK SMALL LETTER DELTA;Ll;0;L;;;;;N;;;0394;;0394
03B5;GREEK SMALL LETTER EPSILON;Ll;0;L;;;;;N;;;0395;;0395
+03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
+03BC;GREEK SMALL LETTER MU;Ll;0;L;;;;;N;;;039C;;039C
03C0;GREEK SMALL LETTER PI;Ll;0;L;;;;;N;;;03A0;;03A0
03C3;GREEK SMALL LETTER SIGMA;Ll;0;L;;;;;N;;;03A3;;03A3
03C4;GREEK SMALL LETTER TAU;Ll;0;L;;;;;N;;;03A4;;03A4
03C6;GREEK SMALL LETTER PHI;Ll;0;L;;;;;N;;;03A6;;03A6
+03C9;GREEK SMALL LETTER OMEGA;Ll;0;L;;;;;N;;;03A9;;03A9
2017;DOUBLE LOW LINE;Po;0;ON;<compat> 0020 0333;;;;N;SPACING DOUBLE UNDERSCORE;;;;
207F;SUPERSCRIPT LATIN SMALL LETTER N;Ll;0;L;<super> 006E;;;;N;;;;;
20A7;PESETA SIGN;Sc;0;ET;;;;;N;;;;;
diff --git a/codepage/gensubset.pl b/codepage/gensubset.pl
index 5fde460f..4dd7f2c1 100755
--- a/codepage/gensubset.pl
+++ b/codepage/gensubset.pl
@@ -9,6 +9,7 @@
%need_these = ();
+# Mark as needed all the characters mentioned in the relevant files
foreach $file (@ARGV) {
open(F, '<', $file) or die;
while (defined($line = <F>)) {
@@ -20,9 +21,27 @@ foreach $file (@ARGV) {
close(F);
}
+# Also mark as needed any case variants of those
+# (Note: this doesn't necessarily provide the full transitive closure,
+# but we shouldn't need it.)
+while (defined($line = <STDIN>)) {
+ @f = split(/;/, $line);
+ if ($f[0] =~ /^([0-9a-f]+)$/i) {
+ $r = hex $f[0];
+ if ($need_these{$r}) {
+ $need_these{hex $f[12]}++ if ($f[12] ne '');
+ $need_these{hex $f[13]}++ if ($f[13] ne '');
+ $need_these{hex $f[14]}++ if ($f[14] ne '');
+ }
+ }
+}
+
+# Finally, write out the subset
+seek(STDIN, 0, 0);
while (defined($line = <STDIN>)) {
($v, $l) = split(/;/, $line, 2);
if ($v =~ /^([0-9a-f]+)\-([0-9a-f]+)$/i) {
+ # This isn't actually the format... fix that if it ever matters
$r1 = hex $1;
$r2 = hex $2;
} elsif ($v =~ /^([0-9a-f]+)$/i) {