summaryrefslogtreecommitdiff
path: root/codepage
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-06-09 12:21:05 -0700
committerH. Peter Anvin <hpa@zytor.com>2008-06-09 16:14:44 -0700
commite74dc5c074f683d6b8ab71f9434533a6c74bdf43 (patch)
treed8794c12d0a25df935f49c8da81bc9c26ca40f86 /codepage
parent78de189bb80b89f63d9bcac8c26c69ab4f913c89 (diff)
downloadsyslinux-e74dc5c074f683d6b8ab71f9434533a6c74bdf43.tar.gz
FAT: make all codepage data derived from the same place
Make the codepage data all derive from the Unicode tables, and create files that could be dynamically loaded in the future.
Diffstat (limited to 'codepage')
-rw-r--r--codepage/Makefile8
-rwxr-xr-xcodepage/cptable.pl52
2 files changed, 44 insertions, 16 deletions
diff --git a/codepage/Makefile b/codepage/Makefile
index d426eaad..5495a6a7 100644
--- a/codepage/Makefile
+++ b/codepage/Makefile
@@ -1,16 +1,16 @@
PERL = perl
CPSRC = $(wildcard *.txt)
-GENFILES = $(patsubst %.txt,%.bin,$(CPSRC))
+GENFILES = $(patsubst %.txt,%.cp,$(CPSRC))
-.SUFFIXES: .txt .bin
+.SUFFIXES: .txt .cp
all: $(GENFILES)
-%.bin: %.txt cptable.pl UnicodeData
+%.cp: %.txt cptable.pl UnicodeData
$(PERL) cptable.pl UnicodeData $< $@
tidy:
- rm -f $(GENFILES)
+ rm -f *.cp *.bin
clean: tidy
diff --git a/codepage/cptable.pl b/codepage/cptable.pl
index c183d08c..44c710ce 100755
--- a/codepage/cptable.pl
+++ b/codepage/cptable.pl
@@ -10,25 +10,23 @@
($ucd, $cpin, $cpout) = @ARGV;
-%altcase = ();
+%ucase = ();
+%lcase = ();
+%tcase = ();
open(UCD, '<', $ucd) or die;
while (defined($line = <UCD>)) {
chomp $line;
@f = split(/;/, $line);
- if ($f[12] ne '') {
- $altcase{hex $f[0]} = hex $f[12]; # Upper case equivalent
- } elsif ($f[13] ne '') {
- $altcase{hex $f[0]} = hex $f[13]; # Lower case equivalent
- } elsif ($f[14] ne '') {
- $altcase{hex $f[0]} = hex $f[14]; # Title case, would be unusual
- } else {
- $altcase{hex $f[0]} = hex $f[0];
- }
+ $n = hex $f[0];
+ $ucase{$n} = hex $f[12] if ($f[12] ne '');
+ $lcase{$n} = hex $f[13] if ($f[13] ne '');
+ $tcase{$n} = hex $f[14] if ($f[14] ne '');
}
close(UCD);
@xtab = (undef) x 256;
+%tabx = ();
open(CPIN, '<', $cpin) or die;
while (defined($line = <CPIN>)) {
@@ -36,17 +34,47 @@ while (defined($line = <CPIN>)) {
@f = split(/\s+/, $line);
next if (scalar @f != 2);
next if (hex $f[0] > 255);
- $xtab[hex $f[0]] = hex $f[1];
+ $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode
+ $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage
}
close(CPIN);
open(CPOUT, '>', $cpout) or die;
+#
+# Magic number, in anticipation of being able to load these
+# files dynamically...
+#
+print CPOUT pack("VV", 0x8fad232b, 0x9c295319);
+
+# Header fields available for future use...
+print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
+
+#
+# Self (shortname) uppercase table
+#
+for ($i = 0; $i < 256; $i++) {
+ $u = $tabx{$ucase{$xtab[$i]}};
+ $u = $i unless (defined($u));
+ print CPOUT pack("C", $u);
+}
+
+#
+# Unicode (longname) matching table
+#
for ($i = 0; $i < 256; $i++) {
if (!defined($xtab[$i])) {
$p0 = $p1 = 0xffff;
} else {
$p0 = $xtab[$i];
- $p1 = defined($altcase{$p0}) ? $altcase{$p0} : $p0;
+ if (defined($ucase{$p0})) {
+ $p1 = $ucase{$p0};
+ } elsif (defined($lcase{$p0})) {
+ $p1 = $lcase{$p0};
+ } elsif (defined($tcase{$p0})) {
+ $p1 = $tcase{$p0};
+ } else {
+ $p1 = $p0;
+ }
}
# Only the BMP is supported...
$p0 = 0xffff if ($p0 > 0xffff);