FAT: make all codepage data derived from the same place

Make the codepage data all derive from the Unicode tables, and create files that could be dynamically loaded in the future.
author: H. Peter Anvin <hpa@zytor.com> 2008-06-09 12:21:05 -0700
committer: H. Peter Anvin <hpa@zytor.com> 2008-06-09 16:14:44 -0700
commit: e74dc5c074f683d6b8ab71f9434533a6c74bdf43 (patch)
tree: d8794c12d0a25df935f49c8da81bc9c26ca40f86 /codepage
parent: 78de189bb80b89f63d9bcac8c26c69ab4f913c89 (diff)
download: syslinux-e74dc5c074f683d6b8ab71f9434533a6c74bdf43.tar.gz
2 files changed, 44 insertions, 16 deletions
diff --git a/codepage/Makefile b/codepage/Makefile
index d426eaad..5495a6a7 100644
--- a/codepage/Makefile
+++ b/codepage/Makefile
@@ -1,16 +1,16 @@
 PERL		= perl
 CPSRC		= $(wildcard *.txt)
-GENFILES	= $(patsubst %.txt,%.bin,$(CPSRC))
+GENFILES	= $(patsubst %.txt,%.cp,$(CPSRC))
 
-.SUFFIXES: .txt .bin
+.SUFFIXES: .txt .cp
 
 all: $(GENFILES)
 
-%.bin: %.txt cptable.pl UnicodeData
+%.cp: %.txt cptable.pl UnicodeData
 	$(PERL) cptable.pl UnicodeData $< $@
 
 tidy:
-	rm -f $(GENFILES)
+	rm -f *.cp *.bin
 
 clean: tidy
 
diff --git a/codepage/cptable.pl b/codepage/cptable.pl
index c183d08c..44c710ce 100755
--- a/codepage/cptable.pl
+++ b/codepage/cptable.pl
@@ -10,25 +10,23 @@
 
 ($ucd, $cpin, $cpout) = @ARGV;
 
-%altcase = ();
+%ucase   = ();
+%lcase   = ();
+%tcase   = ();
 
 open(UCD, '<', $ucd) or die;
 while (defined($line = <UCD>)) {
     chomp $line;
     @f = split(/;/, $line);
-    if ($f[12] ne '') {
-	$altcase{hex $f[0]} = hex $f[12]; # Upper case equivalent
-    } elsif ($f[13] ne '') {
-	$altcase{hex $f[0]} = hex $f[13]; # Lower case equivalent
-    } elsif ($f[14] ne '') {
-	$altcase{hex $f[0]} = hex $f[14]; # Title case, would be unusual
-    } else {
-	$altcase{hex $f[0]} = hex $f[0];
-    }
+    $n = hex $f[0];
+    $ucase{$n} = hex $f[12] if ($f[12] ne '');
+    $lcase{$n} = hex $f[13] if ($f[13] ne '');
+    $tcase{$n} = hex $f[14] if ($f[14] ne '');
 }
 close(UCD);
 
 @xtab = (undef) x 256;
+%tabx = ();
 
 open(CPIN, '<', $cpin) or die;
 while (defined($line = <CPIN>)) {
@@ -36,17 +34,47 @@ while (defined($line = <CPIN>)) {
     @f = split(/\s+/, $line);
     next if (scalar @f != 2);
     next if (hex $f[0] > 255);
-    $xtab[hex $f[0]] = hex $f[1];
+    $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode
+    $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage
 }
 close(CPIN);
 
 open(CPOUT, '>', $cpout) or die;
+#
+# Magic number, in anticipation of being able to load these
+# files dynamically...
+#
+print CPOUT pack("VV", 0x8fad232b, 0x9c295319);
+
+# Header fields available for future use...
+print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
+
+#
+# Self (shortname) uppercase table
+#
+for ($i = 0; $i < 256; $i++) {
+    $u = $tabx{$ucase{$xtab[$i]}};
+    $u = $i unless (defined($u));
+    print CPOUT pack("C", $u);
+}
+
+#
+# Unicode (longname) matching table
+#
 for ($i = 0; $i < 256; $i++) {
     if (!defined($xtab[$i])) {
 	$p0 = $p1 = 0xffff;
     } else {
 	$p0 = $xtab[$i];
-	$p1 = defined($altcase{$p0}) ? $altcase{$p0} : $p0;
+	if (defined($ucase{$p0})) {
+	    $p1 = $ucase{$p0};
+	} elsif (defined($lcase{$p0})) {
+	    $p1 = $lcase{$p0};
+	} elsif (defined($tcase{$p0})) {
+	    $p1 = $tcase{$p0};
+	} else {
+	    $p1 = $p0;
+	}
     }
     # Only the BMP is supported...
     $p0 = 0xffff if ($p0 > 0xffff);
author	H. Peter Anvin <hpa@zytor.com>	2008-06-09 12:21:05 -0700
committer	H. Peter Anvin <hpa@zytor.com>	2008-06-09 16:14:44 -0700
commit	e74dc5c074f683d6b8ab71f9434533a6c74bdf43 (patch)
tree	d8794c12d0a25df935f49c8da81bc9c26ca40f86 /codepage
parent	78de189bb80b89f63d9bcac8c26c69ab4f913c89 (diff)
download	syslinux-e74dc5c074f683d6b8ab71f9434533a6c74bdf43.tar.gz