summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xcodepage/cptable.pl40
-rw-r--r--core/include/codepage.h3
2 files changed, 37 insertions, 6 deletions
diff --git a/codepage/cptable.pl b/codepage/cptable.pl
index 05cfc3eb..e29cf006 100755
--- a/codepage/cptable.pl
+++ b/codepage/cptable.pl
@@ -82,7 +82,7 @@ open(CPOUT, '>', $cpout)
# Magic number, in anticipation of being able to load these
# files dynamically...
#
-print CPOUT pack("VV", 0x8fad232b, 0x9c295319);
+print CPOUT pack("VV", 0x58a8b3d4, 0x51d21eb1);
# Header fields available for future use...
print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
@@ -97,6 +97,7 @@ print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
# ... where @ytab is console codepage -> Unicode and
# %tabx is Unicode -> filesystem codepage.
#
+@uctab = (undef) x 256;
for ($i = 0; $i < 256; $i++) {
$uuc = $ucase{$ytab[$i]}; # Unicode upper case
if (defined($tabx{$uuc})) {
@@ -106,15 +107,44 @@ for ($i = 0; $i < 256; $i++) {
# Upper case equivalent stripped of accents
$u = $tabx{${$decomp{$uuc}}[0]};
} else {
- # No equivalent at all found. Set this to zero, which should
- # prevent shortname matching altogether (still making longname
- # matching possible, of course.)
- $u = 0;
+ # No equivalent at all found. Assume it is a lower-case-only
+ # character, like greek alpha in CP437.
+ $u = $i;
}
+ $uctab[$i] = $u;
print CPOUT pack("C", $u);
}
#
+# Self (shortname) lowercase table.
+# This depends both on the console codepage and the filesystem codepage;
+# the logical transcoding operation is:
+#
+# $taby{$lcase{$xtab[$i]}}
+#
+# ... where @ytab is console codepage -> Unicode and
+# %tabx is Unicode -> filesystem codepage.
+#
+@lctab = (undef) x 256;
+for ($i = 0; $i < 256; $i++) {
+ $llc = $lcase{$xtab[$i]}; # Unicode lower case
+ if (defined($l = $taby{$llc}) && $uctab[$l] == $i) {
+ # Straight-forward conversion
+ } elsif (defined($l = $tabx{${$decomp{$llc}}[0]}) && $uctab[$l] == $i) {
+ # Lower case equivalent stripped of accents
+ } else {
+ # No equivalent at all found. Find *anything* that matches the
+ # bijection criterion...
+ for ($l = 0; $l < 256; $l++) {
+ last if ($uctab[$l] == $i);
+ }
+ $l = $i if ($l == 256); # If nothing, we're screwed anyway...
+ }
+ $lctab[$i] = $l;
+ print CPOUT pack("C", $l);
+}
+
+#
# Unicode (longname) matching table.
# This only depends on the console codepage.
#
diff --git a/core/include/codepage.h b/core/include/codepage.h
index e2f78aae..a24d90f5 100644
--- a/core/include/codepage.h
+++ b/core/include/codepage.h
@@ -6,13 +6,14 @@
#include <stdint.h>
-#define CODEPAGE_MAGIC UINT64_C(0x9c2953198fad232b)
+#define CODEPAGE_MAGIC UINT64_C(0x51d21eb158a8b3d4)
struct codepage {
uint64_t magic;
uint32_t reserved[6];
uint8_t upper[256]; /* Codepage upper case table */
+ uint8_t lower[256]; /* Codepage lower case table */
/*
* The primary Unicode match is the same case, i.e. A -> A,