summaryrefslogtreecommitdiff
path: root/utfebcdic.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-11-12 09:05:19 -0700
committerFather Chrysostomos <sprout@cpan.org>2010-11-22 13:32:51 -0800
commitd02f4dad561ba81f934560f8eab051147a45e09e (patch)
tree55beb313f848843ad63aca8c05d7162b04686b82 /utfebcdic.h
parent6d8e7a01cde4803f9769fb51a041fbd8a839e0b2 (diff)
downloadperl-d02f4dad561ba81f934560f8eab051147a45e09e.tar.gz
PL_fold wrong for EBCDIC platforms.
The PL_fold table map on EBCDIC only works on the ASCII-subrange characters, not the full native Latin1. To fix this, I moved the table to utfebcdic.h for EBCDIC platforms, and actually changed it to three tables, one for each of the code pages known to Perl. There is no EBCDIC platform available to test on. What I did was hack together a program from existing code that does EBCDIC transforms. I ran it in ASCII mode, and verified that the generated table was identical to the Latin1 table I had previously constructed by hand and extensively tested. I then ran it on each of the three EBCDIC transforms, and verified that each matched the places in the original table that I knew were correct, all the ASCII alphabetics, the controls, and a few other code points. So these tables are at least as correct as the existing one, as they are identical to it for [A-Z], [a-z].
Diffstat (limited to 'utfebcdic.h')
-rw-r--r--utfebcdic.h113
1 files changed, 113 insertions, 0 deletions
diff --git a/utfebcdic.h b/utfebcdic.h
index c3fe6036ee..2fb5b9e4e2 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -317,6 +317,42 @@ EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) *
0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
};
+
+EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
+ 'a'; 'a' => 'A' */
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 98, 99, 100, 101, 102, 103,
+ 104, 105, 74, 75, 76, 77, 78, 79,
+ 80, 113, 114, 115, 116, 117, 118, 119,
+ 120, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 66, 67, 68, 69, 70, 71,
+ 72, 73, 106, 107, 108, 109, 110, 111,
+ 128, 81, 82, 83, 84, 85, 86, 87,
+ 88, 121, 122, 123, 124, 125, 126, 127,
+ 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 138, 139, 172, 186, 174, 143,
+ 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+ 'Q', 'R', 154, 155, 158, 157, 156, 159,
+ 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
+ 'Y', 'Z', 170, 171, 140, 173, 142, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 141, 187, 188, 189, 190, 191,
+ 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 202, 235, 236, 237, 238, 239,
+ 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+ 'q', 'r', 218, 251, 252, 253, 254, 223,
+ 224, 225, 's', 't', 'u', 'v', 'w', 'x',
+ 'y', 'z', 234, 203, 204, 205, 206, 207,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 219, 220, 221, 222, 255
+};
#endif /* 1047 */
#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
@@ -361,6 +397,42 @@ EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E
};
+
+EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
+ 'a'; 'a' => 'A' */
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 98, 99, 100, 101, 102, 103,
+ 104, 105, 74, 75, 76, 77, 78, 79,
+ 80, 113, 114, 115, 116, 117, 118, 119,
+ 120, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 66, 67, 68, 69, 70, 71,
+ 72, 73, 106, 107, 108, 109, 110, 111,
+ 128, 81, 82, 83, 84, 85, 86, 87,
+ 88, 121, 122, 123, 124, 125, 126, 127,
+ 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 138, 139, 172, 173, 174, 143,
+ 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+ 'Q', 'R', 154, 155, 158, 157, 156, 159,
+ 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
+ 'Y', 'Z', 170, 171, 140, 141, 142, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 202, 235, 236, 237, 238, 239,
+ 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+ 'q', 'r', 218, 221, 252, 219, 254, 223,
+ 192, 225, 's', 't', 'u', 'v', 'w', 'x',
+ 'y', 'z', 234, 203, 204, 205, 206, 207,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 220, 253, 222, 255
+};
#endif /* POSIX-BC */
#if '^' == 176 /* if defined(??) (OS/400?) 037 */
@@ -406,6 +478,42 @@ EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
};
+
+EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
+ 'a'; 'a' => 'A' */
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 98, 99, 100, 101, 102, 103,
+ 104, 105, 74, 75, 76, 77, 78, 79,
+ 80, 113, 114, 115, 116, 117, 118, 119,
+ 120, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 66, 67, 68, 69, 70, 71,
+ 72, 73, 106, 107, 108, 109, 110, 111,
+ 128, 81, 82, 83, 84, 85, 86, 87,
+ 88, 121, 122, 123, 124, 125, 126, 127,
+ 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 138, 139, 172, 173, 174, 143,
+ 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+ 'Q', 'R', 154, 155, 158, 157, 156, 159,
+ 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
+ 'Y', 'Z', 170, 171, 140, 141, 142, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 202, 235, 236, 237, 238, 239,
+ 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+ 'q', 'r', 218, 251, 252, 253, 254, 223,
+ 224, 225, 's', 't', 'u', 'v', 'w', 'x',
+ 'y', 'z', 234, 203, 204, 205, 206, 207,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 219, 220, 221, 222, 255
+};
#endif /* 037 */
#else
@@ -414,8 +522,13 @@ EXTCONST unsigned char PL_e2utf[];
EXTCONST unsigned char PL_utf2e[];
EXTCONST unsigned char PL_e2a[];
EXTCONST unsigned char PL_a2e[];
+EXTCONST unsigned char PL_fold[];
#endif
+/* Since the EBCDIC code pages are isomorphic to Latin1, that table is merely a
+ * duplicate */
+EXTCONST unsigned char * PL_fold_latin1 = PL_fold;
+
END_EXTERN_C
/* EBCDIC-happy ways of converting native code to UTF-8 */