utfebcdic.h: Add comment

author: Karl Williamson <khw@cpan.org> 2014-12-31 16:49:35 -0700
committer: Karl Williamson <khw@cpan.org> 2015-03-05 22:13:14 -0700
commit: ff982d0036bfe69b869b03451f0561fa790d4378 (patch)
tree: 7ca3eda23d8d523c9f31d94fc425235cfa1d67b2 /utfebcdic.h
parent: b2a691af8242035dab97bd4c05b77ec3ad88955b (diff)
download: perl-ff982d0036bfe69b869b03451f0561fa790d4378.tar.gz
1 files changed, 14 insertions, 0 deletions
diff --git a/utfebcdic.h b/utfebcdic.h
index 23b4ae2014..a4f5fb5cb2 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -86,6 +86,20 @@
  *
  * tr16 also says that NEL and LF be swapped.  We don't do that.
  *
+ * Because of the way UTF-EBCDIC is constructed, the lowest 32 code points that
+ * aren't equivalent to ASCII characters nor C1 controls form the set of
+ * continuation bytes; the remaining 64 non-ASCII, non-control code points form
+ * the potential start bytes, in order.  (However, the first 5 of these lead to
+ * malformed overlongs, so there really are only 59 start bytes.) Hence the
+ * UTF-EBCDIC for the smallest variant code point, 0x160, will have likely 0x41
+ * as its continuation byte, provided 0x41 isn't an ASCII or C1 equivalent.
+ * And its start byte will be the code point that is 37 (32+5) non-ASCII,
+ * non-control code points past it.  (0 - 3F are controls, and 40 is SPACE,
+ * leaving 41 as the first potentially available one.)  In contrast, on ASCII
+ * platforms, the first 64 (not 32) non-ASCII code points are the continuation
+ * bytes.  And the first 2 (not 5) potential start bytes form overlong
+ * malformed sequences.
+ *
  * EBCDIC characters above 0xFF are the same as Unicode in Perl's
  * implementation of all 3 encodings, so for those Step 1 is trivial.
  *
author	Karl Williamson <khw@cpan.org>	2014-12-31 16:49:35 -0700
committer	Karl Williamson <khw@cpan.org>	2015-03-05 22:13:14 -0700
commit	ff982d0036bfe69b869b03451f0561fa790d4378 (patch)
tree	7ca3eda23d8d523c9f31d94fc425235cfa1d67b2 /utfebcdic.h
parent	b2a691af8242035dab97bd4c05b77ec3ad88955b (diff)
download	perl-ff982d0036bfe69b869b03451f0561fa790d4378.tar.gz