summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-03-16 22:41:15 -0600
committerKarl Williamson <public@khwilliamson.com>2013-08-29 09:56:03 -0600
commit155d27387eb109415eee4636d05bb5fe666b84c2 (patch)
treeb8daa257df84e68aac2e525a7b47f04f08486c31 /utf8.h
parentf1658f694f23e5d9fc5c8762b40b2d30ae7ea07b (diff)
downloadperl-155d27387eb109415eee4636d05bb5fe666b84c2.tar.gz
Fix EBCDIC bugs in UTF8_ACUMULATE and utf8.c
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h12
1 files changed, 8 insertions, 4 deletions
diff --git a/utf8.h b/utf8.h
index 4fc513b7da..4037a6a501 100644
--- a/utf8.h
+++ b/utf8.h
@@ -289,9 +289,13 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
/* Adds a UTF8 continuation byte 'new' of information to a running total code
* point 'old' of all the continuation bytes so far. This is designed to be
- * used in a loop to convert from UTF-8 to the code point represented */
-#define UTF8_ACCUMULATE(old, new) (((old) << UTF_ACCUMULATION_SHIFT) \
- | (((U8)new) & UTF_CONTINUATION_MASK))
+ * used in a loop to convert from UTF-8 to the code point represented. Note
+ * that this is asymmetric on EBCDIC platforms, in that the 'new' parameter is
+ * the UTF-EBCDIC byte, whereas the 'old' parameter is a Unicode (not EBCDIC)
+ * code point in process of being generated */
+#define UTF8_ACCUMULATE(old, new) (((old) << UTF_ACCUMULATION_SHIFT) \
+ | ((NATIVE_UTF8_TO_I8((U8)new)) \
+ & UTF_CONTINUATION_MASK))
/* This works in the face of malformed UTF-8. */
#define UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, e) (UTF8_IS_DOWNGRADEABLE_START(*s) \
@@ -314,7 +318,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
* downgradable */
#define TWO_BYTE_UTF8_TO_NATIVE(HI, LO) \
UNI_TO_NATIVE(UTF8_ACCUMULATE((NATIVE_UTF8_TO_I8(HI) & UTF_START_MASK(2)), \
- NATIVE_UTF8_TO_I8(LO)))
+ (LO)))
/* Should never be used, and be deprecated */
#define TWO_BYTE_UTF8_TO_UNI(HI, LO) NATIVE_TO_UNI(TWO_BYTE_UTF8_TO_NATIVE(HI, LO))