summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-02-17 12:46:05 -0700
committerKarl Williamson <public@khwilliamson.com>2013-08-29 09:55:52 -0600
commit94bb8c36d9e11dd4825e43d06f0832f01a7e5045 (patch)
tree199060dba004dca25ce3d6d0555baf8f5ebeeef9 /utf8.h
parent22bd7dd23a9a8ac6942486d524260b846313e61a (diff)
downloadperl-94bb8c36d9e11dd4825e43d06f0832f01a7e5045.tar.gz
Add and use macro to return EBCDIC
The conversion from UTF-8 to code point should generally be to the native code point. This adds a macro to do that, and converts the core calls to the existing macro to use the new one instead. The old macro is retained for possible backwards compatibility, though it probably should be deprecated.
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h11
1 files changed, 7 insertions, 4 deletions
diff --git a/utf8.h b/utf8.h
index 473864816e..bbbefdef70 100644
--- a/utf8.h
+++ b/utf8.h
@@ -302,14 +302,17 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
&& ( (e) - (s) > 1) \
&& UTF8_IS_CONTINUATION(*((s)+1)))
-/* Convert a two (not one) byte utf8 character to a unicode code point value.
+/* Convert a two (not one) byte utf8 character to a native code point value.
* Needs just one iteration of accumulate. Should not be used unless it is
* known that the two bytes are legal: 1) two-byte start, and 2) continuation.
* Note that the result can be larger than 255 if the input character is not
* downgradable */
-#define TWO_BYTE_UTF8_TO_UNI(HI, LO) \
- UTF8_ACCUMULATE((NATIVE_TO_UTF(HI) & UTF_START_MASK(2)), \
- NATIVE_TO_UTF(LO))
+#define TWO_BYTE_UTF8_TO_NATIVE(HI, LO) \
+ UNI_TO_NATIVE(UTF8_ACCUMULATE((NATIVE_UTF8_TO_I8(HI) & UTF_START_MASK(2)), \
+ NATIVE_UTF8_TO_I8(LO)))
+
+/* Should never be used, and be deprecated */
+#define TWO_BYTE_UTF8_TO_UNI(HI, LO) NATIVE_TO_UNI(TWO_BYTE_UTF8_TO_NATIVE(HI, LO))
/* How many bytes in the UTF-8 encoded character whose first (perhaps only)
* byte is pointed to by 's' */