From c512ce4f7f4a9bd0f491f91cb5a15fcb65ee37d9 Mon Sep 17 00:00:00 2001
From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Fri, 29 Dec 2000 07:08:32 +0000
Subject: (Retracted by #8264)  Externally: join() was still quite
 UTF-8-unaware. Internally: sv_catsv() wasn't quite okay on UTF-8, it assumed
 that the only cases to care about are byte+byte and byte+character.

TODO: See how well pp_concat() could be implemented in terms
of sv_catsv().

p4raw-id: //depot/perl@8248
---
 utf8.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'utf8.h')

diff --git a/utf8.h b/utf8.h
index 26ef7236ee..e9598b8759 100644
--- a/utf8.h
+++ b/utf8.h
@@ -62,15 +62,18 @@ END_EXTERN_C
 
 #define UTF8_QUAD_MAX	UINT64_C(0x1000000000)
 
-#define UTF8_IS_ASCII(c) 		((c) <  0x80)
-#define UTF8_IS_START(c)		((c) >= 0xc0 && ((c) <= 0xfd))
-#define UTF8_IS_CONTINUATION(c)		((c) >= 0x80 && ((c) <= 0xbf))
-#define UTF8_IS_CONTINUED(c) 		((c) &  0x80)
+#define UTF8_IS_ASCII(c) 		(((U8)c) <  0x80)
+#define UTF8_IS_START(c)		(((U8)c) >= 0xc0 && (((U8)c) <= 0xfd))
+#define UTF8_IS_CONTINUATION(c)		(((U8)c) >= 0x80 && (((U8)c) <= 0xbf))
+#define UTF8_IS_CONTINUED(c) 		(((U8)c) &  0x80)
 
-#define UTF8_CONTINUATION_MASK		0x3f
+#define UTF8_CONTINUATION_MASK		((U8)0x3f)
 #define UTF8_ACCUMULATION_SHIFT		6
 #define UTF8_ACCUMULATE(old, new)	((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK))
 
+#define UTF8_EIGHT_BIT_HI(c)	( (((U8)c)>>6)      |0xc0)
+#define UTF8_EIGHT_BIT_LO(c)	(((((U8)c)>>6)&0x3f)|0x80)
+
 #ifdef HAS_QUAD
 #define UNISKIP(uv) ( (uv) < 0x80           ? 1 : \
 		      (uv) < 0x800          ? 2 : \
-- 
cgit v1.2.1