From c512ce4f7f4a9bd0f491f91cb5a15fcb65ee37d9 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Fri, 29 Dec 2000 07:08:32 +0000 Subject: (Retracted by #8264) Externally: join() was still quite UTF-8-unaware. Internally: sv_catsv() wasn't quite okay on UTF-8, it assumed that the only cases to care about are byte+byte and byte+character. TODO: See how well pp_concat() could be implemented in terms of sv_catsv(). p4raw-id: //depot/perl@8248 --- utf8.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'utf8.h') diff --git a/utf8.h b/utf8.h index 26ef7236ee..e9598b8759 100644 --- a/utf8.h +++ b/utf8.h @@ -62,15 +62,18 @@ END_EXTERN_C #define UTF8_QUAD_MAX UINT64_C(0x1000000000) -#define UTF8_IS_ASCII(c) ((c) < 0x80) -#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd)) -#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf)) -#define UTF8_IS_CONTINUED(c) ((c) & 0x80) +#define UTF8_IS_ASCII(c) (((U8)c) < 0x80) +#define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd)) +#define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf)) +#define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80) -#define UTF8_CONTINUATION_MASK 0x3f +#define UTF8_CONTINUATION_MASK ((U8)0x3f) #define UTF8_ACCUMULATION_SHIFT 6 #define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK)) +#define UTF8_EIGHT_BIT_HI(c) ( (((U8)c)>>6) |0xc0) +#define UTF8_EIGHT_BIT_LO(c) (((((U8)c)>>6)&0x3f)|0x80) + #ifdef HAS_QUAD #define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \ (uv) < 0x800 ? 2 : \ -- cgit v1.2.1