summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-12-29 07:08:32 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-12-29 07:08:32 +0000
commitc512ce4f7f4a9bd0f491f91cb5a15fcb65ee37d9 (patch)
treefaecae30ec45ee9fe9180f727c600de58a3a50d2 /utf8.h
parenta7514e1ec900a5b60cda6ed25728476973d26ae0 (diff)
downloadperl-c512ce4f7f4a9bd0f491f91cb5a15fcb65ee37d9.tar.gz
(Retracted by #8264) Externally: join() was still quite UTF-8-unaware.
Internally: sv_catsv() wasn't quite okay on UTF-8, it assumed that the only cases to care about are byte+byte and byte+character. TODO: See how well pp_concat() could be implemented in terms of sv_catsv(). p4raw-id: //depot/perl@8248
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h13
1 files changed, 8 insertions, 5 deletions
diff --git a/utf8.h b/utf8.h
index 26ef7236ee..e9598b8759 100644
--- a/utf8.h
+++ b/utf8.h
@@ -62,15 +62,18 @@ END_EXTERN_C
#define UTF8_QUAD_MAX UINT64_C(0x1000000000)
-#define UTF8_IS_ASCII(c) ((c) < 0x80)
-#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd))
-#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf))
-#define UTF8_IS_CONTINUED(c) ((c) & 0x80)
+#define UTF8_IS_ASCII(c) (((U8)c) < 0x80)
+#define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd))
+#define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf))
+#define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80)
-#define UTF8_CONTINUATION_MASK 0x3f
+#define UTF8_CONTINUATION_MASK ((U8)0x3f)
#define UTF8_ACCUMULATION_SHIFT 6
#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK))
+#define UTF8_EIGHT_BIT_HI(c) ( (((U8)c)>>6) |0xc0)
+#define UTF8_EIGHT_BIT_LO(c) (((((U8)c)>>6)&0x3f)|0x80)
+
#ifdef HAS_QUAD
#define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \
(uv) < 0x800 ? 2 : \