summaryrefslogtreecommitdiff
path: root/strings/ctype-mb.inl
diff options
context:
space:
mode:
Diffstat (limited to 'strings/ctype-mb.inl')
-rw-r--r--strings/ctype-mb.inl263
1 files changed, 263 insertions, 0 deletions
diff --git a/strings/ctype-mb.inl b/strings/ctype-mb.inl
new file mode 100644
index 00000000000..6cde31a34ad
--- /dev/null
+++ b/strings/ctype-mb.inl
@@ -0,0 +1,263 @@
+/*
+ Copyright (c) 2015, MariaDB Foundation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+
+#ifndef MY_FUNCTION_NAME
+#error MY_FUNCTION_NAME is not defined
+#endif
+
+#if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR)
+#error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not!
+#endif
+
+#if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR)
+#error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not!
+#endif
+
+
+#ifdef DEFINE_ASIAN_ROUTINES
+#define DEFINE_WELL_FORMED_CHAR_LENGTH
+#define DEFINE_CHARLEN
+#define DEFINE_NATIVE_TO_MB_VARLEN
+#endif
+
+
+#ifdef DEFINE_CHARLEN
+/**
+ Returns length of the left-most character of a string.
+ @param cs - charset with mbminlen==1 and mbmaxlen<=4
+ @param b - the beginning of the string
+ @param e - the end of the string
+
+ @return MY_CS_ILSEQ if a bad byte sequence was found
+ @return MY_CS_TOOSMALL(N) if the string ended unexpectedly
+ @return >0 if a valid character was found
+*/
+static int
+MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *b, const uchar *e)
+{
+ DBUG_ASSERT(cs->mbminlen == 1);
+ DBUG_ASSERT(cs->mbmaxlen <= 4);
+
+ if (b >= e)
+ return MY_CS_TOOSMALL;
+ if ((uchar) b[0] < 128)
+ return 1; /* Single byte ASCII character */
+
+#ifdef IS_8BIT_CHAR
+ if (IS_8BIT_CHAR(b[0]))
+ {
+ /* Single byte non-ASCII character, e.g. half width kana in sjis */
+ return 1;
+ }
+#endif
+
+ if (b + 2 > e)
+ return MY_CS_TOOSMALLN(2);
+ if (IS_MB2_CHAR(b[0], b[1]))
+ return 2; /* Double byte character */
+
+#ifdef IS_MB3_CHAR
+ if (b + 3 > e)
+ {
+#ifdef IS_MB_PREFIX2
+ if (!IS_MB_PREFIX2(b[0], b[1]))
+ return MY_CS_ILSEQ;
+#endif
+ return MY_CS_TOOSMALLN(3);
+ }
+ if (IS_MB3_CHAR(b[0], b[1], b[2]))
+ return 3; /* Three-byte character */
+#endif
+
+#ifdef IS_MB4_CHAR
+ if (b + 4 > e)
+ return MY_CS_TOOSMALLN(4);
+ if (IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
+ return 4; /* Four-byte character */
+#endif
+
+ /* Wrong byte sequence */
+ return MY_CS_ILSEQ;
+}
+#endif /* DEFINE_CHARLEN */
+
+
+#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH
+/**
+ Returns well formed length of a string
+ measured in characters (rather than in bytes).
+ Version for character sets that define IS_MB?_CHAR(), e.g. big5.
+*/
+static size_t
+MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e,
+ size_t nchars,
+ MY_STRCOPY_STATUS *status)
+{
+ size_t nchars0= nchars;
+ for ( ; b < e && nchars ; nchars--)
+ {
+ if ((uchar) b[0] < 128)
+ {
+ b++; /* Single byte ASCII character */
+ continue;
+ }
+
+ if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
+ {
+ b+= 2; /* Double byte character */
+ continue;
+ }
+
+#ifdef IS_MB3_CHAR
+ if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
+ {
+ b+= 3; /* Three-byte character */
+ continue;
+ }
+#endif
+
+#ifdef IS_MB4_CHAR
+ if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
+ {
+ b+= 4; /* Four-byte character */
+ continue;
+ }
+#endif
+
+#ifdef IS_8BIT_CHAR
+ if (IS_8BIT_CHAR(b[0]))
+ {
+ b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
+ continue;
+ }
+#endif
+
+ /* Wrong byte sequence */
+ status->m_source_end_pos= status->m_well_formed_error_pos= b;
+ return nchars0 - nchars;
+ }
+ status->m_source_end_pos= b;
+ status->m_well_formed_error_pos= NULL;
+ return nchars0 - nchars;
+}
+#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */
+
+
+#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+#ifndef CHARLEN
+#error CHARLEN is not defined
+#endif
+/**
+ Returns well formed length of a string
+ measured in characters (rather than in bytes).
+ Version for character sets that define CHARLEN(), e.g. utf8mb3.
+ CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does:
+ - a positive number in the range [1-mbmaxlen] if a valid
+ single-byte or multi-byte character was found
+ - MY_CS_ILSEQ (0) on a bad byte sequence
+ - MY_CS_TOOSMALLxx if the incoming sequence is incomplete
+*/
+static size_t
+MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e,
+ size_t nchars,
+ MY_STRCOPY_STATUS *status)
+{
+ size_t nchars0= nchars;
+ int chlen;
+ for ( ; nchars ; nchars--, b+= chlen)
+ {
+ if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0)
+ {
+ status->m_well_formed_error_pos= b < e ? b : NULL;
+ status->m_source_end_pos= b;
+ return nchars0 - nchars;
+ }
+ }
+ status->m_well_formed_error_pos= NULL;
+ status->m_source_end_pos= b;
+ return nchars0 - nchars;
+}
+#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
+
+
+#ifdef DEFINE_NATIVE_TO_MB_VARLEN
+/*
+ Write a native 2-byte character.
+ If the full character does not fit, only the first byte is written.
+*/
+static inline int
+my_native_to_mb_fixed2(my_wc_t wc, uchar *s, uchar *e)
+{
+ /* The caller must insure there is a space for at least one byte */
+ DBUG_ASSERT(s < e);
+ s[0]= (uchar) (wc >> 8);
+ if (s + 2 > e)
+ return MY_CS_TOOSMALL2;
+ s[1]= wc & 0xFF;
+ return 2;
+}
+
+
+/*
+ Write a native 3-byte character.
+ If the full character does not fit, only the leading bytes are written.
+*/
+static inline int
+my_native_to_mb_fixed3(my_wc_t wc, uchar *s, uchar *e)
+{
+ /* The caller must insure there is a space for at least one byte */
+ DBUG_ASSERT(s < e);
+ s[0]= (uchar) (wc >> 16);
+ if (s + 2 > e)
+ return MY_CS_TOOSMALL2;
+ s[1]= (wc >> 8) & 0xFF;
+ if (s + 3 > e)
+ return MY_CS_TOOSMALL3;
+ s[2]= wc & 0xFF;
+ return 3;
+}
+
+
+/*
+ Write a native 1-byte or 2-byte or 3-byte character.
+*/
+
+static int
+MY_FUNCTION_NAME(native_to_mb)(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc, uchar *s, uchar *e)
+{
+ if (s >= e)
+ return MY_CS_TOOSMALL;
+ if ((int) wc <= 0xFF)
+ {
+ s[0]= (uchar) wc;
+ return 1;
+ }
+#ifdef IS_MB3_HEAD
+ if (wc > 0xFFFF)
+ return my_native_to_mb_fixed3(wc, s, e);
+#endif
+ return my_native_to_mb_fixed2(wc, s, e);
+}
+#endif /* DEFINE_NATIVE_TO_MB_VARLEN */
+
+
+#undef MY_FUNCTION_NAME