/* Copyright (c) 2015, MariaDB Foundation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ #ifndef MY_FUNCTION_NAME #error MY_FUNCTION_NAME is not defined #endif #if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR) #error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not! #endif #if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR) #error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not! #endif #ifdef DEFINE_ASIAN_ROUTINES #define DEFINE_WELL_FORMED_CHAR_LENGTH #define DEFINE_CHARLEN #define DEFINE_NATIVE_TO_MB_VARLEN #endif #ifdef DEFINE_CHARLEN /** Returns length of the left-most character of a string. @param cs - charset with mbminlen==1 and mbmaxlen<=4 @param b - the beginning of the string @param e - the end of the string @return MY_CS_ILSEQ if a bad byte sequence was found @return MY_CS_TOOSMALL(N) if the string ended unexpectedly @return >0 if a valid character was found */ static int MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)), const uchar *b, const uchar *e) { DBUG_ASSERT(cs->mbminlen == 1); DBUG_ASSERT(cs->mbmaxlen <= 4); if (b >= e) return MY_CS_TOOSMALL; if ((uchar) b[0] < 128) return 1; /* Single byte ASCII character */ #ifdef IS_8BIT_CHAR if (IS_8BIT_CHAR(b[0])) { /* Single byte non-ASCII character, e.g. half width kana in sjis */ return 1; } #endif if (b + 2 > e) return MY_CS_TOOSMALLN(2); if (IS_MB2_CHAR(b[0], b[1])) return 2; /* Double byte character */ #ifdef IS_MB3_CHAR if (b + 3 > e) { #ifdef IS_MB_PREFIX2 if (!IS_MB_PREFIX2(b[0], b[1])) return MY_CS_ILSEQ; #endif return MY_CS_TOOSMALLN(3); } if (IS_MB3_CHAR(b[0], b[1], b[2])) return 3; /* Three-byte character */ #endif #ifdef IS_MB4_CHAR if (b + 4 > e) return MY_CS_TOOSMALLN(4); if (IS_MB4_CHAR(b[0], b[1], b[2], b[3])) return 4; /* Four-byte character */ #endif /* Wrong byte sequence */ return MY_CS_ILSEQ; } #endif /* DEFINE_CHARLEN */ #ifdef DEFINE_WELL_FORMED_CHAR_LENGTH /** Returns well formed length of a string measured in characters (rather than in bytes). Version for character sets that define IS_MB?_CHAR(), e.g. big5. */ static size_t MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e, size_t nchars, MY_STRCOPY_STATUS *status) { size_t nchars0= nchars; for ( ; b < e && nchars ; nchars--) { if ((uchar) b[0] < 128) { b++; /* Single byte ASCII character */ continue; } if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1])) { b+= 2; /* Double byte character */ continue; } #ifdef IS_MB3_CHAR if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2])) { b+= 3; /* Three-byte character */ continue; } #endif #ifdef IS_MB4_CHAR if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3])) { b+= 4; /* Four-byte character */ continue; } #endif #ifdef IS_8BIT_CHAR if (IS_8BIT_CHAR(b[0])) { b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */ continue; } #endif /* Wrong byte sequence */ status->m_source_end_pos= status->m_well_formed_error_pos= b; return nchars0 - nchars; } status->m_source_end_pos= b; status->m_well_formed_error_pos= NULL; return nchars0 - nchars; } #endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */ #ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN #ifndef CHARLEN #error CHARLEN is not defined #endif /** Returns well formed length of a string measured in characters (rather than in bytes). Version for character sets that define CHARLEN(), e.g. utf8mb3. CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does: - a positive number in the range [1-mbmaxlen] if a valid single-byte or multi-byte character was found - MY_CS_ILSEQ (0) on a bad byte sequence - MY_CS_TOOSMALLxx if the incoming sequence is incomplete */ static size_t MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e, size_t nchars, MY_STRCOPY_STATUS *status) { size_t nchars0= nchars; int chlen; for ( ; nchars ; nchars--, b+= chlen) { if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0) { status->m_well_formed_error_pos= b < e ? b : NULL; status->m_source_end_pos= b; return nchars0 - nchars; } } status->m_well_formed_error_pos= NULL; status->m_source_end_pos= b; return nchars0 - nchars; } #endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */ #ifdef DEFINE_NATIVE_TO_MB_VARLEN /* Write a native 2-byte character. If the full character does not fit, only the first byte is written. */ static inline int my_native_to_mb_fixed2(my_wc_t wc, uchar *s, uchar *e) { /* The caller must insure there is a space for at least one byte */ DBUG_ASSERT(s < e); s[0]= (uchar) (wc >> 8); if (s + 2 > e) return MY_CS_TOOSMALL2; s[1]= wc & 0xFF; return 2; } /* Write a native 3-byte character. If the full character does not fit, only the leading bytes are written. */ static inline int my_native_to_mb_fixed3(my_wc_t wc, uchar *s, uchar *e) { /* The caller must insure there is a space for at least one byte */ DBUG_ASSERT(s < e); s[0]= (uchar) (wc >> 16); if (s + 2 > e) return MY_CS_TOOSMALL2; s[1]= (wc >> 8) & 0xFF; if (s + 3 > e) return MY_CS_TOOSMALL3; s[2]= wc & 0xFF; return 3; } /* Write a native 1-byte or 2-byte or 3-byte character. */ static int MY_FUNCTION_NAME(native_to_mb)(CHARSET_INFO *cs __attribute__((unused)), my_wc_t wc, uchar *s, uchar *e) { if (s >= e) return MY_CS_TOOSMALL; if ((int) wc <= 0xFF) { s[0]= (uchar) wc; return 1; } #ifdef IS_MB3_HEAD if (wc > 0xFFFF) return my_native_to_mb_fixed3(wc, s, e); #endif return my_native_to_mb_fixed2(wc, s, e); } #endif /* DEFINE_NATIVE_TO_MB_VARLEN */ #undef MY_FUNCTION_NAME