/* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2009, 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ /* Some useful string utility functions used by the MySQL server */ #include "mariadb.h" #include "sql_priv.h" #include "unireg.h" #include "strfunc.h" #include "sql_class.h" #include "typelib.h" // TYPELIB #include "m_ctype.h" // my_charset_latin1 #include "mysqld.h" // system_charset_info /* Return bitmap for strings used in a set SYNOPSIS find_set() lib Strings in set str Strings of set-strings separated by ',' err_pos If error, set to point to start of wrong set string err_len If error, set to the length of wrong set string set_warning Set to 1 if some string in set couldn't be used NOTE We delete all end space from str before comparison RETURN bitmap of all sets found in x. set_warning is set to 1 if there was any sets that couldn't be set */ static const char field_separator=','; ulonglong find_set(const TYPELIB *lib, const char *str, size_t length, CHARSET_INFO *cs, char **err_pos, uint *err_len, bool *set_warning) { CHARSET_INFO *strip= cs ? cs : &my_charset_latin1; const char *end= str + strip->lengthsp(str, length); ulonglong found= 0; *err_pos= 0; // No error yet *err_len= 0; if (str != end) { const char *start= str; for (;;) { const char *pos= start; uint var_len; int mblen= 1; if (cs && cs->mbminlen > 1) { for ( ; pos < end; pos+= mblen) { my_wc_t wc; if ((mblen= cs->mb_wc(&wc, (const uchar *) pos, (const uchar *) end)) < 1) mblen= 1; // Not to hang on a wrong multibyte sequence else if (wc == (my_wc_t) field_separator) break; } } else for (; pos != end && *pos != field_separator; pos++) ; var_len= (uint) (pos - start); uint find= cs ? find_type2(lib, start, var_len, cs) : find_type(lib, start, var_len, (bool) 0); if (unlikely(!find)) { if (*err_len == 0) { // report the first error with length > 0 *err_pos= (char*) start; *err_len= var_len; *set_warning= 1; } } else if (find <= sizeof(longlong) * 8) found|= 1ULL << (find - 1); if (pos >= end) break; start= pos + mblen; } } return found; } /* Function to find a string in a TYPELIB (similar to find_type() of mysys/typelib.c) SYNOPSIS find_type() lib TYPELIB (struct of pointer to values + count) find String to find length Length of string to find part_match Allow part matching of value RETURN 0 error > 0 position in TYPELIB->type_names +1 */ uint find_type(const TYPELIB *lib, const char *find, size_t length, bool part_match) { uint found_count=0, found_pos=0; const char *end= find+length; const char *i; const char *j; for (uint pos=0 ; (j=lib->type_names[pos++]) ; ) { for (i=find ; i != end && my_toupper(system_charset_info,*i) == my_toupper(system_charset_info,*j) ; i++, j++) ; if (i == end) { if (! *j) return(pos); found_count++; found_pos= pos; } } return(found_count == 1 && part_match ? found_pos : 0); } /* Find a string in a list of strings according to collation SYNOPSIS find_type2() lib TYPELIB (struct of pointer to values + count) x String to find length String length cs Character set + collation to use for comparison NOTES RETURN 0 No matching value >0 Offset+1 in typelib for matched string */ uint find_type2(const TYPELIB *typelib, const char *x, size_t length, CHARSET_INFO *cs) { int pos; const char *j; DBUG_ENTER("find_type2"); DBUG_PRINT("enter",("x: '%.*s' lib: %p", (int)length, x, typelib)); if (!typelib->count) { DBUG_PRINT("exit",("no count")); DBUG_RETURN(0); } for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) { if (!cs->strnncoll(x, length, j, typelib->type_lengths[pos])) DBUG_RETURN(pos+1); } DBUG_PRINT("exit",("Couldn't find type")); DBUG_RETURN(0); } /* find_type */ /* Un-hex all elements in a typelib SYNOPSIS unhex_type2() interval TYPELIB (struct of pointer to values + lengths + count) NOTES RETURN N/A */ void unhex_type2(TYPELIB *interval) { for (uint pos= 0; pos < interval->count; pos++) { char *from, *to; for (from= to= (char*) interval->type_names[pos]; *from; ) { /* Note, hexchar_to_int(*from++) doesn't work one some compilers, e.g. IRIX. Looks like a compiler bug in inline functions in combination with arguments that have a side effect. So, let's use from[0] and from[1] and increment 'from' by two later. */ *to++= (char) (hexchar_to_int(from[0]) << 4) + hexchar_to_int(from[1]); from+= 2; } interval->type_lengths[pos] /= 2; } } /* Check if the first word in a string is one of the ones in TYPELIB SYNOPSIS check_word() lib TYPELIB val String to check end End of input end_of_word Store value of last used byte here if we found word RETURN 0 No matching value > 1 lib->type_names[#-1] matched end_of_word will point to separator character/end in 'val' */ uint check_word(TYPELIB *lib, const char *val, const char *end, const char **end_of_word) { int res; const char *ptr; /* Fiend end of word */ for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++) ; if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0) *end_of_word= ptr; return res; } /* Converts a string between character sets SYNOPSIS strconvert() from_cs source character set from source, a null terminated string to destination buffer to_length destination buffer length NOTES 'to' is always terminated with a '\0' character. If there is no enough space to convert whole string, only prefix is converted, and terminated with '\0'. RETURN VALUES result string length */ uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length, CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors) { int cnvres; my_wc_t wc; char *to_start= to; uchar *to_end= (uchar*) to + to_length - 1; const uchar *from_end= (const uchar*) from + from_length; my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; uint error_count= 0; while (1) { if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0) { if (!wc) break; from+= cnvres; } else if (cnvres == MY_CS_ILSEQ) { error_count++; from++; wc= '?'; } else break; // Impossible char. outp: if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) to+= cnvres; else if (cnvres == MY_CS_ILUNI && wc != '?') { error_count++; wc= '?'; goto outp; } else break; } *to= '\0'; *errors= error_count; return (uint32) (to - to_start); } /* Searches for a LEX_STRING in an LEX_STRING array. SYNOPSIS find_string_in_array() heap The array needle The string to search for NOTE The last LEX_STRING in the array should have str member set to NULL RETURN VALUES -1 Not found >=0 Ordinal position */ int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle, CHARSET_INFO * const cs) { const LEX_CSTRING *pos; for (pos= haystack; pos->str; pos++) if (!cs->strnncollsp(pos->str, pos->length, needle->str, needle->length)) { return (int)(pos - haystack); } return -1; } const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, const char *lib[]) { char buff[STRING_BUFFER_USUAL_SIZE*8]; String tmp(buff, sizeof(buff), &my_charset_latin1); LEX_CSTRING unused; if (!result) result= &unused; tmp.length(0); for (uint i= 0; set; i++, set >>= 1) if (set & 1) { tmp.append(lib[i], strlen(lib[i])); tmp.append(','); } if (tmp.length()) { result->str= thd->strmake(tmp.ptr(), tmp.length()-1); result->length= tmp.length()-1; } else { result->str= const_cast(""); result->length= 0; } return result->str; } const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, const char *lib[]) { char buff[STRING_BUFFER_USUAL_SIZE*8]; String tmp(buff, sizeof(buff), &my_charset_latin1); LEX_CSTRING unused; if (!result) result= &unused; tmp.length(0); // note that the last element is always "default", and it's ignored below for (uint i= 0; lib[i+1]; i++, set >>= 1) { tmp.append(lib[i], strlen(lib[i])); if (set & 1) tmp.append(STRING_WITH_LEN("=on,")); else tmp.append(STRING_WITH_LEN("=off,")); } result->str= thd->strmake(tmp.ptr(), tmp.length()-1); result->length= tmp.length()-1; return result->str; }