diff options
Diffstat (limited to 'sql/item_strfunc.cc')
-rw-r--r-- | sql/item_strfunc.cc | 433 |
1 files changed, 413 insertions, 20 deletions
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index eb9c59d31f7..d4bf28a9c21 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -55,6 +55,11 @@ C_MODE_END #include <sql_repl.h> #include "sql_statistics.h" +/* fmtlib include (https://fmt.dev/). */ +#define FMT_STATIC_THOUSANDS_SEPARATOR ',' +#define FMT_HEADER_ONLY 1 +#include "fmt/format-inl.h" + size_t username_char_length= USERNAME_CHAR_LENGTH; /* @@ -1303,6 +1308,138 @@ bool Item_func_replace::fix_length_and_dec() return FALSE; } +/* + this is done in the constructor to be in the same memroot as + the item itself +*/ +Item_func_sformat::Item_func_sformat(THD *thd, List<Item> &list) + : Item_str_func(thd, list) +{ + val_arg= new (thd->mem_root) String[arg_count]; +} + + +bool Item_func_sformat::fix_length_and_dec() +{ + if (!val_arg) + return TRUE; + + ulonglong char_length= 0; + + uint flags= MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_ALLOW_NUMERIC_CONV; + + if (Type_std_attributes::agg_item_collations(collation, func_name_cstring(), + args, arg_count, flags, 1)) + return TRUE; + + DTCollation c= collation; + if (c.collation->mbminlen > 1) + c.collation= &my_charset_utf8mb4_bin; + + for (uint i=0 ; i < arg_count ; i++) + { + char_length+= args[i]->max_char_length(); + if (args[i]->result_type() == STRING_RESULT && + Type_std_attributes::agg_item_set_converter(c, func_name_cstring(), + args+i, 1, flags, 1)) + return TRUE; + } + + fix_char_length_ulonglong(char_length); + return FALSE; +} + +/* + allow fmt to take String arguments directly. + Inherit from string_view, so all string formatting works. + but {:p} doesn't, because it's not char*, not a pointer. +*/ +namespace fmt { + template <> struct formatter<String>: formatter<string_view> { + template <typename FormatContext> + auto format(String c, FormatContext& ctx) -> decltype(ctx.out()) { + string_view name = { c.ptr(), c.length() }; + return formatter<string_view>::format(name, ctx); + }; + }; +}; + +/* + SFORMAT(format_string, ...) + This function receives a formatting specification string and N parameters + (N >= 0), and it returns string formatted using the rules the user passed + in the specification. It uses fmtlib (https://fmt.dev/). +*/ +String *Item_func_sformat::val_str(String *res) +{ + DBUG_ASSERT(fixed()); + using ctx= fmt::format_context; + String *fmt_arg= NULL; + String *parg= NULL; + fmt::format_args::format_arg *vargs= NULL; + + null_value= true; + if (!(fmt_arg= args[0]->val_str(res))) + return NULL; + + if (!(vargs= new fmt::format_args::format_arg[arg_count - 1])) + return NULL; + + /* Creates the array of arguments for vformat */ + for (uint carg= 1; carg < arg_count; carg++) + { + switch (args[carg]->result_type()) + { + case INT_RESULT: + vargs[carg-1]= fmt::detail::make_arg<ctx>(args[carg]->val_int()); + break; + case DECIMAL_RESULT: // TODO + case REAL_RESULT: + if (args[carg]->field_type() == MYSQL_TYPE_FLOAT) + vargs[carg-1]= fmt::detail::make_arg<ctx>((float)args[carg]->val_real()); + else + vargs[carg-1]= fmt::detail::make_arg<ctx>(args[carg]->val_real()); + break; + case STRING_RESULT: + if (!(parg= args[carg]->val_str(&val_arg[carg-1]))) + { + delete [] vargs; + return NULL; + } + vargs[carg-1]= fmt::detail::make_arg<ctx>(*parg); + break; + case TIME_RESULT: // TODO + case ROW_RESULT: // TODO + default: + DBUG_ASSERT(0); + delete [] vargs; + return NULL; + } + } + + null_value= false; + /* Create the string output */ + try + { + auto text = fmt::vformat(fmt_arg->c_ptr_safe(), + fmt::format_args(vargs, arg_count-1)); + res->length(0); + res->set_charset(collation.collation); + res->append(text.c_str(), text.size(), fmt_arg->charset()); + } + catch (const fmt::format_error &ex) + { + THD *thd= current_thd; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + WARN_SFORMAT_ERROR, + ER_THD(thd, WARN_SFORMAT_ERROR), ex.what()); + null_value= true; + } + delete [] vargs; + return null_value ? NULL : res; +} /*********************************************************************/ bool Item_func_regexp_replace::fix_length_and_dec() @@ -4379,26 +4516,6 @@ err: #endif -String *Item_func_uuid::val_str(String *str) -{ - DBUG_ASSERT(fixed()); - uchar guid[MY_UUID_SIZE]; - size_t length= (without_separators ? - MY_UUID_ORACLE_STRING_LENGTH : - MY_UUID_STRING_LENGTH); - - str->alloc(length+1); - str->length(length); - str->set_charset(system_charset_info); - my_uuid(guid); - if (without_separators) - my_uuid2str_oracle(guid, (char *)str->ptr()); - else - my_uuid2str(guid, (char *)str->ptr()); - return str; -} - - Item_func_dyncol_create::Item_func_dyncol_create(THD *thd, List<Item> &args, DYNCALL_CREATE_DEF *dfs): Item_str_func(thd, args), defs(dfs), vals(0), keys_num(NULL), keys_str(NULL), @@ -5304,6 +5421,282 @@ String *Item_temptable_rowid::val_str(String *str) return &str_value; } +/** + Helper routine to encode length prefix + in natsort_encode_numeric_string(). + + The idea is so that bigger input numbers correspond + lexicographically bigger output strings. + + Note, that in real use the number would typically + small, as it only computes variable *length prefixes*. + + @param[in] n - the number + @param[in] s - output string + + @return - length of encoding + + Here is how encoding works + + - n is from 0 to 8 + Output string calculated as '0'+n (range '0' - '8') + + - n is from 9 to 17 + Output calculated as concat('9', '0' + n -9)' + Output range: '90'-'98' + + -n is from 18 to 26 + Output calculated as concat('99', '0' + n -18)' + Output range '990'-'998' + + - n is from 27 to SIZE_T_MAX + Output starts with '999', + then log10(n) is encoded as 2-digit decimal number + then the number itself is added. + Example : for 28 key is concat('999', '01' , '28') + i.e '9990128' + + Key length is 5 + ceil(log10(n)) + + Output range is + (64bit)'9990128' - '9991918446744073709551615' + (32bit)'9990128' - '999094294967295' +*/ + +/* Largest length of encoded string.*/ +static size_t natsort_encode_length_max(size_t n) +{ + return (n < 27) ? n/9+1 : 26; +} + +static void natsort_encode_length(size_t n, String* out) +{ + if (n < 27) + { + if (n >= 9) + out->fill(out->length() + n/9,'9'); + out->append(char(n % 9 + '0')); + return; + } + + size_t log10n= 0; + for (size_t tmp= n / 10; tmp; tmp/= 10) + log10n++; + out->fill(out->length() + 3, '9'); + out->append('0' + (char) (log10n / 10)); + out->append('0' + (char) (log10n % 10)); + out->append_ulonglong(n); +} + +enum class NATSORT_ERR +{ + SUCCESS= 0, + KEY_TOO_LARGE= 1, + ALLOC_ERROR= 2 +}; + +/* + Encode numeric string for natural sorting. + + @param[in] in - start of the numeric string + skipping leading zeros + + @param[in] n_digits - length of the string, + in characters, not counting leading zeros. + + @param[out] out - String to write to. The string should + have enough preallocated space to fit the encoded key. + + @return + NATSORT_ERR::SUCCESS - success + NATSORT_ERR::KEY_TOO_LARGE - out string does not have enough + space left to accomodate the key. + + + The resulting encoding of the numeric string is then + + CONCAT(natsort_encode_length(n_digits), in) +*/ +static NATSORT_ERR natsort_encode_numeric_string(const char *in, + size_t n_digits, + String *out) +{ + DBUG_ASSERT(in); + DBUG_ASSERT(n_digits); + + if (out->length() + natsort_encode_length_max(n_digits - 1) + n_digits > + out->alloced_length()) + return NATSORT_ERR::KEY_TOO_LARGE; + + natsort_encode_length(n_digits - 1, out); + out->append(in, n_digits); + return NATSORT_ERR::SUCCESS; +} + +/* + Calculate max size of the natsort key. + + A digit in string expands to 2 chars length_prefix , and the digit + + With even length L=2N, the largest key corresponds to input string + in form REPEAT(<digit><letter>,N) and the length of a key is + 2N + N = 3N + + With odd input length L=2N+1, largest key is built by appending + a digit at the end, with key length 3N+2 + +*/ +static size_t natsort_max_key_size(size_t input_size) +{ + return input_size + (input_size + 1)/2 ; +} + +/** + Convert a string to natural sort key. + @param[in] in - input string + @param[out] out - output string + @param[in] max_key_size - the maximum size of the output + key, in bytes. + @return NATSORT_ERR::SUCCESS - successful completion + NATSORT_ERR::ALLOC_ERROR - memory allocation error + NATSORT_ERR::KEY_TOO_LARGE - resulting key would exceed max_key_size +*/ +static NATSORT_ERR to_natsort_key(const String *in, String *out, + size_t max_key_size) +{ + size_t n_digits= 0; + size_t n_lead_zeros= 0; + size_t num_start; + size_t reserve_length= std::min( + natsort_max_key_size(in->length()) + MAX_BIGINT_WIDTH + 2, max_key_size); + + out->length(0); + out->set_charset(in->charset()); + + if (out->alloc((uint32) reserve_length)) + return NATSORT_ERR::ALLOC_ERROR; + + for (size_t pos= 0;; pos++) + { + char c= pos < in->length() ? (*in)[pos] : 0; + bool is_digit= (c >= '0' && c <= '9'); + if (!is_digit && (n_digits || n_lead_zeros)) + { + /* Handle end of digits run.*/ + if (!n_digits) + { + /*We only have zeros.*/ + n_lead_zeros--; + num_start= pos - 1; + n_digits= 1; + } + NATSORT_ERR err= natsort_encode_numeric_string( + in->ptr() + num_start, n_digits, out); + if (err != NATSORT_ERR::SUCCESS) + return err; + + /* Reset state.*/ + n_digits= 0; + num_start= size_t(-1); + n_lead_zeros= 0; + } + + if (pos == in->length()) + break; + + if (!is_digit) + { + if (out->length() == max_key_size) + return NATSORT_ERR::KEY_TOO_LARGE; + out->append(c); + } + else if (c == '0' && !n_digits) + n_lead_zeros++; + else if (!n_digits++) + num_start= pos; + } + return NATSORT_ERR::SUCCESS; +} + +String *Item_func_natural_sort_key::val_str(String *out) +{ + String *in= args[0]->val_str(); + if (args[0]->null_value || !in) + { + null_value= true; + return nullptr; + } + NATSORT_ERR err= NATSORT_ERR::SUCCESS; + CHARSET_INFO *cs= in->charset(); + ulong max_allowed_packet= current_thd->variables.max_allowed_packet; + uint errs; + String tmp; + /* + to_natsort_key() only support charsets where digits are represented by + a single byte in range 0x30-0x39. Almost everything is OK, just utf16/32 + won't do. Full ASCII compatibility is not required, so that SJIS and SWE7 + are fine. + */ + if (cs->mbminlen != 1) + { + if (tmp.copy(in, &my_charset_utf8mb4_bin, &errs)) + goto error_exit; + in= &tmp; + } + + err= to_natsort_key(in, out, max_allowed_packet / cs->mbminlen); + + if (err != NATSORT_ERR::SUCCESS) + { + if (err == NATSORT_ERR::KEY_TOO_LARGE) + { + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_ALLOWED_PACKET_OVERFLOWED, + ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED), func_name(), + max_allowed_packet); + } + goto error_exit; + } + + if (cs->mbminlen != 1) + { + /* output string is now utf8, convert to input charset.*/ + if (tmp.copy(out, cs, &errs) || out->copy(tmp)) + goto error_exit; + } + null_value= false; + return out; + +error_exit: + null_value= true; + return nullptr; +} + +bool Item_func_natural_sort_key::fix_length_and_dec(void) +{ + if (agg_arg_charsets_for_string_result(collation, args, 1)) + return true; + DBUG_ASSERT(collation.collation != NULL); + uint32 max_char_len= + (uint32) natsort_max_key_size(args[0]->max_char_length()); + fix_char_length(max_char_len); + + set_maybe_null(args[0]->maybe_null() || + max_char_len * collation.collation->mbmaxlen > + current_thd->variables.max_allowed_packet); + return false; +} + +/** + Disable use in stored virtual functions. Temporarily(?), until + the encoding is stable. +*/ +bool Item_func_natural_sort_key::check_vcol_func_processor(void *arg) +{ + return mark_unsupported_function(func_name(), "()", arg, + VCOL_NON_DETERMINISTIC); +} + #ifdef WITH_WSREP #include "wsrep_mysqld.h" |