summaryrefslogtreecommitdiff
path: root/sql/item_strfunc.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/item_strfunc.cc')
-rw-r--r--sql/item_strfunc.cc433
1 files changed, 413 insertions, 20 deletions
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index eb9c59d31f7..d4bf28a9c21 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -55,6 +55,11 @@ C_MODE_END
#include <sql_repl.h>
#include "sql_statistics.h"
+/* fmtlib include (https://fmt.dev/). */
+#define FMT_STATIC_THOUSANDS_SEPARATOR ','
+#define FMT_HEADER_ONLY 1
+#include "fmt/format-inl.h"
+
size_t username_char_length= USERNAME_CHAR_LENGTH;
/*
@@ -1303,6 +1308,138 @@ bool Item_func_replace::fix_length_and_dec()
return FALSE;
}
+/*
+ this is done in the constructor to be in the same memroot as
+ the item itself
+*/
+Item_func_sformat::Item_func_sformat(THD *thd, List<Item> &list)
+ : Item_str_func(thd, list)
+{
+ val_arg= new (thd->mem_root) String[arg_count];
+}
+
+
+bool Item_func_sformat::fix_length_and_dec()
+{
+ if (!val_arg)
+ return TRUE;
+
+ ulonglong char_length= 0;
+
+ uint flags= MY_COLL_ALLOW_SUPERSET_CONV |
+ MY_COLL_ALLOW_COERCIBLE_CONV |
+ MY_COLL_ALLOW_NUMERIC_CONV;
+
+ if (Type_std_attributes::agg_item_collations(collation, func_name_cstring(),
+ args, arg_count, flags, 1))
+ return TRUE;
+
+ DTCollation c= collation;
+ if (c.collation->mbminlen > 1)
+ c.collation= &my_charset_utf8mb4_bin;
+
+ for (uint i=0 ; i < arg_count ; i++)
+ {
+ char_length+= args[i]->max_char_length();
+ if (args[i]->result_type() == STRING_RESULT &&
+ Type_std_attributes::agg_item_set_converter(c, func_name_cstring(),
+ args+i, 1, flags, 1))
+ return TRUE;
+ }
+
+ fix_char_length_ulonglong(char_length);
+ return FALSE;
+}
+
+/*
+ allow fmt to take String arguments directly.
+ Inherit from string_view, so all string formatting works.
+ but {:p} doesn't, because it's not char*, not a pointer.
+*/
+namespace fmt {
+ template <> struct formatter<String>: formatter<string_view> {
+ template <typename FormatContext>
+ auto format(String c, FormatContext& ctx) -> decltype(ctx.out()) {
+ string_view name = { c.ptr(), c.length() };
+ return formatter<string_view>::format(name, ctx);
+ };
+ };
+};
+
+/*
+ SFORMAT(format_string, ...)
+ This function receives a formatting specification string and N parameters
+ (N >= 0), and it returns string formatted using the rules the user passed
+ in the specification. It uses fmtlib (https://fmt.dev/).
+*/
+String *Item_func_sformat::val_str(String *res)
+{
+ DBUG_ASSERT(fixed());
+ using ctx= fmt::format_context;
+ String *fmt_arg= NULL;
+ String *parg= NULL;
+ fmt::format_args::format_arg *vargs= NULL;
+
+ null_value= true;
+ if (!(fmt_arg= args[0]->val_str(res)))
+ return NULL;
+
+ if (!(vargs= new fmt::format_args::format_arg[arg_count - 1]))
+ return NULL;
+
+ /* Creates the array of arguments for vformat */
+ for (uint carg= 1; carg < arg_count; carg++)
+ {
+ switch (args[carg]->result_type())
+ {
+ case INT_RESULT:
+ vargs[carg-1]= fmt::detail::make_arg<ctx>(args[carg]->val_int());
+ break;
+ case DECIMAL_RESULT: // TODO
+ case REAL_RESULT:
+ if (args[carg]->field_type() == MYSQL_TYPE_FLOAT)
+ vargs[carg-1]= fmt::detail::make_arg<ctx>((float)args[carg]->val_real());
+ else
+ vargs[carg-1]= fmt::detail::make_arg<ctx>(args[carg]->val_real());
+ break;
+ case STRING_RESULT:
+ if (!(parg= args[carg]->val_str(&val_arg[carg-1])))
+ {
+ delete [] vargs;
+ return NULL;
+ }
+ vargs[carg-1]= fmt::detail::make_arg<ctx>(*parg);
+ break;
+ case TIME_RESULT: // TODO
+ case ROW_RESULT: // TODO
+ default:
+ DBUG_ASSERT(0);
+ delete [] vargs;
+ return NULL;
+ }
+ }
+
+ null_value= false;
+ /* Create the string output */
+ try
+ {
+ auto text = fmt::vformat(fmt_arg->c_ptr_safe(),
+ fmt::format_args(vargs, arg_count-1));
+ res->length(0);
+ res->set_charset(collation.collation);
+ res->append(text.c_str(), text.size(), fmt_arg->charset());
+ }
+ catch (const fmt::format_error &ex)
+ {
+ THD *thd= current_thd;
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ WARN_SFORMAT_ERROR,
+ ER_THD(thd, WARN_SFORMAT_ERROR), ex.what());
+ null_value= true;
+ }
+ delete [] vargs;
+ return null_value ? NULL : res;
+}
/*********************************************************************/
bool Item_func_regexp_replace::fix_length_and_dec()
@@ -4379,26 +4516,6 @@ err:
#endif
-String *Item_func_uuid::val_str(String *str)
-{
- DBUG_ASSERT(fixed());
- uchar guid[MY_UUID_SIZE];
- size_t length= (without_separators ?
- MY_UUID_ORACLE_STRING_LENGTH :
- MY_UUID_STRING_LENGTH);
-
- str->alloc(length+1);
- str->length(length);
- str->set_charset(system_charset_info);
- my_uuid(guid);
- if (without_separators)
- my_uuid2str_oracle(guid, (char *)str->ptr());
- else
- my_uuid2str(guid, (char *)str->ptr());
- return str;
-}
-
-
Item_func_dyncol_create::Item_func_dyncol_create(THD *thd, List<Item> &args,
DYNCALL_CREATE_DEF *dfs):
Item_str_func(thd, args), defs(dfs), vals(0), keys_num(NULL), keys_str(NULL),
@@ -5304,6 +5421,282 @@ String *Item_temptable_rowid::val_str(String *str)
return &str_value;
}
+/**
+ Helper routine to encode length prefix
+ in natsort_encode_numeric_string().
+
+ The idea is so that bigger input numbers correspond
+ lexicographically bigger output strings.
+
+ Note, that in real use the number would typically
+ small, as it only computes variable *length prefixes*.
+
+ @param[in] n - the number
+ @param[in] s - output string
+
+ @return - length of encoding
+
+ Here is how encoding works
+
+ - n is from 0 to 8
+ Output string calculated as '0'+n (range '0' - '8')
+
+ - n is from 9 to 17
+ Output calculated as concat('9', '0' + n -9)'
+ Output range: '90'-'98'
+
+ -n is from 18 to 26
+ Output calculated as concat('99', '0' + n -18)'
+ Output range '990'-'998'
+
+ - n is from 27 to SIZE_T_MAX
+ Output starts with '999',
+ then log10(n) is encoded as 2-digit decimal number
+ then the number itself is added.
+ Example : for 28 key is concat('999', '01' , '28')
+ i.e '9990128'
+
+ Key length is 5 + ceil(log10(n))
+
+ Output range is
+ (64bit)'9990128' - '9991918446744073709551615'
+ (32bit)'9990128' - '999094294967295'
+*/
+
+/* Largest length of encoded string.*/
+static size_t natsort_encode_length_max(size_t n)
+{
+ return (n < 27) ? n/9+1 : 26;
+}
+
+static void natsort_encode_length(size_t n, String* out)
+{
+ if (n < 27)
+ {
+ if (n >= 9)
+ out->fill(out->length() + n/9,'9');
+ out->append(char(n % 9 + '0'));
+ return;
+ }
+
+ size_t log10n= 0;
+ for (size_t tmp= n / 10; tmp; tmp/= 10)
+ log10n++;
+ out->fill(out->length() + 3, '9');
+ out->append('0' + (char) (log10n / 10));
+ out->append('0' + (char) (log10n % 10));
+ out->append_ulonglong(n);
+}
+
+enum class NATSORT_ERR
+{
+ SUCCESS= 0,
+ KEY_TOO_LARGE= 1,
+ ALLOC_ERROR= 2
+};
+
+/*
+ Encode numeric string for natural sorting.
+
+ @param[in] in - start of the numeric string
+ skipping leading zeros
+
+ @param[in] n_digits - length of the string,
+ in characters, not counting leading zeros.
+
+ @param[out] out - String to write to. The string should
+ have enough preallocated space to fit the encoded key.
+
+ @return
+ NATSORT_ERR::SUCCESS - success
+ NATSORT_ERR::KEY_TOO_LARGE - out string does not have enough
+ space left to accomodate the key.
+
+
+ The resulting encoding of the numeric string is then
+
+ CONCAT(natsort_encode_length(n_digits), in)
+*/
+static NATSORT_ERR natsort_encode_numeric_string(const char *in,
+ size_t n_digits,
+ String *out)
+{
+ DBUG_ASSERT(in);
+ DBUG_ASSERT(n_digits);
+
+ if (out->length() + natsort_encode_length_max(n_digits - 1) + n_digits >
+ out->alloced_length())
+ return NATSORT_ERR::KEY_TOO_LARGE;
+
+ natsort_encode_length(n_digits - 1, out);
+ out->append(in, n_digits);
+ return NATSORT_ERR::SUCCESS;
+}
+
+/*
+ Calculate max size of the natsort key.
+
+ A digit in string expands to 2 chars length_prefix , and the digit
+
+ With even length L=2N, the largest key corresponds to input string
+ in form REPEAT(<digit><letter>,N) and the length of a key is
+ 2N + N = 3N
+
+ With odd input length L=2N+1, largest key is built by appending
+ a digit at the end, with key length 3N+2
+
+*/
+static size_t natsort_max_key_size(size_t input_size)
+{
+ return input_size + (input_size + 1)/2 ;
+}
+
+/**
+ Convert a string to natural sort key.
+ @param[in] in - input string
+ @param[out] out - output string
+ @param[in] max_key_size - the maximum size of the output
+ key, in bytes.
+ @return NATSORT_ERR::SUCCESS - successful completion
+ NATSORT_ERR::ALLOC_ERROR - memory allocation error
+ NATSORT_ERR::KEY_TOO_LARGE - resulting key would exceed max_key_size
+*/
+static NATSORT_ERR to_natsort_key(const String *in, String *out,
+ size_t max_key_size)
+{
+ size_t n_digits= 0;
+ size_t n_lead_zeros= 0;
+ size_t num_start;
+ size_t reserve_length= std::min(
+ natsort_max_key_size(in->length()) + MAX_BIGINT_WIDTH + 2, max_key_size);
+
+ out->length(0);
+ out->set_charset(in->charset());
+
+ if (out->alloc((uint32) reserve_length))
+ return NATSORT_ERR::ALLOC_ERROR;
+
+ for (size_t pos= 0;; pos++)
+ {
+ char c= pos < in->length() ? (*in)[pos] : 0;
+ bool is_digit= (c >= '0' && c <= '9');
+ if (!is_digit && (n_digits || n_lead_zeros))
+ {
+ /* Handle end of digits run.*/
+ if (!n_digits)
+ {
+ /*We only have zeros.*/
+ n_lead_zeros--;
+ num_start= pos - 1;
+ n_digits= 1;
+ }
+ NATSORT_ERR err= natsort_encode_numeric_string(
+ in->ptr() + num_start, n_digits, out);
+ if (err != NATSORT_ERR::SUCCESS)
+ return err;
+
+ /* Reset state.*/
+ n_digits= 0;
+ num_start= size_t(-1);
+ n_lead_zeros= 0;
+ }
+
+ if (pos == in->length())
+ break;
+
+ if (!is_digit)
+ {
+ if (out->length() == max_key_size)
+ return NATSORT_ERR::KEY_TOO_LARGE;
+ out->append(c);
+ }
+ else if (c == '0' && !n_digits)
+ n_lead_zeros++;
+ else if (!n_digits++)
+ num_start= pos;
+ }
+ return NATSORT_ERR::SUCCESS;
+}
+
+String *Item_func_natural_sort_key::val_str(String *out)
+{
+ String *in= args[0]->val_str();
+ if (args[0]->null_value || !in)
+ {
+ null_value= true;
+ return nullptr;
+ }
+ NATSORT_ERR err= NATSORT_ERR::SUCCESS;
+ CHARSET_INFO *cs= in->charset();
+ ulong max_allowed_packet= current_thd->variables.max_allowed_packet;
+ uint errs;
+ String tmp;
+ /*
+ to_natsort_key() only support charsets where digits are represented by
+ a single byte in range 0x30-0x39. Almost everything is OK, just utf16/32
+ won't do. Full ASCII compatibility is not required, so that SJIS and SWE7
+ are fine.
+ */
+ if (cs->mbminlen != 1)
+ {
+ if (tmp.copy(in, &my_charset_utf8mb4_bin, &errs))
+ goto error_exit;
+ in= &tmp;
+ }
+
+ err= to_natsort_key(in, out, max_allowed_packet / cs->mbminlen);
+
+ if (err != NATSORT_ERR::SUCCESS)
+ {
+ if (err == NATSORT_ERR::KEY_TOO_LARGE)
+ {
+ push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WARN_ALLOWED_PACKET_OVERFLOWED,
+ ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED), func_name(),
+ max_allowed_packet);
+ }
+ goto error_exit;
+ }
+
+ if (cs->mbminlen != 1)
+ {
+ /* output string is now utf8, convert to input charset.*/
+ if (tmp.copy(out, cs, &errs) || out->copy(tmp))
+ goto error_exit;
+ }
+ null_value= false;
+ return out;
+
+error_exit:
+ null_value= true;
+ return nullptr;
+}
+
+bool Item_func_natural_sort_key::fix_length_and_dec(void)
+{
+ if (agg_arg_charsets_for_string_result(collation, args, 1))
+ return true;
+ DBUG_ASSERT(collation.collation != NULL);
+ uint32 max_char_len=
+ (uint32) natsort_max_key_size(args[0]->max_char_length());
+ fix_char_length(max_char_len);
+
+ set_maybe_null(args[0]->maybe_null() ||
+ max_char_len * collation.collation->mbmaxlen >
+ current_thd->variables.max_allowed_packet);
+ return false;
+}
+
+/**
+ Disable use in stored virtual functions. Temporarily(?), until
+ the encoding is stable.
+*/
+bool Item_func_natural_sort_key::check_vcol_func_processor(void *arg)
+{
+ return mark_unsupported_function(func_name(), "()", arg,
+ VCOL_NON_DETERMINISTIC);
+}
+
#ifdef WITH_WSREP
#include "wsrep_mysqld.h"