1 files changed, 413 insertions, 20 deletions
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index eb9c59d31f7..d4bf28a9c21 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -55,6 +55,11 @@ C_MODE_END
 #include <sql_repl.h>
 #include "sql_statistics.h"
 
+/* fmtlib include (https://fmt.dev/). */
+#define FMT_STATIC_THOUSANDS_SEPARATOR ','
+#define FMT_HEADER_ONLY 1
+#include "fmt/format-inl.h"
+
 size_t username_char_length= USERNAME_CHAR_LENGTH;
 
 /*
@@ -1303,6 +1308,138 @@ bool Item_func_replace::fix_length_and_dec()
   return FALSE;
 }
 
+/*
+  this is done in the constructor to be in the same memroot as
+  the item itself
+*/
+Item_func_sformat::Item_func_sformat(THD *thd, List<Item> &list)
+  : Item_str_func(thd, list)
+{
+  val_arg= new (thd->mem_root) String[arg_count];
+}
+
+
+bool Item_func_sformat::fix_length_and_dec()
+{
+  if (!val_arg)
+    return TRUE;
+
+  ulonglong char_length= 0;
+
+  uint flags= MY_COLL_ALLOW_SUPERSET_CONV |
+              MY_COLL_ALLOW_COERCIBLE_CONV |
+              MY_COLL_ALLOW_NUMERIC_CONV;
+
+  if (Type_std_attributes::agg_item_collations(collation, func_name_cstring(),
+        args, arg_count, flags, 1))
+    return TRUE;
+
+  DTCollation c= collation;
+  if (c.collation->mbminlen > 1)
+    c.collation= &my_charset_utf8mb4_bin;
+
+  for (uint i=0 ; i < arg_count ; i++)
+  {
+    char_length+= args[i]->max_char_length();
+    if (args[i]->result_type() == STRING_RESULT &&
+        Type_std_attributes::agg_item_set_converter(c, func_name_cstring(),
+                                                    args+i, 1, flags, 1))
+      return TRUE;
+  }
+
+  fix_char_length_ulonglong(char_length);
+  return FALSE;
+}
+
+/*
+  allow fmt to take String arguments directly.
+  Inherit from string_view, so all string formatting works.
+  but {:p} doesn't, because it's not char*, not a pointer.
+*/
+namespace fmt {
+  template <> struct formatter<String>: formatter<string_view> {
+    template <typename FormatContext>
+    auto format(String c, FormatContext& ctx) -> decltype(ctx.out()) {
+      string_view name = { c.ptr(), c.length() };
+      return formatter<string_view>::format(name, ctx);
+    };
+  };
+};
+
+/*
+  SFORMAT(format_string, ...)
+  This function receives a formatting specification string and N parameters
+  (N >= 0), and it returns string formatted using the rules the user passed
+  in the specification. It uses fmtlib (https://fmt.dev/).
+*/
+String *Item_func_sformat::val_str(String *res)
+{
+  DBUG_ASSERT(fixed());
+  using                         ctx=     fmt::format_context;
+  String                       *fmt_arg= NULL;
+  String                       *parg=    NULL;
+  fmt::format_args::format_arg *vargs=   NULL;
+
+  null_value= true;
+  if (!(fmt_arg= args[0]->val_str(res)))
+    return NULL;
+
+  if (!(vargs= new fmt::format_args::format_arg[arg_count - 1]))
+    return NULL;
+
+  /* Creates the array of arguments for vformat */
+  for (uint carg= 1; carg < arg_count; carg++)
+  {
+    switch (args[carg]->result_type())
+    {
+    case INT_RESULT:
+      vargs[carg-1]= fmt::detail::make_arg<ctx>(args[carg]->val_int());
+      break;
+    case DECIMAL_RESULT: // TODO
+    case REAL_RESULT:
+      if (args[carg]->field_type() == MYSQL_TYPE_FLOAT)
+        vargs[carg-1]= fmt::detail::make_arg<ctx>((float)args[carg]->val_real());
+      else
+        vargs[carg-1]= fmt::detail::make_arg<ctx>(args[carg]->val_real());
+      break;
+    case STRING_RESULT:
+      if (!(parg= args[carg]->val_str(&val_arg[carg-1])))
+      {
+        delete [] vargs;
+        return NULL;
+      }
+      vargs[carg-1]= fmt::detail::make_arg<ctx>(*parg);
+      break;
+    case TIME_RESULT: // TODO
+    case ROW_RESULT: // TODO
+    default:
+      DBUG_ASSERT(0);
+      delete [] vargs;
+      return NULL;
+    }
+  }
+
+  null_value= false;
+  /* Create the string output  */
+  try
+  {
+    auto text = fmt::vformat(fmt_arg->c_ptr_safe(),
+                             fmt::format_args(vargs, arg_count-1));
+    res->length(0);
+    res->set_charset(collation.collation);
+    res->append(text.c_str(), text.size(), fmt_arg->charset());
+  }
+  catch (const fmt::format_error &ex)
+  {
+    THD *thd= current_thd;
+    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                        WARN_SFORMAT_ERROR,
+                        ER_THD(thd, WARN_SFORMAT_ERROR), ex.what());
+    null_value= true;
+  }
+  delete [] vargs;
+  return null_value ? NULL : res;
+}
 
 /*********************************************************************/
 bool Item_func_regexp_replace::fix_length_and_dec()
@@ -4379,26 +4516,6 @@ err:
 #endif
 
 
-String *Item_func_uuid::val_str(String *str)
-{
-  DBUG_ASSERT(fixed());
-  uchar guid[MY_UUID_SIZE];
-  size_t length= (without_separators ?
-                  MY_UUID_ORACLE_STRING_LENGTH :
-                  MY_UUID_STRING_LENGTH);
-
-  str->alloc(length+1);
-  str->length(length);
-  str->set_charset(system_charset_info);
-  my_uuid(guid);
-  if (without_separators)
-    my_uuid2str_oracle(guid, (char *)str->ptr());
-  else
-    my_uuid2str(guid, (char *)str->ptr());
-  return str;
-}
-
-
 Item_func_dyncol_create::Item_func_dyncol_create(THD *thd, List<Item> &args,
                                                  DYNCALL_CREATE_DEF *dfs):
   Item_str_func(thd, args), defs(dfs), vals(0), keys_num(NULL), keys_str(NULL),
@@ -5304,6 +5421,282 @@ String *Item_temptable_rowid::val_str(String *str)
   return &str_value;
 }
 
+/**
+  Helper routine to encode length prefix
+  in natsort_encode_numeric_string().
+
+  The idea is so that bigger input numbers correspond
+  lexicographically bigger output strings.
+
+  Note, that in real use the number would typically
+  small, as it only computes variable *length prefixes*.
+
+  @param[in] n - the number
+  @param[in] s - output string
+
+  @return - length of encoding
+
+  Here is how encoding works
+
+  - n is from 0 to 8
+   Output string calculated as '0'+n (range '0' - '8')
+
+  - n is from 9 to 17
+    Output calculated as concat('9', '0' + n -9)'
+    Output range: '90'-'98'
+
+   -n is from 18 to 26
+    Output calculated as concat('99', '0' + n -18)'
+    Output range '990'-'998'
+
+  - n is from 27 to SIZE_T_MAX
+    Output starts with '999',
+    then log10(n) is encoded as 2-digit decimal number
+    then the number itself is added.
+    Example : for 28 key is concat('999', '01' , '28')
+    i.e '9990128'
+
+    Key length is 5 + ceil(log10(n))
+
+   Output range is
+     (64bit)'9990128' - '9991918446744073709551615'
+     (32bit)'9990128' - '999094294967295'
+*/
+
+/* Largest length of encoded string.*/
+static size_t natsort_encode_length_max(size_t n)
+{
+  return (n < 27) ? n/9+1 : 26;
+}
+
+static void natsort_encode_length(size_t n, String* out)
+{
+  if (n < 27)
+  {
+    if (n >= 9)
+      out->fill(out->length() + n/9,'9');
+    out->append(char(n % 9 + '0'));
+    return;
+  }
+
+  size_t log10n= 0;
+  for (size_t tmp= n / 10; tmp; tmp/= 10)
+    log10n++;
+  out->fill(out->length() + 3, '9');
+  out->append('0' + (char) (log10n / 10));
+  out->append('0' + (char) (log10n % 10));
+  out->append_ulonglong(n);
+}
+
+enum class NATSORT_ERR
+{
+  SUCCESS= 0,
+  KEY_TOO_LARGE= 1,
+  ALLOC_ERROR= 2
+};
+
+/*
+   Encode numeric string for natural sorting.
+
+   @param[in] in - start of the numeric string
+   skipping leading zeros
+
+   @param[in] n_digits - length of the string,
+   in characters, not counting leading zeros.
+
+   @param[out] out - String to write to. The string should
+   have enough preallocated space to fit the encoded key.
+
+   @return
+     NATSORT_ERR::SUCCESS  - success
+     NATSORT_ERR::KEY_TOO_LARGE  - out string does not have enough
+     space left to accomodate the key.
+
+
+   The resulting encoding of the numeric string is then
+
+   CONCAT(natsort_encode_length(n_digits), in)
+*/
+static NATSORT_ERR natsort_encode_numeric_string(const char *in,
+                                                 size_t n_digits,
+                                                 String *out)
+{
+  DBUG_ASSERT(in);
+  DBUG_ASSERT(n_digits);
+
+  if (out->length() + natsort_encode_length_max(n_digits - 1) + n_digits >
+      out->alloced_length())
+    return NATSORT_ERR::KEY_TOO_LARGE;
+
+  natsort_encode_length(n_digits - 1, out);
+  out->append(in, n_digits);
+  return NATSORT_ERR::SUCCESS;
+}
+
+/*
+  Calculate max size of the natsort key.
+
+  A digit in string expands to 2 chars  length_prefix , and  the digit
+
+  With even length L=2N, the largest key corresponds to input string
+  in form REPEAT(<digit><letter>,N) and the length of a key is
+  2N + N = 3N
+
+  With odd input length L=2N+1, largest key is built by appending
+  a digit at the end, with key length 3N+2
+
+*/
+static size_t natsort_max_key_size(size_t input_size)
+{
+  return input_size + (input_size + 1)/2 ;
+}
+
+/**
+  Convert a string to natural sort key.
+  @param[in]   in - input string
+  @param[out]  out - output string
+  @param[in]   max_key_size - the maximum size of the output
+               key, in bytes.
+  @return NATSORT_ERR::SUCCESS - successful completion
+          NATSORT_ERR::ALLOC_ERROR - memory allocation error
+          NATSORT_ERR::KEY_TOO_LARGE - resulting key would exceed max_key_size
+*/
+static NATSORT_ERR to_natsort_key(const String *in, String *out,
+                                  size_t max_key_size)
+{
+  size_t n_digits= 0;
+  size_t n_lead_zeros= 0;
+  size_t num_start;
+  size_t reserve_length= std::min(
+      natsort_max_key_size(in->length()) + MAX_BIGINT_WIDTH + 2, max_key_size);
+
+  out->length(0);
+  out->set_charset(in->charset());
+
+  if (out->alloc((uint32) reserve_length))
+    return NATSORT_ERR::ALLOC_ERROR;
+
+  for (size_t pos= 0;; pos++)
+  {
+    char c= pos < in->length() ? (*in)[pos] : 0;
+    bool is_digit= (c >= '0' && c <= '9');
+    if (!is_digit && (n_digits || n_lead_zeros))
+    {
+      /* Handle end of digits run.*/
+      if (!n_digits)
+      {
+        /*We only have zeros.*/
+        n_lead_zeros--;
+        num_start= pos - 1;
+        n_digits= 1;
+      }
+      NATSORT_ERR err= natsort_encode_numeric_string(
+          in->ptr() + num_start, n_digits, out);
+      if (err != NATSORT_ERR::SUCCESS)
+        return err;
+
+      /* Reset state.*/
+      n_digits= 0;
+      num_start= size_t(-1);
+      n_lead_zeros= 0;
+    }
+
+    if (pos == in->length())
+      break;
+
+    if (!is_digit)
+    {
+      if (out->length() == max_key_size)
+        return NATSORT_ERR::KEY_TOO_LARGE;
+      out->append(c);
+    }
+    else if (c == '0' && !n_digits)
+      n_lead_zeros++;
+    else if (!n_digits++)
+      num_start= pos;
+  }
+  return NATSORT_ERR::SUCCESS;
+}
+
+String *Item_func_natural_sort_key::val_str(String *out)
+{
+  String *in= args[0]->val_str();
+  if (args[0]->null_value || !in)
+  {
+    null_value= true;
+    return nullptr;
+  }
+  NATSORT_ERR err= NATSORT_ERR::SUCCESS;
+  CHARSET_INFO *cs= in->charset();
+  ulong max_allowed_packet= current_thd->variables.max_allowed_packet;
+  uint errs;
+  String tmp;
+  /*
+    to_natsort_key() only support charsets where digits are represented by
+    a single byte in range 0x30-0x39. Almost everything is OK, just utf16/32
+    won't do. Full ASCII compatibility is not required, so that SJIS and SWE7
+    are fine.
+  */
+  if (cs->mbminlen != 1)
+  {
+    if (tmp.copy(in, &my_charset_utf8mb4_bin, &errs))
+      goto error_exit;
+    in= &tmp;
+  }
+
+  err= to_natsort_key(in, out, max_allowed_packet / cs->mbminlen);
+
+  if (err != NATSORT_ERR::SUCCESS)
+  {
+    if (err == NATSORT_ERR::KEY_TOO_LARGE)
+    {
+      push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
+                          ER_WARN_ALLOWED_PACKET_OVERFLOWED,
+                          ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED), func_name(),
+                          max_allowed_packet);
+    }
+    goto error_exit;
+  }
+
+  if (cs->mbminlen != 1)
+  {
+    /* output string is now utf8, convert to input charset.*/
+    if (tmp.copy(out, cs, &errs) || out->copy(tmp))
+      goto error_exit;
+  }
+  null_value= false;
+  return out;
+
+error_exit:
+  null_value= true;
+  return nullptr;
+}
+
+bool Item_func_natural_sort_key::fix_length_and_dec(void)
+{
+  if (agg_arg_charsets_for_string_result(collation, args, 1))
+    return true;
+  DBUG_ASSERT(collation.collation != NULL);
+  uint32 max_char_len=
+      (uint32) natsort_max_key_size(args[0]->max_char_length());
+  fix_char_length(max_char_len);
+
+  set_maybe_null(args[0]->maybe_null() ||
+                 max_char_len * collation.collation->mbmaxlen >
+                     current_thd->variables.max_allowed_packet);
+  return false;
+}
+
+/**
+  Disable use in stored virtual functions. Temporarily(?), until
+  the encoding is stable.
+*/
+bool Item_func_natural_sort_key::check_vcol_func_processor(void *arg)
+{
+  return mark_unsupported_function(func_name(), "()", arg,
+                                   VCOL_NON_DETERMINISTIC);
+}
+
 #ifdef WITH_WSREP
 
 #include "wsrep_mysqld.h"