summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/main/natural_sort_key.result185
-rw-r--r--mysql-test/main/natural_sort_key.test76
-rw-r--r--sql/item_strfunc.cc87
-rw-r--r--sql/item_strfunc.h2
4 files changed, 227 insertions, 123 deletions
diff --git a/mysql-test/main/natural_sort_key.result b/mysql-test/main/natural_sort_key.result
index 1a6cf1cc3ea..a99fa7a71b8 100644
--- a/mysql-test/main/natural_sort_key.result
+++ b/mysql-test/main/natural_sort_key.result
@@ -2,14 +2,6 @@ SET NAMES utf8mb4;
SELECT NATURAL_SORT_KEY(NULL);
NATURAL_SORT_KEY(NULL)
NULL
-SELECT '' c WHERE 0 UNION VALUES('a10'),('a9'),('a1000'), ('a0'),('b'),('b0') ORDER BY NATURAL_SORT_KEY(c);
-c
-a0
-a9
-a10
-a1000
-b
-b0
SELECT NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1));
NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1))
NULL
@@ -22,14 +14,10 @@ Warnings:
Warning 1301 Result of natural_sort_key() was larger than max_allowed_packet (16777216) - truncated
CREATE TABLE t1(
c VARCHAR(30) CHARACTER SET latin1 COLLATE latin1_bin,
-k VARCHAR(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) INVISIBLE,
-KEY(k,c)) ENGINE=InnoDB;
+k VARCHAR(60) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) VIRTUAL INVISIBLE);
INSERT INTO t1 values
('A1'),('a1'),('A100'),('a100'),('A2'),('ä2'),('a2'),('A99'),
('äb'),('B1'),('B100'),('B9'),('C'),('100');
-EXPLAIN SELECT c FROM t1 ORDER BY k,c;
-id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 index NULL k 216 NULL 14 Using index
#Natural sort order.
SELECT c FROM t1 ORDER BY k,c;
c
@@ -69,55 +57,136 @@ SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`c` varchar(30) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
- `NATURAL_SORT_KEY(c)` varchar(60) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL
+ `NATURAL_SORT_KEY(c)` varchar(45) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1,t2;
-SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
+CREATE TABLE t1(c VARCHAR(1), k VARCHAR(2) AS (NATURAL_SORT_KEY(c)) STORED);
+ERROR HY000: Function or expression 'natural_sort_key()' cannot be used in the GENERATED ALWAYS AS clause of `k`
+SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),36,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
FROM
(
-SELECT 0 val
-UNION VALUES ('1'),('01'),('0001')
+SELECT '0' val
+UNION SELECT seq FROM seq_1_to_9
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27
) AS numbers ORDER BY sortkey;
value sortkey encoding_overhead
-0 000 2
-1 010 2
-01 011 1
-0001 013 -1
-10 1100 2
-100 21000 2
-1000 310000 2
-10000 4100000 2
-100000 51000000 2
-1000000 610000000 2
-10000000 7100000000 2
-100000000 81000000000 2
-1000000000 9010000000000 3
-10000000000 91100000000000 3
-100000000000 921000000000000 3
-1000000000000 9310000000000000 3
-10000000000000 94100000000000000 3
-100000000000000 951000000000000000 3
-1000000000000000 9610000000000000000 3
-10000000000000000 97100000000000000000 3
-100000000000000000 981000000000000000000 3
-1000000000000000000 99010000000000000000000 4
-10000000000000000000 991100000000000000000000 4
-100000000000000000000 9921000000000000000000000 4
-1000000000000000000000 99310000000000000000000000 4
-10000000000000000000000 994100000000000000000000000 4
-100000000000000000000000 9951000000000000000000000000 4
-1000000000000000000000000 99610000000000000000000000000 4
-10000000000000000000000000 997100000000000000000000000000 4
-100000000000000000000000000 9981000000000000000000000000000 4
-1000000000000000000000000000 99901271000000000000000000000000000 8
-SELECT val
-FROM
-(
-SELECT 0 val WHERE 0
-UNION VALUES ('1/'),('01'),('1a')
-) AS strings ORDER BY NATURAL_SORT_KEY(val);
-val
-1/
-1a
-01
+0 00 1
+1 01 1
+2 02 1
+3 03 1
+4 04 1
+5 05 1
+6 06 1
+7 07 1
+8 08 1
+9 09 1
+10 110 1
+100 2100 1
+1000 31000 1
+10000 410000 1
+100000 5100000 1
+1000000 61000000 1
+10000000 710000000 1
+100000000 8100000000 1
+1000000000 901000000000 2
+10000000000 9110000000000 2
+100000000000 92100000000000 2
+1000000000000 931000000000000 2
+10000000000000 9410000000000000 2
+100000000000000 95100000000000000 2
+1000000000000000 961000000000000000 2
+10000000000000000 9710000000000000000 2
+100000000000000000 98100000000000000000 2
+1000000000000000000 9901000000000000000000 3
+10000000000000000000 99110000000000000000000 3
+100000000000000000000 992100000000000000000000 3
+1000000000000000000000 9931000000000000000000000 3
+10000000000000000000000 99410000000000000000000000 3
+100000000000000000000000 995100000000000000000000000 3
+1000000000000000000000000 9961000000000000000000000000 3
+10000000000000000000000000 99710000000000000000000000000 3
+100000000000000000000000000 998100000000000000000000000000 3
+1000000000000000000000000000 99901271000000000000000000000000000 7
+SELECT natural_sort_key('1') = natural_sort_key('0001');
+natural_sort_key('1') = natural_sort_key('0001')
+1
+SELECT natural_sort_key('1.1') = natural_sort_key('1.00001');
+natural_sort_key('1.1') = natural_sort_key('1.00001')
+1
+SELECT RPAD(val,20,' ') value, NATURAL_SORT_KEY(val) FROM
+(SELECT '' val WHERE 0 UNION VALUES
+('fred'),
+('pic2'),
+('pic100a'),
+('pic120'),
+('pic121'),
+('jane'),
+('tom'),
+('pic02a'),
+('pic3'),
+('pic4'),
+('1-20'),
+('pic100'),
+('pic02000'),
+('10-20'),
+('1-02'),
+('1-2'),
+('pic01'),
+('pic02'),
+('pic 6'),
+('pic 7'),
+('pic 5'),
+('pic05'),
+('pic 5 '),
+('pic 5 something'),
+('pic 4 else'),
+('2000-1-10'),
+('1999-12-25'),
+('1999-3-3'),
+('2000-3-23'),
+('2000-1-2'),
+('100.200.300.400'),
+('100.50.60.70'),
+('100.8.9.0'),
+('a1b1'),
+('a01b2'),
+('a1b2'),
+('a01b3')
+)AS data ORDER BY 2,1;
+value NATURAL_SORT_KEY(val)
+1-02 01-02
+1-2 01-02
+1-20 01-120
+10-20 110-120
+100.8.9.0 2100.08.09.00
+100.50.60.70 2100.150.160.170
+100.200.300.400 2100.2200.2300.2400
+1999-3-3 31999-03-03
+1999-12-25 31999-112-125
+2000-1-2 32000-01-02
+2000-1-10 32000-01-110
+2000-3-23 32000-03-123
+a1b1 a01b01
+a01b2 a01b02
+a1b2 a01b02
+a01b3 a01b03
+fred fred
+jane jane
+pic 7 pic 07
+pic 4 else pic 04 else
+pic 5 pic 05
+pic 5 something pic 05 something
+pic 6 pic 06
+pic01 pic01
+pic02 pic02
+pic2 pic02
+pic02a pic02a
+pic3 pic03
+pic4 pic04
+pic05 pic05
+pic100 pic2100
+pic100a pic2100a
+pic120 pic2120
+pic121 pic2121
+pic02000 pic32000
+tom tom
diff --git a/mysql-test/main/natural_sort_key.test b/mysql-test/main/natural_sort_key.test
index f918c0e9bdf..dda25c9e1f8 100644
--- a/mysql-test/main/natural_sort_key.test
+++ b/mysql-test/main/natural_sort_key.test
@@ -1,25 +1,21 @@
---source include/have_innodb.inc
--source include/have_sequence.inc
SET NAMES utf8mb4;
SELECT NATURAL_SORT_KEY(NULL);
-# Sort without tables
-SELECT '' c WHERE 0 UNION VALUES('a10'),('a9'),('a1000'), ('a0'),('b'),('b0') ORDER BY NATURAL_SORT_KEY(c);
-
#Test that max packet overflow produces NULL plus warning
SELECT NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1));
SELECT NATURAL_SORT_KEY(repeat('1',@@max_allowed_packet-1));
-#Test with virtual(index only) key
+#Test with virtual
CREATE TABLE t1(
c VARCHAR(30) CHARACTER SET latin1 COLLATE latin1_bin,
- k VARCHAR(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) INVISIBLE,
- KEY(k,c)) ENGINE=InnoDB;
+ k VARCHAR(60) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) VIRTUAL INVISIBLE);
+
INSERT INTO t1 values
('A1'),('a1'),('A100'),('a100'),('A2'),('ä2'),('a2'),('A99'),
('äb'),('B1'),('B100'),('B9'),('C'),('100');
-EXPLAIN SELECT c FROM t1 ORDER BY k,c;
+
-- echo #Natural sort order.
# We sort by 2 colums, for stable sort,as we do not currenly have a case and accent insensitive Unicode collation.
SELECT c FROM t1 ORDER BY k,c;
@@ -30,21 +26,63 @@ CREATE TABLE t2 AS SELECT c, NATURAL_SORT_KEY(c) FROM t1 WHERE 0;
SHOW CREATE TABLE t2;
DROP TABLE t1,t2;
-#Show encoding of numbers, with some leading whitespace.
-SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
+#Virtual STORED is temporarily disabled
+--error ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED
+CREATE TABLE t1(c VARCHAR(1), k VARCHAR(2) AS (NATURAL_SORT_KEY(c)) STORED);
+
+#Show encoding of numbers.
+SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),36,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
FROM
(
-SELECT 0 val
-UNION VALUES ('1'),('01'),('0001')
+SELECT '0' val
+UNION SELECT seq FROM seq_1_to_9
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27
) AS numbers ORDER BY sortkey;
+# leading zeros ignored
+SELECT natural_sort_key('1') = natural_sort_key('0001');
+SELECT natural_sort_key('1.1') = natural_sort_key('1.00001');
-#Test that sort order with leading zeros (lead zeros sort larger)
-SELECT val
-FROM
-(
-SELECT 0 val WHERE 0
-UNION VALUES ('1/'),('01'),('1a')
-) AS strings ORDER BY NATURAL_SORT_KEY(val);
+# Some examples from https://github.com/sourcefrog/natsort/
+# words
+SELECT RPAD(val,20,' ') value, NATURAL_SORT_KEY(val) FROM
+(SELECT '' val WHERE 0 UNION VALUES
+('fred'),
+('pic2'),
+('pic100a'),
+('pic120'),
+('pic121'),
+('jane'),
+('tom'),
+('pic02a'),
+('pic3'),
+('pic4'),
+('1-20'),
+('pic100'),
+('pic02000'),
+('10-20'),
+('1-02'),
+('1-2'),
+('pic01'),
+('pic02'),
+('pic 6'),
+('pic 7'),
+('pic 5'),
+('pic05'),
+('pic 5 '),
+('pic 5 something'),
+('pic 4 else'),
+('2000-1-10'),
+('1999-12-25'),
+('1999-3-3'),
+('2000-3-23'),
+('2000-1-2'),
+('100.200.300.400'),
+('100.50.60.70'),
+('100.8.9.0'),
+('a1b1'),
+('a01b2'),
+('a1b2'),
+('a01b3')
+)AS data ORDER BY 2,1;
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index b3fed8ceb4f..cc6c030d5bc 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -5304,7 +5304,7 @@ String *Item_temptable_rowid::val_str(String *str)
small, as it only computes variable *length prefixes*.
@param[in] n - the number
- @param[in] s - output buffer (should be at least 26 bytes large)
+ @param[in] s - output string
@return - length of encoding
@@ -5321,14 +5321,14 @@ String *Item_temptable_rowid::val_str(String *str)
Output calculated as concat('99', '0' + n -18)'
Output range '990'-'998'
- - n is from 28 to SIZE_T_MAX
+ - n is from 27 to SIZE_T_MAX
Output starts with '999',
then log10(n) is encoded as 2-digit decimal number
then the number itself is added.
Example : for 28 key is concat('999', '01' , '28')
i.e '9990128'
- Key legth is 5 + ceil(log10(n))
+ Key length is 5 + ceil(log10(n))
Output range is
(64bit)'9990128' - '9991918446744073709551615'
@@ -5336,26 +5336,28 @@ String *Item_temptable_rowid::val_str(String *str)
*/
/* Largest length of encoded string.*/
-#define NATSORT_BUFSIZE 26
-static size_t natsort_encode_length(size_t n, char *s)
+static size_t natsort_encode_length_max(size_t n)
+{
+ return (n < 27) ? n/9+1 : 26;
+}
+
+static void natsort_encode_length(size_t n, String* out)
{
if (n < 27)
{
- size_t n_nines= n / 9;
- if (n_nines)
- memset(s, '9', n_nines);
- s[n_nines]= char(n % 9 + '0');
- s[n_nines + 1]= 0;
- return n_nines + 1;
+ if (n >= 9)
+ out->fill(out->length() + n/9,'9');
+ out->append(char(n % 9 + '0'));
+ return;
}
- memset(s, '9', 3);
size_t log10n= 0;
for (size_t tmp= n / 10; tmp; tmp/= 10)
log10n++;
- s[3]= '0' + (char) (log10n / 10);
- s[4]= '0' + (char) (log10n % 10);
- return longlong10_to_str(n, s + 5, 10) - s;
+ out->fill(out->length() + 3, '9');
+ out->append('0' + (char) (log10n / 10));
+ out->append('0' + (char) (log10n % 10));
+ out->append_ulonglong(n);
}
enum class NATSORT_ERR
@@ -5374,8 +5376,6 @@ enum class NATSORT_ERR
@param[in] n_digits - length of the string,
in characters, not counting leading zeros.
- @param[in] n_lead_zeros - leading zeros count.
-
@param[out] out - String to write to. The string should
have enough preallocated space to fit the encoded key.
@@ -5384,58 +5384,43 @@ enum class NATSORT_ERR
NATSORT_ERR::KEY_TOO_LARGE - out string does not have enough
space left to accomodate the key.
- Note:
- Special case, where there are only leading zeros
- n_digits == 0 and n_leading_zeros > 0
- will treated as-if in = "0", n_digits = 1, n_leading_zeros
- is decremented.
The resulting encoding of the numeric string is then
- CONCAT(natsort_encode_length(n_digits), in,
- natsort_encode_length(n_leading_zeros))
+ CONCAT(natsort_encode_length(n_digits), in)
*/
static NATSORT_ERR natsort_encode_numeric_string(const char *in,
size_t n_digits,
- size_t n_leading_zeros,
String *out)
{
- char buf[NATSORT_BUFSIZE];
DBUG_ASSERT(in);
DBUG_ASSERT(n_digits);
- size_t len;
- len= natsort_encode_length(n_digits - 1, buf);
-
- if (out->length() + len + n_digits > out->alloced_length())
+ if (out->length() + natsort_encode_length_max(n_digits - 1) + n_digits >
+ out->alloced_length())
return NATSORT_ERR::KEY_TOO_LARGE;
- out->append(buf, len);
+ natsort_encode_length(n_digits - 1, out);
out->append(in, n_digits);
-
- len= natsort_encode_length(n_leading_zeros, buf);
- if (out->length() + len > out->alloced_length())
- return NATSORT_ERR::KEY_TOO_LARGE;
- out->append(buf, len);
return NATSORT_ERR::SUCCESS;
}
/*
Calculate max size of the natsort key.
- A digit expands to 3 chars - length_prefix, the digit, lead_zero_cnt(0)
+ A digit in string expands to 2 chars length_prefix , and the digit
With even length L=2N, the largest key corresponds to input string
- in form REPEAT('<letter><digit>',N) and the length of a key is
- 3N + N = 4*N = 2*L
+ in form REPEAT(<digit><letter>,N) and the length of a key is
+ 2N + N = 3N
+
+ With odd input length L=2N+1, largest key is built by appending
+ a digit at the end, with key length 3N+2
- With odd input length L=2*N+1, largest key corresponds to
- CONCAT(<digit>,REPEAT('<letter><digit>', N)
- with key length is 3 + 4*N = 2*L+1
*/
static size_t natsort_max_key_size(size_t input_size)
{
- return input_size * 2 + input_size % 2;
+ return input_size + (input_size + 1)/2 ;
}
/**
@@ -5454,8 +5439,8 @@ static NATSORT_ERR to_natsort_key(const String *in, String *out,
size_t n_digits= 0;
size_t n_lead_zeros= 0;
size_t num_start;
- size_t reserve_length= std::min(natsort_max_key_size(in->length()),
- max_key_size);
+ size_t reserve_length= std::min(
+ natsort_max_key_size(in->length()) + MAX_BIGINT_WIDTH + 2, max_key_size);
out->length(0);
out->set_charset(in->charset());
@@ -5478,7 +5463,7 @@ static NATSORT_ERR to_natsort_key(const String *in, String *out,
n_digits= 1;
}
NATSORT_ERR err= natsort_encode_numeric_string(
- in->ptr() + num_start, n_digits, n_lead_zeros, out);
+ in->ptr() + num_start, n_digits, out);
if (err != NATSORT_ERR::SUCCESS)
return err;
@@ -5574,6 +5559,16 @@ bool Item_func_natural_sort_key::fix_length_and_dec(void)
return false;
}
+/**
+ Disable use in stored virtual functions. Temporarily(?), until
+ the encoding is stable.
+*/
+bool Item_func_natural_sort_key::check_vcol_func_processor(void *arg)
+{
+ return mark_unsupported_function(func_name(), "()", arg,
+ VCOL_NON_DETERMINISTIC);
+}
+
#ifdef WITH_WSREP
#include "wsrep_mysqld.h"
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 3f739aae99b..0b444a12809 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -287,6 +287,8 @@ public:
{
return get_item_copy<Item_func_natural_sort_key>(thd, this);
}
+
+ bool check_vcol_func_processor(void *arg) override;
};
class Item_func_concat :public Item_str_func