summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2020-11-27 22:06:54 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2020-11-27 22:20:49 +0530
commitfa17c8b9c0a51525160c5adeefa9d68c1d3f3a4c (patch)
tree5f8bfc4ab888064f15defef93329dcea7f4d05e1
parent08b0b70daa43a539d911238e25998f7450bab9e6 (diff)
downloadmariadb-git-bb-10.3-21265.tar.gz
MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparisonbb-10.3-21265
Allow materialization strategy when collations on the inner and outer sides of an IN subquery are the same and the character set of the inner side is a proper subset of the character set on the outer side. This allows conversion from utf8mb3 to utf8mb4 as the former is a subset of the later. This is only allowed when IN predicate is converted to an IN subquery Backported part of the patch (d6a00d9b18f) of MDEV-17905.
-rw-r--r--mysql-test/main/subselect4.result34
-rw-r--r--mysql-test/main/subselect4.test37
-rw-r--r--sql/item_subselect.cc3
-rw-r--r--sql/item_subselect.h8
-rw-r--r--sql/opt_subselect.cc9
-rw-r--r--sql/sql_string.h18
-rw-r--r--sql/sql_tvc.cc6
-rw-r--r--sql/sql_type.cc90
-rw-r--r--sql/sql_type.h38
9 files changed, 215 insertions, 28 deletions
diff --git a/mysql-test/main/subselect4.result b/mysql-test/main/subselect4.result
index d069f71601f..23b6e268d91 100644
--- a/mysql-test/main/subselect4.result
+++ b/mysql-test/main/subselect4.result
@@ -2718,3 +2718,37 @@ Warning 1931 Query execution was interrupted. The query examined at least 3020 r
SET join_cache_level= @save_join_cache_level;
DROP TABLE t1,t2,t3,t4;
# End of 10.2 tests
+#
+# MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison
+#
+CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT);
+INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT);
+INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold;
+set in_predicate_conversion_threshold=2;
+set names 'utf8mb4';
+#
+# IN predicate to IN subquery is not allowed as materialization is not allowed
+# The character set on the inner side is not equal to or a proper subset of the outer side
+#
+EXPLAIN
+SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where
+set names 'utf8';
+#
+# IN predicate to IN subquery is performed as materialization is llowed
+# The character set on the inner side is a proper subset of the outer side
+#
+EXPLAIN
+SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t2 ALL NULL NULL NULL NULL 5
+1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 func,func 1 Using where
+2 MATERIALIZED <derived3> ALL NULL NULL NULL NULL 3
+3 DERIVED NULL NULL NULL NULL NULL NULL NULL No tables used
+set names default;
+set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold;
+DROP TABLE t1,t2;
+# End of 10.3 tests
diff --git a/mysql-test/main/subselect4.test b/mysql-test/main/subselect4.test
index 6f5eb1f2985..f264b3857ff 100644
--- a/mysql-test/main/subselect4.test
+++ b/mysql-test/main/subselect4.test
@@ -2238,3 +2238,40 @@ SET join_cache_level= @save_join_cache_level;
DROP TABLE t1,t2,t3,t4;
--echo # End of 10.2 tests
+
+--echo #
+--echo # MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison
+--echo #
+
+CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT);
+INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+
+CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT);
+INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
+
+set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold;
+set in_predicate_conversion_threshold=2;
+
+set names 'utf8mb4';
+--echo #
+--echo # IN predicate to IN subquery is not allowed as materialization is not allowed
+--echo # The character set on the inner side is not equal to or a proper subset of the outer side
+--echo #
+
+EXPLAIN
+SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3));
+
+set names 'utf8';
+--echo #
+--echo # IN predicate to IN subquery is performed as materialization is llowed
+--echo # The character set on the inner side is a proper subset of the outer side
+--echo #
+
+EXPLAIN
+SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3));
+
+set names default;
+set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold;
+DROP TABLE t1,t2;
+
+--echo # End of 10.3 tests
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 3e49f893ee3..d882918de5c 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1454,7 +1454,8 @@ Item_in_subselect::Item_in_subselect(THD *thd, Item * left_exp,
pushed_cond_guards(NULL), do_not_convert_to_sj(FALSE), is_jtbm_merged(FALSE),
is_jtbm_const_tab(FALSE), is_flattenable_semijoin(FALSE),
is_registered_semijoin(FALSE),
- upper_item(0)
+ upper_item(0),
+ converted_from_in_predicate(FALSE)
{
DBUG_ENTER("Item_in_subselect::Item_in_subselect");
DBUG_PRINT("info", ("in_strategy: %u", (uint)in_strategy));
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 1cea7291c9e..fdc39f1f05e 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -601,12 +601,18 @@ public:
Item_func_not_all *upper_item; // point on NOT/NOP before ALL/SOME subquery
+ /*
+ SET to TRUE if IN subquery is converted from an IN predicate
+ */
+ bool converted_from_in_predicate;
+
Item_in_subselect(THD *thd_arg, Item * left_expr, st_select_lex *select_lex);
Item_in_subselect(THD *thd_arg):
Item_exists_subselect(thd_arg), left_expr_cache(0), first_execution(TRUE),
in_strategy(SUBS_NOT_TRANSFORMED),
pushed_cond_guards(NULL), func(NULL), do_not_convert_to_sj(FALSE),
- is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0) {}
+ is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0),
+ converted_from_in_predicate(FALSE) {}
void cleanup();
subs_type substype() { return IN_SUBS; }
void reset()
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 9ee51074854..d65e00f4b97 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -836,6 +836,7 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
bool all_are_fields= TRUE;
uint32 total_key_length = 0;
+ bool converted_from_in_predicate= in_subs->converted_from_in_predicate;
for (uint i= 0; i < elements; i++)
{
Item *outer= in_subs->left_expr->element_index(i);
@@ -843,8 +844,12 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM &&
inner->real_item()->type() == Item::FIELD_ITEM);
total_key_length += inner->max_length;
- if (!inner->type_handler()->subquery_type_allows_materialization(inner,
- outer))
+
+ if (!inner->
+ type_handler()->
+ subquery_type_allows_materialization(inner,
+ outer,
+ converted_from_in_predicate))
DBUG_RETURN(FALSE);
}
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 11c3f0cd573..4f9a68acbb4 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -131,6 +131,24 @@ uint convert_to_printable(char *to, size_t to_len,
const char *from, size_t from_len,
CHARSET_INFO *from_cs, size_t nbytes= 0);
+class Charset
+{
+ CHARSET_INFO *m_charset;
+public:
+ Charset() :m_charset(&my_charset_bin) { }
+ Charset(CHARSET_INFO *cs) :m_charset(cs) { }
+
+ CHARSET_INFO *charset() const { return m_charset; }
+ /*
+ Collation name without the character set name.
+ For example, in case of "latin1_swedish_ci",
+ this method returns "_swedish_ci".
+ */
+ LEX_CSTRING collation_specific_name() const;
+ bool encoding_allows_reinterpret_as(CHARSET_INFO *cs) const;
+ bool eq_collation_specific_names(CHARSET_INFO *cs) const;
+};
+
class String : public Sql_alloc
{
char *Ptr;
diff --git a/sql/sql_tvc.cc b/sql/sql_tvc.cc
index def519035d9..10a279b92ed 100644
--- a/sql/sql_tvc.cc
+++ b/sql/sql_tvc.cc
@@ -829,7 +829,8 @@ static bool cmp_row_types(Item* item1, Item* item2)
Item *inner= item1->element_index(i);
Item *outer= item2->element_index(i);
if (!inner->type_handler()->subquery_type_allows_materialization(inner,
- outer))
+ outer,
+ true))
return true;
}
return false;
@@ -895,7 +896,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd,
for (uint i=1; i < arg_count; i++)
{
- if (!args[i]->const_item() || cmp_row_types(args[0], args[i]))
+ if (!args[i]->const_item() || cmp_row_types(args[i], args[0]))
return this;
}
@@ -975,6 +976,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd,
if (!(in_subs=
new (thd->mem_root) Item_in_subselect(thd, args[0], sq_select)))
goto err;
+ in_subs->converted_from_in_predicate= TRUE;
sq= in_subs;
if (negated)
sq= negate_expression(thd, in_subs);
diff --git a/sql/sql_type.cc b/sql/sql_type.cc
index a25aa236b16..0bae1e55145 100644
--- a/sql/sql_type.cc
+++ b/sql/sql_type.cc
@@ -19,6 +19,7 @@
#include "sql_const.h"
#include "sql_class.h"
#include "sql_time.h"
+#include "sql_string.h"
#include "item.h"
#include "log.h"
@@ -5112,7 +5113,8 @@ uint Type_handler_timestamp_common::Item_decimal_precision(const Item *item) con
bool Type_handler_real_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == REAL_RESULT);
return outer->cmp_type() == REAL_RESULT;
@@ -5121,7 +5123,8 @@ bool Type_handler_real_result::
bool Type_handler_int_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == INT_RESULT);
return outer->cmp_type() == INT_RESULT;
@@ -5130,7 +5133,8 @@ bool Type_handler_int_result::
bool Type_handler_decimal_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == DECIMAL_RESULT);
return outer->cmp_type() == DECIMAL_RESULT;
@@ -5139,23 +5143,37 @@ bool Type_handler_decimal_result::
bool Type_handler_string_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == STRING_RESULT);
- return outer->cmp_type() == STRING_RESULT &&
- outer->collation.collation == inner->collation.collation &&
- /*
- Materialization also is unable to work when create_tmp_table() will
- create a blob column because item->max_length is too big.
- The following test is copied from varstring_type_handler().
- */
- !inner->too_big_for_varchar();
+ if (outer->cmp_type() == STRING_RESULT &&
+ /*
+ Materialization also is unable to work when create_tmp_table() will
+ create a blob column because item->max_length is too big.
+ The following test is copied from varstring_type_handler().
+ */
+ !inner->too_big_for_varchar())
+ {
+ if (outer->collation.collation == inner->collation.collation)
+ return true;
+ if (is_in_predicate)
+ {
+ Charset inner_col(inner->collation.collation);
+ if (inner_col.encoding_allows_reinterpret_as(outer->
+ collation.collation) &&
+ inner_col.eq_collation_specific_names(outer->collation.collation))
+ return true;
+ }
+ }
+ return false;
}
bool Type_handler_temporal_result::
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == TIME_RESULT);
return mysql_timestamp_type() ==
@@ -5973,3 +5991,49 @@ bool Type_handler_general_purpose_string::
}
/***************************************************************************/
+
+LEX_CSTRING Charset::collation_specific_name() const
+{
+ /*
+ User defined collations can provide arbitrary names
+ for character sets and collations, so a collation
+ name not necessarily starts with the character set name.
+ */
+ size_t csname_length= strlen(m_charset->csname);
+ if (strncmp(m_charset->name, m_charset->csname, csname_length))
+ return {NULL, 0};
+ const char *ptr= m_charset->name + csname_length;
+ return {ptr, strlen(ptr) };
+}
+
+
+bool
+Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const
+{
+ if (!strcmp(m_charset->csname, cs->csname))
+ return true;
+
+ if (!strcmp(m_charset->csname, MY_UTF8MB3) &&
+ !strcmp(cs->csname, MY_UTF8MB4))
+ return true;
+
+ /*
+ Originally we allowed here instat ALTER for ASCII-to-LATIN1
+ and UCS2-to-UTF16, but this was wrong:
+ - MariaDB's ascii is not a subset for 8-bit character sets
+ like latin1, because it allows storing bytes 0x80..0xFF as
+ "unassigned" characters (see MDEV-19285).
+ - MariaDB's ucs2 (as in Unicode-1.1) is not a subset for UTF16,
+ because they treat surrogate codes differently (MDEV-19284).
+ */
+ return false;
+}
+
+
+bool
+Charset::eq_collation_specific_names(CHARSET_INFO *cs) const
+{
+ LEX_CSTRING name0= collation_specific_name();
+ LEX_CSTRING name1= Charset(cs).collation_specific_name();
+ return name0.length && !cmp(&name0, &name1);
+}
diff --git a/sql/sql_type.h b/sql/sql_type.h
index 907225b7c50..4a7a7b5a9b8 100644
--- a/sql/sql_type.h
+++ b/sql/sql_type.h
@@ -1291,9 +1291,21 @@ public:
Item *target_expr, Item *target_value,
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const= 0;
+
+ /*
+ @brief
+ Check if an IN subquery allows materialization or not
+ @param
+ inner expression on the inner side of the IN subquery
+ outer expression on the outer side of the IN subquery
+ is_in_predicate SET to true if IN subquery was converted from an
+ IN predicate or we are checking if materialization
+ strategy can be used for an IN predicate
+ */
virtual bool
subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const= 0;
+ const Item *outer,
+ bool is_in_predicate) const= 0;
/**
Make a simple constant replacement item for a constant "src",
so the new item can futher be used for comparison with "cmp", e.g.:
@@ -1470,7 +1482,8 @@ public:
}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
DBUG_ASSERT(0);
return false;
@@ -1788,7 +1801,8 @@ public:
virtual ~Type_handler_real_result() {}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
void sortlength(THD *thd,
@@ -1857,7 +1871,8 @@ public:
virtual ~Type_handler_decimal_result() {};
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
@@ -2060,7 +2075,8 @@ public:
virtual ~Type_handler_int_result() {}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
@@ -2156,7 +2172,8 @@ public:
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func,
Item **items, uint nitems) const;
bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const;
@@ -2266,7 +2283,8 @@ public:
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const;
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const;
+ const Item *outer,
+ bool is_in_predicate) const;
Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const;
Item_cache *Item_get_cache(THD *thd, const Item *item) const;
bool set_comparator_func(Arg_comparator *cmp) const;
@@ -3236,7 +3254,8 @@ public:
return blob_type_handler(item);
}
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
return false; // Materialization does not work with BLOB columns
}
@@ -3341,7 +3360,8 @@ public:
return true;
}
bool subquery_type_allows_materialization(const Item *inner,
- const Item *outer) const
+ const Item *outer,
+ bool is_in_predicate) const
{
return false; // Materialization does not work with GEOMETRY columns
}