diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2020-11-27 22:06:54 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2020-11-27 22:20:49 +0530 |
commit | fa17c8b9c0a51525160c5adeefa9d68c1d3f3a4c (patch) | |
tree | 5f8bfc4ab888064f15defef93329dcea7f4d05e1 | |
parent | 08b0b70daa43a539d911238e25998f7450bab9e6 (diff) | |
download | mariadb-git-bb-10.3-21265.tar.gz |
MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparisonbb-10.3-21265
Allow materialization strategy when collations on the
inner and outer sides of an IN subquery are the same and the
character set of the inner side is a proper subset of the character
set on the outer side.
This allows conversion from utf8mb3 to utf8mb4
as the former is a subset of the later.
This is only allowed when IN predicate is converted to an IN subquery
Backported part of the patch (d6a00d9b18f) of MDEV-17905.
-rw-r--r-- | mysql-test/main/subselect4.result | 34 | ||||
-rw-r--r-- | mysql-test/main/subselect4.test | 37 | ||||
-rw-r--r-- | sql/item_subselect.cc | 3 | ||||
-rw-r--r-- | sql/item_subselect.h | 8 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 9 | ||||
-rw-r--r-- | sql/sql_string.h | 18 | ||||
-rw-r--r-- | sql/sql_tvc.cc | 6 | ||||
-rw-r--r-- | sql/sql_type.cc | 90 | ||||
-rw-r--r-- | sql/sql_type.h | 38 |
9 files changed, 215 insertions, 28 deletions
diff --git a/mysql-test/main/subselect4.result b/mysql-test/main/subselect4.result index d069f71601f..23b6e268d91 100644 --- a/mysql-test/main/subselect4.result +++ b/mysql-test/main/subselect4.result @@ -2718,3 +2718,37 @@ Warning 1931 Query execution was interrupted. The query examined at least 3020 r SET join_cache_level= @save_join_cache_level; DROP TABLE t1,t2,t3,t4; # End of 10.2 tests +# +# MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison +# +CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT); +INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5); +CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT); +INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5); +set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold; +set in_predicate_conversion_threshold=2; +set names 'utf8mb4'; +# +# IN predicate to IN subquery is not allowed as materialization is not allowed +# The character set on the inner side is not equal to or a proper subset of the outer side +# +EXPLAIN +SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3)); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where +set names 'utf8'; +# +# IN predicate to IN subquery is performed as materialization is llowed +# The character set on the inner side is a proper subset of the outer side +# +EXPLAIN +SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3)); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 +1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 func,func 1 Using where +2 MATERIALIZED <derived3> ALL NULL NULL NULL NULL 3 +3 DERIVED NULL NULL NULL NULL NULL NULL NULL No tables used +set names default; +set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold; +DROP TABLE t1,t2; +# End of 10.3 tests diff --git a/mysql-test/main/subselect4.test b/mysql-test/main/subselect4.test index 6f5eb1f2985..f264b3857ff 100644 --- a/mysql-test/main/subselect4.test +++ b/mysql-test/main/subselect4.test @@ -2238,3 +2238,40 @@ SET join_cache_level= @save_join_cache_level; DROP TABLE t1,t2,t3,t4; --echo # End of 10.2 tests + +--echo # +--echo # MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison +--echo # + +CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT); +INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5); + +CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT); +INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5); + +set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold; +set in_predicate_conversion_threshold=2; + +set names 'utf8mb4'; +--echo # +--echo # IN predicate to IN subquery is not allowed as materialization is not allowed +--echo # The character set on the inner side is not equal to or a proper subset of the outer side +--echo # + +EXPLAIN +SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3)); + +set names 'utf8'; +--echo # +--echo # IN predicate to IN subquery is performed as materialization is llowed +--echo # The character set on the inner side is a proper subset of the outer side +--echo # + +EXPLAIN +SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3)); + +set names default; +set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold; +DROP TABLE t1,t2; + +--echo # End of 10.3 tests diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 3e49f893ee3..d882918de5c 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -1454,7 +1454,8 @@ Item_in_subselect::Item_in_subselect(THD *thd, Item * left_exp, pushed_cond_guards(NULL), do_not_convert_to_sj(FALSE), is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), is_flattenable_semijoin(FALSE), is_registered_semijoin(FALSE), - upper_item(0) + upper_item(0), + converted_from_in_predicate(FALSE) { DBUG_ENTER("Item_in_subselect::Item_in_subselect"); DBUG_PRINT("info", ("in_strategy: %u", (uint)in_strategy)); diff --git a/sql/item_subselect.h b/sql/item_subselect.h index 1cea7291c9e..fdc39f1f05e 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -601,12 +601,18 @@ public: Item_func_not_all *upper_item; // point on NOT/NOP before ALL/SOME subquery + /* + SET to TRUE if IN subquery is converted from an IN predicate + */ + bool converted_from_in_predicate; + Item_in_subselect(THD *thd_arg, Item * left_expr, st_select_lex *select_lex); Item_in_subselect(THD *thd_arg): Item_exists_subselect(thd_arg), left_expr_cache(0), first_execution(TRUE), in_strategy(SUBS_NOT_TRANSFORMED), pushed_cond_guards(NULL), func(NULL), do_not_convert_to_sj(FALSE), - is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0) {} + is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0), + converted_from_in_predicate(FALSE) {} void cleanup(); subs_type substype() { return IN_SUBS; } void reset() diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 9ee51074854..d65e00f4b97 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -836,6 +836,7 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs) bool all_are_fields= TRUE; uint32 total_key_length = 0; + bool converted_from_in_predicate= in_subs->converted_from_in_predicate; for (uint i= 0; i < elements; i++) { Item *outer= in_subs->left_expr->element_index(i); @@ -843,8 +844,12 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs) all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM && inner->real_item()->type() == Item::FIELD_ITEM); total_key_length += inner->max_length; - if (!inner->type_handler()->subquery_type_allows_materialization(inner, - outer)) + + if (!inner-> + type_handler()-> + subquery_type_allows_materialization(inner, + outer, + converted_from_in_predicate)) DBUG_RETURN(FALSE); } diff --git a/sql/sql_string.h b/sql/sql_string.h index 11c3f0cd573..4f9a68acbb4 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -131,6 +131,24 @@ uint convert_to_printable(char *to, size_t to_len, const char *from, size_t from_len, CHARSET_INFO *from_cs, size_t nbytes= 0); +class Charset +{ + CHARSET_INFO *m_charset; +public: + Charset() :m_charset(&my_charset_bin) { } + Charset(CHARSET_INFO *cs) :m_charset(cs) { } + + CHARSET_INFO *charset() const { return m_charset; } + /* + Collation name without the character set name. + For example, in case of "latin1_swedish_ci", + this method returns "_swedish_ci". + */ + LEX_CSTRING collation_specific_name() const; + bool encoding_allows_reinterpret_as(CHARSET_INFO *cs) const; + bool eq_collation_specific_names(CHARSET_INFO *cs) const; +}; + class String : public Sql_alloc { char *Ptr; diff --git a/sql/sql_tvc.cc b/sql/sql_tvc.cc index def519035d9..10a279b92ed 100644 --- a/sql/sql_tvc.cc +++ b/sql/sql_tvc.cc @@ -829,7 +829,8 @@ static bool cmp_row_types(Item* item1, Item* item2) Item *inner= item1->element_index(i); Item *outer= item2->element_index(i); if (!inner->type_handler()->subquery_type_allows_materialization(inner, - outer)) + outer, + true)) return true; } return false; @@ -895,7 +896,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd, for (uint i=1; i < arg_count; i++) { - if (!args[i]->const_item() || cmp_row_types(args[0], args[i])) + if (!args[i]->const_item() || cmp_row_types(args[i], args[0])) return this; } @@ -975,6 +976,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd, if (!(in_subs= new (thd->mem_root) Item_in_subselect(thd, args[0], sq_select))) goto err; + in_subs->converted_from_in_predicate= TRUE; sq= in_subs; if (negated) sq= negate_expression(thd, in_subs); diff --git a/sql/sql_type.cc b/sql/sql_type.cc index a25aa236b16..0bae1e55145 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -19,6 +19,7 @@ #include "sql_const.h" #include "sql_class.h" #include "sql_time.h" +#include "sql_string.h" #include "item.h" #include "log.h" @@ -5112,7 +5113,8 @@ uint Type_handler_timestamp_common::Item_decimal_precision(const Item *item) con bool Type_handler_real_result:: subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { DBUG_ASSERT(inner->cmp_type() == REAL_RESULT); return outer->cmp_type() == REAL_RESULT; @@ -5121,7 +5123,8 @@ bool Type_handler_real_result:: bool Type_handler_int_result:: subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { DBUG_ASSERT(inner->cmp_type() == INT_RESULT); return outer->cmp_type() == INT_RESULT; @@ -5130,7 +5133,8 @@ bool Type_handler_int_result:: bool Type_handler_decimal_result:: subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { DBUG_ASSERT(inner->cmp_type() == DECIMAL_RESULT); return outer->cmp_type() == DECIMAL_RESULT; @@ -5139,23 +5143,37 @@ bool Type_handler_decimal_result:: bool Type_handler_string_result:: subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { DBUG_ASSERT(inner->cmp_type() == STRING_RESULT); - return outer->cmp_type() == STRING_RESULT && - outer->collation.collation == inner->collation.collation && - /* - Materialization also is unable to work when create_tmp_table() will - create a blob column because item->max_length is too big. - The following test is copied from varstring_type_handler(). - */ - !inner->too_big_for_varchar(); + if (outer->cmp_type() == STRING_RESULT && + /* + Materialization also is unable to work when create_tmp_table() will + create a blob column because item->max_length is too big. + The following test is copied from varstring_type_handler(). + */ + !inner->too_big_for_varchar()) + { + if (outer->collation.collation == inner->collation.collation) + return true; + if (is_in_predicate) + { + Charset inner_col(inner->collation.collation); + if (inner_col.encoding_allows_reinterpret_as(outer-> + collation.collation) && + inner_col.eq_collation_specific_names(outer->collation.collation)) + return true; + } + } + return false; } bool Type_handler_temporal_result:: subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { DBUG_ASSERT(inner->cmp_type() == TIME_RESULT); return mysql_timestamp_type() == @@ -5973,3 +5991,49 @@ bool Type_handler_general_purpose_string:: } /***************************************************************************/ + +LEX_CSTRING Charset::collation_specific_name() const +{ + /* + User defined collations can provide arbitrary names + for character sets and collations, so a collation + name not necessarily starts with the character set name. + */ + size_t csname_length= strlen(m_charset->csname); + if (strncmp(m_charset->name, m_charset->csname, csname_length)) + return {NULL, 0}; + const char *ptr= m_charset->name + csname_length; + return {ptr, strlen(ptr) }; +} + + +bool +Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const +{ + if (!strcmp(m_charset->csname, cs->csname)) + return true; + + if (!strcmp(m_charset->csname, MY_UTF8MB3) && + !strcmp(cs->csname, MY_UTF8MB4)) + return true; + + /* + Originally we allowed here instat ALTER for ASCII-to-LATIN1 + and UCS2-to-UTF16, but this was wrong: + - MariaDB's ascii is not a subset for 8-bit character sets + like latin1, because it allows storing bytes 0x80..0xFF as + "unassigned" characters (see MDEV-19285). + - MariaDB's ucs2 (as in Unicode-1.1) is not a subset for UTF16, + because they treat surrogate codes differently (MDEV-19284). + */ + return false; +} + + +bool +Charset::eq_collation_specific_names(CHARSET_INFO *cs) const +{ + LEX_CSTRING name0= collation_specific_name(); + LEX_CSTRING name1= Charset(cs).collation_specific_name(); + return name0.length && !cmp(&name0, &name1); +} diff --git a/sql/sql_type.h b/sql/sql_type.h index 907225b7c50..4a7a7b5a9b8 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -1291,9 +1291,21 @@ public: Item *target_expr, Item *target_value, Item_bool_func2 *source, Item *source_expr, Item *source_const) const= 0; + + /* + @brief + Check if an IN subquery allows materialization or not + @param + inner expression on the inner side of the IN subquery + outer expression on the outer side of the IN subquery + is_in_predicate SET to true if IN subquery was converted from an + IN predicate or we are checking if materialization + strategy can be used for an IN predicate + */ virtual bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const= 0; + const Item *outer, + bool is_in_predicate) const= 0; /** Make a simple constant replacement item for a constant "src", so the new item can futher be used for comparison with "cmp", e.g.: @@ -1470,7 +1482,8 @@ public: } const Type_handler *type_handler_for_comparison() const; bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { DBUG_ASSERT(0); return false; @@ -1788,7 +1801,8 @@ public: virtual ~Type_handler_real_result() {} const Type_handler *type_handler_for_comparison() const; bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const; + const Item *outer, + bool is_in_predicate) const; void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, Sort_param *param) const; void sortlength(THD *thd, @@ -1857,7 +1871,8 @@ public: virtual ~Type_handler_decimal_result() {}; const Type_handler *type_handler_for_comparison() const; bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const; + const Item *outer, + bool is_in_predicate) const; Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const; void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, Sort_param *param) const; @@ -2060,7 +2075,8 @@ public: virtual ~Type_handler_int_result() {} const Type_handler *type_handler_for_comparison() const; bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const; + const Item *outer, + bool is_in_predicate) const; Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const; void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field, Sort_param *param) const; @@ -2156,7 +2172,8 @@ public: Item_bool_func2 *source, Item *source_expr, Item *source_const) const; bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const; + const Item *outer, + bool is_in_predicate) const; bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func, Item **items, uint nitems) const; bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const; @@ -2266,7 +2283,8 @@ public: Item_bool_func2 *source, Item *source_expr, Item *source_const) const; bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const; + const Item *outer, + bool is_in_predicate) const; Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const; Item_cache *Item_get_cache(THD *thd, const Item *item) const; bool set_comparator_func(Arg_comparator *cmp) const; @@ -3236,7 +3254,8 @@ public: return blob_type_handler(item); } bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { return false; // Materialization does not work with BLOB columns } @@ -3341,7 +3360,8 @@ public: return true; } bool subquery_type_allows_materialization(const Item *inner, - const Item *outer) const + const Item *outer, + bool is_in_predicate) const { return false; // Materialization does not work with GEOMETRY columns } |