diff options
author | Alexander Barkov <bar@mariadb.org> | 2016-03-21 11:21:44 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2016-03-21 11:21:44 +0400 |
commit | 94768542115289272996f6450b7d3cafa75dcead (patch) | |
tree | 58ca682ea232bc83371ece1992137db8292827b2 | |
parent | 7cb16dc2a369058b31df1b3999989f6beff94d07 (diff) | |
download | mariadb-git-94768542115289272996f6450b7d3cafa75dcead.tar.gz |
MDEV-9369 IN operator with ( num, NULL ) gives inconsistent result
Based on this commit into MySQL-5.7:
> commit 8e51b845aafc8b4cdebd763c8aebda262ac2d4cd
> Author: Guilhem Bichot <guilhem.bichot@oracle.com>
> Date: Mon Nov 4 15:44:55 2013 +0100
>
> Bug#13944462 'NULL IN (XX)' RETURNS WRONG RESULTS
-rw-r--r-- | mysql-test/r/row.result | 14 | ||||
-rw-r--r-- | mysql-test/t/row.test | 11 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 119 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 86 | ||||
-rw-r--r-- | sql/item_row.h | 11 |
5 files changed, 173 insertions, 68 deletions
diff --git a/mysql-test/r/row.result b/mysql-test/r/row.result index 59a606128f5..9a19c3b0604 100644 --- a/mysql-test/r/row.result +++ b/mysql-test/r/row.result @@ -50,6 +50,12 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used Warnings: Note 1003 select ((1,2,(3,4)) in ((3,2,(3,4)),(1,2,(3,NULL)))) AS `row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,row(3,NULL)))` +select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(4,5))); +row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(4,5))) +0 +select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(3,5))); +row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(3,5))) +NULL SELECT (1,2,3)=(0,NULL,3); (1,2,3)=(0,NULL,3) 0 @@ -130,7 +136,7 @@ ROW(c,2,3) IN(row(1,b,a), row(2,3,1)) 0 0 1 -NULL +0 select ROW(a,b,c) IN(row(1,2,3), row(3,2,1)) from t1; ROW(a,b,c) IN(row(1,2,3), row(3,2,1)) 1 @@ -509,5 +515,11 @@ Warnings: Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where ((`test`.`t1`.`a` = 10) and (`test`.`t1`.`b` = 10)) DROP TABLE t1; # +# MDEV-9369 IN operator with ( num, NULL ) gives inconsistent result +# +SELECT (1,null) NOT IN ((2,2),(3,3)), (1,null) NOT IN ((2,2)), (1,null) NOT IN ((3,3)); +(1,null) NOT IN ((2,2),(3,3)) (1,null) NOT IN ((2,2)) (1,null) NOT IN ((3,3)) +1 1 1 +# # End of 10.1 tests # diff --git a/mysql-test/t/row.test b/mysql-test/t/row.test index 9268a3e1086..1c1d1b00910 100644 --- a/mysql-test/t/row.test +++ b/mysql-test/t/row.test @@ -19,10 +19,12 @@ select (1,2,(3,4)) IN ((3,2,(3,4)), (1,2,(3,4))); select row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,4)); select row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,row(3,NULL))); explain extended select row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,row(3,NULL))); +select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(4,5))); +select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(3,5))); + SELECT (1,2,3)=(0,NULL,3); SELECT (1,2,3)=(1,NULL,3); -# here's something for Sanja to fix :) SELECT (1,2,3)=(1,NULL,0); SELECT ROW(1,2,3)=ROW(1,2,3); @@ -300,6 +302,13 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=10 AND b=10 AND a>=10; EXPLAIN EXTENDED SELECT * FROM t1 WHERE (a,b)=(10,10) AND a>=10; DROP TABLE t1; + +--echo # +--echo # MDEV-9369 IN operator with ( num, NULL ) gives inconsistent result +--echo # +SELECT (1,null) NOT IN ((2,2),(3,3)), (1,null) NOT IN ((2,2)), (1,null) NOT IN ((3,3)); + + --echo # --echo # End of 10.1 tests --echo # diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 4819a7a9086..1ce8bad933b 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2914,7 +2914,7 @@ Item *Item_func_case::find_item(String *str) return else_expr_num != -1 ? args[else_expr_num] : 0; value_added_map|= 1U << (uint)cmp_type; } - if (!cmp_items[(uint)cmp_type]->cmp(args[i]) && !args[i]->null_value) + if (cmp_items[(uint)cmp_type]->cmp(args[i]) == FALSE) return args[i + 1]; } } @@ -3545,11 +3545,11 @@ static int cmp_decimal(void *cmp_arg, my_decimal *a, my_decimal *b) } -int in_vector::find(Item *item) +bool in_vector::find(Item *item) { uchar *result=get_value(item); if (!result || !used_count) - return 0; // Null value + return false; // Null value uint start,end; start=0; end=used_count-1; @@ -3558,13 +3558,13 @@ int in_vector::find(Item *item) uint mid=(start+end+1)/2; int res; if ((res=(*compare)(collation, base+mid*size, result)) == 0) - return 1; + return true; if (res < 0) start=mid; else end=mid-1; } - return (int) ((*compare)(collation, base+start*size, result) == 0); + return ((*compare)(collation, base+start*size, result) == 0); } in_string::in_string(uint elements,qsort2_cmp cmp_func, CHARSET_INFO *cs) @@ -3894,14 +3894,20 @@ int cmp_item_row::cmp(Item *arg) arg->bring_value(); for (uint i=0; i < n; i++) { - if (comparators[i]->cmp(arg->element_index(i))) + const int rc= comparators[i]->cmp(arg->element_index(i)); + switch (rc) { - if (!arg->element_index(i)->null_value) - return 1; - was_null= 1; + case UNKNOWN: + was_null= true; + break; + case TRUE: + return TRUE; + case FALSE: + break; // elements #i are equal } + arg->null_value|= arg->element_index(i)->null_value; } - return (arg->null_value= was_null); + return was_null ? UNKNOWN : FALSE; } @@ -3924,15 +3930,15 @@ void cmp_item_decimal::store_value(Item *item) /* val may be zero if item is nnull */ if (val && val != &value) my_decimal2decimal(val, &value); + m_null_value= item->null_value; } int cmp_item_decimal::cmp(Item *arg) { my_decimal tmp_buf, *tmp= arg->val_decimal(&tmp_buf); - if (arg->null_value) - return 1; - return my_decimal_cmp(&value, tmp); + return (m_null_value || arg->null_value) ? + UNKNOWN : (my_decimal_cmp(&value, tmp) != 0); } @@ -3956,12 +3962,14 @@ void cmp_item_datetime::store_value(Item *item) enum_field_types f_type= tmp_item[0]->field_type_for_temporal_comparison(warn_item); value= get_datetime_value(thd, &tmp_item, &lval_cache, f_type, &is_null); + m_null_value= item->null_value; } int cmp_item_datetime::cmp(Item *arg) { - return value != arg->val_temporal_packed(warn_item); + const bool rc= value != arg->val_temporal_packed(warn_item); + return (m_null_value || arg->null_value) ? UNKNOWN : rc; } @@ -3985,10 +3993,10 @@ bool Item_func_in::count_sargable_conds(uchar *arg) } -bool Item_func_in::nulls_in_row() +bool Item_func_in::list_contains_null() { Item **arg,**arg_end; - for (arg= args+1, arg_end= args+arg_count; arg != arg_end ; arg++) + for (arg= args + 1, arg_end= args+arg_count; arg != arg_end ; arg++) { if ((*arg)->null_inside()) return 1; @@ -4103,6 +4111,32 @@ void Item_func_in::fix_length_and_dec() } } + /* + First conditions for bisection to be possible: + 1. All types are similar, and + 2. All expressions in <in value list> are const + */ + bool bisection_possible= + type_cnt == 1 && // 1 + const_itm; // 2 + if (bisection_possible) + { + /* + In the presence of NULLs, the correct result of evaluating this item + must be UNKNOWN or FALSE. To achieve that: + - If type is scalar, we can use bisection and the "have_null" boolean. + - If type is ROW, we will need to scan all of <in value list> when + searching, so bisection is impossible. Unless: + 3. UNKNOWN and FALSE are equivalent results + 4. Neither left expression nor <in value list> contain any NULL value + */ + + if (m_compare_type == ROW_RESULT && + ((!is_top_level_item() || negated) && // 3 + (list_contains_null() || args[0]->maybe_null))) // 4 + bisection_possible= false; + } + if (type_cnt == 1) { if (m_compare_type == STRING_RESULT && @@ -4115,7 +4149,7 @@ void Item_func_in::fix_length_and_dec() uint cols= args[0]->cols(); cmp_item_row *cmp= 0; - if (const_itm && !nulls_in_row()) + if (bisection_possible) { array= new (thd->mem_root) in_row(thd, arg_count-1, 0); cmp= &((in_row*)array)->tmp; @@ -4144,11 +4178,8 @@ void Item_func_in::fix_length_and_dec() } } } - /* - Row item with NULLs inside can return NULL or FALSE => - they can't be processed as static - */ - if (type_cnt == 1 && const_itm && !nulls_in_row()) + + if (bisection_possible) { /* IN must compare INT columns and constants as int values (the same @@ -4203,20 +4234,25 @@ void Item_func_in::fix_length_and_dec() array= new (thd->mem_root) in_datetime(date_arg, arg_count - 1); break; } - if (array && !(thd->is_fatal_error)) // If not EOM + if (!array || thd->is_fatal_error) // OOM + return; + uint j=0; + for (uint i=1 ; i < arg_count ; i++) { - uint j=0; - for (uint i=1 ; i < arg_count ; i++) + array->set(j,args[i]); + if (!args[i]->null_value) + j++; // include this cell in the array. + else { - array->set(j,args[i]); - if (!args[i]->null_value) // Skip NULL values - j++; - else - have_null= 1; + /* + We don't put NULL values in array, to avoid erronous matches in + bisection. + */ + have_null= 1; } - if ((array->used_count= j)) - array->sort(); } + if ((array->used_count= j)) + array->sort(); } else { @@ -4284,7 +4320,14 @@ longlong Item_func_in::val_int() uint value_added_map= 0; if (array) { - int tmp=array->find(args[0]); + bool tmp=array->find(args[0]); + /* + NULL on left -> UNKNOWN. + Found no match, and NULL on right -> UNKNOWN. + NULL on right can never give a match, as it is not stored in + array. + See also the 'bisection_possible' variable in fix_length_and_dec(). + */ null_value=args[0]->null_value || (!tmp && have_null); return (longlong) (!null_value && tmp != negated); } @@ -4306,13 +4349,12 @@ longlong Item_func_in::val_int() if (!(value_added_map & (1U << (uint)cmp_type))) { in_item->store_value(args[0]); - if ((null_value= args[0]->null_value)) - return 0; value_added_map|= 1U << (uint)cmp_type; } - if (!in_item->cmp(args[i]) && !args[i]->null_value) + const int rc= in_item->cmp(args[i]); + if (rc == FALSE) return (longlong) (!negated); - have_null|= args[i]->null_value; + have_null|= (rc == UNKNOWN); } null_value= have_null; @@ -6434,7 +6476,8 @@ longlong Item_equal::val_int() /* Skip fields of tables that has not been read yet */ if (!field->table->status || (field->table->status & STATUS_NULL_ROW)) { - if (eval_item->cmp(item) || (null_value= item->null_value)) + const int rc= eval_item->cmp(item); + if ((rc == TRUE) || (null_value= (rc == UNKNOWN))) return 0; } } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index cd19ba72c07..39f2cf5590d 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -796,6 +796,7 @@ public: public: inline void negate() { negated= !negated; } inline void top_level_item() { pred_level= 1; } + bool is_top_level_item() const { return pred_level; } Item *neg_transformer(THD *thd) { negated= !negated; @@ -1075,7 +1076,7 @@ public: { my_qsort2(base,used_count,size,compare,(void*)collation); } - int find(Item *item); + bool find(Item *item); /* Create an instance of Item_{type} (e.g. Item_decimal) constant object @@ -1243,6 +1244,10 @@ public: cmp_item() { cmp_charset= &my_charset_bin; } virtual ~cmp_item() {} virtual void store_value(Item *item)= 0; + /** + @returns result (TRUE, FALSE or UNKNOWN) of + "stored argument's value <> item's value" + */ virtual int cmp(Item *item)= 0; // for optimized IN with row virtual int compare(cmp_item *item)= 0; @@ -1255,7 +1260,14 @@ public: } }; -class cmp_item_string :public cmp_item +/// cmp_item which stores a scalar (i.e. non-ROW). +class cmp_item_scalar : public cmp_item +{ +protected: + bool m_null_value; ///< If stored value is NULL +}; + +class cmp_item_string : public cmp_item_scalar { protected: String *value_res; @@ -1281,14 +1293,20 @@ public: void store_value(Item *item) { value_res= item->val_str(&value); + m_null_value= item->null_value; } int cmp(Item *arg) { char buff[STRING_BUFFER_USUAL_SIZE]; - String tmp(buff, sizeof(buff), cmp_charset), *res; - res= arg->val_str(&tmp); - return (value_res ? (res ? sortcmp(value_res, res, cmp_charset) : 1) : - (res ? -1 : 0)); + String tmp(buff, sizeof(buff), cmp_charset), *res= arg->val_str(&tmp); + if (m_null_value || arg->null_value) + return UNKNOWN; + if (value_res && res) + return sortcmp(value_res, res, cmp_charset) != 0; + else if (!value_res && !res) + return FALSE; + else + return TRUE; } int compare(cmp_item *ci) { @@ -1303,7 +1321,7 @@ public: } }; -class cmp_item_int :public cmp_item +class cmp_item_int : public cmp_item_scalar { longlong value; public: @@ -1311,10 +1329,12 @@ public: void store_value(Item *item) { value= item->val_int(); + m_null_value= item->null_value; } int cmp(Item *arg) { - return value != arg->val_int(); + const bool rc= value != arg->val_int(); + return (m_null_value || arg->null_value) ? UNKNOWN : rc; } int compare(cmp_item *ci) { @@ -1330,7 +1350,7 @@ public: If the left item is a constant one then its value is cached in the lval_cache variable. */ -class cmp_item_datetime :public cmp_item +class cmp_item_datetime : public cmp_item_scalar { longlong value; public: @@ -1348,7 +1368,7 @@ public: cmp_item *make_same(); }; -class cmp_item_real :public cmp_item +class cmp_item_real : public cmp_item_scalar { double value; public: @@ -1356,10 +1376,12 @@ public: void store_value(Item *item) { value= item->val_real(); + m_null_value= item->null_value; } int cmp(Item *arg) { - return value != arg->val_real(); + const bool rc= value != arg->val_real(); + return (m_null_value || arg->null_value) ? UNKNOWN : rc; } int compare(cmp_item *ci) { @@ -1370,7 +1392,7 @@ public: }; -class cmp_item_decimal :public cmp_item +class cmp_item_decimal : public cmp_item_scalar { my_decimal value; public: @@ -1397,12 +1419,13 @@ public: void store_value(Item *item) { value_res= item->val_str(&value); + m_null_value= item->null_value; } int cmp(Item *item) { // Should never be called - DBUG_ASSERT(0); - return 1; + DBUG_ASSERT(false); + return TRUE; } int compare(cmp_item *ci) { @@ -1467,33 +1490,43 @@ public: }; /* - The Item_func_in class implements the in_expr IN(values_list) function. + The Item_func_in class implements + in_expr IN (<in value list>) + and + in_expr NOT IN (<in value list>) The current implementation distinguishes 2 cases: - 1) all items in the value_list are constants and have the same + 1) all items in <in value list> are constants and have the same result type. This case is handled by in_vector class. - 2) items in the value_list have different result types or there is some - non-constant items. - In this case Item_func_in employs several cmp_item objects to performs - comparisons of in_expr and an item from the values_list. One cmp_item + 2) otherwise Item_func_in employs several cmp_item objects to perform + comparisons of in_expr and an item from <in value list>. One cmp_item object for each result type. Different result types are collected in the fix_length_and_dec() member function by means of collect_cmp_types() function. */ class Item_func_in :public Item_func_opt_neg { + /** + Usable if <in value list> is made only of constants. Returns true if one + of these constants contains a NULL. Example: + IN ( (-5, (12,NULL)), ... ). + */ + bool list_contains_null(); protected: SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Field *field, Item *value); public: - /* - an array of values when the right hand arguments of IN - are all SQL constant and there are no nulls - */ + /// An array of values, created when the bisection lookup method is used in_vector *array; + /** + If there is some NULL among <in value list>, during a val_int() call; for + example + IN ( (1,(3,'col')), ... ), where 'col' is a column which evaluates to + NULL. + */ bool have_null; - /* - true when all arguments of the IN clause are of compatible types + /** + true when all arguments of the IN list are of compatible types and can be used safely as comparisons for key conditions */ bool arg_types_compatible; @@ -1547,7 +1580,6 @@ public: virtual void print(String *str, enum_query_type query_type); enum Functype functype() const { return IN_FUNC; } const char *func_name() const { return " IN "; } - bool nulls_in_row(); bool eval_not_null_tables(uchar *opt_arg); void fix_after_pullout(st_select_lex *new_parent, Item **ref); bool count_sargable_conds(uchar *arg); diff --git a/sql/item_row.h b/sql/item_row.h index cf55eddc19c..25c9ec7915b 100644 --- a/sql/item_row.h +++ b/sql/item_row.h @@ -2,7 +2,7 @@ #define ITEM_ROW_INCLUDED /* - Copyright (c) 2002, 2010, Oracle and/or its affiliates. + Copyright (c) 2002, 2013, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,11 +28,20 @@ @endverbatim */ + +/** + Item which stores (x,y,...) and ROW(x,y,...). + Note that this can be recursive: ((x,y),(z,t)) is a ROW of ROWs. +*/ class Item_row: public Item, private Item_args, private Used_tables_and_const_cache { table_map not_null_tables_cache; + /** + If elements are made only of constants, of which one or more are + NULL. For example, this item is (1,2,NULL), or ( (1,NULL), (2,3) ). + */ bool with_null; public: Item_row(THD *thd, List<Item> &list): |