MDEV-26278 Add functionality to eliminate derived tables from the querybb-10.9-MDEV-26278

Elimination of unnecessary tables from SQL queries is already present in MariaDB. But it only works for regular tables and not for derived ones. Imagine we have a view: CREATE VIEW v1 AS SELECT a, b, max(c) AS maxc FROM t1 GROUP BY a, b Due to "GROUP BY a, b" the values of combinations {a, b} are unique, and this fact can be treated as like derived table "v1" has a unique key on fields {a, b}. Suppose we have a SQL query: SELECT t2.* FROM t2 LEFT JOIN v1 ON t2.a=v1.a and t2.b=v1.b 1. Since {v1.a, v1.b} is unique and both these fields are bound to t2, "v1" is functionally dependent on t2. This means every record of "t2" will be either joined with a single record of "v1" or NULL-complemented. 2. No fields of "v1" are present on the SELECT list These two facts allow the server to completely exclude (eliminate) the derived table "v1" from the query.
author: Oleg Smirnov <olernov@gmail.com> 2022-02-17 22:53:37 +0700
committer: Oleg Smirnov <olernov@gmail.com> 2022-06-13 09:35:18 +0400
commit: 1f1b7cc5f1ef9a828ed1bf9cec89fff04e622921 (patch)
tree: 839f683dd1f1679cef8279adbc9b7433c3610995
parent: a9e00a014d52587299ee473a3939c7ba0a26873e (diff)
download: mariadb-git-bb-10.9-MDEV-26278.tar.gz
5 files changed, 650 insertions, 20 deletions
diff --git a/mysql-test/main/table_elim.result b/mysql-test/main/table_elim.result
index deff0623370..4f648f45895 100644
--- a/mysql-test/main/table_elim.result
+++ b/mysql-test/main/table_elim.result
@@ -704,3 +704,284 @@ LIMIT 1;
 PostID	Voted
 1	NULL
 DROP TABLE t1,t2;
+#
+# MDEV-26278: Table elimination does not work across derived tables
+#
+create table t1 (a int, b int);
+insert into t1 select seq, seq+10 from seq_1_to_10;
+create table t11 (
+a int not null,
+b int,
+key(a)
+);
+insert into t11 select A.seq, A.seq+B.seq
+from
+seq_1_to_10 A,
+seq_1_to_100 B;
+create table t12 (
+pk int primary key,
+col1 int
+);
+insert into t12 select seq, seq from seq_1_to_1000;
+create view v2b as 
+select t11.a as a, count(*) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+# The whole v2b is eliminated
+explain select t1.* from t1 left join v2b on v2b.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.a;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      }
+    ]
+  }
+}
+# Elimination of a whole subquery 
+explain select t1.* from t1 left join 
+(select t11.a as a, count(*) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a) v2b on v2b.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# In this case v2b cannot be eliminated (since v2b.b is not unique)!
+explain select t1.* from t1 left join v2b on t1.a=v2b.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	8	test.t1.a	10	Using where
+2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	1000	Using temporary; Using filesort
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.b;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      },
+      {
+        "table": {
+          "table_name": "<derived2>",
+          "access_type": "ref",
+          "possible_keys": ["key0"],
+          "key": "key0",
+          "key_length": "8",
+          "used_key_parts": ["b"],
+          "ref": ["test.t1.a"],
+          "rows": 10,
+          "filtered": 100,
+          "attached_condition": "trigcond(t1.a = v2b.b and trigcond(t1.a is not null))",
+          "materialized": {
+            "query_block": {
+              "select_id": 2,
+              "const_condition": "1",
+              "filesort": {
+                "sort_key": "t11.a",
+                "temporary_table": {
+                  "nested_loop": [
+                    {
+                      "table": {
+                        "table_name": "t11",
+                        "access_type": "ALL",
+                        "rows": 1000,
+                        "filtered": 100
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      }
+    ]
+  }
+}
+create view v2c as 
+select t11.a as a, max(t12.col1) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+# The whole v2c is eliminated
+explain select t1.* from t1 left join v2c on v2c.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on v2c.a=t1.a;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      }
+    ]
+  }
+}
+# In this case v2c cannot be eliminated (since v2c.b is not unique)!
+explain select t1.* from t1 left join v2c on t1.a=v2c.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	10	Using where
+2	DERIVED	t11	ALL	NULL	NULL	NULL	NULL	1000	Using temporary; Using filesort
+2	DERIVED	t12	eq_ref	PRIMARY	PRIMARY	4	test.t11.b	1	Using where
+# Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on t1.a=v2c.b;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "const_condition": "1",
+    "nested_loop": [
+      {
+        "table": {
+          "table_name": "t1",
+          "access_type": "ALL",
+          "rows": 10,
+          "filtered": 100
+        }
+      },
+      {
+        "table": {
+          "table_name": "<derived2>",
+          "access_type": "ref",
+          "possible_keys": ["key0"],
+          "key": "key0",
+          "key_length": "5",
+          "used_key_parts": ["b"],
+          "ref": ["test.t1.a"],
+          "rows": 10,
+          "filtered": 100,
+          "attached_condition": "trigcond(trigcond(t1.a is not null))",
+          "materialized": {
+            "query_block": {
+              "select_id": 2,
+              "const_condition": "1",
+              "filesort": {
+                "sort_key": "t11.a",
+                "temporary_table": {
+                  "nested_loop": [
+                    {
+                      "table": {
+                        "table_name": "t11",
+                        "access_type": "ALL",
+                        "rows": 1000,
+                        "filtered": 100
+                      }
+                    },
+                    {
+                      "table": {
+                        "table_name": "t12",
+                        "access_type": "eq_ref",
+                        "possible_keys": ["PRIMARY"],
+                        "key": "PRIMARY",
+                        "key_length": "4",
+                        "used_key_parts": ["pk"],
+                        "ref": ["test.t11.b"],
+                        "rows": 1,
+                        "filtered": 100,
+                        "attached_condition": "trigcond(trigcond(t11.b is not null))"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      }
+    ]
+  }
+}
+# Create a view with multiple fields in the GROUP BY clause:
+create view v2d as 
+select t11.a as a, t11.b as b, max(t12.col1) as max_col1
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a, t11.b;
+# This one must not be eliminated since only one of the GROUP BY fields is bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	10	Using where
+2	DERIVED	t11	ALL	a	NULL	NULL	NULL	1000	Using temporary; Using filesort
+2	DERIVED	t12	eq_ref	PRIMARY	PRIMARY	4	test.t11.b	1	Using where
+# This must be eliminated since both fields are bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a and v2d.b=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+create table t13 (dt date, b int);
+# Function year() in the GROUP BY list prevents treating this field 
+# as a unique key
+create view v2e as
+select year(t13.dt) as yyy, max(t12.col1) as max_col1
+from t13 join t12 on t12.pk=t13.b
+group by yyy;
+# No elimination here since function year() is used
+explain select t1.* from t1 left join v2e on v2e.yyy=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	2	Using where
+2	DERIVED	NULL	NULL	NULL	NULL	NULL	NULL	NULL	no matching row in const table
+create table t2 (a int, b int, c int);
+insert into t2 select A.seq, B.seq, 123 from seq_1_to_3 A, seq_1_to_3 B;
+# No elimination here since not all fields of the derived table's 
+# GROUP BY are on the SELECT list so D.a is not unique
+explain select t1.* from t1 left join 
+(select a, count(*) as cnt from t2 group by a, b) D on D.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	2	Using where
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	9	Using temporary; Using filesort
+# Still no elimination 'cause field D.b is just an alias for t2.a
+explain select t1.* from t1 left join 
+(select a, a as b, count(*) as cnt from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	PRIMARY	<derived2>	ref	key0	key0	10	test.t1.a,test.t1.b	2	Using where
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	9	Using temporary; Using filesort
+Warnings:
+Warning	1052	Column 'b' in group statement is ambiguous
+# Now both a and b fields are on the SELECT list and they are bound to t1
+# so derived D must be eliminated
+explain select t1.* from t1 left join 
+(select a as a1, b as b1, count(*) as cnt from t2 group by a, b) D 
+on D.a1=t1.a and D.b1=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+# Different order of fields in GROUP BY and SELECT lists
+# must not hamper the elimination
+explain select t1.* from t1 left join 
+(select count(*) as cnt, b, a from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
+drop view v2b, v2c, v2d, v2e;
+drop table t1, t11, t12, t13, t2;
+#
+# End of MDEV-26278: Table elimination does not work across derived tables
+#
diff --git a/mysql-test/main/table_elim.test b/mysql-test/main/table_elim.test
index 8de4743b9fd..a1f7ef91522 100644
--- a/mysql-test/main/table_elim.test
+++ b/mysql-test/main/table_elim.test
@@ -1,6 +1,7 @@
 #
 # Table elimination (MWL#17) tests
 #
+--source include/have_sequence.inc
 --disable_warnings
 drop table if exists t0, t1, t2, t3, t4, t5, t6;
 drop view if exists v1, v2;
@@ -641,3 +642,120 @@ LIMIT 1;
 
 DROP TABLE t1,t2;
 
+--echo #
+--echo # MDEV-26278: Table elimination does not work across derived tables
+--echo #
+create table t1 (a int, b int);
+insert into t1 select seq, seq+10 from seq_1_to_10;
+
+create table t11 (
+  a int not null,
+  b int,
+  key(a)
+);
+
+insert into t11 select A.seq, A.seq+B.seq
+from
+  seq_1_to_10 A,
+  seq_1_to_100 B;
+create table t12 (
+  pk int primary key,
+  col1 int
+);
+
+insert into t12 select seq, seq from seq_1_to_1000;
+
+create view v2b as 
+select t11.a as a, count(*) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+
+--echo # The whole v2b is eliminated
+explain select t1.* from t1 left join v2b on v2b.a=t1.a;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.a;
+
+--echo # Elimination of a whole subquery 
+explain select t1.* from t1 left join 
+  (select t11.a as a, count(*) as b
+   from t11 left join t12 on t12.pk=t11.b
+   group by t11.a) v2b on v2b.a=t1.a;
+
+--echo # In this case v2b cannot be eliminated (since v2b.b is not unique)!
+explain select t1.* from t1 left join v2b on t1.a=v2b.b;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2b on t1.a=v2b.b;
+
+create view v2c as 
+select t11.a as a, max(t12.col1) as b
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a;
+
+--echo # The whole v2c is eliminated
+explain select t1.* from t1 left join v2c on v2c.a=t1.a;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on v2c.a=t1.a;
+
+--echo # In this case v2c cannot be eliminated (since v2c.b is not unique)!
+explain select t1.* from t1 left join v2c on t1.a=v2c.b;
+
+--echo # Check format JSON as well
+explain format=JSON select t1.* from t1 left join v2c on t1.a=v2c.b;
+
+--echo # Create a view with multiple fields in the GROUP BY clause:
+create view v2d as 
+select t11.a as a, t11.b as b, max(t12.col1) as max_col1
+from t11 left join t12 on t12.pk=t11.b
+group by t11.a, t11.b;
+
+--echo # This one must not be eliminated since only one of the GROUP BY fields is bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a;
+
+--echo # This must be eliminated since both fields are bound:
+explain select t1.* from t1 left join v2d on v2d.a=t1.a and v2d.b=t1.b;
+
+create table t13 (dt date, b int);
+
+--echo # Function year() in the GROUP BY list prevents treating this field 
+--echo # as a unique key
+create view v2e as
+select year(t13.dt) as yyy, max(t12.col1) as max_col1
+from t13 join t12 on t12.pk=t13.b
+group by yyy;
+
+--echo # No elimination here since function year() is used
+explain select t1.* from t1 left join v2e on v2e.yyy=t1.a;
+
+create table t2 (a int, b int, c int);
+insert into t2 select A.seq, B.seq, 123 from seq_1_to_3 A, seq_1_to_3 B;
+
+--echo # No elimination here since not all fields of the derived table's 
+--echo # GROUP BY are on the SELECT list so D.a is not unique
+explain select t1.* from t1 left join 
+  (select a, count(*) as cnt from t2 group by a, b) D on D.a=t1.a;
+  
+--echo # Still no elimination 'cause field D.b is just an alias for t2.a
+explain select t1.* from t1 left join 
+  (select a, a as b, count(*) as cnt from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+
+--echo # Now both a and b fields are on the SELECT list and they are bound to t1
+--echo # so derived D must be eliminated
+explain select t1.* from t1 left join 
+  (select a as a1, b as b1, count(*) as cnt from t2 group by a, b) D 
+  on D.a1=t1.a and D.b1=t1.b;
+  
+--echo # Different order of fields in GROUP BY and SELECT lists
+--echo # must not hamper the elimination
+explain select t1.* from t1 left join 
+  (select count(*) as cnt, b, a from t2 group by a, b) D on D.a=t1.a and D.b=t1.b;
+  
+
+drop view v2b, v2c, v2d, v2e;
+drop table t1, t11, t12, t13, t2;
+
+--echo #
+--echo # End of MDEV-26278: Table elimination does not work across derived tables
+--echo #
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
index 8c4720bdec4..cb8ebe9c167 100644
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -33,6 +33,7 @@
 #include "sql_select.h"
 #include "opt_trace.h"
 #include "my_json_writer.h"
+#include <set>
 
 /*
   OVERVIEW
@@ -134,6 +135,11 @@
    - Nodes representing unique keys. Unique key has
       = incoming edges from key component value modules
       = outgoing edge to key's table module
+   - Nodes representing unique pseudo-keys for derived tables.
+     Unique pseudo-keys are composed as a result of GROUP BY expressions.
+     Like normal unique keys, they have:
+      = incoming edges from key component value modules
+      = outgoing edge to key's table module
    - Inner side of outer join module. Outer join module has
       = incoming edges from table value modules
       = No outgoing edges. Once we reach it, we know we can eliminate the 
@@ -205,6 +211,7 @@ class Dep_module;
   class Dep_module_expr;
   class Dep_module_goal;
   class Dep_module_key;
+  class Dep_module_pseudo_key;
 
 class Dep_analysis_context;
 
@@ -278,6 +285,8 @@ private:
     Dep_module_key *key_dep;
     /* Otherwise, this and advance */
     uint equality_no;
+    /* Or this one and advance */
+    Dep_module_pseudo_key *pseudo_key_dep;
   };
   friend class Dep_analysis_context;
   friend class Field_dependency_recorder; 
@@ -302,12 +311,20 @@ class Dep_value_table : public Dep_value
 {
 public:
   Dep_value_table(TABLE *table_arg) : 
-    table(table_arg), fields(NULL), keys(NULL)
+    table(table_arg), fields(NULL), keys(NULL), pseudo_key(NULL)
   {}
   TABLE *table;  /* Table this object is representing */
   /* Ordered list of fields that belong to this table */
   Dep_value_field *fields;
-  Dep_module_key *keys; /* Ordered list of Unique keys in this table */
+
+  /* Ordered list of Unique keys in this table */
+  Dep_module_key *keys;
+
+  /*
+    Possible unique pseudo-key applicable for this table
+    (only none or a single one is possible)
+  */
+  Dep_module_pseudo_key *pseudo_key;
 
   /* Iteration over unbound modules that are our dependencies */
   Iterator init_unbound_modules_iter(char *buf);
@@ -443,9 +460,62 @@ private:
 const size_t Dep_module_key::iterator_size= 
   ALIGN_SIZE(sizeof(Dep_module_key::Value_iter));
 
-const size_t Dep_module::iterator_size=
-  MY_MAX(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);
 
+/*
+  A unique pseudo-key module for a derived table.
+  For example, a derived table
+  "SELECT a, count(*) from t1 GROUP BY a"
+  has unique values in its first field "a" due to GROUP BY expression
+  so this can be considered as a unique key for this derived table
+*/
+
+class Dep_module_pseudo_key : public Dep_module
+{
+public:
+  Dep_module_pseudo_key(Dep_value_table *table_arg,
+                        std::set<field_index_t>&& field_indexes)
+      : table(table_arg), derived_table_field_indexes(field_indexes)
+  {
+    unbound_args= static_cast<uint>(field_indexes.size());
+  }
+
+  Dep_value_table *table;
+
+  Iterator init_unbound_values_iter(char *buf) override;
+
+  Dep_value *get_next_unbound_value(Dep_analysis_context *dac,
+                                    Iterator iter) override;
+
+  bool covers_field(int field_index);
+
+  static const size_t iterator_size;
+
+private:
+  /*
+    Set of field numbers (indexes) in the derived table's SELECT list
+    which are included in the GROUP BY expression.
+    For example, unique pseudo-key for SQL
+    "SELECT count(*), b, a FROM t1 GROUP BY a, b"
+    will include two elements: {2} and {1}, since "a" and "b" are on the
+    GROUP BY list and also are present on the SELECT list with indexes 2 and 1
+    (numeration starts from 0).
+  */
+  std::set<field_index_t> derived_table_field_indexes;
+
+  class Value_iter
+  {
+  public:
+    Dep_value_table *table;
+  };
+};
+
+const size_t Dep_module_pseudo_key::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_module_pseudo_key::Value_iter));
+
+const size_t Dep_module::iterator_size=
+  MY_MAX(Dep_module_expr::iterator_size,
+         MY_MAX(Dep_module_key::iterator_size,
+                Dep_module_pseudo_key::iterator_size));
 
 /*
   A module that represents outer join that we're trying to eliminate. If we 
@@ -508,12 +578,17 @@ public:
   */
   MY_BITMAP expr_deps;
   
-  Dep_value_table *create_table_value(TABLE *table);
+  Dep_value_table *create_table_value(TABLE_LIST *table_list);
   Dep_value_field *get_field_value(Field *field);
 
 #ifndef DBUG_OFF
   void dbug_print_deps();
 #endif 
+
+private:
+  void create_unique_pseudo_key_if_needed(TABLE_LIST *table_list,
+                                          Dep_value_table *tbl_dep);
+  int find_field_in_list(List<Item> &fields_list, Item *field);
 };
 
 
@@ -851,7 +926,7 @@ bool check_func_dependency(JOIN *join,
   /* Create Dep_value_table objects for all tables we're trying to eliminate */
   if (oj_tbl)
   {
-    if (!dac.create_table_value(oj_tbl->table))
+    if (!dac.create_table_value(oj_tbl))
       return FALSE; /* purecov: inspected */
   }
   else
@@ -861,7 +936,7 @@ bool check_func_dependency(JOIN *join,
     {
       if (tbl->table && (tbl->table->map & dep_tables))
       {
-        if (!dac.create_table_value(tbl->table))
+        if (!dac.create_table_value(tbl))
           return FALSE; /* purecov: inspected */
       }
     }
@@ -1577,33 +1652,139 @@ void add_module_expr(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
   DESCRIPTION
     Create a Dep_value_table object for the given table. Also create
     Dep_module_key objects for all unique keys in the table.
+    Create a unique pseudo-key if this table is derived and has
+    a GROUP BY expression.
 
   RETURN
     Created table value object
     NULL if out of memory
 */
 
-Dep_value_table *Dep_analysis_context::create_table_value(TABLE *table)
+Dep_value_table *
+Dep_analysis_context::create_table_value(TABLE_LIST *table_list)
 {
   Dep_value_table *tbl_dep;
-  if (!(tbl_dep= new Dep_value_table(table)))
+  if (!(tbl_dep= new Dep_value_table(table_list->table)))
     return NULL; /* purecov: inspected */
 
   Dep_module_key **key_list= &(tbl_dep->keys);
   /* Add dependencies for unique keys */
-  for (uint i=0; i < table->s->keys; i++)
+  for (uint i= 0; i < table_list->table->s->keys; i++)
   {
-    KEY *key= table->key_info + i; 
+    KEY *key= table_list->table->key_info + i;
     if (key->flags & HA_NOSAME)
     {
       Dep_module_key *key_dep;
-      if (!(key_dep= new Dep_module_key(tbl_dep, i, key->user_defined_key_parts)))
+      if (!(key_dep= new Dep_module_key(tbl_dep, i,
+                                        key->user_defined_key_parts)))
         return NULL;
       *key_list= key_dep;
       key_list= &(key_dep->next_table_key);
     }
   }
-  return table_deps[table->tablenr]= tbl_dep;
+
+  create_unique_pseudo_key_if_needed(table_list, tbl_dep);
+  return table_deps[table_list->table->tablenr]= tbl_dep;
+}
+
+
+/*
+  @brief
+    Check if we can create a unique pseudo-key for the passed table.
+    If we can, create a dependency for it
+
+  @detail
+    Currently, pseudo-key is created for the list of GROUP BY columns.
+
+    TODO: also it can be created if the query uses
+     - SELECT DISTINCT
+     - UNION DISTINCT (not UNION ALL)
+*/
+
+void Dep_analysis_context::create_unique_pseudo_key_if_needed(
+    TABLE_LIST *table_list, Dep_value_table *tbl_dep)
+{
+  auto select_unit= table_list->get_unit();
+  SELECT_LEX *first_select= nullptr;
+  if (select_unit)
+  {
+    first_select= select_unit->first_select();
+
+    /*
+      Exclude UNION (ALL) queries from consideration by checking
+      next_select() == nullptr
+    */
+    if (unlikely(select_unit->first_select()->next_select()))
+      first_select= nullptr;
+  }
+
+  /*
+    GROUP BY expression is considered as a unique pseudo-key
+    for the derived table. Add this pseudo key as a dependency
+  */
+  if (first_select && first_select->group_list.elements > 0)
+  {
+    bool valid= true;
+    std::set<field_index_t> exposed_fields_indexes;
+    for (auto cur_group= first_select->group_list.first;
+         cur_group;
+         cur_group= cur_group->next)
+    {
+      auto elem= *(cur_group->item);
+      /*
+        Make sure GROUP BY elements contain only fields
+        and no functions or other expressions
+      */
+      if (elem->type() != Item::FIELD_ITEM)
+      {
+        valid= false;
+        break;
+      }
+      auto field_idx= find_field_in_list(first_select->join->fields_list, elem);
+      if (field_idx == -1)
+      {
+        /*
+          This GROUP BY element is not present in the select list. This is a
+          case like this:
+             (SELECT a FROM t1 GROUP by a,b) as TBL
+          Here, the combination of (a,b) is unique, but the select doesn't
+          include "b". "a" alone is not unique, so TBL doesn't have a unique
+          pseudo-key.
+        */
+        valid= false;
+        break;
+      }
+      exposed_fields_indexes.insert(field_idx);
+    }
+    if (valid)
+    {
+      Dep_module_pseudo_key *pseudo_key;
+      pseudo_key= new Dep_module_pseudo_key(tbl_dep,
+                                            std::move(exposed_fields_indexes));
+      tbl_dep->pseudo_key= pseudo_key;
+    }
+  }
+}
+
+
+/*
+  Iterate the list of fields and look for the given field.
+  Returns the index of the field if it is found on the list
+  and -1 otherwise
+*/
+
+int Dep_analysis_context::find_field_in_list(List<Item> &fields_list,
+                                             Item *field)
+{
+  List_iterator<Item> it(fields_list);
+  int field_idx= 0;
+  while (auto next_field= it++)
+  {
+    if (next_field->eq(field, false))
+      return field_idx;
+    field_idx++;
+  }
+  return -1; /*not found*/
 }
 
 
@@ -1746,11 +1927,39 @@ Dep_value* Dep_module_key::get_next_unbound_value(Dep_analysis_context *dac,
 }
 
 
+char *Dep_module_pseudo_key::init_unbound_values_iter(char *buf)
+{
+  Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter);
+  iter->table= table;
+  return (char *) iter;
+}
+
+Dep_value *
+Dep_module_pseudo_key::get_next_unbound_value(Dep_analysis_context *dac,
+                                                  Dep_module::Iterator iter)
+{
+  Dep_value *res= ((Value_iter *) iter)->table;
+  ((Value_iter *) iter)->table= NULL;
+  return res;
+}
+
+
+/*
+  Check if column number field_index is covered by the pseudo-key.
+*/
+
+bool Dep_module_pseudo_key::covers_field(int field_index)
+{
+  return derived_table_field_indexes.count(field_index) > 0;
+}
+
+
 Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
 {
   Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter);
   iter->key_dep= table->keys;
   iter->equality_no= 0;
+  iter->pseudo_key_dep= table->pseudo_key;
   return (char*)iter;
 }
 
@@ -1758,7 +1967,8 @@ Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
 void 
 Dep_value_field::make_unbound_modules_iter_skip_keys(Dep_value::Iterator iter)
 {
-  ((Module_iter*)iter)->key_dep= NULL;
+  ((Module_iter*) iter)->key_dep= NULL;
+  ((Module_iter*) iter)->pseudo_key_dep= NULL;
 }
 
 
@@ -1786,6 +1996,16 @@ Dep_module* Dep_value_field::get_next_unbound_module(Dep_analysis_context *dac,
   }
   else 
     di->key_dep= NULL;
+
+  Dep_module_pseudo_key *pseudo_key_dep= di->pseudo_key_dep;
+  if (pseudo_key_dep && !pseudo_key_dep->is_applicable() &&
+      pseudo_key_dep->covers_field(field->field_index))
+  {
+    di->pseudo_key_dep= NULL;
+    return pseudo_key_dep;
+  }
+  else
+    di->pseudo_key_dep= NULL;
   
   /*
     Then walk through [multi]equalities and find those that
@@ -1819,7 +2039,7 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
   TABLE *table;
   /*
     NOTE: there are TABLE_LIST object that have
-    tbl->table!= NULL && tbl->nested_join!=NULL and 
+    tbl->table!= NULL && tbl->nested_join!=NULL and
     tbl->table == tbl->nested_join->join_list->element(..)->table
   */
   if (tbl->nested_join)
@@ -1848,7 +2068,6 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl,
     tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
 }
 
-
 #ifndef DBUG_OFF
 /* purecov: begin inspected */
 void Dep_analysis_context::dbug_print_deps()
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 3bf6d9abf57..e26de308768 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -3419,7 +3419,7 @@ bool st_select_lex::test_limit()
 
 
 
-st_select_lex* st_select_lex_unit::outer_select()
+st_select_lex* st_select_lex_unit::outer_select() const
 {
   return (st_select_lex*) master;
 }
@@ -11786,15 +11786,24 @@ bool SELECT_LEX_UNIT::explainable() const
     EXPLAIN/ANALYZE unit, when:
     (1) if it's a subquery - it's not part of eliminated WHERE/ON clause.
     (2) if it's a CTE - it's not hanging (needed for execution)
-    (3) if it's a derived - it's not merged
+    (3) if it's a derived - it's not merged or eliminated
     if it's not 1/2/3 - it's some weird internal thing, ignore it
   */
+
   return item ?
            !item->eliminated :                        // (1)
            with_element ?
              derived && derived->derived_result &&
                !with_element->is_hanging_recursive(): // (2)
              derived ?
-               derived->is_materialized_derived() :   // (3)
+               derived->is_materialized_derived() && // (3)
+                 !is_derived_eliminated() :
                false;
 }
+
+bool SELECT_LEX_UNIT::is_derived_eliminated() const
+{
+  if (!derived)
+    return false;
+  return derived->table->map & outer_select()->join->eliminated_tables;
+}
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 79d48528574..5afb5f04cda 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -971,7 +971,7 @@ public:
   };
 
   void init_query();
-  st_select_lex* outer_select();
+  st_select_lex* outer_select() const;
   const st_select_lex* first_select() const
   {
     return reinterpret_cast<const st_select_lex*>(slave);
@@ -1039,6 +1039,9 @@ public:
   bool set_lock_to_the_last_select(Lex_select_lock l);
 
   friend class st_select_lex;
+
+private:
+  bool is_derived_eliminated() const;
 };
 
 typedef class st_select_lex_unit SELECT_LEX_UNIT;
author	Oleg Smirnov <olernov@gmail.com>	2022-02-17 22:53:37 +0700
committer	Oleg Smirnov <olernov@gmail.com>	2022-06-13 09:35:18 +0400
commit	1f1b7cc5f1ef9a828ed1bf9cec89fff04e622921 (patch)
tree	839f683dd1f1679cef8279adbc9b7433c3610995
parent	a9e00a014d52587299ee473a3939c7ba0a26873e (diff)
download	mariadb-git-bb-10.9-MDEV-26278.tar.gz