/* Copyright (c) 2010, 2019, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ /* Semi-join subquery optimization code definitions */ #ifdef USE_PRAGMA_INTERFACE #pragma interface /* gcc class implementation */ #endif int check_and_do_in_subquery_rewrites(JOIN *join); bool convert_join_subqueries_to_semijoins(JOIN *join); int pull_out_semijoin_tables(JOIN *join); bool optimize_semijoin_nests(JOIN *join, table_map all_table_map); bool setup_degenerate_jtbm_semi_joins(JOIN *join, List *join_list, List &eq_list); bool setup_jtbm_semi_joins(JOIN *join, List *join_list, List &eq_list); void cleanup_empty_jtbm_semi_joins(JOIN *join, List *join_list); // used by Loose_scan_opt ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, table_map remaining_tables); /* This is a class for considering possible loose index scan optimizations. It's usage pattern is as follows: best_access_path() { Loose_scan_opt opt; opt.init() for each index we can do ref access with { opt.next_ref_key(); for each keyuse opt.add_keyuse(); opt.check_ref_access(); } if (some criteria for range scans) opt.check_range_access(); opt.get_best_option(); } */ class Loose_scan_opt { /* All methods must check this before doing anything else */ bool try_loosescan; /* If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is called sj-equality. If oeK depends only on preceding tables then such equality is called 'bound'. */ ulonglong bound_sj_equalities; /* Accumulated properties of ref access we're now considering: */ ulonglong handled_sj_equalities; key_part_map loose_scan_keyparts; uint max_loose_keypart; bool part1_conds_met; /* Use of quick select is a special case. Some of its properties: */ uint quick_uses_applicable_index; uint quick_max_loose_keypart; /* Best loose scan method so far */ uint best_loose_scan_key; double best_loose_scan_cost; double best_loose_scan_records; KEYUSE *best_loose_scan_start_key; uint best_max_loose_keypart; table_map best_ref_depend_map; public: Loose_scan_opt(): try_loosescan(false), bound_sj_equalities(0), quick_uses_applicable_index(0), quick_max_loose_keypart(0), best_loose_scan_key(0), best_loose_scan_cost(0), best_loose_scan_records(0), best_loose_scan_start_key(NULL), best_max_loose_keypart(0), best_ref_depend_map(0) { } void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables) { /* Discover the bound equalities. We need to do this if 1. The next table is an SJ-inner table, and 2. It is the first table from that semijoin, and 3. We're not within a semi-join range (i.e. all semi-joins either have all or none of their tables in join_table_map), except s->emb_sj_nest (which we've just entered, see #2). 4. All non-IN-equality correlation references from this sj-nest are bound 5. But some of the IN-equalities aren't (so this can't be handled by FirstMatch strategy) */ best_loose_scan_cost= DBL_MAX; if (!join->emb_sjm_nest && s->emb_sj_nest && // (1) s->emb_sj_nest->sj_in_exprs < 64 && ((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2) s->emb_sj_nest->sj_inner_tables) && // (2) join->cur_sj_inner_tables == 0 && // (3) !(remaining_tables & s->emb_sj_nest->nested_join->sj_corr_tables) && // (4) remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5) optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN)) { /* This table is an LooseScan scan candidate */ bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, remaining_tables); try_loosescan= TRUE; DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx", (longlong)bound_sj_equalities)); } } void next_ref_key() { handled_sj_equalities=0; loose_scan_keyparts= 0; max_loose_keypart= 0; part1_conds_met= FALSE; } void add_keyuse(table_map remaining_tables, KEYUSE *keyuse) { if (try_loosescan && keyuse->sj_pred_no != UINT_MAX && (keyuse->table->file->index_flags(keyuse->key, 0, 1 ) & HA_READ_ORDER)) { if (!(remaining_tables & keyuse->used_tables)) { /* This allows to use equality propagation to infer that some sj-equalities are bound. */ bound_sj_equalities |= 1ULL << keyuse->sj_pred_no; } else { handled_sj_equalities |= 1ULL << keyuse->sj_pred_no; loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart; set_if_bigger(max_loose_keypart, keyuse->keypart); } } } bool have_a_case() { return MY_TEST(handled_sj_equalities); } void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, table_map found_part) { /* Check if we can use LooseScan semi-join strategy. We can if 1. This is the right table at right location 2. All IN-equalities are either - "bound", ie. the outer_expr part refers to the preceding tables - "handled", ie. covered by the index we're considering 3. Index order allows to enumerate subquery's duplicate groups in order. This happens when the index definition matches this pattern: (handled_col|bound_col)* (other_col|bound_col) */ if (try_loosescan && // (1) (handled_sj_equalities | bound_sj_equalities) == // (2) PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2) (PREV_BITS(key_part_map, max_loose_keypart+1) & // (3) (found_part | loose_scan_keyparts)) == // (3) PREV_BITS(key_part_map, max_loose_keypart+1) && // (3) !key_uses_partial_cols(s->table->s, key)) { if (s->quick && s->quick->index == key && s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { quick_uses_applicable_index= TRUE; quick_max_loose_keypart= max_loose_keypart; } DBUG_PRINT("info", ("Can use LooseScan scan")); if (found_part & 1) { /* Can use LooseScan on ref access if the first key part is bound */ part1_conds_met= TRUE; } /* Check if this is a special case where there are no usable bound IN-equalities, i.e. we have outer_expr IN (SELECT innertbl.key FROM ...) and outer_expr cannot be evaluated yet, so it's actually full index scan and not a ref access. We can do full index scan if it uses index-only. */ if (!(found_part & 1 ) && /* no usable ref access for 1st key part */ s->table->covering_keys.is_set(key)) { double records, read_time; part1_conds_met= TRUE; handler *file= s->table->file; DBUG_PRINT("info", ("Can use full index scan for LooseScan")); /* Calculate the cost of complete loose index scan. */ records= rows2double(file->stats.records); /* The cost is entire index scan cost (divided by 2) */ read_time= file->cost(file->ha_keyread_and_copy_time(key, 1, (ha_rows) records, 0)); /* Now find out how many different keys we will get (for now we ignore the fact that we have "keypart_i=const" restriction for some key components, that may make us think think that loose scan will produce more distinct records than it actually will) */ ulong rpc; if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart])) records= records / rpc; // TODO: previous version also did /2 if (read_time < best_loose_scan_cost) { best_loose_scan_key= key; best_loose_scan_cost= read_time; best_loose_scan_records= records; best_max_loose_keypart= max_loose_keypart; best_loose_scan_start_key= start_key; best_ref_depend_map= 0; } } } } void check_ref_access_part2(uint key, KEYUSE *start_key, double records, double read_time, table_map ref_depend_map_arg) { if (part1_conds_met && read_time < best_loose_scan_cost) { /* TODO use rec-per-key-based fanout calculations */ best_loose_scan_key= key; best_loose_scan_cost= read_time; best_loose_scan_records= records; best_max_loose_keypart= max_loose_keypart; best_loose_scan_start_key= start_key; best_ref_depend_map= ref_depend_map_arg; } } void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick) { /* TODO: this the right part restriction: */ if (quick_uses_applicable_index && idx == join->const_tables && quick->read_time < best_loose_scan_cost) { best_loose_scan_key= quick->index; best_loose_scan_cost= quick->read_time; /* this is ok because idx == join->const_tables */ best_loose_scan_records= rows2double(quick->records); best_max_loose_keypart= quick_max_loose_keypart; best_loose_scan_start_key= NULL; best_ref_depend_map= 0; } } void save_to_position(JOIN_TAB *tab, double record_count, double records_out, POSITION *pos) { pos->read_time= best_loose_scan_cost; if (best_loose_scan_cost != DBL_MAX) { /* Make sure LooseScan plan doesn't produce more rows than the records_out of other table access method. */ set_if_smaller(best_loose_scan_records, records_out); pos->loops= record_count; pos->records_read= best_loose_scan_records; pos->records_init= pos->records_read; pos->records_out= best_loose_scan_records; pos->key= best_loose_scan_start_key; pos->cond_selectivity= 1.0; pos->loosescan_picker.loosescan_key= best_loose_scan_key; pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; pos->use_join_buffer= FALSE; pos->table= tab; pos->range_rowid_filter_info= tab->range_rowid_filter_info; pos->ref_depend_map= best_ref_depend_map; DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s", tab->table->key_info[best_loose_scan_key].name.str, best_loose_scan_start_key? "(ref access)": "(range/index access)")); } } }; void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, double *current_record_count, double *current_read_time, POSITION *loose_scan_pos); void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, uint idx, table_map remaining_tables); void restore_prev_sj_state(const table_map remaining_tables, const JOIN_TAB *tab, uint idx); void fix_semijoin_strategies_for_picked_join_order(JOIN *join); bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab); bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab); uint get_number_of_tables_at_top_level(JOIN *join); /* Temporary table used by semi-join DuplicateElimination strategy This consists of the temptable itself and data needed to put records into it. The table's DDL is as follows: CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col)); where the primary key can be replaced with unique constraint if n exceeds the limit (as it is always done for query execution-time temptables). The record value is a concatenation of rowids of tables from the join we're executing. If a join table is on the inner side of the outer join, we assume that its rowid can be NULL and provide means to store this rowid in the tuple. */ class SJ_TMP_TABLE : public Sql_alloc { public: /* Array of pointers to tables whose rowids compose the temporary table record. */ class TAB { public: JOIN_TAB *join_tab; uint rowid_offset; ushort null_byte; uchar null_bit; }; TAB *tabs; TAB *tabs_end; /* is_degenerate==TRUE means this is a special case where the temptable record has zero length (and presence of a unique key means that the temptable can have either 0 or 1 records). In this case we don't create the physical temptable but instead record its state in SJ_TMP_TABLE::have_degenerate_row. */ bool is_degenerate; /* When is_degenerate==TRUE: the contents of the table (whether it has the record or not). */ bool have_degenerate_row; /* table record parameters */ uint null_bits; uint null_bytes; uint rowid_len; /* The temporary table itself (NULL means not created yet) */ TABLE *tmp_table; /* These are the members we got from temptable creation code. We'll need them if we'll need to convert table from HEAP to MyISAM/Maria. */ TMP_ENGINE_COLUMNDEF *start_recinfo; TMP_ENGINE_COLUMNDEF *recinfo; SJ_TMP_TABLE *next_flush_table; int sj_weedout_delete_rows(); int sj_weedout_check_row(THD *thd); bool create_sj_weedout_tmp_table(THD *thd); }; int setup_semijoin_loosescan(JOIN *join); int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, uint no_jbuf_after); void destroy_sj_tmp_tables(JOIN *join); int clear_sj_tmp_tables(JOIN *join); int rewrite_to_index_subquery_engine(JOIN *join); void get_delayed_table_estimates(TABLE *table, ha_rows *out_rows, double *scan_time, double *startup_cost); enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab);