summaryrefslogtreecommitdiff
path: root/storage/innobase/dict/dict0stats.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/dict/dict0stats.cc')
-rw-r--r--storage/innobase/dict/dict0stats.cc3181
1 files changed, 3181 insertions, 0 deletions
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
new file mode 100644
index 00000000000..aa587f54a0d
--- /dev/null
+++ b/storage/innobase/dict/dict0stats.cc
@@ -0,0 +1,3181 @@
+/*****************************************************************************
+
+Copyright (c) 2009, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0stats.cc
+Code used for calculating and manipulating table statistics.
+
+Created Jan 06, 2010 Vasil Dimov
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+
+#include "univ.i"
+
+#include "btr0btr.h" /* btr_get_size() */
+#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
+#include "dict0dict.h" /* dict_table_get_first_index() */
+#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
+#include "dict0stats.h"
+#include "data0type.h" /* dtype_t */
+#include "db0err.h" /* db_err */
+#include "dyn0dyn.h" /* dyn_array* */
+#include "pars0pars.h" /* pars_info_create() */
+#include "pars0types.h" /* pars_info_t */
+#include "que0que.h" /* que_eval_sql() */
+#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
+#include "row0sel.h" /* sel_node_struct */
+#include "row0types.h" /* sel_node_t */
+#include "trx0trx.h" /* trx_create() */
+#include "trx0roll.h" /* trx_rollback_to_savepoint() */
+#include "ut0rnd.h" /* ut_rnd_interval() */
+
+#include "ha_prototypes.h" /* innobase_strcasecmp() */
+
+/* Sampling algorithm description @{
+
+The algorithm is controlled by one number - srv_stats_persistent_sample_pages,
+let it be A, which is the number of leaf pages to analyze for a given index
+for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
+analyzed).
+
+Let the total number of leaf pages in the table be T.
+Level 0 - leaf pages, level H - root.
+
+Definition: N-prefix-boring record is a record on a non-leaf page that equals
+the next (to the right, cross page boundaries, skipping the supremum and
+infimum) record on the same level when looking at the fist n-prefix columns.
+The last (user) record on a level is not boring (it does not match the
+non-existent user record to the right). We call the records boring because all
+the records on the page below a boring record are equal to that boring record.
+
+We avoid diving below boring records when searching for a leaf page to
+estimate the number of distinct records because we know that such a leaf
+page will have number of distinct records == 1.
+
+For each n-prefix: start from the root level and full scan subsequent lower
+levels until a level that contains at least A*10 distinct records is found.
+Lets call this level LA.
+As an optimization the search is canceled if it has reached level 1 (never
+descend to the level 0 (leaf)) and also if the next level to be scanned
+would contain more than A pages. The latter is because the user has asked
+to analyze A leaf pages and it does not make sense to scan much more than
+A non-leaf pages with the sole purpose of finding a good sample of A leaf
+pages.
+
+After finding the appropriate level LA with >A*10 distinct records (or less in
+the exceptions described above), divide it into groups of equal records and
+pick A such groups. Then pick the last record from each group. For example,
+let the level be:
+
+index: 0,1,2,3,4,5,6,7,8,9,10
+record: 1,1,1,2,2,7,7,7,7,7,9
+
+There are 4 groups of distinct records and if A=2 random ones are selected,
+e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
+
+After selecting A records as described above, dive below them to find A leaf
+pages and analyze them, finding the total number of distinct records. The
+dive to the leaf level is performed by selecting a non-boring record from
+each page and diving below it.
+
+This way, a total of A leaf pages are analyzed for the given n-prefix.
+
+Let the number of different key values found in each leaf page i be Pi (i=1..A).
+Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
+Let the number of different key values on level LA be N_DIFF_LA.
+Let the total number of records on level LA be TOTAL_LA.
+Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
+leaf level.
+Let the number of leaf pages be N.
+Then the total number of different key values on the leaf level is:
+N * R * N_DIFF_AVG_LEAF.
+See REF01 for the implementation.
+
+The above describes how to calculate the cardinality of an index.
+This algorithm is executed for each n-prefix of a multi-column index
+where n=1..n_uniq.
+@} */
+
+/* names of the tables from the persistent statistics storage */
+#define TABLE_STATS_NAME "mysql/innodb_table_stats"
+#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
+#define INDEX_STATS_NAME "mysql/innodb_index_stats"
+#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
+
+#ifdef UNIV_STATS_DEBUG
+#define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
+#else /* UNIV_STATS_DEBUG */
+#define DEBUG_PRINTF(fmt, ...) /* noop */
+#endif /* UNIV_STATS_DEBUG */
+
+/* number of distinct records on a given level that are required to stop
+descending to lower levels and fetch
+srv_stats_persistent_sample_pages records from that level */
+#define N_DIFF_REQUIRED (srv_stats_persistent_sample_pages * 10)
+
+/** Open handles on the stats tables. Currently this is used to increase the
+reference count of the stats tables. */
+typedef struct dict_stats_struct {
+ dict_table_t* table_stats; /*!< Handle to open TABLE_STATS_NAME */
+ dict_table_t* index_stats; /*!< Handle to open INDEX_STATS_NAME */
+} dict_stats_t;
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced.
+dict_stats_update_transient() @{ */
+static
+void
+dict_stats_update_transient(
+/*========================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ dict_index_t* index;
+ ulint sum_of_index_sizes = 0;
+
+ /* Find out the sizes of the indexes and how many different values
+ for the key they approximately have */
+
+ index = dict_table_get_first_index(table);
+
+ if (index == NULL) {
+ /* Table definition is corrupt */
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: table %s has no indexes. "
+ "Cannot calculate statistics.\n", table->name);
+ return;
+ }
+
+ do {
+
+ if (index->type & DICT_FTS) {
+ index = dict_table_get_next_index(index);
+ continue;
+ }
+
+ if (UNIV_LIKELY
+ (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
+ || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+ && dict_index_is_clust(index)))) {
+ mtr_t mtr;
+ ulint size;
+
+ mtr_start(&mtr);
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+ size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+ if (size != ULINT_UNDEFINED) {
+ index->stat_index_size = size;
+
+ size = btr_get_size(
+ index, BTR_N_LEAF_PAGES, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ switch (size) {
+ case ULINT_UNDEFINED:
+ goto fake_statistics;
+ case 0:
+ /* The root node of the tree is a leaf */
+ size = 1;
+ }
+
+ sum_of_index_sizes += index->stat_index_size;
+
+ index->stat_n_leaf_pages = size;
+
+ btr_estimate_number_of_different_key_vals(index);
+ } else {
+ /* If we have set a high innodb_force_recovery
+ level, do not calculate statistics, as a badly
+ corrupted index can cause a crash in it.
+ Initialize some bogus index cardinality
+ statistics, so that the data can be queried in
+ various means, also via secondary indexes. */
+ ulint i;
+
+fake_statistics:
+ sum_of_index_sizes++;
+ index->stat_index_size = index->stat_n_leaf_pages = 1;
+
+ for (i = dict_index_get_n_unique(index); i; ) {
+ index->stat_n_diff_key_vals[i--] = 1;
+ }
+
+ memset(index->stat_n_non_null_key_vals, 0,
+ (1 + dict_index_get_n_unique(index))
+ * sizeof(*index->stat_n_non_null_key_vals));
+ }
+
+ index = dict_table_get_next_index(index);
+ } while (index);
+
+ index = dict_table_get_first_index(table);
+
+ table->stat_n_rows = index->stat_n_diff_key_vals[
+ dict_index_get_n_unique(index)];
+
+ table->stat_clustered_index_size = index->stat_index_size;
+
+ table->stat_sum_of_other_index_sizes = sum_of_index_sizes
+ - index->stat_index_size;
+
+ table->stat_modified_counter = 0;
+
+ table->stat_initialized = TRUE;
+}
+/* @} */
+
+/*********************************************************************//**
+Checks whether the persistent statistics storage exists and that all
+tables have the proper structure.
+dict_stats_persistent_storage_check() @{
+@return TRUE if exists and all tables are ok */
+static
+ibool
+dict_stats_persistent_storage_check(
+/*================================*/
+ ibool caller_has_dict_sys_mutex) /*!< in: TRUE if the caller
+ owns dict_sys->mutex */
+{
+ /* definition for the table TABLE_STATS_NAME */
+ dict_col_meta_t table_stats_columns[] = {
+ {"database_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+ {"table_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+ {"last_update", DATA_FIXBINARY,
+ DATA_NOT_NULL, 4},
+
+ {"n_rows", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+ {"clustered_index_size", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+ {"sum_of_other_index_sizes", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8}
+ };
+ dict_table_schema_t table_stats_schema = {
+ TABLE_STATS_NAME,
+ UT_ARR_SIZE(table_stats_columns),
+ table_stats_columns
+ };
+
+ /* definition for the table INDEX_STATS_NAME */
+ dict_col_meta_t index_stats_columns[] = {
+ {"database_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+ {"table_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+ {"index_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+ {"last_update", DATA_FIXBINARY,
+ DATA_NOT_NULL, 4},
+
+ {"stat_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 64*3},
+
+ {"stat_value", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+ {"sample_size", DATA_INT,
+ DATA_UNSIGNED, 8},
+
+ {"stat_description", DATA_VARMYSQL,
+ DATA_NOT_NULL, 1024*3}
+ };
+ dict_table_schema_t index_stats_schema = {
+ INDEX_STATS_NAME,
+ UT_ARR_SIZE(index_stats_columns),
+ index_stats_columns
+ };
+
+ char errstr[512];
+ enum db_err ret;
+
+ if (!caller_has_dict_sys_mutex) {
+ mutex_enter(&(dict_sys->mutex));
+ }
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ /* first check table_stats */
+ ret = dict_table_schema_check(&table_stats_schema, errstr,
+ sizeof(errstr));
+ if (ret == DB_SUCCESS) {
+ /* if it is ok, then check index_stats */
+ ret = dict_table_schema_check(&index_stats_schema, errstr,
+ sizeof(errstr));
+ }
+
+ if (!caller_has_dict_sys_mutex) {
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ if (ret != DB_SUCCESS && ret != DB_TABLE_NOT_FOUND) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: %s\n", errstr);
+ }
+ /* We return silently if some of the tables are not present because
+ this code is executed during open table. By design we check if the
+ persistent statistics storage is present and whether there are stats
+ for the table being opened and if so, then we use them, otherwise we
+ silently switch back to using the transient stats. */
+
+ return(ret == DB_SUCCESS);
+}
+/* @} */
+
+/* @{ Pseudo code about the relation between the following functions
+
+let N = srv_stats_persistent_sample_pages
+
+dict_stats_analyze_index()
+ for each n_prefix
+ search for good enough level:
+ dict_stats_analyze_index_level() // only called if level has <= N pages
+ // full scan of the level in one mtr
+ collect statistics about the given level
+ if we are not satisfied with the level, search next lower level
+ we have found a good enough level here
+ dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
+ // full scan of the level in one mtr
+ dive below some records and analyze the leaf page there:
+ dict_stats_analyze_index_below_cur()
+@} */
+
+/*********************************************************************//**
+Find the total number and the number of distinct keys on a given level in
+an index. Each of the 1..n_uniq prefixes are looked up and the results are
+saved in the array n_diff[]. Notice that n_diff[] must be able to store
+n_uniq+1 numbers because the results are saved in
+n_diff[1] .. n_diff[n_uniq]. The total number of records on the level is
+saved in total_recs.
+Also, the index of the last record in each group of equal records is saved
+in n_diff_boundaries[1..n_uniq], records indexing starts from the leftmost
+record on the level and continues cross pages boundaries, counting from 0.
+dict_stats_analyze_index_level() @{ */
+static
+void
+dict_stats_analyze_index_level(
+/*===========================*/
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level */
+ ib_uint64_t* n_diff, /*!< out: array for number of
+ distinct keys for all prefixes */
+ ib_uint64_t* total_recs, /*!< out: total number of records */
+ ib_uint64_t* total_pages, /*!< out: total number of pages */
+ dyn_array_t* n_diff_boundaries)/*!< out: boundaries of the groups
+ of distinct keys */
+{
+ ulint n_uniq;
+ mem_heap_t* heap;
+ dtuple_t* dtuple;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ const page_t* page;
+ const rec_t* rec;
+ const rec_t* prev_rec;
+ byte* prev_rec_buf = NULL;
+ ulint prev_rec_buf_size = 0;
+ ulint i;
+
+ DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__,
+ index->table->name, index->name, level);
+
+ n_uniq = dict_index_get_n_unique(index);
+
+ /* elements in the n_diff array are 1..n_uniq (inclusive) */
+ memset(n_diff, 0x0, (n_uniq + 1) * sizeof(*n_diff));
+
+ heap = mem_heap_create(256);
+
+ /* reset the dynamic arrays n_diff_boundaries[1..n_uniq];
+ n_diff_boundaries[0] is ignored to follow the same convention
+ as n_diff[] */
+ if (n_diff_boundaries != NULL) {
+ for (i = 1; i <= n_uniq; i++) {
+ dyn_array_free(&n_diff_boundaries[i]);
+
+ dyn_array_create(&n_diff_boundaries[i]);
+ }
+ }
+
+ /* craft a record that is always smaller than the others,
+ this way we are sure that the cursor pcur will be positioned
+ on the leftmost record on the leftmost page on the desired level */
+ dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
+ dict_table_copy_types(dtuple, index->table);
+ dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ &pcur, __FILE__, __LINE__, &mtr);
+
+ page = btr_pcur_get_page(&pcur);
+
+ /* check that we are indeed on the desired level */
+ ut_a(btr_page_get_level(page, &mtr) == level);
+
+ /* there should not be any pages on the left */
+ ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+
+ /* check whether the first record on the leftmost page is marked
+ as such, if we are on a non-leaf level */
+ ut_a(level == 0
+ || (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ page_rec_get_next_const(page_get_infimum_rec(page)),
+ page_is_comp(page))));
+
+ if (btr_pcur_is_before_first_on_page(&pcur)) {
+ btr_pcur_move_to_next_on_page(&pcur);
+ }
+
+ if (btr_pcur_is_after_last_on_page(&pcur)) {
+ btr_pcur_move_to_prev_on_page(&pcur);
+ }
+
+ prev_rec = NULL;
+
+ /* no records by default */
+ *total_recs = 0;
+
+ *total_pages = 0;
+
+ /* iterate over all user records on this level
+ and compare each two adjacent ones, even the last on page
+ X and the fist on page X+1 */
+ for (;
+ btr_pcur_is_on_user_rec(&pcur);
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
+
+ ulint matched_fields = 0;
+ ulint matched_bytes = 0;
+ ulint offsets_rec_onstack[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets_rec;
+
+ rec_offs_init(offsets_rec_onstack);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ /* increment the pages counter at the end of each page */
+ if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+
+ (*total_pages)++;
+ }
+
+ /* skip delete-marked records */
+ if (rec_get_deleted_flag(rec, page_is_comp(
+ btr_pcur_get_page(&pcur)))) {
+
+ continue;
+ }
+
+ offsets_rec = rec_get_offsets(rec, index, offsets_rec_onstack,
+ n_uniq, &heap);
+
+ (*total_recs)++;
+
+ if (prev_rec != NULL) {
+
+ ulint offsets_prev_rec_onstack[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets_prev_rec;
+
+ rec_offs_init(offsets_prev_rec_onstack);
+
+ offsets_prev_rec = rec_get_offsets(
+ prev_rec, index, offsets_prev_rec_onstack,
+ n_uniq, &heap);
+
+ cmp_rec_rec_with_match(rec,
+ prev_rec,
+ offsets_rec,
+ offsets_prev_rec,
+ index,
+ FALSE,
+ &matched_fields,
+ &matched_bytes);
+
+ for (i = matched_fields + 1; i <= n_uniq; i++) {
+
+ if (n_diff_boundaries != NULL) {
+ /* push the index of the previous
+ record, that is - the last one from
+ a group of equal keys */
+
+ void* p;
+ ib_uint64_t idx;
+
+ /* the index of the current record
+ is total_recs - 1, the index of the
+ previous record is total_recs - 2;
+ we know that idx is not going to
+ become negative here because if we
+ are in this branch then there is a
+ previous record and thus
+ total_recs >= 2 */
+ idx = *total_recs - 2;
+
+ p = dyn_array_push(
+ &n_diff_boundaries[i],
+ sizeof(ib_uint64_t));
+
+ memcpy(p, &idx, sizeof(ib_uint64_t));
+ }
+
+ /* increment the number of different keys
+ for n_prefix=i */
+ n_diff[i]++;
+ }
+ } else {
+ /* this is the first non-delete marked record */
+ for (i = 1; i <= n_uniq; i++) {
+ n_diff[i] = 1;
+ }
+ }
+
+ if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+ /* end of a page has been reached */
+
+ /* we need to copy the record instead of assigning
+ like prev_rec = rec; because when we traverse the
+ records on this level at some point we will jump from
+ one page to the next and then rec and prev_rec will
+ be on different pages and
+ btr_pcur_move_to_next_user_rec() will release the
+ latch on the page that prev_rec is on */
+ prev_rec = rec_copy_prefix_to_buf(
+ rec, index, rec_offs_n_fields(offsets_rec),
+ &prev_rec_buf, &prev_rec_buf_size);
+
+ } else {
+ /* still on the same page, the next call to
+ btr_pcur_move_to_next_user_rec() will not jump
+ on the next page, we can simply assign pointers
+ instead of copying the records like above */
+
+ prev_rec = rec;
+ }
+ }
+
+ /* if *total_pages is left untouched then the above loop was not
+ entered at all and there is one page in the whole tree which is
+ empty */
+ if (*total_pages == 0) {
+
+ ut_ad(level == 0);
+ ut_ad(*total_recs == 0);
+
+ *total_pages = 1;
+ }
+
+ /* if there are records on this level and boundaries
+ should be saved */
+ if (*total_recs > 0 && n_diff_boundaries != NULL) {
+
+ /* remember the index of the last record on the level as the
+ last one from the last group of equal keys; this holds for
+ all possible prefixes */
+ for (i = 1; i <= n_uniq; i++) {
+ void* p;
+ ib_uint64_t idx;
+
+ idx = *total_recs - 1;
+
+ p = dyn_array_push(&n_diff_boundaries[i],
+ sizeof(ib_uint64_t));
+
+ memcpy(p, &idx, sizeof(ib_uint64_t));
+ }
+ }
+
+ /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
+ for i=1..n_uniq */
+
+#ifdef UNIV_STATS_DEBUG
+ for (i = 1; i <= n_uniq; i++) {
+
+ DEBUG_PRINTF(" %s(): total recs: " UINT64PF
+ ", total pages: " UINT64PF
+ ", n_diff[%lu]: " UINT64PF "\n",
+ __func__, *total_recs,
+ *total_pages,
+ i, n_diff[i]);
+
+#if 0
+ if (n_diff_boundaries != NULL) {
+ ib_uint64_t j;
+
+ DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
+ __func__, i);
+
+ for (j = 0; j < n_diff[i]; j++) {
+ ib_uint64_t idx;
+
+ idx = *(ib_uint64_t*) dyn_array_get_element(
+ &n_diff_boundaries[i],
+ j * sizeof(ib_uint64_t));
+
+ DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
+ j, idx);
+ }
+ DEBUG_PRINTF("\n");
+ }
+#endif
+ }
+#endif /* UNIV_STATS_DEBUG */
+
+ btr_pcur_close(&pcur);
+
+ mtr_commit(&mtr);
+
+ if (prev_rec_buf != NULL) {
+
+ mem_free(prev_rec_buf);
+ }
+
+ mem_heap_free(heap);
+}
+/* @} */
+
+/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
+typedef enum page_scan_method_enum {
+ COUNT_ALL_NON_BORING, /* scan all records on the given page
+ and count the number of distinct ones */
+ QUIT_ON_FIRST_NON_BORING/* quit when the first record that differs
+ from its right neighbor is found */
+} page_scan_method_t;
+/* @} */
+
+/*********************************************************************//**
+Scan a page, reading records from left to right and counting the number
+of distinct records on that page (looking only at the first n_prefix
+columns). If scan_method is QUIT_ON_FIRST_NON_BORING then the function
+will return as soon as it finds a record that does not match its neighbor
+to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
+returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
+equal) or 2 (the function found a non-boring record and returned).
+@return offsets1 or offsets2 (the offsets of *out_rec),
+or NULL if the page is empty and does not contain user records.
+dict_stats_scan_page() @{ */
+UNIV_INLINE __attribute__((nonnull))
+ulint*
+dict_stats_scan_page(
+/*=================*/
+ const rec_t** out_rec, /*!< out: record, or NULL */
+ ulint* offsets1, /*!< out: rec_get_offsets()
+ working space (must be big
+ enough) */
+ ulint* offsets2, /*!< out: rec_get_offsets()
+ working space (must be big
+ enough) */
+ dict_index_t* index, /*!< in: index of the page */
+ const page_t* page, /*!< in: the page to scan */
+ ulint n_prefix, /*!< in: look at the first
+ n_prefix columns */
+ page_scan_method_t scan_method, /*!< in: scan to the end of
+ the page or not */
+ ib_uint64_t* n_diff) /*!< out: number of distinct
+ records encountered */
+{
+ ulint* offsets_rec = offsets1;
+ ulint* offsets_next_rec = offsets2;
+ const rec_t* rec;
+ const rec_t* next_rec;
+ /* A dummy heap, to be passed to rec_get_offsets().
+ Because offsets1,offsets2 should be big enough,
+ this memory heap should never be used. */
+ mem_heap_t* heap = NULL;
+
+ rec = page_rec_get_next_const(page_get_infimum_rec(page));
+
+ if (page_rec_is_supremum(rec)) {
+ /* the page is empty */
+ *n_diff = 0;
+ *out_rec = NULL;
+ return(NULL);
+ }
+
+ offsets_rec = rec_get_offsets(rec, index, offsets_rec,
+ ULINT_UNDEFINED, &heap);
+
+ next_rec = page_rec_get_next_const(rec);
+
+ *n_diff = 1;
+
+ while (!page_rec_is_supremum(next_rec)) {
+
+ ulint matched_fields = 0;
+ ulint matched_bytes = 0;
+
+ offsets_next_rec = rec_get_offsets(next_rec, index,
+ offsets_next_rec,
+ ULINT_UNDEFINED,
+ &heap);
+
+ /* check whether rec != next_rec when looking at
+ the first n_prefix fields */
+ cmp_rec_rec_with_match(rec, next_rec,
+ offsets_rec, offsets_next_rec,
+ index, FALSE, &matched_fields,
+ &matched_bytes);
+
+ if (matched_fields < n_prefix) {
+ /* rec != next_rec, => rec is non-boring */
+
+ (*n_diff)++;
+
+ if (scan_method == QUIT_ON_FIRST_NON_BORING) {
+ goto func_exit;
+ }
+ }
+
+ rec = next_rec;
+ {
+ /* Assign offsets_rec = offsets_next_rec
+ so that offsets_rec matches with rec which
+ was just assigned rec = next_rec above.
+ Also need to point offsets_next_rec to the
+ place where offsets_rec was pointing before
+ because we have just 2 placeholders where
+ data is actually stored:
+ offsets_onstack1 and offsets_onstack2 and we
+ are using them in circular fashion
+ (offsets[_next]_rec are just pointers to
+ those placeholders). */
+ ulint* offsets_tmp;
+ offsets_tmp = offsets_rec;
+ offsets_rec = offsets_next_rec;
+ offsets_next_rec = offsets_tmp;
+ }
+ next_rec = page_rec_get_next_const(next_rec);
+ }
+
+func_exit:
+ /* offsets1,offsets2 should have been big enough */
+ ut_a(heap == NULL);
+ *out_rec = rec;
+ return(offsets_rec);
+}
+/* @} */
+
+/*********************************************************************//**
+Dive below the current position of a cursor and calculate the number of
+distinct records on the leaf page, when looking at the fist n_prefix
+columns.
+dict_stats_analyze_index_below_cur() @{
+@return number of distinct records on the leaf page */
+static
+ib_uint64_t
+dict_stats_analyze_index_below_cur(
+/*===============================*/
+ const btr_cur_t*cur, /*!< in: cursor */
+ ulint n_prefix, /*!< in: look at the first n_prefix
+ columns when comparing records */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ dict_index_t* index;
+ ulint space;
+ ulint zip_size;
+ buf_block_t* block;
+ ulint page_no;
+ const page_t* page;
+ mem_heap_t* heap;
+ const rec_t* rec;
+ ulint* offsets1;
+ ulint* offsets2;
+ ulint* offsets_rec;
+ ulint root_height;
+ ib_uint64_t n_diff; /* the result */
+ ulint size;
+
+ index = btr_cur_get_index(cur);
+
+ /* Allocate offsets for the record and the node pointer, for
+ node pointer records. In a secondary index, the node pointer
+ record will consist of all index fields followed by a child
+ page number.
+ Allocate space for the offsets header (the allocation size at
+ offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
+ so that this will never be less than the size calculated in
+ rec_get_offsets_func(). */
+ size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
+
+ heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
+
+ offsets1 = static_cast<ulint*>(mem_heap_alloc(
+ heap, size * sizeof *offsets1));
+
+ offsets2 = static_cast<ulint*>(mem_heap_alloc(
+ heap, size * sizeof *offsets2));
+
+ rec_offs_set_n_alloc(offsets1, size);
+ rec_offs_set_n_alloc(offsets2, size);
+
+ root_height = btr_page_get_level(btr_root_get(index, mtr), mtr);
+
+ space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
+
+ rec = btr_cur_get_rec(cur);
+
+ offsets_rec = rec_get_offsets(rec, index, offsets1,
+ ULINT_UNDEFINED, &heap);
+
+ page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+
+ /* descend to the leaf level on the B-tree */
+ for (;;) {
+
+ block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+ NULL /* no guessed block */,
+ BUF_GET, __FILE__, __LINE__, mtr);
+
+ page = buf_block_get_frame(block);
+
+ if (btr_page_get_level(page, mtr) == 0) {
+ /* leaf level */
+ break;
+ }
+ /* else */
+
+ /* search for the first non-boring record on the page */
+ offsets_rec = dict_stats_scan_page(
+ &rec, offsets1, offsets2, index, page, n_prefix,
+ QUIT_ON_FIRST_NON_BORING, &n_diff);
+
+ /* pages on level > 0 are not allowed to be empty */
+ ut_a(offsets_rec != NULL);
+ /* if page is not empty (offsets_rec != NULL) then n_diff must
+ be > 0, otherwise there is a bug in dict_stats_scan_page() */
+ ut_a(n_diff > 0);
+
+ if (n_diff == 1) {
+ /* page has all keys equal and the end of the page
+ was reached by dict_stats_scan_page(), no need to
+ descend to the leaf level */
+ mem_heap_free(heap);
+ return(1);
+ }
+ /* else */
+
+ /* when we instruct dict_stats_scan_page() to quit on the
+ first non-boring record it finds, then the returned n_diff
+ can either be 0 (empty page), 1 (page has all keys equal) or
+ 2 (non-boring record was found) */
+ ut_a(n_diff == 2);
+
+ /* we have a non-boring record in rec, descend below it */
+
+ page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+ }
+
+ /* make sure we got a leaf page as a result from the above loop */
+ ut_ad(btr_page_get_level(page, mtr) == 0);
+
+ /* scan the leaf page and find the number of distinct keys,
+ when looking only at the first n_prefix columns */
+
+ offsets_rec = dict_stats_scan_page(
+ &rec, offsets1, offsets2, index, page, n_prefix,
+ COUNT_ALL_NON_BORING, &n_diff);
+
+ if (root_height > 0) {
+
+ /* empty pages are allowed only if the whole B-tree is empty
+ and contains a single empty page */
+ ut_a(offsets_rec != NULL);
+ }
+
+#if 0
+ DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
+ __func__, page_no, n_diff);
+#endif
+
+ mem_heap_free(heap);
+
+ return(n_diff);
+}
+/* @} */
+
+/*********************************************************************//**
+For a given level in an index select srv_stats_persistent_sample_pages
+(or less) records from that level and dive below them to the corresponding
+leaf pages, then scan those leaf pages and save the sampling results in
+index->stat_n_diff_key_vals[n_prefix] and the number of pages scanned in
+index->stat_n_sample_sizes[n_prefix].
+dict_stats_analyze_index_for_n_prefix() @{ */
+static
+void
+dict_stats_analyze_index_for_n_prefix(
+/*==================================*/
+ dict_index_t* index, /*!< in/out: index */
+ ulint level, /*!< in: level,
+ must be >= 1 */
+ ib_uint64_t total_recs_on_level, /*!< in: total number of
+ records on the given level */
+ ulint n_prefix, /*!< in: look at first
+ n_prefix columns when
+ comparing records */
+ ib_uint64_t n_diff_for_this_prefix, /*!< in: number of distinct
+ records on the given level,
+ when looking at the first
+ n_prefix columns */
+ dyn_array_t* boundaries) /*!< in: array that contains
+ n_diff_for_this_prefix
+ integers each of which
+ represents the index (on the
+ level, counting from
+ left/smallest to right/biggest
+ from 0) of the last record
+ from each group of distinct
+ keys */
+{
+ mem_heap_t* heap;
+ dtuple_t* dtuple;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ const page_t* page;
+ ib_uint64_t rec_idx;
+ ib_uint64_t last_idx_on_level;
+ ib_uint64_t n_recs_to_dive_below;
+ ib_uint64_t n_diff_sum_of_all_analyzed_pages;
+ ib_uint64_t i;
+
+#if 0
+ DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
+ "n_diff_for_this_prefix=" UINT64PF ")\n",
+ __func__, index->table->name, index->name, level,
+ n_prefix, n_diff_for_this_prefix);
+#endif
+
+ /* if some of those is 0 then this means that there is exactly one
+ page in the B-tree and it is empty and we should have done full scan
+ and should not be here */
+ ut_ad(total_recs_on_level > 0);
+ ut_ad(n_diff_for_this_prefix > 0);
+
+ /* this is configured to be min 1, someone has changed the code */
+ ut_ad(srv_stats_persistent_sample_pages > 0);
+
+ heap = mem_heap_create(256);
+
+ /* craft a record that is always smaller than the others,
+ this way we are sure that the cursor pcur will be positioned
+ on the leftmost record on the leftmost page on the desired level */
+ dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
+ dict_table_copy_types(dtuple, index->table);
+ dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ &pcur, __FILE__, __LINE__, &mtr);
+
+ page = btr_pcur_get_page(&pcur);
+
+ /* check that we are indeed on the desired level */
+ ut_a(btr_page_get_level(page, &mtr) == level);
+
+ /* there should not be any pages on the left */
+ ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+
+ /* check whether the first record on the leftmost page is marked
+ as such, if we are on a non-leaf level */
+ ut_a(level == 0 || REC_INFO_MIN_REC_FLAG
+ & rec_get_info_bits(page_rec_get_next_const(
+ page_get_infimum_rec(page)),
+ page_is_comp(page)));
+
+ if (btr_pcur_is_before_first_on_page(&pcur)) {
+ btr_pcur_move_to_next_on_page(&pcur);
+ }
+
+ if (btr_pcur_is_after_last_on_page(&pcur)) {
+ btr_pcur_move_to_prev_on_page(&pcur);
+ }
+
+ last_idx_on_level = *(ib_uint64_t*) dyn_array_get_element(boundaries,
+ (ulint) ((n_diff_for_this_prefix - 1) * sizeof(ib_uint64_t)));
+
+ rec_idx = 0;
+
+ n_diff_sum_of_all_analyzed_pages = 0;
+
+ n_recs_to_dive_below = ut_min(srv_stats_persistent_sample_pages,
+ n_diff_for_this_prefix);
+
+ for (i = 0; i < n_recs_to_dive_below; i++) {
+ ib_uint64_t left;
+ ib_uint64_t right;
+ ulint rnd;
+ ib_uint64_t dive_below_idx;
+
+ /* there are n_diff_for_this_prefix elements
+ in the array boundaries[] and we divide those elements
+ into n_recs_to_dive_below segments, for example:
+
+ let n_diff_for_this_prefix=100, n_recs_to_dive_below=4, then:
+ segment i=0: [0, 24]
+ segment i=1: [25, 49]
+ segment i=2: [50, 74]
+ segment i=3: [75, 99] or
+
+ let n_diff_for_this_prefix=1, n_recs_to_dive_below=1, then:
+ segment i=0: [0, 0] or
+
+ let n_diff_for_this_prefix=2, n_recs_to_dive_below=2, then:
+ segment i=0: [0, 0]
+ segment i=1: [1, 1] or
+
+ let n_diff_for_this_prefix=13, n_recs_to_dive_below=7, then:
+ segment i=0: [0, 0]
+ segment i=1: [1, 2]
+ segment i=2: [3, 4]
+ segment i=3: [5, 6]
+ segment i=4: [7, 8]
+ segment i=5: [9, 10]
+ segment i=6: [11, 12]
+
+ then we select a random record from each segment and dive
+ below it */
+ left = n_diff_for_this_prefix * i / n_recs_to_dive_below;
+ right = n_diff_for_this_prefix * (i + 1)
+ / n_recs_to_dive_below - 1;
+
+ ut_a(left <= right);
+ ut_a(right <= last_idx_on_level);
+
+ /* we do not pass (left, right) because we do not want to ask
+ ut_rnd_interval() to work with too big numbers since
+ ib_uint64_t could be bigger than ulint */
+ rnd = ut_rnd_interval(0, (ulint) (right - left));
+
+ dive_below_idx = *(ib_uint64_t*) dyn_array_get_element(
+ boundaries, (ulint) ((left + rnd)
+ * sizeof(ib_uint64_t)));
+
+#if 0
+ DEBUG_PRINTF(" %s(): dive below record with index="
+ UINT64PF "\n", __func__, dive_below_idx);
+#endif
+
+ /* seek to the record with index dive_below_idx */
+ while (rec_idx < dive_below_idx
+ && btr_pcur_is_on_user_rec(&pcur)) {
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ rec_idx++;
+ }
+
+ /* if the level has finished before the record we are
+ searching for, this means that the B-tree has changed in
+ the meantime, quit our sampling and use whatever stats
+ we have collected so far */
+ if (rec_idx < dive_below_idx) {
+
+ ut_ad(!btr_pcur_is_on_user_rec(&pcur));
+ break;
+ }
+
+ ut_a(rec_idx == dive_below_idx);
+
+ ib_uint64_t n_diff_on_leaf_page;
+
+ n_diff_on_leaf_page = dict_stats_analyze_index_below_cur(
+ btr_pcur_get_btr_cur(&pcur), n_prefix, &mtr);
+
+ /* We adjust n_diff_on_leaf_page here to avoid counting
+ one record twice - once as the last on some page and once
+ as the first on another page. Consider the following example:
+ Leaf level:
+ page: (2,2,2,2,3,3)
+ ... many pages like (3,3,3,3,3,3) ...
+ page: (3,3,3,3,5,5)
+ ... many pages like (5,5,5,5,5,5) ...
+ page: (5,5,5,5,8,8)
+ page: (8,8,8,8,9,9)
+ our algo would (correctly) get an estimate that there are
+ 2 distinct records per page (average). Having 4 pages below
+ non-boring records, it would (wrongly) estimate the number
+ of distinct records to 8. */
+ if (n_diff_on_leaf_page > 0) {
+ n_diff_on_leaf_page--;
+ }
+
+ n_diff_sum_of_all_analyzed_pages += n_diff_on_leaf_page;
+ }
+
+ if (n_diff_sum_of_all_analyzed_pages == 0) {
+ n_diff_sum_of_all_analyzed_pages = 1;
+ }
+
+ /* See REF01 for an explanation of the algorithm */
+ index->stat_n_diff_key_vals[n_prefix]
+ = index->stat_n_leaf_pages
+
+ * n_diff_for_this_prefix
+ / total_recs_on_level
+
+ * n_diff_sum_of_all_analyzed_pages
+ / n_recs_to_dive_below;
+
+ index->stat_n_sample_sizes[n_prefix] = n_recs_to_dive_below;
+
+ DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu "
+ "(%lu"
+ " * " UINT64PF " / " UINT64PF
+ " * " UINT64PF " / " UINT64PF ")\n",
+ __func__, index->stat_n_diff_key_vals[n_prefix],
+ n_prefix,
+ index->stat_n_leaf_pages,
+ n_diff_for_this_prefix, total_recs_on_level,
+ n_diff_sum_of_all_analyzed_pages, n_recs_to_dive_below);
+
+ btr_pcur_close(&pcur);
+
+ mtr_commit(&mtr);
+
+ mem_heap_free(heap);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new statistics for a given index and saves them to the index
+members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
+stat_n_leaf_pages. This function could be slow.
+dict_stats_analyze_index() @{ */
+static
+void
+dict_stats_analyze_index(
+/*=====================*/
+ dict_index_t* index) /*!< in/out: index to analyze */
+{
+ ulint root_level;
+ ulint level;
+ ibool level_is_analyzed;
+ ulint n_uniq;
+ ulint n_prefix;
+ ib_uint64_t* n_diff_on_level;
+ ib_uint64_t total_recs;
+ ib_uint64_t total_pages;
+ dyn_array_t* n_diff_boundaries;
+ mtr_t mtr;
+ ulint size;
+ ulint i;
+
+ DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
+
+ mtr_start(&mtr);
+
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+ size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+ if (size != ULINT_UNDEFINED) {
+ index->stat_index_size = size;
+ size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
+ }
+
+ switch (size) {
+ case ULINT_UNDEFINED:
+ mtr_commit(&mtr);
+ /* Fake some statistics. */
+ index->stat_index_size = index->stat_n_leaf_pages = 1;
+
+ for (i = dict_index_get_n_unique(index); i; ) {
+ index->stat_n_diff_key_vals[i--] = 1;
+ }
+
+ memset(index->stat_n_non_null_key_vals, 0,
+ (1 + dict_index_get_n_unique(index))
+ * sizeof(*index->stat_n_non_null_key_vals));
+ return;
+ case 0:
+ /* The root node of the tree is a leaf */
+ size = 1;
+ }
+
+ index->stat_n_leaf_pages = size;
+
+ root_level = btr_page_get_level(btr_root_get(index, &mtr), &mtr);
+
+ mtr_commit(&mtr);
+
+ n_uniq = dict_index_get_n_unique(index);
+
+ /* if the tree has just one level (and one page) or if the user
+ has requested to sample too many pages then do full scan */
+ if (root_level == 0
+ /* for each n-column prefix (for n=1..n_uniq)
+ srv_stats_persistent_sample_pages will be sampled, so in total
+ srv_stats_persistent_sample_pages * n_uniq leaf pages will be
+ sampled. If that number is bigger than the total number of leaf
+ pages then do full scan of the leaf level instead since it will
+ be faster and will give better results. */
+ || srv_stats_persistent_sample_pages * n_uniq
+ > index->stat_n_leaf_pages) {
+
+ if (root_level == 0) {
+ DEBUG_PRINTF(" %s(): just one page, "
+ "doing full scan\n", __func__);
+ } else {
+ DEBUG_PRINTF(" %s(): too many pages requested for "
+ "sampling, doing full scan\n", __func__);
+ }
+
+ /* do full scan of level 0; save results directly
+ into the index */
+
+ dict_stats_analyze_index_level(index,
+ 0 /* leaf level */,
+ index->stat_n_diff_key_vals,
+ &total_recs,
+ &total_pages,
+ NULL /*boundaries not needed*/);
+
+ for (i = 1; i <= n_uniq; i++) {
+ index->stat_n_sample_sizes[i] = total_pages;
+ }
+
+ return;
+ }
+ /* else */
+
+ /* set to zero */
+ n_diff_on_level = (ib_uint64_t*) mem_zalloc((n_uniq + 1)
+ * sizeof(ib_uint64_t));
+
+ n_diff_boundaries = (dyn_array_t*) mem_alloc((n_uniq + 1)
+ * sizeof(dyn_array_t));
+
+ for (i = 1; i <= n_uniq; i++) {
+ /* initialize the dynamic arrays, the first one
+ (index=0) is ignored to follow the same indexing
+ scheme as n_diff_on_level[] */
+ dyn_array_create(&n_diff_boundaries[i]);
+ }
+
+ /* total_recs is also used to estimate the number of pages on one
+ level below, so at the start we have 1 page (the root) */
+ total_recs = 1;
+
+ /* Here we use the following optimization:
+ If we find that level L is the first one (searching from the
+ root) that contains at least D distinct keys when looking at
+ the first n_prefix columns, then:
+ if we look at the first n_prefix-1 columns then the first
+ level that contains D distinct keys will be either L or a
+ lower one.
+ So if we find that the first level containing D distinct
+ keys (on n_prefix columns) is L, we continue from L when
+ searching for D distinct keys on n_prefix-1 columns. */
+ level = (long) root_level;
+ level_is_analyzed = FALSE;
+ for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
+
+ DEBUG_PRINTF(" %s(): searching level with >=%llu "
+ "distinct records, n_prefix=%lu\n",
+ __func__, N_DIFF_REQUIRED, n_prefix);
+
+ /* check whether we should pick the current level;
+ we pick level 1 even if it does not have enough
+ distinct records because we do not want to scan the
+ leaf level because it may contain too many records */
+ if (level_is_analyzed
+ && (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+ || level == 1)) {
+
+ goto found_level;
+ }
+ /* else */
+
+ /* search for a level that contains enough distinct records */
+
+ if (level_is_analyzed && level > 1) {
+
+ /* if this does not hold we should be on
+ "found_level" instead of here */
+ ut_ad(n_diff_on_level[n_prefix] < N_DIFF_REQUIRED);
+
+ level--;
+ level_is_analyzed = FALSE;
+ }
+
+ for (;;) {
+
+ /* make sure we do not scan the leaf level
+ accidentally, it may contain too many pages */
+ ut_ad(level > 0);
+
+ /* scanning the same level twice is an optimization
+ bug */
+ ut_ad(!level_is_analyzed);
+
+ /* Do not scan if this would read too many pages.
+ Here we use the following fact:
+ the number of pages on level L equals the number
+ of records on level L+1, thus we deduce that the
+ following call would scan total_recs pages, because
+ total_recs is left from the previous iteration when
+ we scanned one level upper or we have not scanned any
+ levels yet in which case total_recs is 1. */
+ if (total_recs > srv_stats_persistent_sample_pages) {
+
+ /* if the above cond is true then we are not
+ at the root level since on the root level
+ total_recs == 1 and cannot
+ be > srv_stats_persistent_sample_pages */
+ ut_a(level != root_level);
+
+ /* step one level back and be satisfied with
+ whatever it contains */
+ level++;
+ level_is_analyzed = TRUE;
+
+ break;
+ }
+
+ dict_stats_analyze_index_level(index,
+ level,
+ n_diff_on_level,
+ &total_recs,
+ &total_pages,
+ n_diff_boundaries);
+
+ level_is_analyzed = TRUE;
+
+ if (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+ || level == 1) {
+ /* we found a good level with many distinct
+ records or we have reached the last level we
+ could scan */
+ break;
+ }
+ /* else */
+
+ level--;
+ level_is_analyzed = FALSE;
+ }
+found_level:
+
+ DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF
+ " distinct records for n_prefix=%lu\n",
+ __func__, level, n_diff_on_level[n_prefix],
+ n_prefix);
+
+ /* here we are either on level 1 or the level that we are on
+ contains >= N_DIFF_REQUIRED distinct keys or we did not scan
+ deeper levels because they would contain too many pages */
+
+ ut_ad(level > 0);
+
+ ut_ad(level_is_analyzed);
+
+ /* pick some records from this level and dive below them for
+ the given n_prefix */
+
+ dict_stats_analyze_index_for_n_prefix(
+ index, level, total_recs, n_prefix,
+ n_diff_on_level[n_prefix],
+ &n_diff_boundaries[n_prefix]);
+ }
+
+ for (i = 1; i <= n_uniq; i++) {
+ dyn_array_free(&n_diff_boundaries[i]);
+ }
+
+ mem_free(n_diff_boundaries);
+
+ mem_free(n_diff_on_level);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively slow and is used to calculate persistent statistics that
+will be saved on disk.
+dict_stats_update_persistent() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_update_persistent(
+/*=========================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ dict_index_t* index;
+
+ DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
+
+ /* XXX quit if interrupted, e.g. SIGTERM */
+
+ /* analyze the clustered index first */
+
+ index = dict_table_get_first_index(table);
+
+ if (index == NULL) {
+ /* Table definition is corrupt */
+ return(DB_CORRUPTION);
+ }
+
+ dict_stats_analyze_index(index);
+
+ table->stat_n_rows
+ = index->stat_n_diff_key_vals[dict_index_get_n_unique(index)];
+
+ table->stat_clustered_index_size = index->stat_index_size;
+
+ /* analyze other indexes from the table, if any */
+
+ table->stat_sum_of_other_index_sizes = 0;
+
+ for (index = dict_table_get_next_index(index);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (index->type & DICT_FTS) {
+ continue;
+ }
+
+ dict_stats_analyze_index(index);
+
+ table->stat_sum_of_other_index_sizes
+ += index->stat_index_size;
+ }
+
+ table->stat_modified_counter = 0;
+
+ table->stat_initialized = TRUE;
+
+ return(DB_SUCCESS);
+}
+/* @} */
+
+/*********************************************************************//**
+Save an individual index's statistic into the persistent statistics
+storage.
+dict_stats_save_index_stat() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_save_index_stat(
+/*=======================*/
+ dict_index_t* index, /*!< in: index */
+ lint last_update, /*!< in: timestamp of the stat */
+ const char* stat_name, /*!< in: name of the stat */
+ ib_uint64_t stat_value, /*!< in: value of the stat */
+ ib_uint64_t* sample_size, /*!< in: n pages sampled or NULL */
+ const char* stat_description,/*!< in: description of the stat */
+ trx_t* trx, /*!< in/out: transaction to use */
+ ibool caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
+ owns dict_sys->mutex */
+{
+ pars_info_t* pinfo;
+ enum db_err ret;
+
+ pinfo = pars_info_create();
+
+ pars_info_add_literal(pinfo, "database_name", index->table->name,
+ dict_get_db_name_len(index->table->name),
+ DATA_VARCHAR, 0);
+
+ pars_info_add_str_literal(pinfo, "table_name",
+ dict_remove_db_name(index->table->name));
+
+ pars_info_add_str_literal(pinfo, "index_name", index->name);
+
+ pars_info_add_int4_literal(pinfo, "last_update", last_update);
+
+ pars_info_add_str_literal(pinfo, "stat_name", stat_name);
+
+ pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
+
+ if (sample_size != NULL) {
+ pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
+ } else {
+ pars_info_add_literal(pinfo, "sample_size", NULL,
+ UNIV_SQL_NULL, DATA_FIXBINARY, 0);
+ }
+
+ pars_info_add_str_literal(pinfo, "stat_description",
+ stat_description);
+
+ ret = que_eval_sql(pinfo,
+ "PROCEDURE INDEX_STATS_SAVE () IS\n"
+ "dummy CHAR;\n"
+ "BEGIN\n"
+
+ "SELECT database_name INTO dummy\n"
+ "FROM \"" INDEX_STATS_NAME "\"\n"
+ "WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name AND\n"
+ "index_name = :index_name AND\n"
+ "stat_name = :stat_name\n"
+ "FOR UPDATE;\n"
+
+ "IF (SQL % NOTFOUND) THEN\n"
+ " INSERT INTO \"" INDEX_STATS_NAME "\"\n"
+ " VALUES\n"
+ " (\n"
+ " :database_name,\n"
+ " :table_name,\n"
+ " :index_name,\n"
+ " :last_update,\n"
+ " :stat_name,\n"
+ " :stat_value,\n"
+ " :sample_size,\n"
+ " :stat_description\n"
+ " );\n"
+ "ELSE\n"
+ " UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+ " last_update = :last_update,\n"
+ " stat_value = :stat_value,\n"
+ " sample_size = :sample_size,\n"
+ " stat_description = :stat_description\n"
+ " WHERE\n"
+ " database_name = :database_name AND\n"
+ " table_name = :table_name AND\n"
+ " index_name = :index_name AND\n"
+ " stat_name = :stat_name;\n"
+ "END IF;\n"
+ "END;",
+ !caller_has_dict_sys_mutex, trx);
+
+ /* pinfo is freed by que_eval_sql() */
+
+ if (ret != DB_SUCCESS) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error while trying to save index "
+ "statistics for table %s, index %s, "
+ "stat name %s: %s\n",
+ index->table->name, index->name,
+ stat_name, ut_strerr(ret));
+
+ trx->error_state = DB_SUCCESS;
+ }
+
+ return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Save the table's statistics into the persistent statistics storage.
+dict_stats_save() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_save(
+/*============*/
+ dict_table_t* table, /*!< in: table */
+ ibool caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
+ owns dict_sys->mutex */
+{
+ trx_t* trx;
+ pars_info_t* pinfo;
+ dict_index_t* index;
+ lint now;
+ enum db_err ret;
+
+ /* MySQL's timestamp is 4 byte, so we use
+ pars_info_add_int4_literal() which takes a lint arg, so "now" is
+ lint */
+ now = (lint) ut_time();
+
+ trx = trx_allocate_for_background();
+
+ /* Use 'read-uncommitted' so that the SELECTs we execute
+ do not get blocked in case some user has locked the rows we
+ are SELECTing */
+
+ trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+
+ trx_start_if_not_started(trx);
+
+ pinfo = pars_info_create();
+
+ pars_info_add_literal(pinfo, "database_name", table->name,
+ dict_get_db_name_len(table->name),
+ DATA_VARCHAR, 0);
+
+ pars_info_add_str_literal(pinfo, "table_name",
+ dict_remove_db_name(table->name));
+
+ pars_info_add_int4_literal(pinfo, "last_update", now);
+
+ pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
+
+ pars_info_add_ull_literal(pinfo, "clustered_index_size",
+ table->stat_clustered_index_size);
+
+ pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
+ table->stat_sum_of_other_index_sizes);
+
+ ret = que_eval_sql(pinfo,
+ "PROCEDURE TABLE_STATS_SAVE () IS\n"
+ "dummy CHAR;\n"
+ "BEGIN\n"
+
+ "SELECT database_name INTO dummy\n"
+ "FROM \"" TABLE_STATS_NAME "\"\n"
+ "WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name\n"
+ "FOR UPDATE;\n"
+
+ "IF (SQL % NOTFOUND) THEN\n"
+ " INSERT INTO \"" TABLE_STATS_NAME "\"\n"
+ " VALUES\n"
+ " (\n"
+ " :database_name,\n"
+ " :table_name,\n"
+ " :last_update,\n"
+ " :n_rows,\n"
+ " :clustered_index_size,\n"
+ " :sum_of_other_index_sizes\n"
+ " );\n"
+ "ELSE\n"
+ " UPDATE \"" TABLE_STATS_NAME "\" SET\n"
+ " last_update = :last_update,\n"
+ " n_rows = :n_rows,\n"
+ " clustered_index_size = :clustered_index_size,\n"
+ " sum_of_other_index_sizes = "
+ " :sum_of_other_index_sizes\n"
+ " WHERE\n"
+ " database_name = :database_name AND\n"
+ " table_name = :table_name;\n"
+ "END IF;\n"
+ "END;",
+ !caller_has_dict_sys_mutex, trx);
+
+ /* pinfo is freed by que_eval_sql() */
+
+ if (ret != DB_SUCCESS) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error while trying to save table "
+ "statistics for table %s: %s\n",
+ table->name, ut_strerr(ret));
+
+ goto end_rollback;
+ }
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ ib_uint64_t stat_n_diff_key_vals[REC_MAX_N_FIELDS];
+ ib_uint64_t stat_n_sample_sizes[REC_MAX_N_FIELDS];
+ ulint n_uniq;
+ ulint i;
+
+ ret = dict_stats_save_index_stat(index, now, "size",
+ index->stat_index_size,
+ NULL,
+ "Number of pages "
+ "in the index",
+ trx,
+ caller_has_dict_sys_mutex);
+ if (ret != DB_SUCCESS) {
+ goto end_rollback;
+ }
+
+ ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
+ index->stat_n_leaf_pages,
+ NULL,
+ "Number of leaf pages "
+ "in the index",
+ trx,
+ caller_has_dict_sys_mutex);
+ if (ret != DB_SUCCESS) {
+ goto end_rollback;
+ }
+
+ n_uniq = dict_index_get_n_unique(index);
+
+ ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_diff_key_vals));
+
+ memcpy(stat_n_diff_key_vals, index->stat_n_diff_key_vals,
+ (n_uniq + 1) * sizeof(index->stat_n_diff_key_vals[0]));
+
+ ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_sample_sizes));
+
+ memcpy(stat_n_sample_sizes, index->stat_n_sample_sizes,
+ (n_uniq + 1) * sizeof(index->stat_n_sample_sizes[0]));
+
+ for (i = 1; i <= n_uniq; i++) {
+
+ char stat_name[16];
+ char stat_description[1024];
+ ulint j;
+
+ ut_snprintf(stat_name, sizeof(stat_name),
+ "n_diff_pfx%02lu", i);
+
+ /* craft a string that contains the columns names */
+ ut_snprintf(stat_description,
+ sizeof(stat_description),
+ "%s", index->fields[0].name);
+ for (j = 2; j <= i; j++) {
+ size_t len;
+
+ len = strlen(stat_description);
+
+ ut_snprintf(stat_description + len,
+ sizeof(stat_description) - len,
+ ",%s", index->fields[j - 1].name);
+ }
+
+ ret = dict_stats_save_index_stat(
+ index, now, stat_name,
+ stat_n_diff_key_vals[i],
+ &stat_n_sample_sizes[i],
+ stat_description, trx,
+ caller_has_dict_sys_mutex);
+
+ if (ret != DB_SUCCESS) {
+ goto end_rollback;
+ }
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+ ret = DB_SUCCESS;
+ goto end_free;
+
+end_rollback:
+
+ trx->op_info = "rollback of internal transaction on stats tables";
+ trx_rollback_to_savepoint(trx, NULL);
+ trx->op_info = "";
+ ut_a(trx->error_state == DB_SUCCESS);
+
+end_free:
+
+ trx_free_for_background(trx);
+
+ return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Called for the row that is selected by
+SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
+The second argument is a pointer to the table and the fetched stats are
+written to it.
+dict_stats_fetch_table_stats_step() @{
+@return non-NULL dummy */
+static
+ibool
+dict_stats_fetch_table_stats_step(
+/*==============================*/
+ void* node_void, /*!< in: select node */
+ void* table_void) /*!< out: table */
+{
+ sel_node_t* node = (sel_node_t*) node_void;
+ dict_table_t* table = (dict_table_t*) table_void;
+ que_common_t* cnode;
+ int i;
+
+ /* this should loop exactly 3 times - for
+ n_rows,clustered_index_size,sum_of_other_index_sizes */
+ for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
+ cnode != NULL;
+ cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
+ i++) {
+
+ const byte* data;
+ dfield_t* dfield = que_node_get_val(cnode);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ data = static_cast<const byte*>(dfield_get_data(dfield));
+
+ switch (i) {
+ case 0: /* mysql.innodb_table_stats.n_rows */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ table->stat_n_rows = mach_read_from_8(data);
+
+ break;
+
+ case 1: /* mysql.innodb_table_stats.clustered_index_size */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ table->stat_clustered_index_size
+ = (ulint) mach_read_from_8(data);
+
+ break;
+
+ case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ table->stat_sum_of_other_index_sizes
+ = (ulint) mach_read_from_8(data);
+
+ break;
+
+ default:
+
+ /* someone changed SELECT
+ n_rows,clustered_index_size,sum_of_other_index_sizes
+ to select more columns from innodb_table_stats without
+ adjusting here */
+ ut_error;
+ }
+ }
+
+ /* if i < 3 this means someone changed the
+ SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
+ to select less columns from innodb_table_stats without adjusting here;
+ if i > 3 we would have ut_error'ed earlier */
+ ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
+
+ /* XXX this is not used but returning non-NULL is necessary */
+ return(TRUE);
+}
+/* @} */
+
+/** Aux struct used to pass a table and a boolean to
+dict_stats_fetch_index_stats_step(). */
+typedef struct index_fetch_struct {
+ dict_table_t* table; /*!< table whose indexes are to be modified */
+ ibool stats_were_modified; /*!< will be set to TRUE if at
+ least one index stats were modified */
+} index_fetch_t;
+
+/*********************************************************************//**
+Called for the rows that are selected by
+SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
+The second argument is a pointer to the table and the fetched stats are
+written to its indexes.
+Let a table has N indexes and each index has Ui unique columns for i=1..N,
+then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
+So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
+N*AVG(Ui). In each call it searches for the currently fetched index into
+table->indexes linearly, assuming this list is not sorted. Thus, overall,
+fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
+is the number of indexes.
+This can be improved if we sort table->indexes in a temporary area just once
+and then search in that sorted list. Then the complexity will be O(N*log(N)).
+We assume a table will not have more than 100 indexes, so we go with the
+simpler N^2 algorithm.
+dict_stats_fetch_index_stats_step() @{
+@return non-NULL dummy */
+static
+ibool
+dict_stats_fetch_index_stats_step(
+/*==============================*/
+ void* node_void, /*!< in: select node */
+ void* arg_void) /*!< out: table + a flag that tells if we
+ modified anything */
+{
+ sel_node_t* node = (sel_node_t*) node_void;
+ index_fetch_t* arg = (index_fetch_t*) arg_void;
+ dict_table_t* table = arg->table;
+ dict_index_t* index = NULL;
+ que_common_t* cnode;
+ const char* stat_name = NULL;
+ ulint stat_name_len = ULINT_UNDEFINED;
+ ib_uint64_t stat_value = UINT64_UNDEFINED;
+ ib_uint64_t sample_size = UINT64_UNDEFINED;
+ int i;
+
+ /* this should loop exactly 4 times - for the columns that
+ were selected: index_name,stat_name,stat_value,sample_size */
+ for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
+ cnode != NULL;
+ cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
+ i++) {
+
+ const byte* data;
+ dfield_t* dfield = que_node_get_val(cnode);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ data = static_cast<const byte*>(dfield_get_data(dfield));
+
+ switch (i) {
+ case 0: /* mysql.innodb_index_stats.index_name */
+
+ ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
+
+ /* search for index in table's indexes whose name
+ matches data; the fetched index name is in data,
+ has no terminating '\0' and has length len */
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (strlen(index->name) == len
+ && memcmp(index->name, data, len) == 0) {
+ /* the corresponding index was found */
+ break;
+ }
+ }
+
+ /* if index is NULL here this means that
+ mysql.innodb_index_stats contains more rows than the
+ number of indexes in the table; this is ok, we just
+ return ignoring those extra rows; in other words
+ dict_stats_fetch_index_stats_step() has been called
+ for a row from index_stats with unknown index_name
+ column */
+ if (index == NULL) {
+
+ return(TRUE);
+ }
+
+ break;
+
+ case 1: /* mysql.innodb_index_stats.stat_name */
+
+ ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
+
+ ut_a(index != NULL);
+
+ stat_name = (const char*) data;
+ stat_name_len = len;
+
+ break;
+
+ case 2: /* mysql.innodb_index_stats.stat_value */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ ut_a(index != NULL);
+ ut_a(stat_name != NULL);
+ ut_a(stat_name_len != ULINT_UNDEFINED);
+
+ stat_value = mach_read_from_8(data);
+
+ break;
+
+ case 3: /* mysql.innodb_index_stats.sample_size */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8 || len == UNIV_SQL_NULL);
+
+ ut_a(index != NULL);
+ ut_a(stat_name != NULL);
+ ut_a(stat_name_len != ULINT_UNDEFINED);
+ ut_a(stat_value != UINT64_UNDEFINED);
+
+ if (len == UNIV_SQL_NULL) {
+ break;
+ }
+ /* else */
+
+ sample_size = mach_read_from_8(data);
+
+ break;
+
+ default:
+
+ /* someone changed
+ SELECT index_name,stat_name,stat_value,sample_size
+ to select more columns from innodb_index_stats without
+ adjusting here */
+ ut_error;
+ }
+ }
+
+ /* if i < 4 this means someone changed the
+ SELECT index_name,stat_name,stat_value,sample_size
+ to select less columns from innodb_index_stats without adjusting here;
+ if i > 4 we would have ut_error'ed earlier */
+ ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
+
+ ut_a(index != NULL);
+ ut_a(stat_name != NULL);
+ ut_a(stat_name_len != ULINT_UNDEFINED);
+ ut_a(stat_value != UINT64_UNDEFINED);
+ /* sample_size could be UINT64_UNDEFINED here, if it is NULL */
+
+#define PFX "n_diff_pfx"
+#define PFX_LEN 10
+
+ if (stat_name_len == 4 /* strlen("size") */
+ && strncasecmp("size", stat_name, stat_name_len) == 0) {
+ index->stat_index_size = (ulint) stat_value;
+ arg->stats_were_modified = TRUE;
+ } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
+ && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
+ == 0) {
+ index->stat_n_leaf_pages = (ulint) stat_value;
+ arg->stats_were_modified = TRUE;
+ } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
+ && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
+
+ const char* num_ptr;
+ unsigned long n_pfx;
+
+ /* point num_ptr into "1" from "n_diff_pfx12..." */
+ num_ptr = stat_name + PFX_LEN;
+
+ /* stat_name should have exactly 2 chars appended to PFX
+ and they should be digits */
+ if (stat_name_len != PFX_LEN + 2
+ || num_ptr[0] < '0' || num_ptr[0] > '9'
+ || num_ptr[1] < '0' || num_ptr[1] > '9') {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Ignoring strange row from "
+ "%s WHERE "
+ "database_name = '%.*s' AND "
+ "table_name = '%s' AND "
+ "index_name = '%s' AND "
+ "stat_name = '%.*s'; because stat_name "
+ "is malformed\n",
+ INDEX_STATS_NAME_PRINT,
+ (int) dict_get_db_name_len(table->name),
+ table->name,
+ dict_remove_db_name(table->name),
+ index->name,
+ (int) stat_name_len,
+ stat_name);
+ return(TRUE);
+ }
+ /* else */
+
+ /* extract 12 from "n_diff_pfx12..." into n_pfx
+ note that stat_name does not have a terminating '\0' */
+ n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
+
+ if (n_pfx == 0 || n_pfx > dict_index_get_n_unique(index)) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Ignoring strange row from "
+ "%s WHERE "
+ "database_name = '%.*s' AND "
+ "table_name = '%s' AND "
+ "index_name = '%s' AND "
+ "stat_name = '%.*s'; because stat_name is "
+ "out of range, the index has %lu unique "
+ "columns\n",
+ INDEX_STATS_NAME_PRINT,
+ (int) dict_get_db_name_len(table->name),
+ table->name,
+ dict_remove_db_name(table->name),
+ index->name,
+ (int) stat_name_len,
+ stat_name,
+ dict_index_get_n_unique(index));
+ return(TRUE);
+ }
+ /* else */
+
+ index->stat_n_diff_key_vals[n_pfx] = stat_value;
+
+ if (sample_size != UINT64_UNDEFINED) {
+ index->stat_n_sample_sizes[n_pfx] = sample_size;
+ } else {
+ /* hmm, strange... the user must have UPDATEd the
+ table manually and SET sample_size = NULL */
+ index->stat_n_sample_sizes[n_pfx] = 0;
+ }
+
+ arg->stats_were_modified = TRUE;
+ } else {
+ /* silently ignore rows with unknown stat_name, the
+ user may have developed her own stats */
+ }
+
+ /* XXX this is not used but returning non-NULL is necessary */
+ return(TRUE);
+}
+/* @} */
+
+/*********************************************************************//**
+Read table's statistics from the persistent statistics storage.
+dict_stats_fetch_from_ps() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_fetch_from_ps(
+/*=====================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
+ owns dict_sys->mutex */
+{
+ index_fetch_t index_fetch_arg;
+ trx_t* trx;
+ pars_info_t* pinfo;
+ enum db_err ret;
+
+ ut_ad(mutex_own(&dict_sys->mutex) == caller_has_dict_sys_mutex);
+
+ trx = trx_allocate_for_background();
+
+ /* Use 'read-uncommitted' so that the SELECTs we execute
+ do not get blocked in case some user has locked the rows we
+ are SELECTing */
+
+ trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+
+ trx_start_if_not_started(trx);
+
+ pinfo = pars_info_create();
+
+ pars_info_add_literal(pinfo, "database_name", table->name,
+ dict_get_db_name_len(table->name),
+ DATA_VARCHAR, 0);
+
+ pars_info_add_str_literal(pinfo, "table_name",
+ dict_remove_db_name(table->name));
+
+ pars_info_bind_function(pinfo,
+ "fetch_table_stats_step",
+ dict_stats_fetch_table_stats_step,
+ table);
+
+ index_fetch_arg.table = table;
+ index_fetch_arg.stats_were_modified = FALSE;
+ pars_info_bind_function(pinfo,
+ "fetch_index_stats_step",
+ dict_stats_fetch_index_stats_step,
+ &index_fetch_arg);
+
+ ret = que_eval_sql(pinfo,
+ "PROCEDURE FETCH_STATS () IS\n"
+ "found INT;\n"
+ "DECLARE FUNCTION fetch_table_stats_step;\n"
+ "DECLARE FUNCTION fetch_index_stats_step;\n"
+ "DECLARE CURSOR table_stats_cur IS\n"
+ " SELECT\n"
+ /* if you change the selected fields, be
+ sure to adjust
+ dict_stats_fetch_table_stats_step() */
+ " n_rows,\n"
+ " clustered_index_size,\n"
+ " sum_of_other_index_sizes\n"
+ " FROM \"" TABLE_STATS_NAME "\"\n"
+ " WHERE\n"
+ " database_name = :database_name AND\n"
+ " table_name = :table_name;\n"
+ "DECLARE CURSOR index_stats_cur IS\n"
+ " SELECT\n"
+ /* if you change the selected fields, be
+ sure to adjust
+ dict_stats_fetch_index_stats_step() */
+ " index_name,\n"
+ " stat_name,\n"
+ " stat_value,\n"
+ " sample_size\n"
+ " FROM \"" INDEX_STATS_NAME "\"\n"
+ " WHERE\n"
+ " database_name = :database_name AND\n"
+ " table_name = :table_name;\n"
+
+ "BEGIN\n"
+
+ "OPEN table_stats_cur;\n"
+ "FETCH table_stats_cur INTO\n"
+ " fetch_table_stats_step();\n"
+ "IF (SQL % NOTFOUND) THEN\n"
+ " CLOSE table_stats_cur;\n"
+ " RETURN;\n"
+ "END IF;\n"
+ "CLOSE table_stats_cur;\n"
+
+ "OPEN index_stats_cur;\n"
+ "found := 1;\n"
+ "WHILE found = 1 LOOP\n"
+ " FETCH index_stats_cur INTO\n"
+ " fetch_index_stats_step();\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE index_stats_cur;\n"
+
+ "END;",
+ !caller_has_dict_sys_mutex, trx);
+
+ /* pinfo is freed by que_eval_sql() */
+
+ /* XXX If mysql.innodb_index_stats contained less rows than the number
+ of indexes in the table, then some of the indexes of the table
+ were left uninitialized. Currently this is ignored and those
+ indexes are left with uninitialized stats until ANALYZE TABLE is
+ run. This condition happens when the user creates a new index
+ on a table. We could return DB_STATS_DO_NOT_EXIST from here,
+ forcing the usage of transient stats until mysql.innodb_index_stats
+ is complete. */
+
+ trx_commit_for_mysql(trx);
+
+ trx_free_for_background(trx);
+
+ if (!index_fetch_arg.stats_were_modified) {
+ return(DB_STATS_DO_NOT_EXIST);
+ }
+
+ return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization.
+dict_stats_update() @{
+@return DB_* error code or DB_SUCCESS */
+UNIV_INTERN
+enum db_err
+dict_stats_update(
+/*==============*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_stats_upd_option_t stats_upd_option,
+ /*!< in: whether to (re) calc
+ the stats or to fetch them from
+ the persistent statistics
+ storage */
+ ibool caller_has_dict_sys_mutex)
+ /*!< in: TRUE if the caller
+ owns dict_sys->mutex */
+{
+ enum db_err ret = DB_ERROR;
+
+ /* check whether caller_has_dict_sys_mutex is set correctly;
+ note that mutex_own() is not implemented in non-debug code so
+ we cannot avoid having this extra param to the current function */
+ ut_ad(caller_has_dict_sys_mutex
+ ? mutex_own(&dict_sys->mutex)
+ : !mutex_own(&dict_sys->mutex));
+
+ if (table->ibd_file_missing) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: cannot calculate statistics for table %s "
+ "because the .ibd file is missing. For help, please "
+ "refer to " REFMAN "innodb-troubleshooting.html\n",
+ table->name);
+
+ return(DB_TABLESPACE_DELETED);
+ }
+
+ /* If we have set a high innodb_force_recovery level, do not calculate
+ statistics, as a badly corrupted index can cause a crash in it. */
+
+ if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+
+ return(DB_SUCCESS);
+ }
+
+ switch (stats_upd_option) {
+ case DICT_STATS_RECALC_PERSISTENT:
+ case DICT_STATS_RECALC_PERSISTENT_SILENT:
+ /* Persistent recalculation requested, called from
+ ANALYZE TABLE or from TRUNCATE TABLE */
+
+ /* FTS auxiliary tables do not need persistent stats */
+ if ((ut_strcount(table->name, "FTS") > 0
+ && (ut_strcount(table->name, "CONFIG") > 0
+ || ut_strcount(table->name, "INDEX") > 0
+ || ut_strcount(table->name, "DELETED") > 0
+ || ut_strcount(table->name, "DOC_ID") > 0
+ || ut_strcount(table->name, "ADDED") > 0))) {
+ goto transient;
+ }
+
+ /* check if the persistent statistics storage exists
+ before calling the potentially slow function
+ dict_stats_update_persistent(); that is a
+ prerequisite for dict_stats_save() succeeding */
+ if (dict_stats_persistent_storage_check(
+ caller_has_dict_sys_mutex)) {
+
+ dict_table_stats_lock(table, RW_X_LATCH);
+
+ ret = dict_stats_update_persistent(table);
+
+ /* XXX Currently dict_stats_save() would read the
+ stats from the table without dict_table_stats_lock()
+ which means it could save inconsistent data on the
+ disk. This is because we must call
+ dict_table_stats_lock() after locking dict_sys->mutex.
+ A solution is to copy here the stats to a temporary
+ buffer while holding the _stats_lock(), release it,
+ and pass that buffer to dict_stats_save(). */
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+
+ if (ret == DB_SUCCESS) {
+ ret = dict_stats_save(
+ table,
+ caller_has_dict_sys_mutex);
+ }
+
+ return(ret);
+ }
+ /* else */
+
+ /* Fall back to transient stats since the persistent
+ storage is not present or is corrupted */
+
+ if (stats_upd_option == DICT_STATS_RECALC_PERSISTENT) {
+
+ ut_print_timestamp(stderr);
+ /* XXX add link to the doc about storage
+ creation */
+ fprintf(stderr,
+ " InnoDB: Recalculation of persistent "
+ "statistics requested but the required "
+ "persistent statistics storage is not "
+ "present or is corrupted. "
+ "Using quick transient stats "
+ "instead.\n");
+ }
+
+ goto transient;
+
+ case DICT_STATS_RECALC_TRANSIENT:
+
+ goto transient;
+
+ case DICT_STATS_FETCH:
+ case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
+ /* fetch requested, either fetch from persistent statistics
+ storage or use the old method */
+
+ dict_table_stats_lock(table, RW_X_LATCH);
+
+ if (stats_upd_option == DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY
+ && table->stat_initialized) {
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+ return(DB_SUCCESS);
+ }
+ /* else */
+
+ /* Must unlock because otherwise there is a lock order
+ violation with dict_sys->mutex below. Declare stats to be
+ initialized before unlocking. */
+ table->stat_initialized = TRUE;
+ dict_table_stats_unlock(table, RW_X_LATCH);
+
+ if (strchr(table->name, '/') == NULL
+ || strcmp(table->name, INDEX_STATS_NAME) == 0
+ || strcmp(table->name, TABLE_STATS_NAME) == 0
+ || (ut_strcount(table->name, "FTS") > 0
+ && (ut_strcount(table->name, "CONFIG") > 0
+ || ut_strcount(table->name, "INDEX") > 0
+ || ut_strcount(table->name, "DELETED") > 0
+ || ut_strcount(table->name, "DOC_ID") > 0
+ || ut_strcount(table->name, "ADDED") > 0))) {
+ /* Use the quick transient stats method for
+ InnoDB internal tables, because we know the
+ persistent stats storage does not contain data
+ for them */
+
+ goto transient;
+ }
+ /* else */
+
+ if (dict_stats_persistent_storage_check(
+ caller_has_dict_sys_mutex)) {
+
+ ret = dict_stats_fetch_from_ps(table,
+ caller_has_dict_sys_mutex);
+
+ if (ret == DB_STATS_DO_NOT_EXIST
+ || (ret != DB_SUCCESS && stats_upd_option
+ == DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY)) {
+ /* Stats for this particular table do not
+ exist or we have been called from open table
+ which needs to initialize the stats,
+ calculate the quick transient statistics */
+ goto transient;
+ }
+ /* else */
+
+ return(ret);
+ } else {
+ /* persistent statistics storage does not exist,
+ calculate the transient stats */
+ goto transient;
+ }
+
+ break;
+
+ /* no "default:" in order to produce a compilation warning
+ about unhandled enumeration value */
+ }
+
+transient:
+
+ dict_table_stats_lock(table, RW_X_LATCH);
+
+ dict_stats_update_transient(table);
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+
+ return(DB_SUCCESS);
+}
+/* @} */
+
+/*********************************************************************//**
+Close the stats tables. Should always be called after successful
+dict_stats_open(). It will free the dict_stats handle.
+dict_stats_close() @{ */
+UNIV_INLINE
+void
+dict_stats_close(
+/*=============*/
+ dict_stats_t* dict_stats) /*!< in/own: Handle to open
+ statistics tables */
+{
+ if (dict_stats->table_stats != NULL) {
+ dict_table_close(dict_stats->table_stats, FALSE);
+ dict_stats->table_stats = NULL;
+ }
+
+ if (dict_stats->index_stats != NULL) {
+ dict_table_close(dict_stats->index_stats, FALSE);
+ dict_stats->index_stats = NULL;
+ }
+
+ mem_free(dict_stats);
+}
+/* @} */
+
+/*********************************************************************//**
+Open stats tables to prevent these tables from being DROPped.
+Also check whether they have the correct structure. The caller
+must call dict_stats_close() when he has finished DMLing the tables.
+dict_stats_open() @{
+@return pointer to open tables or NULL on failure */
+UNIV_INLINE
+dict_stats_t*
+dict_stats_open(void)
+/*=================*/
+{
+ dict_stats_t* dict_stats;
+
+ dict_stats = static_cast<dict_stats_t*>(
+ mem_zalloc(sizeof(*dict_stats)));
+
+ dict_stats->table_stats = dict_table_open_on_name_no_stats(
+ TABLE_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+
+ dict_stats->index_stats = dict_table_open_on_name_no_stats(
+ INDEX_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+
+ /* Check if the tables have the correct structure, if yes then
+ after this function we can safely DELETE from them without worrying
+ that they may get DROPped or DDLed because the open will have
+ increased the reference count. */
+
+ if (dict_stats->table_stats == NULL
+ || dict_stats->index_stats == NULL
+ || !dict_stats_persistent_storage_check(FALSE)) {
+
+ /* There was an error, close the tables and free the handle. */
+ dict_stats_close(dict_stats);
+ dict_stats = NULL;
+ }
+
+ return(dict_stats);
+}
+/* @} */
+
+/*********************************************************************//**
+Removes the information for a particular index's stats from the persistent
+storage if it exists and if there is data stored for this index.
+The transaction is not committed, it must not be committed in this
+function because this is the user trx that is running DROP INDEX.
+The transaction will be committed at the very end when dropping an
+index.
+A note from Marko why we cannot edit user and sys_* tables in one trx:
+marko: The problem is that ibuf merges should be disabled while we are
+rolling back dict transactions.
+marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
+But we shouldn't open *.ibd files before we have rolled back dict
+transactions and opened the SYS_* records for the *.ibd files.
+dict_stats_delete_index_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+enum db_err
+dict_stats_delete_index_stats(
+/*==========================*/
+ dict_index_t* index, /*!< in: index */
+ trx_t* trx, /*!< in: transaction to use */
+ char* errstr, /*!< out: error message if != DB_SUCCESS
+ is returned */
+ ulint errstr_sz)/*!< in: size of the errstr buffer */
+{
+ char database_name[MAX_DATABASE_NAME_LEN + 1];
+ const char* table_name;
+ pars_info_t* pinfo;
+ enum db_err ret;
+ dict_stats_t* dict_stats;
+ void* mysql_thd = trx->mysql_thd;
+
+ /* skip indexes whose table names do not contain a database name
+ e.g. if we are dropping an index from SYS_TABLES */
+ if (strchr(index->table_name, '/') == NULL) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* Increment table reference count to prevent the tables from
+ being DROPped just before que_eval_sql(). */
+ dict_stats = dict_stats_open();
+
+ if (dict_stats == NULL) {
+ /* stats tables do not exist or have unexpected structure */
+ return(DB_SUCCESS);
+ }
+
+ /* the stats tables cannot be DROPped now */
+
+ ut_snprintf(database_name, sizeof(database_name), "%.*s",
+ (int) dict_get_db_name_len(index->table_name),
+ index->table_name);
+
+ table_name = dict_remove_db_name(index->table_name);
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", database_name);
+
+ pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+ pars_info_add_str_literal(pinfo, "index_name", index->name);
+
+ /* Force lock wait timeout to be instantaneous because the incoming
+ transaction was created via MySQL. */
+
+ mysql_thd = trx->mysql_thd;
+ trx->mysql_thd = NULL;
+
+ ret = que_eval_sql(pinfo,
+ "PROCEDURE DROP_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name AND\n"
+ "index_name = :index_name;\n"
+ "END;\n",
+ TRUE,
+ trx);
+
+ trx->mysql_thd = mysql_thd;
+
+ /* pinfo is freed by que_eval_sql() */
+
+ /* do not to commit here, see the function's comment */
+
+ if (ret != DB_SUCCESS) {
+
+ ut_snprintf(errstr, errstr_sz,
+ "Unable to delete statistics for index %s "
+ "from %s%s. They can be deleted later using "
+ "DELETE FROM %s WHERE "
+ "database_name = '%s' AND "
+ "table_name = '%s' AND "
+ "index_name = '%s';",
+ index->name,
+ INDEX_STATS_NAME_PRINT,
+ (ret == DB_LOCK_WAIT_TIMEOUT
+ ? " because the rows are locked"
+ : ""),
+ INDEX_STATS_NAME_PRINT,
+ database_name,
+ table_name,
+ index->name);
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: %s\n", errstr);
+
+ trx->error_state = DB_SUCCESS;
+ }
+
+ dict_stats_close(dict_stats);
+
+ return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Removes the statistics for a table and all of its indexes from the
+persistent statistics storage if it exists and if there is data stored for
+the table. This function creates its own transaction and commits it.
+dict_stats_delete_table_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+enum db_err
+dict_stats_delete_table_stats(
+/*==========================*/
+ const char* table_name, /*!< in: table name */
+ char* errstr, /*!< out: error message
+ if != DB_SUCCESS is returned */
+ ulint errstr_sz) /*!< in: size of errstr buffer */
+{
+ char database_name[MAX_DATABASE_NAME_LEN + 1];
+ const char* table_name_strip; /* without leading db name */
+ trx_t* trx;
+ pars_info_t* pinfo;
+ enum db_err ret = DB_ERROR;
+ dict_stats_t* dict_stats;
+
+ /* skip tables that do not contain a database name
+ e.g. if we are dropping SYS_TABLES */
+ if (strchr(table_name, '/') == NULL) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* skip innodb_table_stats and innodb_index_stats themselves */
+ if (strcmp(table_name, TABLE_STATS_NAME) == 0
+ || strcmp(table_name, INDEX_STATS_NAME) == 0) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* Create a new private trx */
+
+ trx = trx_allocate_for_background();
+
+ /* Use 'read-uncommitted' so that the SELECTs we execute
+ do not get blocked in case some user has locked the rows we
+ are SELECTing */
+
+ trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+
+ trx_start_if_not_started(trx);
+
+ /* Increment table reference count to prevent the tables from
+ being DROPped just before que_eval_sql(). */
+ dict_stats = dict_stats_open();
+
+ if (dict_stats == NULL) {
+ /* stats tables do not exist or have unexpected structure */
+ ret = DB_SUCCESS;
+ goto commit_and_return;
+ }
+
+ ut_snprintf(database_name, sizeof(database_name), "%.*s",
+ (int) dict_get_db_name_len(table_name),
+ table_name);
+
+ table_name_strip = dict_remove_db_name(table_name);
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", database_name);
+
+ pars_info_add_str_literal(pinfo, "table_name", table_name_strip);
+
+ ret = que_eval_sql(pinfo,
+ "PROCEDURE DROP_TABLE_STATS () IS\n"
+ "BEGIN\n"
+
+ "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+
+ "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+
+ "END;\n",
+ TRUE,
+ trx);
+
+ /* pinfo is freed by que_eval_sql() */
+
+ if (ret != DB_SUCCESS) {
+
+ ut_snprintf(errstr, errstr_sz,
+ "Unable to delete statistics for table %s.%s "
+ "from %s or %s%s. "
+ "They can be deleted later using "
+
+ "DELETE FROM %s WHERE "
+ "database_name = '%s' AND "
+ "table_name = '%s'; "
+
+ "DELETE FROM %s WHERE "
+ "database_name = '%s' AND "
+ "table_name = '%s';",
+
+ database_name, table_name_strip,
+ TABLE_STATS_NAME_PRINT, INDEX_STATS_NAME_PRINT,
+
+ (ret == DB_LOCK_WAIT_TIMEOUT
+ ? " because the rows are locked"
+ : ""),
+
+ INDEX_STATS_NAME_PRINT,
+ database_name, table_name_strip,
+
+ TABLE_STATS_NAME_PRINT,
+ database_name, table_name_strip);
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: %s\n", errstr);
+ }
+
+ dict_stats_close(dict_stats);
+
+commit_and_return:
+
+ trx_commit_for_mysql(trx);
+
+ trx_free_for_background(trx);
+
+ return(ret);
+}
+/* @} */
+
+/* tests @{ */
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+/* The following unit tests test some of the functions in this file
+individually, such testing cannot be performed by the mysql-test framework
+via SQL. */
+
+/* test_dict_table_schema_check() @{ */
+void
+test_dict_table_schema_check()
+{
+ /*
+ CREATE TABLE tcheck (
+ c01 VARCHAR(123),
+ c02 INT,
+ c03 INT NOT NULL,
+ c04 INT UNSIGNED,
+ c05 BIGINT,
+ c06 BIGINT UNSIGNED NOT NULL,
+ c07 TIMESTAMP
+ ) ENGINE=INNODB;
+ */
+ /* definition for the table 'test/tcheck' */
+ dict_col_meta_t columns[] = {
+ {"c01", DATA_VARCHAR, 0, 123},
+ {"c02", DATA_INT, 0, 4},
+ {"c03", DATA_INT, DATA_NOT_NULL, 4},
+ {"c04", DATA_INT, DATA_UNSIGNED, 4},
+ {"c05", DATA_INT, 0, 8},
+ {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
+ {"c07", DATA_INT, 0, 4},
+ {"c_extra", DATA_INT, 0, 4}
+ };
+ dict_table_schema_t schema = {
+ "test/tcheck",
+ 0 /* will be set individually for each test below */,
+ columns
+ };
+ char errstr[512];
+
+ ut_snprintf(errstr, sizeof(errstr), "Table not found");
+
+ /* prevent any data dictionary modifications while we are checking
+ the tables' structure */
+
+ mutex_enter(&(dict_sys->mutex));
+
+ /* check that a valid table is reported as valid */
+ schema.n_cols = 7;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ == DB_SUCCESS) {
+ printf("OK: test.tcheck ok\n");
+ } else {
+ printf("ERROR: %s\n", errstr);
+ printf("ERROR: test.tcheck not present or corrupted\n");
+ goto test_dict_table_schema_check_end;
+ }
+
+ /* check columns with wrong length */
+ schema.columns[1].len = 8;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck.c02 has different length and is "
+ "reported as corrupted\n");
+ } else {
+ printf("OK: test.tcheck.c02 has different length but is "
+ "reported as ok\n");
+ goto test_dict_table_schema_check_end;
+ }
+ schema.columns[1].len = 4;
+
+ /* request that c02 is NOT NULL while actually it does not have
+ this flag set */
+ schema.columns[1].prtype_mask |= DATA_NOT_NULL;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck.c02 does not have NOT NULL while "
+ "it should and is reported as corrupted\n");
+ } else {
+ printf("ERROR: test.tcheck.c02 does not have NOT NULL while "
+ "it should and is not reported as corrupted\n");
+ goto test_dict_table_schema_check_end;
+ }
+ schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
+
+ /* check a table that contains some extra columns */
+ schema.n_cols = 6;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ == DB_SUCCESS) {
+ printf("ERROR: test.tcheck has more columns but is not "
+ "reported as corrupted\n");
+ goto test_dict_table_schema_check_end;
+ } else {
+ printf("OK: test.tcheck has more columns and is "
+ "reported as corrupted\n");
+ }
+
+ /* check a table that has some columns missing */
+ schema.n_cols = 8;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck has missing columns and is "
+ "reported as corrupted\n");
+ } else {
+ printf("ERROR: test.tcheck has missing columns but is "
+ "reported as ok\n");
+ goto test_dict_table_schema_check_end;
+ }
+
+ /* check non-existent table */
+ schema.table_name = "test/tcheck_nonexistent";
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck_nonexistent is not present\n");
+ } else {
+ printf("ERROR: test.tcheck_nonexistent is present!?\n");
+ goto test_dict_table_schema_check_end;
+ }
+
+test_dict_table_schema_check_end:
+
+ mutex_exit(&(dict_sys->mutex));
+}
+/* @} */
+
+/* save/fetch aux macros @{ */
+#define TEST_DATABASE_NAME "foobardb"
+#define TEST_TABLE_NAME "test_dict_stats"
+
+#define TEST_N_ROWS 111
+#define TEST_CLUSTERED_INDEX_SIZE 222
+#define TEST_SUM_OF_OTHER_INDEX_SIZES 333
+
+#define TEST_IDX1_NAME "tidx1"
+#define TEST_IDX1_COL1_NAME "tidx1_col1"
+#define TEST_IDX1_INDEX_SIZE 123
+#define TEST_IDX1_N_LEAF_PAGES 234
+#define TEST_IDX1_N_DIFF1 50
+#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
+
+#define TEST_IDX2_NAME "tidx2"
+#define TEST_IDX2_COL1_NAME "tidx2_col1"
+#define TEST_IDX2_COL2_NAME "tidx2_col2"
+#define TEST_IDX2_COL3_NAME "tidx2_col3"
+#define TEST_IDX2_COL4_NAME "tidx2_col4"
+#define TEST_IDX2_INDEX_SIZE 321
+#define TEST_IDX2_N_LEAF_PAGES 432
+#define TEST_IDX2_N_DIFF1 60
+#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
+#define TEST_IDX2_N_DIFF2 61
+#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
+#define TEST_IDX2_N_DIFF3 62
+#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
+#define TEST_IDX2_N_DIFF4 63
+#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
+/* @} */
+
+/* test_dict_stats_save() @{ */
+void
+test_dict_stats_save()
+{
+ dict_table_t table;
+ dict_index_t index1;
+ dict_field_t index1_fields[1];
+ ib_uint64_t index1_stat_n_diff_key_vals[2];
+ ib_uint64_t index1_stat_n_sample_sizes[2];
+ dict_index_t index2;
+ dict_field_t index2_fields[4];
+ ib_uint64_t index2_stat_n_diff_key_vals[5];
+ ib_uint64_t index2_stat_n_sample_sizes[5];
+ enum db_err ret;
+
+ /* craft a dummy dict_table_t */
+ table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+ table.stat_n_rows = TEST_N_ROWS;
+ table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
+ table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
+ UT_LIST_INIT(table.indexes);
+ UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
+ UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+#ifdef UNIV_DEBUG
+ table.magic_n = DICT_TABLE_MAGIC_N;
+#endif /* UNIV_DEBUG */
+
+ index1.name = TEST_IDX1_NAME;
+ index1.table = &table;
+#ifdef UNIV_DEBUG
+ index1.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+ index1.to_be_dropped = 0;
+ index1.cached = 1;
+ index1.n_uniq = 1;
+ index1.fields = index1_fields;
+ index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
+ index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
+ index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
+ index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
+ index1_fields[0].name = TEST_IDX1_COL1_NAME;
+ index1_stat_n_diff_key_vals[0] = 1; /* dummy */
+ index1_stat_n_diff_key_vals[1] = TEST_IDX1_N_DIFF1;
+ index1_stat_n_sample_sizes[0] = 0; /* dummy */
+ index1_stat_n_sample_sizes[1] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
+
+ index2.name = TEST_IDX2_NAME;
+ index2.table = &table;
+#ifdef UNIV_DEBUG
+ index2.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+ index2.to_be_dropped = 0;
+ index2.cached = 1;
+ index2.n_uniq = 4;
+ index2.fields = index2_fields;
+ index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
+ index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
+ index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
+ index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
+ index2_fields[0].name = TEST_IDX2_COL1_NAME;
+ index2_fields[1].name = TEST_IDX2_COL2_NAME;
+ index2_fields[2].name = TEST_IDX2_COL3_NAME;
+ index2_fields[3].name = TEST_IDX2_COL4_NAME;
+ index2_stat_n_diff_key_vals[0] = 1; /* dummy */
+ index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF1;
+ index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF2;
+ index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF3;
+ index2_stat_n_diff_key_vals[4] = TEST_IDX2_N_DIFF4;
+ index2_stat_n_sample_sizes[0] = 0; /* dummy */
+ index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[4] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
+
+ ret = dict_stats_save(&table, FALSE);
+
+ ut_a(ret == DB_SUCCESS);
+
+ printf("\nOK: stats saved successfully, now go ahead and read "
+ "what's inside %s and %s:\n\n",
+ TABLE_STATS_NAME_PRINT,
+ INDEX_STATS_NAME_PRINT);
+
+ printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
+ "FROM %s\n"
+ "WHERE\n"
+ "database_name = '%s' AND\n"
+ "table_name = '%s' AND\n"
+ "n_rows = %d AND\n"
+ "clustered_index_size = %d AND\n"
+ "sum_of_other_index_sizes = %d;\n"
+ "\n",
+ TABLE_STATS_NAME_PRINT,
+ TEST_DATABASE_NAME,
+ TEST_TABLE_NAME,
+ TEST_N_ROWS,
+ TEST_CLUSTERED_INDEX_SIZE,
+ TEST_SUM_OF_OTHER_INDEX_SIZES);
+
+ printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
+ "FROM %s\n"
+ "WHERE\n"
+ "database_name = '%s' AND\n"
+ "table_name = '%s' AND\n"
+ "index_name = '%s' AND\n"
+ "(\n"
+ " (stat_name = 'size' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s')\n"
+ ");\n"
+ "\n",
+ INDEX_STATS_NAME_PRINT,
+ TEST_DATABASE_NAME,
+ TEST_TABLE_NAME,
+ TEST_IDX1_NAME,
+ TEST_IDX1_INDEX_SIZE,
+ TEST_IDX1_N_LEAF_PAGES,
+ TEST_IDX1_N_DIFF1,
+ TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
+ TEST_IDX1_COL1_NAME);
+
+ printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
+ "FROM %s\n"
+ "WHERE\n"
+ "database_name = '%s' AND\n"
+ "table_name = '%s' AND\n"
+ "index_name = '%s' AND\n"
+ "(\n"
+ " (stat_name = 'size' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s') OR\n"
+ " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s,%s') OR\n"
+ " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
+ " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
+ ");\n"
+ "\n",
+ INDEX_STATS_NAME_PRINT,
+ TEST_DATABASE_NAME,
+ TEST_TABLE_NAME,
+ TEST_IDX2_NAME,
+ TEST_IDX2_INDEX_SIZE,
+ TEST_IDX2_N_LEAF_PAGES,
+ TEST_IDX2_N_DIFF1,
+ TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
+ TEST_IDX2_N_DIFF2,
+ TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
+ TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
+ TEST_IDX2_N_DIFF3,
+ TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
+ TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
+ TEST_IDX2_N_DIFF4,
+ TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
+ TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
+ TEST_IDX2_COL4_NAME);
+}
+/* @} */
+
+/* test_dict_stats_fetch_from_ps() @{ */
+void
+test_dict_stats_fetch_from_ps()
+{
+ dict_table_t table;
+ dict_index_t index1;
+ ib_uint64_t index1_stat_n_diff_key_vals[2];
+ ib_uint64_t index1_stat_n_sample_sizes[2];
+ dict_index_t index2;
+ ib_uint64_t index2_stat_n_diff_key_vals[5];
+ ib_uint64_t index2_stat_n_sample_sizes[5];
+ enum db_err ret;
+
+ /* craft a dummy dict_table_t */
+ table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+ UT_LIST_INIT(table.indexes);
+ UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
+ UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+#ifdef UNIV_DEBUG
+ table.magic_n = DICT_TABLE_MAGIC_N;
+#endif /* UNIV_DEBUG */
+
+ index1.name = TEST_IDX1_NAME;
+#ifdef UNIV_DEBUG
+ index1.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+ index1.cached = 1;
+ index1.n_uniq = 1;
+ index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
+ index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
+
+ index2.name = TEST_IDX2_NAME;
+#ifdef UNIV_DEBUG
+ index2.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+ index2.cached = 1;
+ index2.n_uniq = 4;
+ index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
+ index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
+
+ ret = dict_stats_fetch_from_ps(&table, FALSE);
+
+ ut_a(ret == DB_SUCCESS);
+
+ ut_a(table.stat_n_rows == TEST_N_ROWS);
+ ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
+ ut_a(table.stat_sum_of_other_index_sizes
+ == TEST_SUM_OF_OTHER_INDEX_SIZES);
+
+ ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
+ ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
+ ut_a(index1_stat_n_diff_key_vals[1] == TEST_IDX1_N_DIFF1);
+ ut_a(index1_stat_n_sample_sizes[1] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
+
+ ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
+ ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
+ ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF1);
+ ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF2);
+ ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF3);
+ ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[4] == TEST_IDX2_N_DIFF4);
+ ut_a(index2_stat_n_sample_sizes[4] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
+
+ printf("OK: fetch successful\n");
+}
+/* @} */
+
+/* test_dict_stats_all() @{ */
+void
+test_dict_stats_all()
+{
+ test_dict_table_schema_check();
+
+ test_dict_stats_save();
+
+ test_dict_stats_fetch_from_ps();
+}
+/* @} */
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
+/* @} */
+
+#endif /* UNIV_HOTBACKUP */
+
+/* vim: set foldmethod=marker foldmarker=@{,@}: */