1 files changed, 3181 insertions, 0 deletions
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
new file mode 100644
index 00000000000..aa587f54a0d
--- /dev/null
+++ b/storage/innobase/dict/dict0stats.cc
@@ -0,0 +1,3181 @@
+/*****************************************************************************
+
+Copyright (c) 2009, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0stats.cc
+Code used for calculating and manipulating table statistics.
+
+Created Jan 06, 2010 Vasil Dimov
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+
+#include "univ.i"
+
+#include "btr0btr.h" /* btr_get_size() */
+#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
+#include "dict0dict.h" /* dict_table_get_first_index() */
+#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
+#include "dict0stats.h"
+#include "data0type.h" /* dtype_t */
+#include "db0err.h" /* db_err */
+#include "dyn0dyn.h" /* dyn_array* */
+#include "pars0pars.h" /* pars_info_create() */
+#include "pars0types.h" /* pars_info_t */
+#include "que0que.h" /* que_eval_sql() */
+#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
+#include "row0sel.h" /* sel_node_struct */
+#include "row0types.h" /* sel_node_t */
+#include "trx0trx.h" /* trx_create() */
+#include "trx0roll.h" /* trx_rollback_to_savepoint() */
+#include "ut0rnd.h" /* ut_rnd_interval() */
+
+#include "ha_prototypes.h" /* innobase_strcasecmp() */
+
+/* Sampling algorithm description @{
+
+The algorithm is controlled by one number - srv_stats_persistent_sample_pages,
+let it be A, which is the number of leaf pages to analyze for a given index
+for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
+analyzed).
+
+Let the total number of leaf pages in the table be T.
+Level 0 - leaf pages, level H - root.
+
+Definition: N-prefix-boring record is a record on a non-leaf page that equals
+the next (to the right, cross page boundaries, skipping the supremum and
+infimum) record on the same level when looking at the fist n-prefix columns.
+The last (user) record on a level is not boring (it does not match the
+non-existent user record to the right). We call the records boring because all
+the records on the page below a boring record are equal to that boring record.
+
+We avoid diving below boring records when searching for a leaf page to
+estimate the number of distinct records because we know that such a leaf
+page will have number of distinct records == 1.
+
+For each n-prefix: start from the root level and full scan subsequent lower
+levels until a level that contains at least A*10 distinct records is found.
+Lets call this level LA.
+As an optimization the search is canceled if it has reached level 1 (never
+descend to the level 0 (leaf)) and also if the next level to be scanned
+would contain more than A pages. The latter is because the user has asked
+to analyze A leaf pages and it does not make sense to scan much more than
+A non-leaf pages with the sole purpose of finding a good sample of A leaf
+pages.
+
+After finding the appropriate level LA with >A*10 distinct records (or less in
+the exceptions described above), divide it into groups of equal records and
+pick A such groups. Then pick the last record from each group. For example,
+let the level be:
+
+index:  0,1,2,3,4,5,6,7,8,9,10
+record: 1,1,1,2,2,7,7,7,7,7,9
+
+There are 4 groups of distinct records and if A=2 random ones are selected,
+e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
+
+After selecting A records as described above, dive below them to find A leaf
+pages and analyze them, finding the total number of distinct records. The
+dive to the leaf level is performed by selecting a non-boring record from
+each page and diving below it.
+
+This way, a total of A leaf pages are analyzed for the given n-prefix.
+
+Let the number of different key values found in each leaf page i be Pi (i=1..A).
+Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
+Let the number of different key values on level LA be N_DIFF_LA.
+Let the total number of records on level LA be TOTAL_LA.
+Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
+leaf level.
+Let the number of leaf pages be N.
+Then the total number of different key values on the leaf level is:
+N * R * N_DIFF_AVG_LEAF.
+See REF01 for the implementation.
+
+The above describes how to calculate the cardinality of an index.
+This algorithm is executed for each n-prefix of a multi-column index
+where n=1..n_uniq.
+@} */
+
+/* names of the tables from the persistent statistics storage */
+#define TABLE_STATS_NAME	"mysql/innodb_table_stats"
+#define TABLE_STATS_NAME_PRINT	"mysql.innodb_table_stats"
+#define INDEX_STATS_NAME	"mysql/innodb_index_stats"
+#define INDEX_STATS_NAME_PRINT	"mysql.innodb_index_stats"
+
+#ifdef UNIV_STATS_DEBUG
+#define DEBUG_PRINTF(fmt, ...)	printf(fmt, ## __VA_ARGS__)
+#else /* UNIV_STATS_DEBUG */
+#define DEBUG_PRINTF(fmt, ...)	/* noop */
+#endif /* UNIV_STATS_DEBUG */
+
+/* number of distinct records on a given level that are required to stop
+descending to lower levels and fetch
+srv_stats_persistent_sample_pages records from that level */
+#define N_DIFF_REQUIRED	(srv_stats_persistent_sample_pages * 10)
+
+/** Open handles on the stats tables. Currently this is used to increase the
+reference count of the stats tables. */
+typedef struct dict_stats_struct {
+	dict_table_t*	table_stats;	/*!< Handle to open TABLE_STATS_NAME */
+	dict_table_t*	index_stats;	/*!< Handle to open INDEX_STATS_NAME */
+} dict_stats_t;
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced.
+dict_stats_update_transient() @{ */
+static
+void
+dict_stats_update_transient(
+/*========================*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	dict_index_t*	index;
+	ulint		sum_of_index_sizes	= 0;
+
+	/* Find out the sizes of the indexes and how many different values
+	for the key they approximately have */
+
+	index = dict_table_get_first_index(table);
+
+	if (index == NULL) {
+		/* Table definition is corrupt */
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: table %s has no indexes. "
+			"Cannot calculate statistics.\n", table->name);
+		return;
+	}
+
+	do {
+
+		if (index->type & DICT_FTS) {
+			index = dict_table_get_next_index(index);
+			continue;
+		}
+
+		if (UNIV_LIKELY
+		    (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
+		     || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+			 && dict_index_is_clust(index)))) {
+			mtr_t	mtr;
+			ulint	size;
+
+			mtr_start(&mtr);
+			mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+			size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+			if (size != ULINT_UNDEFINED) {
+				index->stat_index_size = size;
+
+				size = btr_get_size(
+					index, BTR_N_LEAF_PAGES, &mtr);
+			}
+
+			mtr_commit(&mtr);
+
+			switch (size) {
+			case ULINT_UNDEFINED:
+				goto fake_statistics;
+			case 0:
+				/* The root node of the tree is a leaf */
+				size = 1;
+			}
+
+			sum_of_index_sizes += index->stat_index_size;
+
+			index->stat_n_leaf_pages = size;
+
+			btr_estimate_number_of_different_key_vals(index);
+		} else {
+			/* If we have set a high innodb_force_recovery
+			level, do not calculate statistics, as a badly
+			corrupted index can cause a crash in it.
+			Initialize some bogus index cardinality
+			statistics, so that the data can be queried in
+			various means, also via secondary indexes. */
+			ulint	i;
+
+fake_statistics:
+			sum_of_index_sizes++;
+			index->stat_index_size = index->stat_n_leaf_pages = 1;
+
+			for (i = dict_index_get_n_unique(index); i; ) {
+				index->stat_n_diff_key_vals[i--] = 1;
+			}
+
+			memset(index->stat_n_non_null_key_vals, 0,
+			       (1 + dict_index_get_n_unique(index))
+			       * sizeof(*index->stat_n_non_null_key_vals));
+		}
+
+		index = dict_table_get_next_index(index);
+	} while (index);
+
+	index = dict_table_get_first_index(table);
+
+	table->stat_n_rows = index->stat_n_diff_key_vals[
+		dict_index_get_n_unique(index)];
+
+	table->stat_clustered_index_size = index->stat_index_size;
+
+	table->stat_sum_of_other_index_sizes = sum_of_index_sizes
+		- index->stat_index_size;
+
+	table->stat_modified_counter = 0;
+
+	table->stat_initialized = TRUE;
+}
+/* @} */
+
+/*********************************************************************//**
+Checks whether the persistent statistics storage exists and that all
+tables have the proper structure.
+dict_stats_persistent_storage_check() @{
+@return TRUE if exists and all tables are ok */
+static
+ibool
+dict_stats_persistent_storage_check(
+/*================================*/
+	ibool	caller_has_dict_sys_mutex)	/*!< in: TRUE if the caller
+						owns dict_sys->mutex */
+{
+	/* definition for the table TABLE_STATS_NAME */
+	dict_col_meta_t	table_stats_columns[] = {
+		{"database_name", DATA_VARMYSQL,
+			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+		{"table_name", DATA_VARMYSQL,
+			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+		{"last_update", DATA_FIXBINARY,
+			DATA_NOT_NULL, 4},
+
+		{"n_rows", DATA_INT,
+			DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+		{"clustered_index_size", DATA_INT,
+			DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+		{"sum_of_other_index_sizes", DATA_INT,
+			DATA_NOT_NULL | DATA_UNSIGNED, 8}
+	};
+	dict_table_schema_t	table_stats_schema = {
+		TABLE_STATS_NAME,
+		UT_ARR_SIZE(table_stats_columns),
+		table_stats_columns
+	};
+
+	/* definition for the table INDEX_STATS_NAME */
+	dict_col_meta_t	index_stats_columns[] = {
+		{"database_name", DATA_VARMYSQL,
+			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+		{"table_name", DATA_VARMYSQL,
+			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+		{"index_name", DATA_VARMYSQL,
+			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+
+		{"last_update", DATA_FIXBINARY,
+			DATA_NOT_NULL, 4},
+
+		{"stat_name", DATA_VARMYSQL,
+			DATA_NOT_NULL, 64*3},
+
+		{"stat_value", DATA_INT,
+			DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+		{"sample_size", DATA_INT,
+			DATA_UNSIGNED, 8},
+
+		{"stat_description", DATA_VARMYSQL,
+			DATA_NOT_NULL, 1024*3}
+	};
+	dict_table_schema_t	index_stats_schema = {
+		INDEX_STATS_NAME,
+		UT_ARR_SIZE(index_stats_columns),
+		index_stats_columns
+	};
+
+	char		errstr[512];
+	enum db_err	ret;
+
+	if (!caller_has_dict_sys_mutex) {
+		mutex_enter(&(dict_sys->mutex));
+	}
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	/* first check table_stats */
+	ret = dict_table_schema_check(&table_stats_schema, errstr,
+				      sizeof(errstr));
+	if (ret == DB_SUCCESS) {
+		/* if it is ok, then check index_stats */
+		ret = dict_table_schema_check(&index_stats_schema, errstr,
+					      sizeof(errstr));
+	}
+
+	if (!caller_has_dict_sys_mutex) {
+		mutex_exit(&(dict_sys->mutex));
+	}
+
+	if (ret != DB_SUCCESS && ret != DB_TABLE_NOT_FOUND) {
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: %s\n", errstr);
+	}
+	/* We return silently if some of the tables are not present because
+	this code is executed during open table. By design we check if the
+	persistent statistics storage is present and whether there are stats
+	for the table being opened and if so, then we use them, otherwise we
+	silently switch back to using the transient stats. */
+
+	return(ret == DB_SUCCESS);
+}
+/* @} */
+
+/* @{ Pseudo code about the relation between the following functions
+
+let N = srv_stats_persistent_sample_pages
+
+dict_stats_analyze_index()
+  for each n_prefix
+    search for good enough level:
+      dict_stats_analyze_index_level() // only called if level has <= N pages
+        // full scan of the level in one mtr
+        collect statistics about the given level
+      if we are not satisfied with the level, search next lower level
+    we have found a good enough level here
+    dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
+      // full scan of the level in one mtr
+      dive below some records and analyze the leaf page there:
+      dict_stats_analyze_index_below_cur()
+@} */
+
+/*********************************************************************//**
+Find the total number and the number of distinct keys on a given level in
+an index. Each of the 1..n_uniq prefixes are looked up and the results are
+saved in the array n_diff[]. Notice that n_diff[] must be able to store
+n_uniq+1 numbers because the results are saved in
+n_diff[1] .. n_diff[n_uniq]. The total number of records on the level is
+saved in total_recs.
+Also, the index of the last record in each group of equal records is saved
+in n_diff_boundaries[1..n_uniq], records indexing starts from the leftmost
+record on the level and continues cross pages boundaries, counting from 0.
+dict_stats_analyze_index_level() @{ */
+static
+void
+dict_stats_analyze_index_level(
+/*===========================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		level,		/*!< in: level */
+	ib_uint64_t*	n_diff,		/*!< out: array for number of
+					distinct keys for all prefixes */
+	ib_uint64_t*	total_recs,	/*!< out: total number of records */
+	ib_uint64_t*	total_pages,	/*!< out: total number of pages */
+	dyn_array_t*	n_diff_boundaries)/*!< out: boundaries of the groups
+					of distinct keys */
+{
+	ulint		n_uniq;
+	mem_heap_t*	heap;
+	dtuple_t*	dtuple;
+	btr_pcur_t	pcur;
+	mtr_t		mtr;
+	const page_t*	page;
+	const rec_t*	rec;
+	const rec_t*	prev_rec;
+	byte*		prev_rec_buf = NULL;
+	ulint		prev_rec_buf_size = 0;
+	ulint		i;
+
+	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu)\n", __func__,
+		     index->table->name, index->name, level);
+
+	n_uniq = dict_index_get_n_unique(index);
+
+	/* elements in the n_diff array are 1..n_uniq (inclusive) */
+	memset(n_diff, 0x0, (n_uniq + 1) * sizeof(*n_diff));
+
+	heap = mem_heap_create(256);
+
+	/* reset the dynamic arrays n_diff_boundaries[1..n_uniq];
+	n_diff_boundaries[0] is ignored to follow the same convention
+	as n_diff[] */
+	if (n_diff_boundaries != NULL) {
+		for (i = 1; i <= n_uniq; i++) {
+			dyn_array_free(&n_diff_boundaries[i]);
+
+			dyn_array_create(&n_diff_boundaries[i]);
+		}
+	}
+
+	/* craft a record that is always smaller than the others,
+	this way we are sure that the cursor pcur will be positioned
+	on the leftmost record on the leftmost page on the desired level */
+	dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
+	dict_table_copy_types(dtuple, index->table);
+	dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
+
+	mtr_start(&mtr);
+
+	btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
+			  &pcur, __FILE__, __LINE__, &mtr);
+
+	page = btr_pcur_get_page(&pcur);
+
+	/* check that we are indeed on the desired level */
+	ut_a(btr_page_get_level(page, &mtr) == level);
+
+	/* there should not be any pages on the left */
+	ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+
+	/* check whether the first record on the leftmost page is marked
+	as such, if we are on a non-leaf level */
+	ut_a(level == 0
+	     || (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+		     page_rec_get_next_const(page_get_infimum_rec(page)),
+		     page_is_comp(page))));
+
+	if (btr_pcur_is_before_first_on_page(&pcur)) {
+		btr_pcur_move_to_next_on_page(&pcur);
+	}
+
+	if (btr_pcur_is_after_last_on_page(&pcur)) {
+		btr_pcur_move_to_prev_on_page(&pcur);
+	}
+
+	prev_rec = NULL;
+
+	/* no records by default */
+	*total_recs = 0;
+
+	*total_pages = 0;
+
+	/* iterate over all user records on this level
+	and compare each two adjacent ones, even the last on page
+	X and the fist on page X+1 */
+	for (;
+	     btr_pcur_is_on_user_rec(&pcur);
+	     btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
+
+		ulint	matched_fields = 0;
+		ulint	matched_bytes = 0;
+		ulint	offsets_rec_onstack[REC_OFFS_NORMAL_SIZE];
+		ulint*	offsets_rec;
+
+		rec_offs_init(offsets_rec_onstack);
+
+		rec = btr_pcur_get_rec(&pcur);
+
+		/* increment the pages counter at the end of each page */
+		if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+
+			(*total_pages)++;
+		}
+
+		/* skip delete-marked records */
+		if (rec_get_deleted_flag(rec, page_is_comp(
+				btr_pcur_get_page(&pcur)))) {
+
+			continue;
+		}
+
+		offsets_rec = rec_get_offsets(rec, index, offsets_rec_onstack,
+					      n_uniq, &heap);
+
+		(*total_recs)++;
+
+		if (prev_rec != NULL) {
+
+			ulint	offsets_prev_rec_onstack[REC_OFFS_NORMAL_SIZE];
+			ulint*	offsets_prev_rec;
+
+			rec_offs_init(offsets_prev_rec_onstack);
+
+			offsets_prev_rec = rec_get_offsets(
+				prev_rec, index, offsets_prev_rec_onstack,
+				n_uniq, &heap);
+
+			cmp_rec_rec_with_match(rec,
+					       prev_rec,
+					       offsets_rec,
+					       offsets_prev_rec,
+					       index,
+					       FALSE,
+					       &matched_fields,
+					       &matched_bytes);
+
+			for (i = matched_fields + 1; i <= n_uniq; i++) {
+
+				if (n_diff_boundaries != NULL) {
+					/* push the index of the previous
+					record, that is - the last one from
+					a group of equal keys */
+
+					void*		p;
+					ib_uint64_t	idx;
+
+					/* the index of the current record
+					is total_recs - 1, the index of the
+					previous record is total_recs - 2;
+					we know that idx is not going to
+					become negative here because if we
+					are in this branch then there is a
+					previous record and thus
+					total_recs >= 2 */
+					idx = *total_recs - 2;
+
+					p = dyn_array_push(
+						&n_diff_boundaries[i],
+						sizeof(ib_uint64_t));
+
+					memcpy(p, &idx, sizeof(ib_uint64_t));
+				}
+
+				/* increment the number of different keys
+				for n_prefix=i */
+				n_diff[i]++;
+			}
+		} else {
+			/* this is the first non-delete marked record */
+			for (i = 1; i <= n_uniq; i++) {
+				n_diff[i] = 1;
+			}
+		}
+
+		if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+			/* end of a page has been reached */
+
+			/* we need to copy the record instead of assigning
+			like prev_rec = rec; because when we traverse the
+			records on this level at some point we will jump from
+			one page to the next and then rec and prev_rec will
+			be on different pages and
+			btr_pcur_move_to_next_user_rec() will release the
+			latch on the page that prev_rec is on */
+			prev_rec = rec_copy_prefix_to_buf(
+				rec, index, rec_offs_n_fields(offsets_rec),
+				&prev_rec_buf, &prev_rec_buf_size);
+
+		} else {
+			/* still on the same page, the next call to
+			btr_pcur_move_to_next_user_rec() will not jump
+			on the next page, we can simply assign pointers
+			instead of copying the records like above */
+
+			prev_rec = rec;
+		}
+	}
+
+	/* if *total_pages is left untouched then the above loop was not
+	entered at all and there is one page in the whole tree which is
+	empty */
+	if (*total_pages == 0) {
+
+		ut_ad(level == 0);
+		ut_ad(*total_recs == 0);
+
+		*total_pages = 1;
+	}
+
+	/* if there are records on this level and boundaries
+	should be saved */
+	if (*total_recs > 0 && n_diff_boundaries != NULL) {
+
+		/* remember the index of the last record on the level as the
+		last one from the last group of equal keys; this holds for
+		all possible prefixes */
+		for (i = 1; i <= n_uniq; i++) {
+			void*		p;
+			ib_uint64_t	idx;
+
+			idx = *total_recs - 1;
+
+			p = dyn_array_push(&n_diff_boundaries[i],
+					   sizeof(ib_uint64_t));
+
+			memcpy(p, &idx, sizeof(ib_uint64_t));
+		}
+	}
+
+	/* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
+	for i=1..n_uniq */
+
+#ifdef UNIV_STATS_DEBUG
+	for (i = 1; i <= n_uniq; i++) {
+
+		DEBUG_PRINTF("    %s(): total recs: " UINT64PF
+			     ", total pages: " UINT64PF
+			     ", n_diff[%lu]: " UINT64PF "\n",
+			     __func__, *total_recs,
+			     *total_pages,
+			     i, n_diff[i]);
+
+#if 0
+		if (n_diff_boundaries != NULL) {
+			ib_uint64_t	j;
+
+			DEBUG_PRINTF("    %s(): boundaries[%lu]: ",
+				     __func__, i);
+
+			for (j = 0; j < n_diff[i]; j++) {
+				ib_uint64_t	idx;
+
+				idx = *(ib_uint64_t*) dyn_array_get_element(
+					&n_diff_boundaries[i],
+					j * sizeof(ib_uint64_t));
+
+				DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
+					     j, idx);
+			}
+			DEBUG_PRINTF("\n");
+		}
+#endif
+	}
+#endif /* UNIV_STATS_DEBUG */
+
+	btr_pcur_close(&pcur);
+
+	mtr_commit(&mtr);
+
+	if (prev_rec_buf != NULL) {
+
+		mem_free(prev_rec_buf);
+	}
+
+	mem_heap_free(heap);
+}
+/* @} */
+
+/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
+typedef enum page_scan_method_enum {
+	COUNT_ALL_NON_BORING,	/* scan all records on the given page
+				and count the number of distinct ones */
+	QUIT_ON_FIRST_NON_BORING/* quit when the first record that differs
+				from its right neighbor is found */
+} page_scan_method_t;
+/* @} */
+
+/*********************************************************************//**
+Scan a page, reading records from left to right and counting the number
+of distinct records on that page (looking only at the first n_prefix
+columns). If scan_method is QUIT_ON_FIRST_NON_BORING then the function
+will return as soon as it finds a record that does not match its neighbor
+to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
+returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
+equal) or 2 (the function found a non-boring record and returned).
+@return offsets1 or offsets2 (the offsets of *out_rec),
+or NULL if the page is empty and does not contain user records.
+dict_stats_scan_page() @{ */
+UNIV_INLINE __attribute__((nonnull))
+ulint*
+dict_stats_scan_page(
+/*=================*/
+	const rec_t**		out_rec,	/*!< out: record, or NULL */
+	ulint*			offsets1,	/*!< out: rec_get_offsets()
+						working space (must be big
+						enough) */
+	ulint*			offsets2,	/*!< out: rec_get_offsets()
+						working space (must be big
+						enough) */
+	dict_index_t*		index,		/*!< in: index of the page */
+	const page_t*		page,		/*!< in: the page to scan */
+	ulint			n_prefix,	/*!< in: look at the first
+						n_prefix columns */
+	page_scan_method_t	scan_method,	/*!< in: scan to the end of
+						the page or not */
+	ib_uint64_t*		n_diff)		/*!< out: number of distinct
+						records encountered */
+{
+	ulint*		offsets_rec		= offsets1;
+	ulint*		offsets_next_rec	= offsets2;
+	const rec_t*	rec;
+	const rec_t*	next_rec;
+	/* A dummy heap, to be passed to rec_get_offsets().
+	Because offsets1,offsets2 should be big enough,
+	this memory heap should never be used. */
+	mem_heap_t*	heap			= NULL;
+
+	rec = page_rec_get_next_const(page_get_infimum_rec(page));
+
+	if (page_rec_is_supremum(rec)) {
+		/* the page is empty */
+		*n_diff = 0;
+		*out_rec = NULL;
+		return(NULL);
+	}
+
+	offsets_rec = rec_get_offsets(rec, index, offsets_rec,
+				      ULINT_UNDEFINED, &heap);
+
+	next_rec = page_rec_get_next_const(rec);
+
+	*n_diff = 1;
+
+	while (!page_rec_is_supremum(next_rec)) {
+
+		ulint	matched_fields = 0;
+		ulint	matched_bytes = 0;
+
+		offsets_next_rec = rec_get_offsets(next_rec, index,
+						   offsets_next_rec,
+						   ULINT_UNDEFINED,
+						   &heap);
+
+		/* check whether rec != next_rec when looking at
+		the first n_prefix fields */
+		cmp_rec_rec_with_match(rec, next_rec,
+				       offsets_rec, offsets_next_rec,
+				       index, FALSE, &matched_fields,
+				       &matched_bytes);
+
+		if (matched_fields < n_prefix) {
+			/* rec != next_rec, => rec is non-boring */
+
+			(*n_diff)++;
+
+			if (scan_method == QUIT_ON_FIRST_NON_BORING) {
+				goto func_exit;
+			}
+		}
+
+		rec = next_rec;
+		{
+			/* Assign offsets_rec = offsets_next_rec
+			so that offsets_rec matches with rec which
+			was just assigned rec = next_rec above.
+			Also need to point offsets_next_rec to the
+			place where offsets_rec was pointing before
+			because we have just 2 placeholders where
+			data is actually stored:
+			offsets_onstack1 and offsets_onstack2 and we
+			are using them in circular fashion
+			(offsets[_next]_rec are just pointers to
+			those placeholders). */
+			ulint*	offsets_tmp;
+			offsets_tmp = offsets_rec;
+			offsets_rec = offsets_next_rec;
+			offsets_next_rec = offsets_tmp;
+		}
+		next_rec = page_rec_get_next_const(next_rec);
+	}
+
+func_exit:
+	/* offsets1,offsets2 should have been big enough */
+	ut_a(heap == NULL);
+	*out_rec = rec;
+	return(offsets_rec);
+}
+/* @} */
+
+/*********************************************************************//**
+Dive below the current position of a cursor and calculate the number of
+distinct records on the leaf page, when looking at the fist n_prefix
+columns.
+dict_stats_analyze_index_below_cur() @{
+@return number of distinct records on the leaf page */
+static
+ib_uint64_t
+dict_stats_analyze_index_below_cur(
+/*===============================*/
+	const btr_cur_t*cur,		/*!< in: cursor */
+	ulint		n_prefix,	/*!< in: look at the first n_prefix
+					columns when comparing records */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+{
+	dict_index_t*	index;
+	ulint		space;
+	ulint		zip_size;
+	buf_block_t*	block;
+	ulint		page_no;
+	const page_t*	page;
+	mem_heap_t*	heap;
+	const rec_t*	rec;
+	ulint*		offsets1;
+	ulint*		offsets2;
+	ulint*		offsets_rec;
+	ulint		root_height;
+	ib_uint64_t	n_diff; /* the result */
+	ulint		size;
+
+	index = btr_cur_get_index(cur);
+
+	/* Allocate offsets for the record and the node pointer, for
+	node pointer records. In a secondary index, the node pointer
+	record will consist of all index fields followed by a child
+	page number.
+	Allocate space for the offsets header (the allocation size at
+	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
+	so that this will never be less than the size calculated in
+	rec_get_offsets_func(). */
+	size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
+
+	heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
+
+	offsets1 = static_cast<ulint*>(mem_heap_alloc(
+			heap, size * sizeof *offsets1));
+
+	offsets2 = static_cast<ulint*>(mem_heap_alloc(
+			heap, size * sizeof *offsets2));
+
+	rec_offs_set_n_alloc(offsets1, size);
+	rec_offs_set_n_alloc(offsets2, size);
+
+	root_height = btr_page_get_level(btr_root_get(index, mtr), mtr);
+
+	space = dict_index_get_space(index);
+	zip_size = dict_table_zip_size(index->table);
+
+	rec = btr_cur_get_rec(cur);
+
+	offsets_rec = rec_get_offsets(rec, index, offsets1,
+				      ULINT_UNDEFINED, &heap);
+
+	page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+
+	/* descend to the leaf level on the B-tree */
+	for (;;) {
+
+		block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+					 NULL /* no guessed block */,
+					 BUF_GET, __FILE__, __LINE__, mtr);
+
+		page = buf_block_get_frame(block);
+
+		if (btr_page_get_level(page, mtr) == 0) {
+			/* leaf level */
+			break;
+		}
+		/* else */
+
+		/* search for the first non-boring record on the page */
+		offsets_rec = dict_stats_scan_page(
+			&rec, offsets1, offsets2, index, page, n_prefix,
+			QUIT_ON_FIRST_NON_BORING, &n_diff);
+
+		/* pages on level > 0 are not allowed to be empty */
+		ut_a(offsets_rec != NULL);
+		/* if page is not empty (offsets_rec != NULL) then n_diff must
+		be > 0, otherwise there is a bug in dict_stats_scan_page() */
+		ut_a(n_diff > 0);
+
+		if (n_diff == 1) {
+			/* page has all keys equal and the end of the page
+			was reached by dict_stats_scan_page(), no need to
+			descend to the leaf level */
+			mem_heap_free(heap);
+			return(1);
+		}
+		/* else */
+
+		/* when we instruct dict_stats_scan_page() to quit on the
+		first non-boring record it finds, then the returned n_diff
+		can either be 0 (empty page), 1 (page has all keys equal) or
+		2 (non-boring record was found) */
+		ut_a(n_diff == 2);
+
+		/* we have a non-boring record in rec, descend below it */
+
+		page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+	}
+
+	/* make sure we got a leaf page as a result from the above loop */
+	ut_ad(btr_page_get_level(page, mtr) == 0);
+
+	/* scan the leaf page and find the number of distinct keys,
+	when looking only at the first n_prefix columns */
+
+	offsets_rec = dict_stats_scan_page(
+		&rec, offsets1, offsets2, index, page, n_prefix,
+		COUNT_ALL_NON_BORING, &n_diff);
+
+	if (root_height > 0) {
+
+		/* empty pages are allowed only if the whole B-tree is empty
+		and contains a single empty page */
+		ut_a(offsets_rec != NULL);
+	}
+
+#if 0
+	DEBUG_PRINTF("      %s(): n_diff below page_no=%lu: " UINT64PF "\n",
+		     __func__, page_no, n_diff);
+#endif
+
+	mem_heap_free(heap);
+
+	return(n_diff);
+}
+/* @} */
+
+/*********************************************************************//**
+For a given level in an index select srv_stats_persistent_sample_pages
+(or less) records from that level and dive below them to the corresponding
+leaf pages, then scan those leaf pages and save the sampling results in
+index->stat_n_diff_key_vals[n_prefix] and the number of pages scanned in
+index->stat_n_sample_sizes[n_prefix].
+dict_stats_analyze_index_for_n_prefix() @{ */
+static
+void
+dict_stats_analyze_index_for_n_prefix(
+/*==================================*/
+	dict_index_t*	index,			/*!< in/out: index */
+	ulint		level,			/*!< in: level,
+						must be >= 1 */
+	ib_uint64_t	total_recs_on_level,	/*!< in: total number of
+						records on the given level */
+	ulint		n_prefix,		/*!< in: look at first
+						n_prefix columns when
+						comparing records */
+	ib_uint64_t	n_diff_for_this_prefix,	/*!< in: number of distinct
+						records on the given level,
+						when looking at the first
+						n_prefix columns */
+	dyn_array_t*	boundaries)		/*!< in: array that contains
+						n_diff_for_this_prefix
+						integers each of which
+						represents the index (on the
+						level, counting from
+						left/smallest to right/biggest
+						from 0) of the last record
+						from each group of distinct
+						keys */
+{
+	mem_heap_t*	heap;
+	dtuple_t*	dtuple;
+	btr_pcur_t	pcur;
+	mtr_t		mtr;
+	const page_t*	page;
+	ib_uint64_t	rec_idx;
+	ib_uint64_t	last_idx_on_level;
+	ib_uint64_t	n_recs_to_dive_below;
+	ib_uint64_t	n_diff_sum_of_all_analyzed_pages;
+	ib_uint64_t	i;
+
+#if 0
+	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
+		     "n_diff_for_this_prefix=" UINT64PF ")\n",
+		     __func__, index->table->name, index->name, level,
+		     n_prefix, n_diff_for_this_prefix);
+#endif
+
+	/* if some of those is 0 then this means that there is exactly one
+	page in the B-tree and it is empty and we should have done full scan
+	and should not be here */
+	ut_ad(total_recs_on_level > 0);
+	ut_ad(n_diff_for_this_prefix > 0);
+
+	/* this is configured to be min 1, someone has changed the code */
+	ut_ad(srv_stats_persistent_sample_pages > 0);
+
+	heap = mem_heap_create(256);
+
+	/* craft a record that is always smaller than the others,
+	this way we are sure that the cursor pcur will be positioned
+	on the leftmost record on the leftmost page on the desired level */
+	dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
+	dict_table_copy_types(dtuple, index->table);
+	dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
+
+	mtr_start(&mtr);
+
+	btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
+			  &pcur, __FILE__, __LINE__, &mtr);
+
+	page = btr_pcur_get_page(&pcur);
+
+	/* check that we are indeed on the desired level */
+	ut_a(btr_page_get_level(page, &mtr) == level);
+
+	/* there should not be any pages on the left */
+	ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+
+	/* check whether the first record on the leftmost page is marked
+	as such, if we are on a non-leaf level */
+	ut_a(level == 0 || REC_INFO_MIN_REC_FLAG
+	     & rec_get_info_bits(page_rec_get_next_const(
+					 page_get_infimum_rec(page)),
+				 page_is_comp(page)));
+
+	if (btr_pcur_is_before_first_on_page(&pcur)) {
+		btr_pcur_move_to_next_on_page(&pcur);
+	}
+
+	if (btr_pcur_is_after_last_on_page(&pcur)) {
+		btr_pcur_move_to_prev_on_page(&pcur);
+	}
+
+	last_idx_on_level = *(ib_uint64_t*) dyn_array_get_element(boundaries,
+		(ulint) ((n_diff_for_this_prefix - 1) * sizeof(ib_uint64_t)));
+
+	rec_idx = 0;
+
+	n_diff_sum_of_all_analyzed_pages = 0;
+
+	n_recs_to_dive_below = ut_min(srv_stats_persistent_sample_pages,
+				      n_diff_for_this_prefix);
+
+	for (i = 0; i < n_recs_to_dive_below; i++) {
+		ib_uint64_t	left;
+		ib_uint64_t	right;
+		ulint		rnd;
+		ib_uint64_t	dive_below_idx;
+
+		/* there are n_diff_for_this_prefix elements
+		in the array boundaries[] and we divide those elements
+		into n_recs_to_dive_below segments, for example:
+
+		let n_diff_for_this_prefix=100, n_recs_to_dive_below=4, then:
+		segment i=0:  [0, 24]
+		segment i=1: [25, 49]
+		segment i=2: [50, 74]
+		segment i=3: [75, 99] or
+
+		let n_diff_for_this_prefix=1, n_recs_to_dive_below=1, then:
+		segment i=0: [0, 0] or
+
+		let n_diff_for_this_prefix=2, n_recs_to_dive_below=2, then:
+		segment i=0: [0, 0]
+		segment i=1: [1, 1] or
+
+		let n_diff_for_this_prefix=13, n_recs_to_dive_below=7, then:
+		segment i=0:  [0,  0]
+		segment i=1:  [1,  2]
+		segment i=2:  [3,  4]
+		segment i=3:  [5,  6]
+		segment i=4:  [7,  8]
+		segment i=5:  [9, 10]
+		segment i=6: [11, 12]
+
+		then we select a random record from each segment and dive
+		below it */
+		left = n_diff_for_this_prefix * i / n_recs_to_dive_below;
+		right = n_diff_for_this_prefix * (i + 1)
+			/ n_recs_to_dive_below - 1;
+
+		ut_a(left <= right);
+		ut_a(right <= last_idx_on_level);
+
+		/* we do not pass (left, right) because we do not want to ask
+		ut_rnd_interval() to work with too big numbers since
+		ib_uint64_t could be bigger than ulint */
+		rnd = ut_rnd_interval(0, (ulint) (right - left));
+
+		dive_below_idx = *(ib_uint64_t*) dyn_array_get_element(
+			boundaries, (ulint) ((left + rnd)
+					     * sizeof(ib_uint64_t)));
+
+#if 0
+		DEBUG_PRINTF("    %s(): dive below record with index="
+			     UINT64PF "\n", __func__, dive_below_idx);
+#endif
+
+		/* seek to the record with index dive_below_idx */
+		while (rec_idx < dive_below_idx
+		       && btr_pcur_is_on_user_rec(&pcur)) {
+
+			btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+			rec_idx++;
+		}
+
+		/* if the level has finished before the record we are
+		searching for, this means that the B-tree has changed in
+		the meantime, quit our sampling and use whatever stats
+		we have collected so far */
+		if (rec_idx < dive_below_idx) {
+
+			ut_ad(!btr_pcur_is_on_user_rec(&pcur));
+			break;
+		}
+
+		ut_a(rec_idx == dive_below_idx);
+
+		ib_uint64_t	n_diff_on_leaf_page;
+
+		n_diff_on_leaf_page = dict_stats_analyze_index_below_cur(
+			btr_pcur_get_btr_cur(&pcur), n_prefix, &mtr);
+
+		/* We adjust n_diff_on_leaf_page here to avoid counting
+		one record twice - once as the last on some page and once
+		as the first on another page. Consider the following example:
+		Leaf level:
+		page: (2,2,2,2,3,3)
+		... many pages like (3,3,3,3,3,3) ...
+		page: (3,3,3,3,5,5)
+		... many pages like (5,5,5,5,5,5) ...
+		page: (5,5,5,5,8,8)
+		page: (8,8,8,8,9,9)
+		our algo would (correctly) get an estimate that there are
+		2 distinct records per page (average). Having 4 pages below
+		non-boring records, it would (wrongly) estimate the number
+		of distinct records to 8. */
+		if (n_diff_on_leaf_page > 0) {
+			n_diff_on_leaf_page--;
+		}
+
+		n_diff_sum_of_all_analyzed_pages += n_diff_on_leaf_page;
+	}
+
+	if (n_diff_sum_of_all_analyzed_pages == 0) {
+		n_diff_sum_of_all_analyzed_pages = 1;
+	}
+
+	/* See REF01 for an explanation of the algorithm */
+	index->stat_n_diff_key_vals[n_prefix]
+		= index->stat_n_leaf_pages
+
+		* n_diff_for_this_prefix
+		/ total_recs_on_level
+
+		* n_diff_sum_of_all_analyzed_pages
+		/ n_recs_to_dive_below;
+
+	index->stat_n_sample_sizes[n_prefix] = n_recs_to_dive_below;
+
+	DEBUG_PRINTF("    %s(): n_diff=" UINT64PF " for n_prefix=%lu "
+		     "(%lu"
+		     " * " UINT64PF " / " UINT64PF
+		     " * " UINT64PF " / " UINT64PF ")\n",
+		     __func__, index->stat_n_diff_key_vals[n_prefix],
+		     n_prefix,
+		     index->stat_n_leaf_pages,
+		     n_diff_for_this_prefix, total_recs_on_level,
+		     n_diff_sum_of_all_analyzed_pages, n_recs_to_dive_below);
+
+	btr_pcur_close(&pcur);
+
+	mtr_commit(&mtr);
+
+	mem_heap_free(heap);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new statistics for a given index and saves them to the index
+members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
+stat_n_leaf_pages. This function could be slow.
+dict_stats_analyze_index() @{ */
+static
+void
+dict_stats_analyze_index(
+/*=====================*/
+	dict_index_t*	index)	/*!< in/out: index to analyze */
+{
+	ulint		root_level;
+	ulint		level;
+	ibool		level_is_analyzed;
+	ulint		n_uniq;
+	ulint		n_prefix;
+	ib_uint64_t*	n_diff_on_level;
+	ib_uint64_t	total_recs;
+	ib_uint64_t	total_pages;
+	dyn_array_t*	n_diff_boundaries;
+	mtr_t		mtr;
+	ulint		size;
+	ulint		i;
+
+	DEBUG_PRINTF("  %s(index=%s)\n", __func__, index->name);
+
+	mtr_start(&mtr);
+
+	mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+	size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+	if (size != ULINT_UNDEFINED) {
+		index->stat_index_size = size;
+		size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
+	}
+
+	switch (size) {
+	case ULINT_UNDEFINED:
+		mtr_commit(&mtr);
+		/* Fake some statistics. */
+		index->stat_index_size = index->stat_n_leaf_pages = 1;
+
+		for (i = dict_index_get_n_unique(index); i; ) {
+			index->stat_n_diff_key_vals[i--] = 1;
+		}
+
+		memset(index->stat_n_non_null_key_vals, 0,
+		       (1 + dict_index_get_n_unique(index))
+		       * sizeof(*index->stat_n_non_null_key_vals));
+		return;
+	case 0:
+		/* The root node of the tree is a leaf */
+		size = 1;
+	}
+
+	index->stat_n_leaf_pages = size;
+
+	root_level = btr_page_get_level(btr_root_get(index, &mtr), &mtr);
+
+	mtr_commit(&mtr);
+
+	n_uniq = dict_index_get_n_unique(index);
+
+	/* if the tree has just one level (and one page) or if the user
+	has requested to sample too many pages then do full scan */
+	if (root_level == 0
+	    /* for each n-column prefix (for n=1..n_uniq)
+	    srv_stats_persistent_sample_pages will be sampled, so in total
+	    srv_stats_persistent_sample_pages * n_uniq leaf pages will be
+	    sampled. If that number is bigger than the total number of leaf
+	    pages then do full scan of the leaf level instead since it will
+	    be faster and will give better results. */
+	    || srv_stats_persistent_sample_pages * n_uniq
+	       > index->stat_n_leaf_pages) {
+
+		if (root_level == 0) {
+			DEBUG_PRINTF("  %s(): just one page, "
+				     "doing full scan\n", __func__);
+		} else {
+			DEBUG_PRINTF("  %s(): too many pages requested for "
+				     "sampling, doing full scan\n", __func__);
+		}
+
+		/* do full scan of level 0; save results directly
+		into the index */
+
+		dict_stats_analyze_index_level(index,
+					       0 /* leaf level */,
+					       index->stat_n_diff_key_vals,
+					       &total_recs,
+					       &total_pages,
+					       NULL /*boundaries not needed*/);
+
+		for (i = 1; i <= n_uniq; i++) {
+			index->stat_n_sample_sizes[i] = total_pages;
+		}
+
+		return;
+	}
+	/* else */
+
+	/* set to zero */
+	n_diff_on_level = (ib_uint64_t*) mem_zalloc((n_uniq + 1)
+						    * sizeof(ib_uint64_t));
+
+	n_diff_boundaries = (dyn_array_t*) mem_alloc((n_uniq + 1)
+						     * sizeof(dyn_array_t));
+
+	for (i = 1; i <= n_uniq; i++) {
+		/* initialize the dynamic arrays, the first one
+		(index=0) is ignored to follow the same indexing
+		scheme as n_diff_on_level[] */
+		dyn_array_create(&n_diff_boundaries[i]);
+	}
+
+	/* total_recs is also used to estimate the number of pages on one
+	level below, so at the start we have 1 page (the root) */
+	total_recs = 1;
+
+	/* Here we use the following optimization:
+	If we find that level L is the first one (searching from the
+	root) that contains at least D distinct keys when looking at
+	the first n_prefix columns, then:
+	if we look at the first n_prefix-1 columns then the first
+	level that contains D distinct keys will be either L or a
+	lower one.
+	So if we find that the first level containing D distinct
+	keys (on n_prefix columns) is L, we continue from L when
+	searching for D distinct keys on n_prefix-1 columns. */
+	level = (long) root_level;
+	level_is_analyzed = FALSE;
+	for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
+
+		DEBUG_PRINTF("  %s(): searching level with >=%llu "
+			     "distinct records, n_prefix=%lu\n",
+			     __func__, N_DIFF_REQUIRED, n_prefix);
+
+		/* check whether we should pick the current level;
+		we pick level 1 even if it does not have enough
+		distinct records because we do not want to scan the
+		leaf level because it may contain too many records */
+		if (level_is_analyzed
+		    && (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+			|| level == 1)) {
+
+			goto found_level;
+		}
+		/* else */
+
+		/* search for a level that contains enough distinct records */
+
+		if (level_is_analyzed && level > 1) {
+
+			/* if this does not hold we should be on
+			"found_level" instead of here */
+			ut_ad(n_diff_on_level[n_prefix] < N_DIFF_REQUIRED);
+
+			level--;
+			level_is_analyzed = FALSE;
+		}
+
+		for (;;) {
+
+			/* make sure we do not scan the leaf level
+			accidentally, it may contain too many pages */
+			ut_ad(level > 0);
+
+			/* scanning the same level twice is an optimization
+			bug */
+			ut_ad(!level_is_analyzed);
+
+			/* Do not scan if this would read too many pages.
+			Here we use the following fact:
+			the number of pages on level L equals the number
+			of records on level L+1, thus we deduce that the
+			following call would scan total_recs pages, because
+			total_recs is left from the previous iteration when
+			we scanned one level upper or we have not scanned any
+			levels yet in which case total_recs is 1. */
+			if (total_recs > srv_stats_persistent_sample_pages) {
+
+				/* if the above cond is true then we are not
+				at the root level since on the root level
+				total_recs == 1 and cannot
+				be > srv_stats_persistent_sample_pages */
+				ut_a(level != root_level);
+
+				/* step one level back and be satisfied with
+				whatever it contains */
+				level++;
+				level_is_analyzed = TRUE;
+
+				break;
+			}
+
+			dict_stats_analyze_index_level(index,
+						       level,
+						       n_diff_on_level,
+						       &total_recs,
+						       &total_pages,
+						       n_diff_boundaries);
+
+			level_is_analyzed = TRUE;
+
+			if (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+			    || level == 1) {
+				/* we found a good level with many distinct
+				records or we have reached the last level we
+				could scan */
+				break;
+			}
+			/* else */
+
+			level--;
+			level_is_analyzed = FALSE;
+		}
+found_level:
+
+		DEBUG_PRINTF("  %s(): found level %lu that has " UINT64PF
+			     " distinct records for n_prefix=%lu\n",
+			     __func__, level, n_diff_on_level[n_prefix],
+			     n_prefix);
+
+		/* here we are either on level 1 or the level that we are on
+		contains >= N_DIFF_REQUIRED distinct keys or we did not scan
+		deeper levels because they would contain too many pages */
+
+		ut_ad(level > 0);
+
+		ut_ad(level_is_analyzed);
+
+		/* pick some records from this level and dive below them for
+		the given n_prefix */
+
+		dict_stats_analyze_index_for_n_prefix(
+			index, level, total_recs, n_prefix,
+			n_diff_on_level[n_prefix],
+			&n_diff_boundaries[n_prefix]);
+	}
+
+	for (i = 1; i <= n_uniq; i++) {
+		dyn_array_free(&n_diff_boundaries[i]);
+	}
+
+	mem_free(n_diff_boundaries);
+
+	mem_free(n_diff_on_level);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively slow and is used to calculate persistent statistics that
+will be saved on disk.
+dict_stats_update_persistent() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_update_persistent(
+/*=========================*/
+	dict_table_t*	table)		/*!< in/out: table */
+{
+	dict_index_t*	index;
+
+	DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
+
+	/* XXX quit if interrupted, e.g. SIGTERM */
+
+	/* analyze the clustered index first */
+
+	index = dict_table_get_first_index(table);
+
+	if (index == NULL) {
+		/* Table definition is corrupt */
+		return(DB_CORRUPTION);
+	}
+
+	dict_stats_analyze_index(index);
+
+	table->stat_n_rows
+		= index->stat_n_diff_key_vals[dict_index_get_n_unique(index)];
+
+	table->stat_clustered_index_size = index->stat_index_size;
+
+	/* analyze other indexes from the table, if any */
+
+	table->stat_sum_of_other_index_sizes = 0;
+
+	for (index = dict_table_get_next_index(index);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		if (index->type & DICT_FTS) {
+			continue;
+		}
+
+		dict_stats_analyze_index(index);
+
+		table->stat_sum_of_other_index_sizes
+			+= index->stat_index_size;
+	}
+
+	table->stat_modified_counter = 0;
+
+	table->stat_initialized = TRUE;
+
+	return(DB_SUCCESS);
+}
+/* @} */
+
+/*********************************************************************//**
+Save an individual index's statistic into the persistent statistics
+storage.
+dict_stats_save_index_stat() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_save_index_stat(
+/*=======================*/
+	dict_index_t*	index,		/*!< in: index */
+	lint		last_update,	/*!< in: timestamp of the stat */
+	const char*	stat_name,	/*!< in: name of the stat */
+	ib_uint64_t	stat_value,	/*!< in: value of the stat */
+	ib_uint64_t*	sample_size,	/*!< in: n pages sampled or NULL */
+	const char*	stat_description,/*!< in: description of the stat */
+	trx_t*		trx,		/*!< in/out: transaction to use */
+	ibool		caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
+					owns dict_sys->mutex */
+{
+	pars_info_t*	pinfo;
+	enum db_err	ret;
+
+	pinfo = pars_info_create();
+
+	pars_info_add_literal(pinfo, "database_name", index->table->name,
+			      dict_get_db_name_len(index->table->name),
+			      DATA_VARCHAR, 0);
+
+	pars_info_add_str_literal(pinfo, "table_name",
+				  dict_remove_db_name(index->table->name));
+
+	pars_info_add_str_literal(pinfo, "index_name", index->name);
+
+	pars_info_add_int4_literal(pinfo, "last_update", last_update);
+
+	pars_info_add_str_literal(pinfo, "stat_name", stat_name);
+
+	pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
+
+	if (sample_size != NULL) {
+		pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
+	} else {
+		pars_info_add_literal(pinfo, "sample_size", NULL,
+				      UNIV_SQL_NULL, DATA_FIXBINARY, 0);
+	}
+
+	pars_info_add_str_literal(pinfo, "stat_description",
+				  stat_description);
+
+	ret = que_eval_sql(pinfo,
+			   "PROCEDURE INDEX_STATS_SAVE () IS\n"
+			   "dummy CHAR;\n"
+			   "BEGIN\n"
+
+			   "SELECT database_name INTO dummy\n"
+			   "FROM \"" INDEX_STATS_NAME "\"\n"
+			   "WHERE\n"
+			   "database_name = :database_name AND\n"
+			   "table_name = :table_name AND\n"
+			   "index_name = :index_name AND\n"
+			   "stat_name = :stat_name\n"
+			   "FOR UPDATE;\n"
+
+			   "IF (SQL % NOTFOUND) THEN\n"
+			   "  INSERT INTO \"" INDEX_STATS_NAME "\"\n"
+			   "  VALUES\n"
+			   "  (\n"
+			   "  :database_name,\n"
+			   "  :table_name,\n"
+			   "  :index_name,\n"
+			   "  :last_update,\n"
+			   "  :stat_name,\n"
+			   "  :stat_value,\n"
+			   "  :sample_size,\n"
+			   "  :stat_description\n"
+			   "  );\n"
+			   "ELSE\n"
+			   "  UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+			   "  last_update = :last_update,\n"
+			   "  stat_value = :stat_value,\n"
+			   "  sample_size = :sample_size,\n"
+			   "  stat_description = :stat_description\n"
+			   "  WHERE\n"
+			   "  database_name = :database_name AND\n"
+			   "  table_name = :table_name AND\n"
+			   "  index_name = :index_name AND\n"
+			   "  stat_name = :stat_name;\n"
+			   "END IF;\n"
+			   "END;",
+		!caller_has_dict_sys_mutex, trx);
+
+	/* pinfo is freed by que_eval_sql() */
+
+	if (ret != DB_SUCCESS) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: Error while trying to save index "
+			"statistics for table %s, index %s, "
+			"stat name %s: %s\n",
+			index->table->name, index->name,
+			stat_name, ut_strerr(ret));
+
+		trx->error_state = DB_SUCCESS;
+	}
+
+	return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Save the table's statistics into the persistent statistics storage.
+dict_stats_save() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_save(
+/*============*/
+	dict_table_t*	table,		/*!< in: table */
+	ibool		caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
+					owns dict_sys->mutex */
+{
+	trx_t*		trx;
+	pars_info_t*	pinfo;
+	dict_index_t*	index;
+	lint		now;
+	enum db_err	ret;
+
+	/* MySQL's timestamp is 4 byte, so we use
+	pars_info_add_int4_literal() which takes a lint arg, so "now" is
+	lint */
+	now = (lint) ut_time();
+
+	trx = trx_allocate_for_background();
+
+	/* Use 'read-uncommitted' so that the SELECTs we execute
+	do not get blocked in case some user has locked the rows we
+	are SELECTing */
+
+	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+
+	trx_start_if_not_started(trx);
+
+	pinfo = pars_info_create();
+
+	pars_info_add_literal(pinfo, "database_name", table->name,
+			      dict_get_db_name_len(table->name),
+			      DATA_VARCHAR, 0);
+
+	pars_info_add_str_literal(pinfo, "table_name",
+				  dict_remove_db_name(table->name));
+
+	pars_info_add_int4_literal(pinfo, "last_update", now);
+
+	pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
+
+	pars_info_add_ull_literal(pinfo, "clustered_index_size",
+				     table->stat_clustered_index_size);
+
+	pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
+				     table->stat_sum_of_other_index_sizes);
+
+	ret = que_eval_sql(pinfo,
+			   "PROCEDURE TABLE_STATS_SAVE () IS\n"
+			   "dummy CHAR;\n"
+			   "BEGIN\n"
+
+			   "SELECT database_name INTO dummy\n"
+			   "FROM \"" TABLE_STATS_NAME "\"\n"
+			   "WHERE\n"
+			   "database_name = :database_name AND\n"
+			   "table_name = :table_name\n"
+			   "FOR UPDATE;\n"
+
+			   "IF (SQL % NOTFOUND) THEN\n"
+			   "  INSERT INTO \"" TABLE_STATS_NAME "\"\n"
+			   "  VALUES\n"
+			   "  (\n"
+			   "  :database_name,\n"
+			   "  :table_name,\n"
+			   "  :last_update,\n"
+			   "  :n_rows,\n"
+			   "  :clustered_index_size,\n"
+			   "  :sum_of_other_index_sizes\n"
+			   "  );\n"
+			   "ELSE\n"
+			   "  UPDATE \"" TABLE_STATS_NAME "\" SET\n"
+			   "  last_update = :last_update,\n"
+			   "  n_rows = :n_rows,\n"
+			   "  clustered_index_size = :clustered_index_size,\n"
+			   "  sum_of_other_index_sizes = "
+			   "    :sum_of_other_index_sizes\n"
+			   "  WHERE\n"
+			   "  database_name = :database_name AND\n"
+			   "  table_name = :table_name;\n"
+			   "END IF;\n"
+			   "END;",
+			   !caller_has_dict_sys_mutex, trx);
+
+	/* pinfo is freed by que_eval_sql() */
+
+	if (ret != DB_SUCCESS) {
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: Error while trying to save table "
+			"statistics for table %s: %s\n",
+			table->name, ut_strerr(ret));
+
+		goto end_rollback;
+	}
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		ib_uint64_t	stat_n_diff_key_vals[REC_MAX_N_FIELDS];
+		ib_uint64_t	stat_n_sample_sizes[REC_MAX_N_FIELDS];
+		ulint		n_uniq;
+		ulint		i;
+
+		ret = dict_stats_save_index_stat(index, now, "size",
+						 index->stat_index_size,
+						 NULL,
+						 "Number of pages "
+						 "in the index",
+						 trx,
+						 caller_has_dict_sys_mutex);
+		if (ret != DB_SUCCESS) {
+			goto end_rollback;
+		}
+
+		ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
+						 index->stat_n_leaf_pages,
+						 NULL,
+						 "Number of leaf pages "
+						 "in the index",
+						 trx,
+						 caller_has_dict_sys_mutex);
+		if (ret != DB_SUCCESS) {
+			goto end_rollback;
+		}
+
+		n_uniq = dict_index_get_n_unique(index);
+
+		ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_diff_key_vals));
+
+		memcpy(stat_n_diff_key_vals, index->stat_n_diff_key_vals,
+		       (n_uniq + 1) * sizeof(index->stat_n_diff_key_vals[0]));
+
+		ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_sample_sizes));
+
+		memcpy(stat_n_sample_sizes, index->stat_n_sample_sizes,
+		       (n_uniq + 1) * sizeof(index->stat_n_sample_sizes[0]));
+
+		for (i = 1; i <= n_uniq; i++) {
+
+			char	stat_name[16];
+			char	stat_description[1024];
+			ulint	j;
+
+			ut_snprintf(stat_name, sizeof(stat_name),
+				    "n_diff_pfx%02lu", i);
+
+			/* craft a string that contains the columns names */
+			ut_snprintf(stat_description,
+				    sizeof(stat_description),
+				    "%s", index->fields[0].name);
+			for (j = 2; j <= i; j++) {
+				size_t	len;
+
+				len = strlen(stat_description);
+
+				ut_snprintf(stat_description + len,
+					    sizeof(stat_description) - len,
+					    ",%s", index->fields[j - 1].name);
+			}
+
+			ret = dict_stats_save_index_stat(
+				index, now, stat_name,
+				stat_n_diff_key_vals[i],
+				&stat_n_sample_sizes[i],
+				stat_description, trx,
+				caller_has_dict_sys_mutex);
+
+			if (ret != DB_SUCCESS) {
+				goto end_rollback;
+			}
+		}
+	}
+
+	trx_commit_for_mysql(trx);
+	ret = DB_SUCCESS;
+	goto end_free;
+
+end_rollback:
+
+	trx->op_info = "rollback of internal transaction on stats tables";
+	trx_rollback_to_savepoint(trx, NULL);
+	trx->op_info = "";
+	ut_a(trx->error_state == DB_SUCCESS);
+
+end_free:
+
+	trx_free_for_background(trx);
+
+	return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Called for the row that is selected by
+SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
+The second argument is a pointer to the table and the fetched stats are
+written to it.
+dict_stats_fetch_table_stats_step() @{
+@return non-NULL dummy */
+static
+ibool
+dict_stats_fetch_table_stats_step(
+/*==============================*/
+	void*	node_void,	/*!< in: select node */
+	void*	table_void)	/*!< out: table */
+{
+	sel_node_t*	node = (sel_node_t*) node_void;
+	dict_table_t*	table = (dict_table_t*) table_void;
+	que_common_t*	cnode;
+	int		i;
+
+	/* this should loop exactly 3 times - for
+	n_rows,clustered_index_size,sum_of_other_index_sizes */
+	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
+	     cnode != NULL;
+	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
+	     i++) {
+
+		const byte*	data;
+		dfield_t*	dfield = que_node_get_val(cnode);
+		dtype_t*	type = dfield_get_type(dfield);
+		ulint		len = dfield_get_len(dfield);
+
+		data = static_cast<const byte*>(dfield_get_data(dfield));
+
+		switch (i) {
+		case 0: /* mysql.innodb_table_stats.n_rows */
+
+			ut_a(dtype_get_mtype(type) == DATA_INT);
+			ut_a(len == 8);
+
+			table->stat_n_rows = mach_read_from_8(data);
+
+			break;
+
+		case 1: /* mysql.innodb_table_stats.clustered_index_size */
+
+			ut_a(dtype_get_mtype(type) == DATA_INT);
+			ut_a(len == 8);
+
+			table->stat_clustered_index_size
+				= (ulint) mach_read_from_8(data);
+
+			break;
+
+		case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
+
+			ut_a(dtype_get_mtype(type) == DATA_INT);
+			ut_a(len == 8);
+
+			table->stat_sum_of_other_index_sizes
+				= (ulint) mach_read_from_8(data);
+
+			break;
+
+		default:
+
+			/* someone changed SELECT
+			n_rows,clustered_index_size,sum_of_other_index_sizes
+			to select more columns from innodb_table_stats without
+			adjusting here */
+			ut_error;
+		}
+	}
+
+	/* if i < 3 this means someone changed the
+	SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
+	to select less columns from innodb_table_stats without adjusting here;
+	if i > 3 we would have ut_error'ed earlier */
+	ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
+
+	/* XXX this is not used but returning non-NULL is necessary */
+	return(TRUE);
+}
+/* @} */
+
+/** Aux struct used to pass a table and a boolean to
+dict_stats_fetch_index_stats_step(). */
+typedef struct index_fetch_struct {
+	dict_table_t*	table;	/*!< table whose indexes are to be modified */
+	ibool		stats_were_modified; /*!< will be set to TRUE if at
+				least one index stats were modified */
+} index_fetch_t;
+
+/*********************************************************************//**
+Called for the rows that are selected by
+SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
+The second argument is a pointer to the table and the fetched stats are
+written to its indexes.
+Let a table has N indexes and each index has Ui unique columns for i=1..N,
+then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
+So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
+N*AVG(Ui). In each call it searches for the currently fetched index into
+table->indexes linearly, assuming this list is not sorted. Thus, overall,
+fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
+is the number of indexes.
+This can be improved if we sort table->indexes in a temporary area just once
+and then search in that sorted list. Then the complexity will be O(N*log(N)).
+We assume a table will not have more than 100 indexes, so we go with the
+simpler N^2 algorithm.
+dict_stats_fetch_index_stats_step() @{
+@return non-NULL dummy */
+static
+ibool
+dict_stats_fetch_index_stats_step(
+/*==============================*/
+	void*	node_void,	/*!< in: select node */
+	void*	arg_void)	/*!< out: table + a flag that tells if we
+				modified anything */
+{
+	sel_node_t*	node = (sel_node_t*) node_void;
+	index_fetch_t*	arg = (index_fetch_t*) arg_void;
+	dict_table_t*	table = arg->table;
+	dict_index_t*	index = NULL;
+	que_common_t*	cnode;
+	const char*	stat_name = NULL;
+	ulint		stat_name_len = ULINT_UNDEFINED;
+	ib_uint64_t	stat_value = UINT64_UNDEFINED;
+	ib_uint64_t	sample_size = UINT64_UNDEFINED;
+	int		i;
+
+	/* this should loop exactly 4 times - for the columns that
+	were selected: index_name,stat_name,stat_value,sample_size */
+	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
+	     cnode != NULL;
+	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
+	     i++) {
+
+		const byte*	data;
+		dfield_t*	dfield = que_node_get_val(cnode);
+		dtype_t*	type = dfield_get_type(dfield);
+		ulint		len = dfield_get_len(dfield);
+
+		data = static_cast<const byte*>(dfield_get_data(dfield));
+
+		switch (i) {
+		case 0: /* mysql.innodb_index_stats.index_name */
+
+			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
+
+			/* search for index in table's indexes whose name
+			matches data; the fetched index name is in data,
+			has no terminating '\0' and has length len */
+			for (index = dict_table_get_first_index(table);
+			     index != NULL;
+			     index = dict_table_get_next_index(index)) {
+
+				if (strlen(index->name) == len
+				    && memcmp(index->name, data, len) == 0) {
+					/* the corresponding index was found */
+					break;
+				}
+			}
+
+			/* if index is NULL here this means that
+			mysql.innodb_index_stats contains more rows than the
+			number of indexes in the table; this is ok, we just
+			return ignoring those extra rows; in other words
+			dict_stats_fetch_index_stats_step() has been called
+			for a row from index_stats with unknown index_name
+			column */
+			if (index == NULL) {
+
+				return(TRUE);
+			}
+
+			break;
+
+		case 1: /* mysql.innodb_index_stats.stat_name */
+
+			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
+
+			ut_a(index != NULL);
+
+			stat_name = (const char*) data;
+			stat_name_len = len;
+
+			break;
+
+		case 2: /* mysql.innodb_index_stats.stat_value */
+
+			ut_a(dtype_get_mtype(type) == DATA_INT);
+			ut_a(len == 8);
+
+			ut_a(index != NULL);
+			ut_a(stat_name != NULL);
+			ut_a(stat_name_len != ULINT_UNDEFINED);
+
+			stat_value = mach_read_from_8(data);
+
+			break;
+
+		case 3: /* mysql.innodb_index_stats.sample_size */
+
+			ut_a(dtype_get_mtype(type) == DATA_INT);
+			ut_a(len == 8 || len == UNIV_SQL_NULL);
+
+			ut_a(index != NULL);
+			ut_a(stat_name != NULL);
+			ut_a(stat_name_len != ULINT_UNDEFINED);
+			ut_a(stat_value != UINT64_UNDEFINED);
+
+			if (len == UNIV_SQL_NULL) {
+				break;
+			}
+			/* else */
+
+			sample_size = mach_read_from_8(data);
+
+			break;
+
+		default:
+
+			/* someone changed
+			SELECT index_name,stat_name,stat_value,sample_size
+			to select more columns from innodb_index_stats without
+			adjusting here */
+			ut_error;
+		}
+	}
+
+	/* if i < 4 this means someone changed the
+	SELECT index_name,stat_name,stat_value,sample_size
+	to select less columns from innodb_index_stats without adjusting here;
+	if i > 4 we would have ut_error'ed earlier */
+	ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
+
+	ut_a(index != NULL);
+	ut_a(stat_name != NULL);
+	ut_a(stat_name_len != ULINT_UNDEFINED);
+	ut_a(stat_value != UINT64_UNDEFINED);
+	/* sample_size could be UINT64_UNDEFINED here, if it is NULL */
+
+#define PFX	"n_diff_pfx"
+#define PFX_LEN	10
+
+	if (stat_name_len == 4 /* strlen("size") */
+	    && strncasecmp("size", stat_name, stat_name_len) == 0) {
+		index->stat_index_size = (ulint) stat_value;
+		arg->stats_were_modified = TRUE;
+	} else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
+		   && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
+		   == 0) {
+		index->stat_n_leaf_pages = (ulint) stat_value;
+		arg->stats_were_modified = TRUE;
+	} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
+		   && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
+
+		const char*	num_ptr;
+		unsigned long	n_pfx;
+
+		/* point num_ptr into "1" from "n_diff_pfx12..." */
+		num_ptr = stat_name + PFX_LEN;
+
+		/* stat_name should have exactly 2 chars appended to PFX
+		and they should be digits */
+		if (stat_name_len != PFX_LEN + 2
+		    || num_ptr[0] < '0' || num_ptr[0] > '9'
+		    || num_ptr[1] < '0' || num_ptr[1] > '9') {
+
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Ignoring strange row from "
+				"%s WHERE "
+				"database_name = '%.*s' AND "
+				"table_name = '%s' AND "
+				"index_name = '%s' AND "
+				"stat_name = '%.*s'; because stat_name "
+				"is malformed\n",
+				INDEX_STATS_NAME_PRINT,
+				(int) dict_get_db_name_len(table->name),
+				table->name,
+				dict_remove_db_name(table->name),
+				index->name,
+				(int) stat_name_len,
+				stat_name);
+			return(TRUE);
+		}
+		/* else */
+
+		/* extract 12 from "n_diff_pfx12..." into n_pfx
+		note that stat_name does not have a terminating '\0' */
+		n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
+
+		if (n_pfx == 0 || n_pfx > dict_index_get_n_unique(index)) {
+
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Ignoring strange row from "
+				"%s WHERE "
+				"database_name = '%.*s' AND "
+				"table_name = '%s' AND "
+				"index_name = '%s' AND "
+				"stat_name = '%.*s'; because stat_name is "
+				"out of range, the index has %lu unique "
+				"columns\n",
+				INDEX_STATS_NAME_PRINT,
+				(int) dict_get_db_name_len(table->name),
+				table->name,
+				dict_remove_db_name(table->name),
+				index->name,
+				(int) stat_name_len,
+				stat_name,
+				dict_index_get_n_unique(index));
+			return(TRUE);
+		}
+		/* else */
+
+		index->stat_n_diff_key_vals[n_pfx] = stat_value;
+
+		if (sample_size != UINT64_UNDEFINED) {
+			index->stat_n_sample_sizes[n_pfx] = sample_size;
+		} else {
+			/* hmm, strange... the user must have UPDATEd the
+			table manually and SET sample_size = NULL */
+			index->stat_n_sample_sizes[n_pfx] = 0;
+		}
+
+		arg->stats_were_modified = TRUE;
+	} else {
+		/* silently ignore rows with unknown stat_name, the
+		user may have developed her own stats */
+	}
+
+	/* XXX this is not used but returning non-NULL is necessary */
+	return(TRUE);
+}
+/* @} */
+
+/*********************************************************************//**
+Read table's statistics from the persistent statistics storage.
+dict_stats_fetch_from_ps() @{
+@return DB_SUCCESS or error code */
+static
+enum db_err
+dict_stats_fetch_from_ps(
+/*=====================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	ibool		caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
+					owns dict_sys->mutex */
+{
+	index_fetch_t	index_fetch_arg;
+	trx_t*		trx;
+	pars_info_t*	pinfo;
+	enum db_err	ret;
+
+	ut_ad(mutex_own(&dict_sys->mutex) == caller_has_dict_sys_mutex);
+
+	trx = trx_allocate_for_background();
+
+	/* Use 'read-uncommitted' so that the SELECTs we execute
+	do not get blocked in case some user has locked the rows we
+	are SELECTing */
+
+	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+
+	trx_start_if_not_started(trx);
+
+	pinfo = pars_info_create();
+
+	pars_info_add_literal(pinfo, "database_name", table->name,
+			      dict_get_db_name_len(table->name),
+			      DATA_VARCHAR, 0);
+
+	pars_info_add_str_literal(pinfo, "table_name",
+				  dict_remove_db_name(table->name));
+
+	pars_info_bind_function(pinfo,
+			       "fetch_table_stats_step",
+			       dict_stats_fetch_table_stats_step,
+			       table);
+
+	index_fetch_arg.table = table;
+	index_fetch_arg.stats_were_modified = FALSE;
+	pars_info_bind_function(pinfo,
+			        "fetch_index_stats_step",
+			        dict_stats_fetch_index_stats_step,
+			        &index_fetch_arg);
+
+	ret = que_eval_sql(pinfo,
+			   "PROCEDURE FETCH_STATS () IS\n"
+			   "found INT;\n"
+			   "DECLARE FUNCTION fetch_table_stats_step;\n"
+			   "DECLARE FUNCTION fetch_index_stats_step;\n"
+			   "DECLARE CURSOR table_stats_cur IS\n"
+			   "  SELECT\n"
+			   /* if you change the selected fields, be
+			   sure to adjust
+			   dict_stats_fetch_table_stats_step() */
+			   "  n_rows,\n"
+			   "  clustered_index_size,\n"
+			   "  sum_of_other_index_sizes\n"
+			   "  FROM \"" TABLE_STATS_NAME "\"\n"
+			   "  WHERE\n"
+			   "  database_name = :database_name AND\n"
+			   "  table_name = :table_name;\n"
+			   "DECLARE CURSOR index_stats_cur IS\n"
+			   "  SELECT\n"
+			   /* if you change the selected fields, be
+			   sure to adjust
+			   dict_stats_fetch_index_stats_step() */
+			   "  index_name,\n"
+			   "  stat_name,\n"
+			   "  stat_value,\n"
+			   "  sample_size\n"
+			   "  FROM \"" INDEX_STATS_NAME "\"\n"
+			   "  WHERE\n"
+			   "  database_name = :database_name AND\n"
+			   "  table_name = :table_name;\n"
+
+			   "BEGIN\n"
+
+			   "OPEN table_stats_cur;\n"
+			   "FETCH table_stats_cur INTO\n"
+			   "  fetch_table_stats_step();\n"
+			   "IF (SQL % NOTFOUND) THEN\n"
+			   "  CLOSE table_stats_cur;\n"
+			   "  RETURN;\n"
+			   "END IF;\n"
+			   "CLOSE table_stats_cur;\n"
+
+			   "OPEN index_stats_cur;\n"
+			   "found := 1;\n"
+			   "WHILE found = 1 LOOP\n"
+			   "  FETCH index_stats_cur INTO\n"
+			   "    fetch_index_stats_step();\n"
+			   "  IF (SQL % NOTFOUND) THEN\n"
+			   "    found := 0;\n"
+			   "  END IF;\n"
+			   "END LOOP;\n"
+			   "CLOSE index_stats_cur;\n"
+
+			   "END;",
+			   !caller_has_dict_sys_mutex, trx);
+
+	/* pinfo is freed by que_eval_sql() */
+
+	/* XXX If mysql.innodb_index_stats contained less rows than the number
+	of indexes in the table, then some of the indexes of the table
+	were left uninitialized. Currently this is ignored and those
+	indexes are left with uninitialized stats until ANALYZE TABLE is
+	run. This condition happens when the user creates a new index
+	on a table. We could return DB_STATS_DO_NOT_EXIST from here,
+	forcing the usage of transient stats until mysql.innodb_index_stats
+	is complete. */
+
+	trx_commit_for_mysql(trx);
+
+	trx_free_for_background(trx);
+
+	if (!index_fetch_arg.stats_were_modified) {
+		return(DB_STATS_DO_NOT_EXIST);
+	}
+
+	return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization.
+dict_stats_update() @{
+@return DB_* error code or DB_SUCCESS */
+UNIV_INTERN
+enum db_err
+dict_stats_update(
+/*==============*/
+	dict_table_t*		table,	/*!< in/out: table */
+	dict_stats_upd_option_t	stats_upd_option,
+					/*!< in: whether to (re) calc
+					the stats or to fetch them from
+					the persistent statistics
+					storage */
+	ibool			caller_has_dict_sys_mutex)
+					/*!< in: TRUE if the caller
+					owns dict_sys->mutex */
+{
+	enum db_err	ret = DB_ERROR;
+
+	/* check whether caller_has_dict_sys_mutex is set correctly;
+	note that mutex_own() is not implemented in non-debug code so
+	we cannot avoid having this extra param to the current function */
+	ut_ad(caller_has_dict_sys_mutex
+	      ? mutex_own(&dict_sys->mutex)
+	      : !mutex_own(&dict_sys->mutex));
+
+	if (table->ibd_file_missing) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: cannot calculate statistics for table %s "
+			"because the .ibd file is missing. For help, please "
+			"refer to " REFMAN "innodb-troubleshooting.html\n",
+			table->name);
+
+		return(DB_TABLESPACE_DELETED);
+	}
+
+	/* If we have set a high innodb_force_recovery level, do not calculate
+	statistics, as a badly corrupted index can cause a crash in it. */
+
+	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+
+		return(DB_SUCCESS);
+	}
+
+	switch (stats_upd_option) {
+	case DICT_STATS_RECALC_PERSISTENT:
+	case DICT_STATS_RECALC_PERSISTENT_SILENT:
+		/* Persistent recalculation requested, called from
+		ANALYZE TABLE or from TRUNCATE TABLE */
+
+		/* FTS auxiliary tables do not need persistent stats */
+		if ((ut_strcount(table->name, "FTS") > 0
+		     && (ut_strcount(table->name, "CONFIG") > 0
+			 || ut_strcount(table->name, "INDEX") > 0
+			 || ut_strcount(table->name, "DELETED") > 0
+			 || ut_strcount(table->name, "DOC_ID") > 0
+			 || ut_strcount(table->name, "ADDED") > 0))) {
+			goto transient;
+		}
+
+		/* check if the persistent statistics storage exists
+		before calling the potentially slow function
+		dict_stats_update_persistent(); that is a
+		prerequisite for dict_stats_save() succeeding */
+		if (dict_stats_persistent_storage_check(
+				caller_has_dict_sys_mutex)) {
+
+			dict_table_stats_lock(table, RW_X_LATCH);
+
+			ret = dict_stats_update_persistent(table);
+
+			/* XXX Currently dict_stats_save() would read the
+			stats from the table without dict_table_stats_lock()
+			which means it could save inconsistent data on the
+			disk. This is because we must call
+			dict_table_stats_lock() after locking dict_sys->mutex.
+			A solution is to copy here the stats to a temporary
+			buffer while holding the _stats_lock(), release it,
+			and pass that buffer to dict_stats_save(). */
+
+			dict_table_stats_unlock(table, RW_X_LATCH);
+
+			if (ret == DB_SUCCESS) {
+				ret = dict_stats_save(
+					table,
+					caller_has_dict_sys_mutex);
+			}
+
+			return(ret);
+		}
+		/* else */
+
+		/* Fall back to transient stats since the persistent
+		storage is not present or is corrupted */
+
+		if (stats_upd_option == DICT_STATS_RECALC_PERSISTENT) {
+
+			ut_print_timestamp(stderr);
+			/* XXX add link to the doc about storage
+			creation */
+			fprintf(stderr,
+				" InnoDB: Recalculation of persistent "
+				"statistics requested but the required "
+				"persistent statistics storage is not "
+				"present or is corrupted. "
+				"Using quick transient stats "
+				"instead.\n");
+		}
+
+		goto transient;
+
+	case DICT_STATS_RECALC_TRANSIENT:
+
+		goto transient;
+
+	case DICT_STATS_FETCH:
+	case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
+		/* fetch requested, either fetch from persistent statistics
+		storage or use the old method */
+
+		dict_table_stats_lock(table, RW_X_LATCH);
+
+		if (stats_upd_option == DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY
+		    && table->stat_initialized) {
+
+			dict_table_stats_unlock(table, RW_X_LATCH);
+			return(DB_SUCCESS);
+		}
+		/* else */
+
+		/* Must unlock because otherwise there is a lock order
+		violation with dict_sys->mutex below. Declare stats to be
+		initialized before unlocking. */
+		table->stat_initialized = TRUE;
+		dict_table_stats_unlock(table, RW_X_LATCH);
+
+		if (strchr(table->name, '/') == NULL
+		    || strcmp(table->name, INDEX_STATS_NAME) == 0
+		    || strcmp(table->name, TABLE_STATS_NAME) == 0
+		    || (ut_strcount(table->name, "FTS") > 0
+		        && (ut_strcount(table->name, "CONFIG") > 0
+			    || ut_strcount(table->name, "INDEX") > 0
+			    || ut_strcount(table->name, "DELETED") > 0
+			    || ut_strcount(table->name, "DOC_ID") > 0
+			    || ut_strcount(table->name, "ADDED") > 0))) {
+			/* Use the quick transient stats method for
+			InnoDB internal tables, because we know the
+			persistent stats storage does not contain data
+			for them */
+
+			goto transient;
+		}
+		/* else */
+
+		if (dict_stats_persistent_storage_check(
+			caller_has_dict_sys_mutex)) {
+
+			ret = dict_stats_fetch_from_ps(table,
+				caller_has_dict_sys_mutex);
+
+			if (ret == DB_STATS_DO_NOT_EXIST
+			    || (ret != DB_SUCCESS && stats_upd_option
+				== DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY)) {
+				/* Stats for this particular table do not
+				exist or we have been called from open table
+				which needs to initialize the stats,
+				calculate the quick transient statistics */
+				goto transient;
+			}
+			/* else */
+
+			return(ret);
+		} else {
+			/* persistent statistics storage does not exist,
+			calculate the transient stats */
+			goto transient;
+		}
+
+		break;
+
+	/* no "default:" in order to produce a compilation warning
+	about unhandled enumeration value */
+	}
+
+transient:
+
+	dict_table_stats_lock(table, RW_X_LATCH);
+
+	dict_stats_update_transient(table);
+
+	dict_table_stats_unlock(table, RW_X_LATCH);
+
+	return(DB_SUCCESS);
+}
+/* @} */
+
+/*********************************************************************//**
+Close the stats tables. Should always be called after successful
+dict_stats_open(). It will free the dict_stats handle.
+dict_stats_close() @{ */
+UNIV_INLINE
+void
+dict_stats_close(
+/*=============*/
+	dict_stats_t*	dict_stats)	/*!< in/own: Handle to open
+					statistics tables */
+{
+	if (dict_stats->table_stats != NULL) {
+		dict_table_close(dict_stats->table_stats, FALSE);
+		dict_stats->table_stats = NULL;
+	}
+
+	if (dict_stats->index_stats != NULL) {
+		dict_table_close(dict_stats->index_stats, FALSE);
+		dict_stats->index_stats = NULL;
+	}
+
+	mem_free(dict_stats);
+}
+/* @} */
+
+/*********************************************************************//**
+Open stats tables to prevent these tables from being DROPped.
+Also check whether they have the correct structure. The caller
+must call dict_stats_close() when he has finished DMLing the tables.
+dict_stats_open() @{
+@return pointer to open tables or NULL on failure */
+UNIV_INLINE
+dict_stats_t*
+dict_stats_open(void)
+/*=================*/
+{
+	dict_stats_t*	dict_stats;
+
+	dict_stats = static_cast<dict_stats_t*>(
+		mem_zalloc(sizeof(*dict_stats)));
+
+	dict_stats->table_stats = dict_table_open_on_name_no_stats(
+		TABLE_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+
+	dict_stats->index_stats = dict_table_open_on_name_no_stats(
+		INDEX_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+
+	/* Check if the tables have the correct structure, if yes then
+	after this function we can safely DELETE from them without worrying
+	that they may get DROPped or DDLed because the open will have
+	increased the reference count. */
+
+	if (dict_stats->table_stats == NULL
+	    || dict_stats->index_stats == NULL
+	    || !dict_stats_persistent_storage_check(FALSE)) {
+
+		/* There was an error, close the tables and free the handle. */
+		dict_stats_close(dict_stats);
+		dict_stats = NULL;
+	}
+
+	return(dict_stats);
+}
+/* @} */
+
+/*********************************************************************//**
+Removes the information for a particular index's stats from the persistent
+storage if it exists and if there is data stored for this index.
+The transaction is not committed, it must not be committed in this
+function because this is the user trx that is running DROP INDEX.
+The transaction will be committed at the very end when dropping an
+index.
+A note from Marko why we cannot edit user and sys_* tables in one trx:
+marko: The problem is that ibuf merges should be disabled while we are
+rolling back dict transactions.
+marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
+But we shouldn't open *.ibd files before we have rolled back dict
+transactions and opened the SYS_* records for the *.ibd files.
+dict_stats_delete_index_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+enum db_err
+dict_stats_delete_index_stats(
+/*==========================*/
+	dict_index_t*	index,	/*!< in: index */
+	trx_t*		trx,	/*!< in: transaction to use */
+	char*		errstr, /*!< out: error message if != DB_SUCCESS
+				is returned */
+	ulint		errstr_sz)/*!< in: size of the errstr buffer */
+{
+	char		database_name[MAX_DATABASE_NAME_LEN + 1];
+	const char*	table_name;
+	pars_info_t*	pinfo;
+	enum db_err	ret;
+	dict_stats_t*	dict_stats;
+	void*		mysql_thd = trx->mysql_thd;
+
+	/* skip indexes whose table names do not contain a database name
+	e.g. if we are dropping an index from SYS_TABLES */
+	if (strchr(index->table_name, '/') == NULL) {
+
+		return(DB_SUCCESS);
+	}
+
+	/* Increment table reference count to prevent the tables from
+	being DROPped just before que_eval_sql(). */
+	dict_stats = dict_stats_open();
+
+	if (dict_stats == NULL) {
+		/* stats tables do not exist or have unexpected structure */
+		return(DB_SUCCESS);
+	}
+
+	/* the stats tables cannot be DROPped now */
+
+	ut_snprintf(database_name, sizeof(database_name), "%.*s",
+		    (int) dict_get_db_name_len(index->table_name),
+		    index->table_name);
+
+	table_name = dict_remove_db_name(index->table_name);
+
+	pinfo = pars_info_create();
+
+	pars_info_add_str_literal(pinfo, "database_name", database_name);
+
+	pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+	pars_info_add_str_literal(pinfo, "index_name", index->name);
+
+	/* Force lock wait timeout to be instantaneous because the incoming
+	transaction was created via MySQL. */
+
+	mysql_thd = trx->mysql_thd;
+	trx->mysql_thd = NULL;
+
+	ret = que_eval_sql(pinfo,
+			   "PROCEDURE DROP_INDEX_STATS () IS\n"
+			   "BEGIN\n"
+			   "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+			   "database_name = :database_name AND\n"
+			   "table_name = :table_name AND\n"
+			   "index_name = :index_name;\n"
+			   "END;\n",
+			   TRUE,
+			   trx);
+
+	trx->mysql_thd = mysql_thd;
+
+	/* pinfo is freed by que_eval_sql() */
+
+	/* do not to commit here, see the function's comment */
+
+	if (ret != DB_SUCCESS) {
+
+		ut_snprintf(errstr, errstr_sz,
+			    "Unable to delete statistics for index %s "
+			    "from %s%s. They can be deleted later using "
+			    "DELETE FROM %s WHERE "
+			    "database_name = '%s' AND "
+			    "table_name = '%s' AND "
+			    "index_name = '%s';",
+			    index->name,
+			    INDEX_STATS_NAME_PRINT,
+			    (ret == DB_LOCK_WAIT_TIMEOUT
+			     ? " because the rows are locked"
+			     : ""),
+			    INDEX_STATS_NAME_PRINT,
+			    database_name,
+			    table_name,
+			    index->name);
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: %s\n", errstr);
+
+		trx->error_state = DB_SUCCESS;
+	}
+
+	dict_stats_close(dict_stats);
+
+	return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Removes the statistics for a table and all of its indexes from the
+persistent statistics storage if it exists and if there is data stored for
+the table.  This function creates its own transaction and commits it.
+dict_stats_delete_table_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+enum db_err
+dict_stats_delete_table_stats(
+/*==========================*/
+	const char*	table_name,	/*!< in: table name */
+	char*		errstr,		/*!< out: error message
+					if != DB_SUCCESS is returned */
+	ulint		errstr_sz)	/*!< in: size of errstr buffer */
+{
+	char		database_name[MAX_DATABASE_NAME_LEN + 1];
+	const char*	table_name_strip; /* without leading db name */
+	trx_t*		trx;
+	pars_info_t*	pinfo;
+	enum db_err	ret = DB_ERROR;
+	dict_stats_t*	dict_stats;
+
+	/* skip tables that do not contain a database name
+	e.g. if we are dropping SYS_TABLES */
+	if (strchr(table_name, '/') == NULL) {
+
+		return(DB_SUCCESS);
+	}
+
+	/* skip innodb_table_stats and innodb_index_stats themselves */
+	if (strcmp(table_name, TABLE_STATS_NAME) == 0
+	    || strcmp(table_name, INDEX_STATS_NAME) == 0) {
+
+		return(DB_SUCCESS);
+	}
+
+	/* Create a new private trx */
+
+	trx = trx_allocate_for_background();
+
+	/* Use 'read-uncommitted' so that the SELECTs we execute
+	do not get blocked in case some user has locked the rows we
+	are SELECTing */
+
+	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+
+	trx_start_if_not_started(trx);
+
+	/* Increment table reference count to prevent the tables from
+	being DROPped just before que_eval_sql(). */
+	dict_stats = dict_stats_open();
+
+	if (dict_stats == NULL) {
+		/* stats tables do not exist or have unexpected structure */
+		ret = DB_SUCCESS;
+		goto commit_and_return;
+	}
+
+	ut_snprintf(database_name, sizeof(database_name), "%.*s",
+		    (int) dict_get_db_name_len(table_name),
+		    table_name);
+
+	table_name_strip = dict_remove_db_name(table_name);
+
+	pinfo = pars_info_create();
+
+	pars_info_add_str_literal(pinfo, "database_name", database_name);
+
+	pars_info_add_str_literal(pinfo, "table_name", table_name_strip);
+
+	ret = que_eval_sql(pinfo,
+			   "PROCEDURE DROP_TABLE_STATS () IS\n"
+			   "BEGIN\n"
+
+			   "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+			   "database_name = :database_name AND\n"
+			   "table_name = :table_name;\n"
+
+			   "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
+			   "database_name = :database_name AND\n"
+			   "table_name = :table_name;\n"
+
+			   "END;\n",
+			   TRUE,
+			   trx);
+
+	/* pinfo is freed by que_eval_sql() */
+
+	if (ret != DB_SUCCESS) {
+
+		ut_snprintf(errstr, errstr_sz,
+			    "Unable to delete statistics for table %s.%s "
+			    "from %s or %s%s. "
+			    "They can be deleted later using "
+
+			    "DELETE FROM %s WHERE "
+			    "database_name = '%s' AND "
+			    "table_name = '%s'; "
+
+			    "DELETE FROM %s WHERE "
+			    "database_name = '%s' AND "
+			    "table_name = '%s';",
+
+			    database_name, table_name_strip,
+			    TABLE_STATS_NAME_PRINT, INDEX_STATS_NAME_PRINT,
+
+			    (ret == DB_LOCK_WAIT_TIMEOUT
+			     ? " because the rows are locked"
+			     : ""),
+
+			    INDEX_STATS_NAME_PRINT,
+			    database_name, table_name_strip,
+
+			    TABLE_STATS_NAME_PRINT,
+			    database_name, table_name_strip);
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: %s\n", errstr);
+	}
+
+	dict_stats_close(dict_stats);
+
+commit_and_return:
+
+	trx_commit_for_mysql(trx);
+
+	trx_free_for_background(trx);
+
+	return(ret);
+}
+/* @} */
+
+/* tests @{ */
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+/* The following unit tests test some of the functions in this file
+individually, such testing cannot be performed by the mysql-test framework
+via SQL. */
+
+/* test_dict_table_schema_check() @{ */
+void
+test_dict_table_schema_check()
+{
+	/*
+	CREATE TABLE tcheck (
+		c01 VARCHAR(123),
+		c02 INT,
+		c03 INT NOT NULL,
+		c04 INT UNSIGNED,
+		c05 BIGINT,
+		c06 BIGINT UNSIGNED NOT NULL,
+		c07 TIMESTAMP
+	) ENGINE=INNODB;
+	*/
+	/* definition for the table 'test/tcheck' */
+	dict_col_meta_t	columns[] = {
+		{"c01", DATA_VARCHAR, 0, 123},
+		{"c02", DATA_INT, 0, 4},
+		{"c03", DATA_INT, DATA_NOT_NULL, 4},
+		{"c04", DATA_INT, DATA_UNSIGNED, 4},
+		{"c05", DATA_INT, 0, 8},
+		{"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
+		{"c07", DATA_INT, 0, 4},
+		{"c_extra", DATA_INT, 0, 4}
+	};
+	dict_table_schema_t	schema = {
+		"test/tcheck",
+		0 /* will be set individually for each test below */,
+		columns
+	};
+	char	errstr[512];
+
+	ut_snprintf(errstr, sizeof(errstr), "Table not found");
+
+	/* prevent any data dictionary modifications while we are checking
+	the tables' structure */
+
+	mutex_enter(&(dict_sys->mutex));
+
+	/* check that a valid table is reported as valid */
+	schema.n_cols = 7;
+	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+	    == DB_SUCCESS) {
+		printf("OK: test.tcheck ok\n");
+	} else {
+		printf("ERROR: %s\n", errstr);
+		printf("ERROR: test.tcheck not present or corrupted\n");
+		goto test_dict_table_schema_check_end;
+	}
+
+	/* check columns with wrong length */
+	schema.columns[1].len = 8;
+	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+	    != DB_SUCCESS) {
+		printf("OK: test.tcheck.c02 has different length and is "
+		       "reported as corrupted\n");
+	} else {
+		printf("OK: test.tcheck.c02 has different length but is "
+		       "reported as ok\n");
+		goto test_dict_table_schema_check_end;
+	}
+	schema.columns[1].len = 4;
+
+	/* request that c02 is NOT NULL while actually it does not have
+	this flag set */
+	schema.columns[1].prtype_mask |= DATA_NOT_NULL;
+	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+	    != DB_SUCCESS) {
+		printf("OK: test.tcheck.c02 does not have NOT NULL while "
+		       "it should and is reported as corrupted\n");
+	} else {
+		printf("ERROR: test.tcheck.c02 does not have NOT NULL while "
+		       "it should and is not reported as corrupted\n");
+		goto test_dict_table_schema_check_end;
+	}
+	schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
+
+	/* check a table that contains some extra columns */
+	schema.n_cols = 6;
+	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+	    == DB_SUCCESS) {
+		printf("ERROR: test.tcheck has more columns but is not "
+		       "reported as corrupted\n");
+		goto test_dict_table_schema_check_end;
+	} else {
+		printf("OK: test.tcheck has more columns and is "
+		       "reported as corrupted\n");
+	}
+
+	/* check a table that has some columns missing */
+	schema.n_cols = 8;
+	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+	    != DB_SUCCESS) {
+		printf("OK: test.tcheck has missing columns and is "
+		       "reported as corrupted\n");
+	} else {
+		printf("ERROR: test.tcheck has missing columns but is "
+		       "reported as ok\n");
+		goto test_dict_table_schema_check_end;
+	}
+
+	/* check non-existent table */
+	schema.table_name = "test/tcheck_nonexistent";
+	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+	    != DB_SUCCESS) {
+		printf("OK: test.tcheck_nonexistent is not present\n");
+	} else {
+		printf("ERROR: test.tcheck_nonexistent is present!?\n");
+		goto test_dict_table_schema_check_end;
+	}
+
+test_dict_table_schema_check_end:
+
+	mutex_exit(&(dict_sys->mutex));
+}
+/* @} */
+
+/* save/fetch aux macros @{ */
+#define TEST_DATABASE_NAME		"foobardb"
+#define TEST_TABLE_NAME			"test_dict_stats"
+
+#define TEST_N_ROWS			111
+#define TEST_CLUSTERED_INDEX_SIZE	222
+#define TEST_SUM_OF_OTHER_INDEX_SIZES	333
+
+#define TEST_IDX1_NAME			"tidx1"
+#define TEST_IDX1_COL1_NAME		"tidx1_col1"
+#define TEST_IDX1_INDEX_SIZE		123
+#define TEST_IDX1_N_LEAF_PAGES		234
+#define TEST_IDX1_N_DIFF1		50
+#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE	500
+
+#define TEST_IDX2_NAME			"tidx2"
+#define TEST_IDX2_COL1_NAME		"tidx2_col1"
+#define TEST_IDX2_COL2_NAME		"tidx2_col2"
+#define TEST_IDX2_COL3_NAME		"tidx2_col3"
+#define TEST_IDX2_COL4_NAME		"tidx2_col4"
+#define TEST_IDX2_INDEX_SIZE		321
+#define TEST_IDX2_N_LEAF_PAGES		432
+#define TEST_IDX2_N_DIFF1		60
+#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE	600
+#define TEST_IDX2_N_DIFF2		61
+#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE	610
+#define TEST_IDX2_N_DIFF3		62
+#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE	620
+#define TEST_IDX2_N_DIFF4		63
+#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE	630
+/* @} */
+
+/* test_dict_stats_save() @{ */
+void
+test_dict_stats_save()
+{
+	dict_table_t	table;
+	dict_index_t	index1;
+	dict_field_t	index1_fields[1];
+	ib_uint64_t	index1_stat_n_diff_key_vals[2];
+	ib_uint64_t	index1_stat_n_sample_sizes[2];
+	dict_index_t	index2;
+	dict_field_t	index2_fields[4];
+	ib_uint64_t	index2_stat_n_diff_key_vals[5];
+	ib_uint64_t	index2_stat_n_sample_sizes[5];
+	enum db_err	ret;
+
+	/* craft a dummy dict_table_t */
+	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+	table.stat_n_rows = TEST_N_ROWS;
+	table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
+	table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
+	UT_LIST_INIT(table.indexes);
+	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
+	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+#ifdef UNIV_DEBUG
+	table.magic_n = DICT_TABLE_MAGIC_N;
+#endif /* UNIV_DEBUG */
+
+	index1.name = TEST_IDX1_NAME;
+	index1.table = &table;
+#ifdef UNIV_DEBUG
+	index1.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+	index1.to_be_dropped = 0;
+	index1.cached = 1;
+	index1.n_uniq = 1;
+	index1.fields = index1_fields;
+	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
+	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
+	index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
+	index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
+	index1_fields[0].name = TEST_IDX1_COL1_NAME;
+	index1_stat_n_diff_key_vals[0] = 1; /* dummy */
+	index1_stat_n_diff_key_vals[1] = TEST_IDX1_N_DIFF1;
+	index1_stat_n_sample_sizes[0] = 0; /* dummy */
+	index1_stat_n_sample_sizes[1] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
+
+	index2.name = TEST_IDX2_NAME;
+	index2.table = &table;
+#ifdef UNIV_DEBUG
+	index2.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+	index2.to_be_dropped = 0;
+	index2.cached = 1;
+	index2.n_uniq = 4;
+	index2.fields = index2_fields;
+	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
+	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
+	index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
+	index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
+	index2_fields[0].name = TEST_IDX2_COL1_NAME;
+	index2_fields[1].name = TEST_IDX2_COL2_NAME;
+	index2_fields[2].name = TEST_IDX2_COL3_NAME;
+	index2_fields[3].name = TEST_IDX2_COL4_NAME;
+	index2_stat_n_diff_key_vals[0] = 1; /* dummy */
+	index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF1;
+	index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF2;
+	index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF3;
+	index2_stat_n_diff_key_vals[4] = TEST_IDX2_N_DIFF4;
+	index2_stat_n_sample_sizes[0] = 0; /* dummy */
+	index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
+	index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
+	index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
+	index2_stat_n_sample_sizes[4] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
+
+	ret = dict_stats_save(&table, FALSE);
+
+	ut_a(ret == DB_SUCCESS);
+
+	printf("\nOK: stats saved successfully, now go ahead and read "
+	       "what's inside %s and %s:\n\n",
+	       TABLE_STATS_NAME_PRINT,
+	       INDEX_STATS_NAME_PRINT);
+
+	printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
+	       "FROM %s\n"
+	       "WHERE\n"
+	       "database_name = '%s' AND\n"
+	       "table_name = '%s' AND\n"
+	       "n_rows = %d AND\n"
+	       "clustered_index_size = %d AND\n"
+	       "sum_of_other_index_sizes = %d;\n"
+	       "\n",
+	       TABLE_STATS_NAME_PRINT,
+	       TEST_DATABASE_NAME,
+	       TEST_TABLE_NAME,
+	       TEST_N_ROWS,
+	       TEST_CLUSTERED_INDEX_SIZE,
+	       TEST_SUM_OF_OTHER_INDEX_SIZES);
+
+	printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
+	       "FROM %s\n"
+	       "WHERE\n"
+	       "database_name = '%s' AND\n"
+	       "table_name = '%s' AND\n"
+	       "index_name = '%s' AND\n"
+	       "(\n"
+	       " (stat_name = 'size' AND stat_value = %d AND"
+	       "  sample_size IS NULL) OR\n"
+	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
+	       "  sample_size IS NULL) OR\n"
+	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
+	       "  sample_size = '%d' AND stat_description = '%s')\n"
+	       ");\n"
+	       "\n",
+	       INDEX_STATS_NAME_PRINT,
+	       TEST_DATABASE_NAME,
+	       TEST_TABLE_NAME,
+	       TEST_IDX1_NAME,
+	       TEST_IDX1_INDEX_SIZE,
+	       TEST_IDX1_N_LEAF_PAGES,
+	       TEST_IDX1_N_DIFF1,
+	       TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
+	       TEST_IDX1_COL1_NAME);
+
+	printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
+	       "FROM %s\n"
+	       "WHERE\n"
+	       "database_name = '%s' AND\n"
+	       "table_name = '%s' AND\n"
+	       "index_name = '%s' AND\n"
+	       "(\n"
+	       " (stat_name = 'size' AND stat_value = %d AND"
+	       "  sample_size IS NULL) OR\n"
+	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
+	       "  sample_size IS NULL) OR\n"
+	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
+	       "  sample_size = '%d' AND stat_description = '%s') OR\n"
+	       " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
+	       "  sample_size = '%d' AND stat_description = '%s,%s') OR\n"
+	       " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
+	       "  sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
+	       " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
+	       "  sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
+	       ");\n"
+	       "\n",
+	       INDEX_STATS_NAME_PRINT,
+	       TEST_DATABASE_NAME,
+	       TEST_TABLE_NAME,
+	       TEST_IDX2_NAME,
+	       TEST_IDX2_INDEX_SIZE,
+	       TEST_IDX2_N_LEAF_PAGES,
+	       TEST_IDX2_N_DIFF1,
+	       TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
+	       TEST_IDX2_N_DIFF2,
+	       TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
+	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
+	       TEST_IDX2_N_DIFF3,
+	       TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
+	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
+	       TEST_IDX2_N_DIFF4,
+	       TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
+	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
+	       TEST_IDX2_COL4_NAME);
+}
+/* @} */
+
+/* test_dict_stats_fetch_from_ps() @{ */
+void
+test_dict_stats_fetch_from_ps()
+{
+	dict_table_t	table;
+	dict_index_t	index1;
+	ib_uint64_t	index1_stat_n_diff_key_vals[2];
+	ib_uint64_t	index1_stat_n_sample_sizes[2];
+	dict_index_t	index2;
+	ib_uint64_t	index2_stat_n_diff_key_vals[5];
+	ib_uint64_t	index2_stat_n_sample_sizes[5];
+	enum db_err	ret;
+
+	/* craft a dummy dict_table_t */
+	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+	UT_LIST_INIT(table.indexes);
+	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
+	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+#ifdef UNIV_DEBUG
+	table.magic_n = DICT_TABLE_MAGIC_N;
+#endif /* UNIV_DEBUG */
+
+	index1.name = TEST_IDX1_NAME;
+#ifdef UNIV_DEBUG
+	index1.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+	index1.cached = 1;
+	index1.n_uniq = 1;
+	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
+	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
+
+	index2.name = TEST_IDX2_NAME;
+#ifdef UNIV_DEBUG
+	index2.magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+	index2.cached = 1;
+	index2.n_uniq = 4;
+	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
+	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
+
+	ret = dict_stats_fetch_from_ps(&table, FALSE);
+
+	ut_a(ret == DB_SUCCESS);
+
+	ut_a(table.stat_n_rows == TEST_N_ROWS);
+	ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
+	ut_a(table.stat_sum_of_other_index_sizes
+	     == TEST_SUM_OF_OTHER_INDEX_SIZES);
+
+	ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
+	ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
+	ut_a(index1_stat_n_diff_key_vals[1] == TEST_IDX1_N_DIFF1);
+	ut_a(index1_stat_n_sample_sizes[1] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
+
+	ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
+	ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
+	ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF1);
+	ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
+	ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF2);
+	ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
+	ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF3);
+	ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
+	ut_a(index2_stat_n_diff_key_vals[4] == TEST_IDX2_N_DIFF4);
+	ut_a(index2_stat_n_sample_sizes[4] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
+
+	printf("OK: fetch successful\n");
+}
+/* @} */
+
+/* test_dict_stats_all() @{ */
+void
+test_dict_stats_all()
+{
+	test_dict_table_schema_check();
+
+	test_dict_stats_save();
+
+	test_dict_stats_fetch_from_ps();
+}
+/* @} */
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
+/* @} */
+
+#endif /* UNIV_HOTBACKUP */
+
+/* vim: set foldmethod=marker foldmarker=@{,@}: */