summaryrefslogtreecommitdiff
path: root/storage/xtradb/page
diff options
context:
space:
mode:
authorSergei Golubchik <vuvova@gmail.com>2015-05-04 19:15:28 +0200
committerSergei Golubchik <vuvova@gmail.com>2015-05-04 19:15:28 +0200
commit14a142fca67b9e1fb3f0250fda093f5b967f0138 (patch)
treedd49e0666c863d80b5c50642e36a9c945ea12b8a /storage/xtradb/page
parentdfb001edcd4b16bd4370b08b0176df78c4c5523f (diff)
downloadmariadb-git-14a142fca67b9e1fb3f0250fda093f5b967f0138.tar.gz
move to storage/xtradb
Diffstat (limited to 'storage/xtradb/page')
-rw-r--r--storage/xtradb/page/page0cur.cc2145
-rw-r--r--storage/xtradb/page/page0page.cc2813
-rw-r--r--storage/xtradb/page/page0zip.cc4952
3 files changed, 9910 insertions, 0 deletions
diff --git a/storage/xtradb/page/page0cur.cc b/storage/xtradb/page/page0cur.cc
new file mode 100644
index 00000000000..f5f7e1299ce
--- /dev/null
+++ b/storage/xtradb/page/page0cur.cc
@@ -0,0 +1,2145 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file page/page0cur.cc
+The page cursor
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "page0cur.h"
+#ifdef UNIV_NONINL
+#include "page0cur.ic"
+#endif
+
+#include "page0zip.h"
+#include "btr0btr.h"
+#include "mtr0log.h"
+#include "log0recv.h"
+#include "ut0ut.h"
+#ifndef UNIV_HOTBACKUP
+#include "rem0cmp.h"
+
+#ifdef PAGE_CUR_ADAPT
+# ifdef UNIV_SEARCH_PERF_STAT
+static ulint page_cur_short_succ = 0;
+# endif /* UNIV_SEARCH_PERF_STAT */
+
+/*******************************************************************//**
+This is a linear congruential generator PRNG. Returns a pseudo random
+number between 0 and 2^64-1 inclusive. The formula and the constants
+being used are:
+X[n+1] = (a * X[n] + c) mod m
+where:
+X[0] = ut_time_us(NULL)
+a = 1103515245 (3^5 * 5 * 7 * 129749)
+c = 12345 (3 * 5 * 823)
+m = 18446744073709551616 (2^64)
+
+@return number between 0 and 2^64-1 */
+static
+ib_uint64_t
+page_cur_lcg_prng(void)
+/*===================*/
+{
+#define LCG_a 1103515245
+#define LCG_c 12345
+ static ib_uint64_t lcg_current = 0;
+ static ibool initialized = FALSE;
+
+ if (!initialized) {
+ lcg_current = (ib_uint64_t) ut_time_us(NULL);
+ initialized = TRUE;
+ }
+
+ /* no need to "% 2^64" explicitly because lcg_current is
+ 64 bit and this will be done anyway */
+ lcg_current = LCG_a * lcg_current + LCG_c;
+
+ return(lcg_current);
+}
+
+/****************************************************************//**
+Tries a search shortcut based on the last insert.
+@return TRUE on success */
+UNIV_INLINE
+ibool
+page_cur_try_search_shortcut(
+/*=========================*/
+ const buf_block_t* block, /*!< in: index page */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint* iup_matched_fields,
+ /*!< in/out: already matched
+ fields in upper limit record */
+ ulint* iup_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ ulint* ilow_matched_fields,
+ /*!< in/out: already matched
+ fields in lower limit record */
+ ulint* ilow_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ page_cur_t* cursor) /*!< out: page cursor */
+{
+ const rec_t* rec;
+ const rec_t* next_rec;
+ ulint low_match;
+ ulint low_bytes;
+ ulint up_match;
+ ulint up_bytes;
+#ifdef UNIV_SEARCH_DEBUG
+ page_cur_t cursor2;
+#endif
+ ibool success = FALSE;
+ const page_t* page = buf_block_get_frame(block);
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
+ offsets = rec_get_offsets(rec, index, offsets,
+ dtuple_get_n_fields(tuple), &heap);
+
+ ut_ad(rec);
+ ut_ad(page_rec_is_user_rec(rec));
+
+ ut_pair_min(&low_match, &low_bytes,
+ *ilow_matched_fields, *ilow_matched_bytes,
+ *iup_matched_fields, *iup_matched_bytes);
+
+ up_match = low_match;
+ up_bytes = low_bytes;
+
+ if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
+ &low_match, &low_bytes) < 0) {
+ goto exit_func;
+ }
+
+ next_rec = page_rec_get_next_const(rec);
+ offsets = rec_get_offsets(next_rec, index, offsets,
+ dtuple_get_n_fields(tuple), &heap);
+
+ if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
+ &up_match, &up_bytes) >= 0) {
+ goto exit_func;
+ }
+
+ page_cur_position(rec, block, cursor);
+
+#ifdef UNIV_SEARCH_DEBUG
+ page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
+ iup_matched_fields,
+ iup_matched_bytes,
+ ilow_matched_fields,
+ ilow_matched_bytes,
+ &cursor2);
+ ut_a(cursor2.rec == cursor->rec);
+
+ if (!page_rec_is_supremum(next_rec)) {
+
+ ut_a(*iup_matched_fields == up_match);
+ ut_a(*iup_matched_bytes == up_bytes);
+ }
+
+ ut_a(*ilow_matched_fields == low_match);
+ ut_a(*ilow_matched_bytes == low_bytes);
+#endif
+ if (!page_rec_is_supremum(next_rec)) {
+
+ *iup_matched_fields = up_match;
+ *iup_matched_bytes = up_bytes;
+ }
+
+ *ilow_matched_fields = low_match;
+ *ilow_matched_bytes = low_bytes;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+ page_cur_short_succ++;
+#endif
+ success = TRUE;
+exit_func:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return(success);
+}
+
+#endif
+
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+/****************************************************************//**
+Checks if the nth field in a record is a character type field which extends
+the nth field in tuple, i.e., the field is longer or equal in length and has
+common first characters.
+@return TRUE if rec field extends tuple field */
+static
+ibool
+page_cur_rec_field_extends(
+/*=======================*/
+ const dtuple_t* tuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: record */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n) /*!< in: compare nth field */
+{
+ const dtype_t* type;
+ const dfield_t* dfield;
+ const byte* rec_f;
+ ulint rec_f_len;
+
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ dfield = dtuple_get_nth_field(tuple, n);
+
+ type = dfield_get_type(dfield);
+
+ rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len);
+
+ if (type->mtype == DATA_VARCHAR
+ || type->mtype == DATA_CHAR
+ || type->mtype == DATA_FIXBINARY
+ || type->mtype == DATA_BINARY
+ || type->mtype == DATA_BLOB
+ || type->mtype == DATA_VARMYSQL
+ || type->mtype == DATA_MYSQL) {
+
+ if (dfield_get_len(dfield) != UNIV_SQL_NULL
+ && rec_f_len != UNIV_SQL_NULL
+ && rec_f_len >= dfield_get_len(dfield)
+ && !cmp_data_data_slow(type->mtype, type->prtype,
+ dfield_get_data(dfield),
+ dfield_get_len(dfield),
+ rec_f, dfield_get_len(dfield))) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+
+/****************************************************************//**
+Searches the right position for a page cursor. */
+UNIV_INTERN
+void
+page_cur_search_with_match(
+/*=======================*/
+ const buf_block_t* block, /*!< in: buffer block */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint mode, /*!< in: PAGE_CUR_L,
+ PAGE_CUR_LE, PAGE_CUR_G, or
+ PAGE_CUR_GE */
+ ulint* iup_matched_fields,
+ /*!< in/out: already matched
+ fields in upper limit record */
+ ulint* iup_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ ulint* ilow_matched_fields,
+ /*!< in/out: already matched
+ fields in lower limit record */
+ ulint* ilow_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ page_cur_t* cursor) /*!< out: page cursor */
+{
+ ulint up;
+ ulint low;
+ ulint mid;
+ const page_t* page;
+ const page_dir_slot_t* slot;
+ const rec_t* up_rec;
+ const rec_t* low_rec;
+ const rec_t* mid_rec;
+ ulint up_matched_fields;
+ ulint up_matched_bytes;
+ ulint low_matched_fields;
+ ulint low_matched_bytes;
+ ulint cur_matched_fields;
+ ulint cur_matched_bytes;
+ int cmp;
+#ifdef UNIV_SEARCH_DEBUG
+ int dbg_cmp;
+ ulint dbg_matched_fields;
+ ulint dbg_matched_bytes;
+#endif
+#ifdef UNIV_ZIP_DEBUG
+ const page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+#endif /* UNIV_ZIP_DEBUG */
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
+ && ilow_matched_fields && ilow_matched_bytes && cursor);
+ ut_ad(dtuple_validate(tuple));
+#ifdef UNIV_DEBUG
+# ifdef PAGE_CUR_DBG
+ if (mode != PAGE_CUR_DBG)
+# endif /* PAGE_CUR_DBG */
+# ifdef PAGE_CUR_LE_OR_EXTENDS
+ if (mode != PAGE_CUR_LE_OR_EXTENDS)
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
+#endif /* UNIV_DEBUG */
+ page = buf_block_get_frame(block);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ page_check_dir(page);
+
+#ifdef PAGE_CUR_ADAPT
+ if (page_is_leaf(page)
+ && (mode == PAGE_CUR_LE)
+ && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
+ && (page_header_get_ptr(page, PAGE_LAST_INSERT))
+ && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
+
+ if (page_cur_try_search_shortcut(
+ block, index, tuple,
+ iup_matched_fields, iup_matched_bytes,
+ ilow_matched_fields, ilow_matched_bytes,
+ cursor)) {
+ return;
+ }
+ }
+# ifdef PAGE_CUR_DBG
+ if (mode == PAGE_CUR_DBG) {
+ mode = PAGE_CUR_LE;
+ }
+# endif
+#endif
+
+ /* The following flag does not work for non-latin1 char sets because
+ cmp_full_field does not tell how many bytes matched */
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+
+ /* If mode PAGE_CUR_G is specified, we are trying to position the
+ cursor to answer a query of the form "tuple < X", where tuple is
+ the input parameter, and X denotes an arbitrary physical record on
+ the page. We want to position the cursor on the first X which
+ satisfies the condition. */
+
+ up_matched_fields = *iup_matched_fields;
+ up_matched_bytes = *iup_matched_bytes;
+ low_matched_fields = *ilow_matched_fields;
+ low_matched_bytes = *ilow_matched_bytes;
+
+ /* Perform binary search. First the search is done through the page
+ directory, after that as a linear search in the list of records
+ owned by the upper limit directory slot. */
+
+ low = 0;
+ up = page_dir_get_n_slots(page) - 1;
+
+ /* Perform binary search until the lower and upper limit directory
+ slots come to the distance 1 of each other */
+
+ while (up - low > 1) {
+ mid = (low + up) / 2;
+ slot = page_dir_get_nth_slot(page, mid);
+ mid_rec = page_dir_slot_get_rec(slot);
+
+ ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
+ low_matched_fields, low_matched_bytes,
+ up_matched_fields, up_matched_bytes);
+
+ offsets = rec_get_offsets(mid_rec, index, offsets,
+ dtuple_get_n_fields_cmp(tuple),
+ &heap);
+
+ cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
+ &cur_matched_fields,
+ &cur_matched_bytes);
+ if (UNIV_LIKELY(cmp > 0)) {
+low_slot_match:
+ low = mid;
+ low_matched_fields = cur_matched_fields;
+ low_matched_bytes = cur_matched_bytes;
+
+ } else if (UNIV_EXPECT(cmp, -1)) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ if (mode == PAGE_CUR_LE_OR_EXTENDS
+ && page_cur_rec_field_extends(
+ tuple, mid_rec, offsets,
+ cur_matched_fields)) {
+
+ goto low_slot_match;
+ }
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_slot_match:
+ up = mid;
+ up_matched_fields = cur_matched_fields;
+ up_matched_bytes = cur_matched_bytes;
+
+ } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ) {
+
+ goto low_slot_match;
+ } else {
+
+ goto up_slot_match;
+ }
+ }
+
+ slot = page_dir_get_nth_slot(page, low);
+ low_rec = page_dir_slot_get_rec(slot);
+ slot = page_dir_get_nth_slot(page, up);
+ up_rec = page_dir_slot_get_rec(slot);
+
+ /* Perform linear search until the upper and lower records come to
+ distance 1 of each other. */
+
+ while (page_rec_get_next_const(low_rec) != up_rec) {
+
+ mid_rec = page_rec_get_next_const(low_rec);
+
+ ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
+ low_matched_fields, low_matched_bytes,
+ up_matched_fields, up_matched_bytes);
+
+ offsets = rec_get_offsets(mid_rec, index, offsets,
+ dtuple_get_n_fields_cmp(tuple),
+ &heap);
+
+ cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
+ &cur_matched_fields,
+ &cur_matched_bytes);
+ if (UNIV_LIKELY(cmp > 0)) {
+low_rec_match:
+ low_rec = mid_rec;
+ low_matched_fields = cur_matched_fields;
+ low_matched_bytes = cur_matched_bytes;
+
+ } else if (UNIV_EXPECT(cmp, -1)) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ if (mode == PAGE_CUR_LE_OR_EXTENDS
+ && page_cur_rec_field_extends(
+ tuple, mid_rec, offsets,
+ cur_matched_fields)) {
+
+ goto low_rec_match;
+ }
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_rec_match:
+ up_rec = mid_rec;
+ up_matched_fields = cur_matched_fields;
+ up_matched_bytes = cur_matched_bytes;
+ } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ ) {
+
+ goto low_rec_match;
+ } else {
+
+ goto up_rec_match;
+ }
+ }
+
+#ifdef UNIV_SEARCH_DEBUG
+
+ /* Check that the lower and upper limit records have the
+ right alphabetical order compared to tuple. */
+ dbg_matched_fields = 0;
+ dbg_matched_bytes = 0;
+
+ offsets = rec_get_offsets(low_rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
+ &dbg_matched_fields,
+ &dbg_matched_bytes);
+ if (mode == PAGE_CUR_G) {
+ ut_a(dbg_cmp >= 0);
+ } else if (mode == PAGE_CUR_GE) {
+ ut_a(dbg_cmp == 1);
+ } else if (mode == PAGE_CUR_L) {
+ ut_a(dbg_cmp == 1);
+ } else if (mode == PAGE_CUR_LE) {
+ ut_a(dbg_cmp >= 0);
+ }
+
+ if (!page_rec_is_infimum(low_rec)) {
+
+ ut_a(low_matched_fields == dbg_matched_fields);
+ ut_a(low_matched_bytes == dbg_matched_bytes);
+ }
+
+ dbg_matched_fields = 0;
+ dbg_matched_bytes = 0;
+
+ offsets = rec_get_offsets(up_rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
+ &dbg_matched_fields,
+ &dbg_matched_bytes);
+ if (mode == PAGE_CUR_G) {
+ ut_a(dbg_cmp == -1);
+ } else if (mode == PAGE_CUR_GE) {
+ ut_a(dbg_cmp <= 0);
+ } else if (mode == PAGE_CUR_L) {
+ ut_a(dbg_cmp <= 0);
+ } else if (mode == PAGE_CUR_LE) {
+ ut_a(dbg_cmp == -1);
+ }
+
+ if (!page_rec_is_supremum(up_rec)) {
+
+ ut_a(up_matched_fields == dbg_matched_fields);
+ ut_a(up_matched_bytes == dbg_matched_bytes);
+ }
+#endif
+ if (mode <= PAGE_CUR_GE) {
+ page_cur_position(up_rec, block, cursor);
+ } else {
+ page_cur_position(low_rec, block, cursor);
+ }
+
+ *iup_matched_fields = up_matched_fields;
+ *iup_matched_bytes = up_matched_bytes;
+ *ilow_matched_fields = low_matched_fields;
+ *ilow_matched_bytes = low_matched_bytes;
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+/***********************************************************//**
+Positions a page cursor on a randomly chosen user record on a page. If there
+are no user records, sets the cursor on the infimum record. */
+UNIV_INTERN
+void
+page_cur_open_on_rnd_user_rec(
+/*==========================*/
+ buf_block_t* block, /*!< in: page */
+ page_cur_t* cursor) /*!< out: page cursor */
+{
+ ulint rnd;
+ ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
+
+ page_cur_set_before_first(block, cursor);
+
+ if (UNIV_UNLIKELY(n_recs == 0)) {
+
+ return;
+ }
+
+ rnd = (ulint) (page_cur_lcg_prng() % n_recs);
+
+ do {
+ page_cur_move_to_next(cursor);
+ } while (rnd--);
+}
+
+/***********************************************************//**
+Writes the log record of a record insert on a page. */
+static
+void
+page_cur_insert_rec_write_log(
+/*==========================*/
+ rec_t* insert_rec, /*!< in: inserted physical record */
+ ulint rec_size, /*!< in: insert_rec size */
+ rec_t* cursor_rec, /*!< in: record the
+ cursor is pointing to */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+{
+ ulint cur_rec_size;
+ ulint extra_size;
+ ulint cur_extra_size;
+ const byte* ins_ptr;
+ byte* log_ptr;
+ const byte* log_end;
+ ulint i;
+
+ ut_a(rec_size < UNIV_PAGE_SIZE);
+ ut_ad(page_align(insert_rec) == page_align(cursor_rec));
+ ut_ad(!page_rec_is_comp(insert_rec)
+ == !dict_table_is_comp(index->table));
+
+ {
+ mem_heap_t* heap = NULL;
+ ulint cur_offs_[REC_OFFS_NORMAL_SIZE];
+ ulint ins_offs_[REC_OFFS_NORMAL_SIZE];
+
+ ulint* cur_offs;
+ ulint* ins_offs;
+
+ rec_offs_init(cur_offs_);
+ rec_offs_init(ins_offs_);
+
+ cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
+ ULINT_UNDEFINED, &heap);
+ ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
+ ULINT_UNDEFINED, &heap);
+
+ extra_size = rec_offs_extra_size(ins_offs);
+ cur_extra_size = rec_offs_extra_size(cur_offs);
+ ut_ad(rec_size == rec_offs_size(ins_offs));
+ cur_rec_size = rec_offs_size(cur_offs);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ }
+
+ ins_ptr = insert_rec - extra_size;
+
+ i = 0;
+
+ if (cur_extra_size == extra_size) {
+ ulint min_rec_size = ut_min(cur_rec_size, rec_size);
+
+ const byte* cur_ptr = cursor_rec - cur_extra_size;
+
+ /* Find out the first byte in insert_rec which differs from
+ cursor_rec; skip the bytes in the record info */
+
+ do {
+ if (*ins_ptr == *cur_ptr) {
+ i++;
+ ins_ptr++;
+ cur_ptr++;
+ } else if ((i < extra_size)
+ && (i >= extra_size
+ - page_rec_get_base_extra_size
+ (insert_rec))) {
+ i = extra_size;
+ ins_ptr = insert_rec;
+ cur_ptr = cursor_rec;
+ } else {
+ break;
+ }
+ } while (i < min_rec_size);
+ }
+
+ if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
+
+ if (page_rec_is_comp(insert_rec)) {
+ log_ptr = mlog_open_and_write_index(
+ mtr, insert_rec, index, MLOG_COMP_REC_INSERT,
+ 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
+ if (UNIV_UNLIKELY(!log_ptr)) {
+ /* Logging in mtr is switched off
+ during crash recovery: in that case
+ mlog_open returns NULL */
+ return;
+ }
+ } else {
+ log_ptr = mlog_open(mtr, 11
+ + 2 + 5 + 1 + 5 + 5
+ + MLOG_BUF_MARGIN);
+ if (UNIV_UNLIKELY(!log_ptr)) {
+ /* Logging in mtr is switched off
+ during crash recovery: in that case
+ mlog_open returns NULL */
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(
+ insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
+ }
+
+ log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
+ /* Write the cursor rec offset as a 2-byte ulint */
+ mach_write_to_2(log_ptr, page_offset(cursor_rec));
+ log_ptr += 2;
+ } else {
+ log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
+ if (!log_ptr) {
+ /* Logging in mtr is switched off during crash
+ recovery: in that case mlog_open returns NULL */
+ return;
+ }
+ log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
+ }
+
+ if (page_rec_is_comp(insert_rec)) {
+ if (UNIV_UNLIKELY
+ (rec_get_info_and_status_bits(insert_rec, TRUE)
+ != rec_get_info_and_status_bits(cursor_rec, TRUE))) {
+
+ goto need_extra_info;
+ }
+ } else {
+ if (UNIV_UNLIKELY
+ (rec_get_info_and_status_bits(insert_rec, FALSE)
+ != rec_get_info_and_status_bits(cursor_rec, FALSE))) {
+
+ goto need_extra_info;
+ }
+ }
+
+ if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
+need_extra_info:
+ /* Write the record end segment length
+ and the extra info storage flag */
+ log_ptr += mach_write_compressed(log_ptr,
+ 2 * (rec_size - i) + 1);
+
+ /* Write the info bits */
+ mach_write_to_1(log_ptr,
+ rec_get_info_and_status_bits(
+ insert_rec,
+ page_rec_is_comp(insert_rec)));
+ log_ptr++;
+
+ /* Write the record origin offset */
+ log_ptr += mach_write_compressed(log_ptr, extra_size);
+
+ /* Write the mismatch index */
+ log_ptr += mach_write_compressed(log_ptr, i);
+
+ ut_a(i < UNIV_PAGE_SIZE);
+ ut_a(extra_size < UNIV_PAGE_SIZE);
+ } else {
+ /* Write the record end segment length
+ and the extra info storage flag */
+ log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
+ }
+
+ /* Write to the log the inserted index record end segment which
+ differs from the cursor record */
+
+ rec_size -= i;
+
+ if (log_ptr + rec_size <= log_end) {
+ memcpy(log_ptr, ins_ptr, rec_size);
+ mlog_close(mtr, log_ptr + rec_size);
+ } else {
+ mlog_close(mtr, log_ptr);
+ ut_a(rec_size < UNIV_PAGE_SIZE);
+ mlog_catenate_string(mtr, ins_ptr, rec_size);
+ }
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a log record of a record insert on a page.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_insert_rec(
+/*======================*/
+ ibool is_short,/*!< in: TRUE if short inserts */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+{
+ ulint origin_offset;
+ ulint end_seg_len;
+ ulint mismatch_index;
+ page_t* page;
+ rec_t* cursor_rec;
+ byte buf1[1024];
+ byte* buf;
+ byte* ptr2 = ptr;
+ ulint info_and_status_bits = 0; /* remove warning */
+ page_cur_t cursor;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ page = block ? buf_block_get_frame(block) : NULL;
+
+ if (is_short) {
+ cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
+ } else {
+ ulint offset;
+
+ /* Read the cursor rec offset as a 2-byte ulint */
+
+ if (UNIV_UNLIKELY(end_ptr < ptr + 2)) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ cursor_rec = page + offset;
+
+ if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
+
+ recv_sys->found_corrupt_log = TRUE;
+
+ return(NULL);
+ }
+ }
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
+ recv_sys->found_corrupt_log = TRUE;
+
+ return(NULL);
+ }
+
+ if (end_seg_len & 0x1UL) {
+ /* Read the info bits */
+
+ if (end_ptr < ptr + 1) {
+
+ return(NULL);
+ }
+
+ info_and_status_bits = mach_read_from_1(ptr);
+ ptr++;
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ ut_a(origin_offset < UNIV_PAGE_SIZE);
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ ut_a(mismatch_index < UNIV_PAGE_SIZE);
+ }
+
+ if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
+
+ return(NULL);
+ }
+
+ if (!block) {
+
+ return(ptr + (end_seg_len >> 1));
+ }
+
+ ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+ ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
+
+ /* Read from the log the inserted index record end segment which
+ differs from the cursor record */
+
+ offsets = rec_get_offsets(cursor_rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ if (!(end_seg_len & 0x1UL)) {
+ info_and_status_bits = rec_get_info_and_status_bits(
+ cursor_rec, page_is_comp(page));
+ origin_offset = rec_offs_extra_size(offsets);
+ mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
+ }
+
+ end_seg_len >>= 1;
+
+ if (mismatch_index + end_seg_len < sizeof buf1) {
+ buf = buf1;
+ } else {
+ buf = static_cast<byte*>(
+ mem_alloc(mismatch_index + end_seg_len));
+ }
+
+ /* Build the inserted record to buf */
+
+ if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "Is short %lu, info_and_status_bits %lu, offset %lu, "
+ "o_offset %lu\n"
+ "mismatch index %lu, end_seg_len %lu\n"
+ "parsed len %lu\n",
+ (ulong) is_short, (ulong) info_and_status_bits,
+ (ulong) page_offset(cursor_rec),
+ (ulong) origin_offset,
+ (ulong) mismatch_index, (ulong) end_seg_len,
+ (ulong) (ptr - ptr2));
+
+ fputs("Dump of 300 bytes of log:\n", stderr);
+ ut_print_buf(stderr, ptr2, 300);
+ putc('\n', stderr);
+
+ buf_page_print(page, 0, 0);
+
+ ut_error;
+ }
+
+ ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
+ ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
+
+ if (page_is_comp(page)) {
+ rec_set_info_and_status_bits(buf + origin_offset,
+ info_and_status_bits);
+ } else {
+ rec_set_info_bits_old(buf + origin_offset,
+ info_and_status_bits);
+ }
+
+ page_cur_position(cursor_rec, block, &cursor);
+
+ offsets = rec_get_offsets(buf + origin_offset, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor,
+ buf + origin_offset,
+ index, offsets, mtr))) {
+ /* The redo log record should only have been written
+ after the write was successful. */
+ ut_error;
+ }
+
+ if (buf != buf1) {
+
+ mem_free(buf);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ return(ptr + end_seg_len);
+}
+
+/***********************************************************//**
+Inserts a record next to page cursor on an uncompressed page.
+Returns pointer to inserted record if succeed, i.e., enough
+space available, NULL otherwise. The cursor stays at the same position.
+@return pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_low(
+/*====================*/
+ rec_t* current_rec,/*!< in: pointer to current record after
+ which the new record is inserted */
+ dict_index_t* index, /*!< in: record descriptor */
+ const rec_t* rec, /*!< in: pointer to a physical record */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+{
+ byte* insert_buf;
+ ulint rec_size;
+ page_t* page; /*!< the relevant page */
+ rec_t* last_insert; /*!< cursor position at previous
+ insert */
+ rec_t* free_rec; /*!< a free record that was reused,
+ or NULL */
+ rec_t* insert_rec; /*!< inserted record */
+ ulint heap_no; /*!< heap number of the inserted
+ record */
+
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ page = page_align(current_rec);
+ ut_ad(dict_table_is_comp(index->table)
+ == (ibool) !!page_is_comp(page));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+ == index->id || recv_recovery_is_on()
+ || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+
+ ut_ad(!page_rec_is_supremum(current_rec));
+
+ /* 1. Get the size of the physical record in the page */
+ rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG_VALGRIND
+ {
+ const void* rec_start
+ = rec - rec_offs_extra_size(offsets);
+ ulint extra_size
+ = rec_offs_extra_size(offsets)
+ - (rec_offs_comp(offsets)
+ ? REC_N_NEW_EXTRA_BYTES
+ : REC_N_OLD_EXTRA_BYTES);
+
+ /* All data bytes of the record must be valid. */
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ /* The variable-length header must be valid. */
+ UNIV_MEM_ASSERT_RW(rec_start, extra_size);
+ }
+#endif /* UNIV_DEBUG_VALGRIND */
+
+ /* 2. Try to find suitable space from page memory management */
+
+ free_rec = page_header_get_ptr(page, PAGE_FREE);
+ if (UNIV_LIKELY_NULL(free_rec)) {
+ /* Try to allocate from the head of the free list. */
+ ulint foffsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* foffsets = foffsets_;
+ mem_heap_t* heap = NULL;
+
+ rec_offs_init(foffsets_);
+
+ foffsets = rec_get_offsets(
+ free_rec, index, foffsets, ULINT_UNDEFINED, &heap);
+ if (rec_offs_size(foffsets) < rec_size) {
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ goto use_heap;
+ }
+
+ insert_buf = free_rec - rec_offs_extra_size(foffsets);
+
+ if (page_is_comp(page)) {
+ heap_no = rec_get_heap_no_new(free_rec);
+ page_mem_alloc_free(page, NULL,
+ rec_get_next_ptr(free_rec, TRUE),
+ rec_size);
+ } else {
+ heap_no = rec_get_heap_no_old(free_rec);
+ page_mem_alloc_free(page, NULL,
+ rec_get_next_ptr(free_rec, FALSE),
+ rec_size);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ } else {
+use_heap:
+ free_rec = NULL;
+ insert_buf = page_mem_alloc_heap(page, NULL,
+ rec_size, &heap_no);
+
+ if (UNIV_UNLIKELY(insert_buf == NULL)) {
+ return(NULL);
+ }
+ }
+
+ /* 3. Create the record */
+ insert_rec = rec_copy(insert_buf, rec, offsets);
+ rec_offs_make_valid(insert_rec, index, offsets);
+
+ /* 4. Insert the record in the linked list of records */
+ ut_ad(current_rec != insert_rec);
+
+ {
+ /* next record after current before the insertion */
+ rec_t* next_rec = page_rec_get_next(current_rec);
+#ifdef UNIV_DEBUG
+ if (page_is_comp(page)) {
+ ut_ad(rec_get_status(current_rec)
+ <= REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
+ }
+#endif
+ page_rec_set_next(insert_rec, next_rec);
+ page_rec_set_next(current_rec, insert_rec);
+ }
+
+ page_header_set_field(page, NULL, PAGE_N_RECS,
+ 1 + page_get_n_recs(page));
+
+ /* 5. Set the n_owned field in the inserted record to zero,
+ and set the heap_no field */
+ if (page_is_comp(page)) {
+ rec_set_n_owned_new(insert_rec, NULL, 0);
+ rec_set_heap_no_new(insert_rec, heap_no);
+ } else {
+ rec_set_n_owned_old(insert_rec, 0);
+ rec_set_heap_no_old(insert_rec, heap_no);
+ }
+
+ UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
+ rec_offs_size(offsets));
+ /* 6. Update the last insertion info in page header */
+
+ last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
+ ut_ad(!last_insert || !page_is_comp(page)
+ || rec_get_node_ptr_flag(last_insert)
+ == rec_get_node_ptr_flag(insert_rec));
+
+ if (UNIV_UNLIKELY(last_insert == NULL)) {
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+
+ } else if ((last_insert == current_rec)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_LEFT)) {
+
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_RIGHT);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+
+ } else if ((page_rec_get_next(insert_rec) == last_insert)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_RIGHT)) {
+
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_LEFT);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+ } else {
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+ }
+
+ page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
+
+ /* 7. It remains to update the owner record. */
+ {
+ rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
+ ulint n_owned;
+ if (page_is_comp(page)) {
+ n_owned = rec_get_n_owned_new(owner_rec);
+ rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
+ } else {
+ n_owned = rec_get_n_owned_old(owner_rec);
+ rec_set_n_owned_old(owner_rec, n_owned + 1);
+ }
+
+ /* 8. Now we have incremented the n_owned field of the owner
+ record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+ we have to split the corresponding directory slot in two. */
+
+ if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ page_dir_split_slot(
+ page, NULL,
+ page_dir_find_owner_slot(owner_rec));
+ }
+ }
+
+ /* 9. Write log record of the insert */
+ if (UNIV_LIKELY(mtr != NULL)) {
+ page_cur_insert_rec_write_log(insert_rec, rec_size,
+ current_rec, index, mtr);
+ }
+
+ btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert");
+
+ return(insert_rec);
+}
+
+/***********************************************************//**
+Inserts a record next to page cursor on a compressed and uncompressed
+page. Returns pointer to inserted record if succeed, i.e.,
+enough space available, NULL otherwise.
+The cursor stays at the same position.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_zip(
+/*====================*/
+ page_cur_t* cursor, /*!< in/out: page cursor */
+ dict_index_t* index, /*!< in: record descriptor */
+ const rec_t* rec, /*!< in: pointer to a physical record */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+{
+ byte* insert_buf;
+ ulint rec_size;
+ page_t* page; /*!< the relevant page */
+ rec_t* last_insert; /*!< cursor position at previous
+ insert */
+ rec_t* free_rec; /*!< a free record that was reused,
+ or NULL */
+ rec_t* insert_rec; /*!< inserted record */
+ ulint heap_no; /*!< heap number of the inserted
+ record */
+ page_zip_des_t* page_zip;
+
+ page_zip = page_cur_get_page_zip(cursor);
+ ut_ad(page_zip);
+
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ page = page_cur_get_page(cursor);
+ ut_ad(dict_table_is_comp(index->table));
+ ut_ad(page_is_comp(page));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+ == index->id || recv_recovery_is_on()
+ || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+
+ ut_ad(!page_cur_is_after_last(cursor));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ /* 1. Get the size of the physical record in the page */
+ rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG_VALGRIND
+ {
+ const void* rec_start
+ = rec - rec_offs_extra_size(offsets);
+ ulint extra_size
+ = rec_offs_extra_size(offsets)
+ - (rec_offs_comp(offsets)
+ ? REC_N_NEW_EXTRA_BYTES
+ : REC_N_OLD_EXTRA_BYTES);
+
+ /* All data bytes of the record must be valid. */
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ /* The variable-length header must be valid. */
+ UNIV_MEM_ASSERT_RW(rec_start, extra_size);
+ }
+#endif /* UNIV_DEBUG_VALGRIND */
+
+ const bool reorg_before_insert = page_has_garbage(page)
+ && rec_size > page_get_max_insert_size(page, 1)
+ && rec_size <= page_get_max_insert_size_after_reorganize(
+ page, 1);
+
+ /* 2. Try to find suitable space from page memory management */
+ if (!page_zip_available(page_zip, dict_index_is_clust(index),
+ rec_size, 1)
+ || reorg_before_insert) {
+ /* The values can change dynamically. */
+ bool log_compressed = page_zip_log_pages;
+ ulint level = page_zip_level;
+#ifdef UNIV_DEBUG
+ rec_t* cursor_rec = page_cur_get_rec(cursor);
+#endif /* UNIV_DEBUG */
+
+ /* If we are not writing compressed page images, we
+ must reorganize the page before attempting the
+ insert. */
+ if (recv_recovery_is_on()) {
+ /* Insert into the uncompressed page only.
+ The page reorganization or creation that we
+ would attempt outside crash recovery would
+ have been covered by a previous redo log record. */
+ } else if (page_is_empty(page)) {
+ ut_ad(page_cur_is_before_first(cursor));
+
+ /* This is an empty page. Recreate it to
+ get rid of the modification log. */
+ page_create_zip(page_cur_get_block(cursor), index,
+ page_header_get_field(page, PAGE_LEVEL),
+ 0, mtr);
+ ut_ad(!page_header_get_ptr(page, PAGE_FREE));
+
+ if (page_zip_available(
+ page_zip, dict_index_is_clust(index),
+ rec_size, 1)) {
+ goto use_heap;
+ }
+
+ /* The cursor should remain on the page infimum. */
+ return(NULL);
+ } else if (!page_zip->m_nonempty && !page_has_garbage(page)) {
+ /* The page has been freshly compressed, so
+ reorganizing it will not help. */
+ } else if (log_compressed && !reorg_before_insert) {
+ /* Insert into uncompressed page only, and
+ try page_zip_reorganize() afterwards. */
+ } else if (btr_page_reorganize_low(
+ recv_recovery_is_on(), level,
+ cursor, index, mtr)) {
+ ut_ad(!page_header_get_ptr(page, PAGE_FREE));
+
+ if (page_zip_available(
+ page_zip, dict_index_is_clust(index),
+ rec_size, 1)) {
+ /* After reorganizing, there is space
+ available. */
+ goto use_heap;
+ }
+ } else {
+ ut_ad(cursor->rec == cursor_rec);
+ return(NULL);
+ }
+
+ /* Try compressing the whole page afterwards. */
+ insert_rec = page_cur_insert_rec_low(
+ cursor->rec, index, rec, offsets, NULL);
+
+ /* If recovery is on, this implies that the compression
+ of the page was successful during runtime. Had that not
+ been the case or had the redo logging of compressed
+ pages been enabled during runtime then we'd have seen
+ a MLOG_ZIP_PAGE_COMPRESS redo record. Therefore, we
+ know that we don't need to reorganize the page. We,
+ however, do need to recompress the page. That will
+ happen when the next redo record is read which must
+ be of type MLOG_ZIP_PAGE_COMPRESS_NO_DATA and it must
+ contain a valid compression level value.
+ This implies that during recovery from this point till
+ the next redo is applied the uncompressed and
+ compressed versions are not identical and
+ page_zip_validate will fail but that is OK because
+ we call page_zip_validate only after processing
+ all changes to a page under a single mtr during
+ recovery. */
+ if (insert_rec == NULL) {
+ /* Out of space.
+ This should never occur during crash recovery,
+ because the MLOG_COMP_REC_INSERT should only
+ be logged after a successful operation. */
+ ut_ad(!recv_recovery_is_on());
+ } else if (recv_recovery_is_on()) {
+ /* This should be followed by
+ MLOG_ZIP_PAGE_COMPRESS_NO_DATA,
+ which should succeed. */
+ rec_offs_make_valid(insert_rec, index, offsets);
+ } else {
+ ulint pos = page_rec_get_n_recs_before(insert_rec);
+ ut_ad(pos > 0);
+
+ if (!log_compressed) {
+ if (page_zip_compress(
+ page_zip, page, index,
+ level, NULL)) {
+ page_cur_insert_rec_write_log(
+ insert_rec, rec_size,
+ cursor->rec, index, mtr);
+ page_zip_compress_write_log_no_data(
+ level, page, index, mtr);
+
+ rec_offs_make_valid(
+ insert_rec, index, offsets);
+ return(insert_rec);
+ }
+
+ ut_ad(cursor->rec
+ == (pos > 1
+ ? page_rec_get_nth(
+ page, pos - 1)
+ : page + PAGE_NEW_INFIMUM));
+ } else {
+ /* We are writing entire page images
+ to the log. Reduce the redo log volume
+ by reorganizing the page at the same time. */
+ if (page_zip_reorganize(
+ cursor->block, index, mtr)) {
+ /* The page was reorganized:
+ Seek to pos. */
+ if (pos > 1) {
+ cursor->rec = page_rec_get_nth(
+ page, pos - 1);
+ } else {
+ cursor->rec = page
+ + PAGE_NEW_INFIMUM;
+ }
+
+ insert_rec = page + rec_get_next_offs(
+ cursor->rec, TRUE);
+ rec_offs_make_valid(
+ insert_rec, index, offsets);
+ return(insert_rec);
+ }
+
+ /* Theoretically, we could try one
+ last resort of btr_page_reorganize_low()
+ followed by page_zip_available(), but
+ that would be very unlikely to
+ succeed. (If the full reorganized page
+ failed to compress, why would it
+ succeed to compress the page, plus log
+ the insert of this record? */
+ }
+
+ /* Out of space: restore the page */
+ btr_blob_dbg_remove(page, index, "insert_zip_fail");
+ if (!page_zip_decompress(page_zip, page, FALSE)) {
+ ut_error; /* Memory corrupted? */
+ }
+ ut_ad(page_validate(page, index));
+ btr_blob_dbg_add(page, index, "insert_zip_fail");
+ insert_rec = NULL;
+ }
+
+ return(insert_rec);
+ }
+
+ free_rec = page_header_get_ptr(page, PAGE_FREE);
+ if (UNIV_LIKELY_NULL(free_rec)) {
+ /* Try to allocate from the head of the free list. */
+ lint extra_size_diff;
+ ulint foffsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* foffsets = foffsets_;
+ mem_heap_t* heap = NULL;
+
+ rec_offs_init(foffsets_);
+
+ foffsets = rec_get_offsets(free_rec, index, foffsets,
+ ULINT_UNDEFINED, &heap);
+ if (rec_offs_size(foffsets) < rec_size) {
+too_small:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ goto use_heap;
+ }
+
+ insert_buf = free_rec - rec_offs_extra_size(foffsets);
+
+ /* On compressed pages, do not relocate records from
+ the free list. If extra_size would grow, use the heap. */
+ extra_size_diff
+ = rec_offs_extra_size(offsets)
+ - rec_offs_extra_size(foffsets);
+
+ if (UNIV_UNLIKELY(extra_size_diff < 0)) {
+ /* Add an offset to the extra_size. */
+ if (rec_offs_size(foffsets)
+ < rec_size - extra_size_diff) {
+
+ goto too_small;
+ }
+
+ insert_buf -= extra_size_diff;
+ } else if (UNIV_UNLIKELY(extra_size_diff)) {
+ /* Do not allow extra_size to grow */
+
+ goto too_small;
+ }
+
+ heap_no = rec_get_heap_no_new(free_rec);
+ page_mem_alloc_free(page, page_zip,
+ rec_get_next_ptr(free_rec, TRUE),
+ rec_size);
+
+ if (!page_is_leaf(page)) {
+ /* Zero out the node pointer of free_rec,
+ in case it will not be overwritten by
+ insert_rec. */
+
+ ut_ad(rec_size > REC_NODE_PTR_SIZE);
+
+ if (rec_offs_extra_size(foffsets)
+ + rec_offs_data_size(foffsets) > rec_size) {
+
+ memset(rec_get_end(free_rec, foffsets)
+ - REC_NODE_PTR_SIZE, 0,
+ REC_NODE_PTR_SIZE);
+ }
+ } else if (dict_index_is_clust(index)) {
+ /* Zero out the DB_TRX_ID and DB_ROLL_PTR
+ columns of free_rec, in case it will not be
+ overwritten by insert_rec. */
+
+ ulint trx_id_col;
+ ulint trx_id_offs;
+ ulint len;
+
+ trx_id_col = dict_index_get_sys_col_pos(index,
+ DATA_TRX_ID);
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+ trx_id_offs = rec_get_nth_field_offs(foffsets,
+ trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+
+ if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs
+ + rec_offs_extra_size(foffsets) > rec_size) {
+ /* We will have to zero out the
+ DB_TRX_ID and DB_ROLL_PTR, because
+ they will not be fully overwritten by
+ insert_rec. */
+
+ memset(free_rec + trx_id_offs, 0,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ }
+
+ ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN
+ == rec_get_nth_field(free_rec, foffsets,
+ trx_id_col + 1, &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ } else {
+use_heap:
+ free_rec = NULL;
+ insert_buf = page_mem_alloc_heap(page, page_zip,
+ rec_size, &heap_no);
+
+ if (UNIV_UNLIKELY(insert_buf == NULL)) {
+ return(NULL);
+ }
+
+ page_zip_dir_add_slot(page_zip, dict_index_is_clust(index));
+ }
+
+ /* 3. Create the record */
+ insert_rec = rec_copy(insert_buf, rec, offsets);
+ rec_offs_make_valid(insert_rec, index, offsets);
+
+ /* 4. Insert the record in the linked list of records */
+ ut_ad(cursor->rec != insert_rec);
+
+ {
+ /* next record after current before the insertion */
+ const rec_t* next_rec = page_rec_get_next_low(
+ cursor->rec, TRUE);
+ ut_ad(rec_get_status(cursor->rec)
+ <= REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
+
+ page_rec_set_next(insert_rec, next_rec);
+ page_rec_set_next(cursor->rec, insert_rec);
+ }
+
+ page_header_set_field(page, page_zip, PAGE_N_RECS,
+ 1 + page_get_n_recs(page));
+
+ /* 5. Set the n_owned field in the inserted record to zero,
+ and set the heap_no field */
+ rec_set_n_owned_new(insert_rec, NULL, 0);
+ rec_set_heap_no_new(insert_rec, heap_no);
+
+ UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
+ rec_offs_size(offsets));
+
+ page_zip_dir_insert(page_zip, cursor->rec, free_rec, insert_rec);
+
+ /* 6. Update the last insertion info in page header */
+
+ last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
+ ut_ad(!last_insert
+ || rec_get_node_ptr_flag(last_insert)
+ == rec_get_node_ptr_flag(insert_rec));
+
+ if (UNIV_UNLIKELY(last_insert == NULL)) {
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+
+ } else if ((last_insert == cursor->rec)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_LEFT)) {
+
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_RIGHT);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+
+ } else if ((page_rec_get_next(insert_rec) == last_insert)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_RIGHT)) {
+
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_LEFT);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+ } else {
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+ }
+
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
+
+ /* 7. It remains to update the owner record. */
+ {
+ rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
+ ulint n_owned;
+
+ n_owned = rec_get_n_owned_new(owner_rec);
+ rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
+
+ /* 8. Now we have incremented the n_owned field of the owner
+ record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+ we have to split the corresponding directory slot in two. */
+
+ if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ page_dir_split_slot(
+ page, page_zip,
+ page_dir_find_owner_slot(owner_rec));
+ }
+ }
+
+ page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
+
+ btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok");
+
+ /* 9. Write log record of the insert */
+ if (UNIV_LIKELY(mtr != NULL)) {
+ page_cur_insert_rec_write_log(insert_rec, rec_size,
+ cursor->rec, index, mtr);
+ }
+
+ return(insert_rec);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Writes a log record of copying a record list end to a new created page.
+@return 4-byte field where to write the log data length, or NULL if
+logging is disabled */
+UNIV_INLINE
+byte*
+page_copy_rec_list_to_created_page_write_log(
+/*=========================================*/
+ page_t* page, /*!< in: index page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ byte* log_ptr;
+
+ ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+ log_ptr = mlog_open_and_write_index(mtr, page, index,
+ page_is_comp(page)
+ ? MLOG_COMP_LIST_END_COPY_CREATED
+ : MLOG_LIST_END_COPY_CREATED, 4);
+ if (UNIV_LIKELY(log_ptr != NULL)) {
+ mlog_close(mtr, log_ptr + 4);
+ }
+
+ return(log_ptr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Parses a log record of copying a record list end to a new created page.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_copy_rec_list_to_created_page(
+/*=====================================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+{
+ byte* rec_end;
+ ulint log_data_len;
+ page_t* page;
+ page_zip_des_t* page_zip;
+
+ if (ptr + 4 > end_ptr) {
+
+ return(NULL);
+ }
+
+ log_data_len = mach_read_from_4(ptr);
+ ptr += 4;
+
+ rec_end = ptr + log_data_len;
+
+ if (rec_end > end_ptr) {
+
+ return(NULL);
+ }
+
+ if (!block) {
+
+ return(rec_end);
+ }
+
+ while (ptr < rec_end) {
+ ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
+ block, index, mtr);
+ }
+
+ ut_a(ptr == rec_end);
+
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
+
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+
+ return(rec_end);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Copies records from page to a newly created page, from a given record onward,
+including that record. Infimum and supremum records are not copied.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
+UNIV_INTERN
+void
+page_copy_rec_list_end_to_created_page(
+/*===================================*/
+ page_t* new_page, /*!< in/out: index page to copy to */
+ rec_t* rec, /*!< in: first record to copy */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_dir_slot_t* slot = 0; /* remove warning */
+ byte* heap_top;
+ rec_t* insert_rec = 0; /* remove warning */
+ rec_t* prev_rec;
+ ulint count;
+ ulint n_recs;
+ ulint slot_index;
+ ulint rec_size;
+ ulint log_mode;
+ byte* log_ptr;
+ ulint log_data_len;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
+ ut_ad(page_align(rec) != new_page);
+ ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
+
+ if (page_rec_is_infimum(rec)) {
+
+ rec = page_rec_get_next(rec);
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ return;
+ }
+
+#ifdef UNIV_DEBUG
+ /* To pass the debug tests we have to set these dummy values
+ in the debug version */
+ page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
+ page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
+ new_page + UNIV_PAGE_SIZE - 1);
+#endif
+
+ log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
+ index, mtr);
+
+ log_data_len = dyn_array_get_data_size(&(mtr->log));
+
+ /* Individual inserts are logged in a shorter form */
+
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
+
+ prev_rec = page_get_infimum_rec(new_page);
+ if (page_is_comp(new_page)) {
+ heap_top = new_page + PAGE_NEW_SUPREMUM_END;
+ } else {
+ heap_top = new_page + PAGE_OLD_SUPREMUM_END;
+ }
+ count = 0;
+ slot_index = 0;
+ n_recs = 0;
+
+ do {
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ insert_rec = rec_copy(heap_top, rec, offsets);
+
+ if (page_is_comp(new_page)) {
+ rec_set_next_offs_new(prev_rec,
+ page_offset(insert_rec));
+
+ rec_set_n_owned_new(insert_rec, NULL, 0);
+ rec_set_heap_no_new(insert_rec,
+ PAGE_HEAP_NO_USER_LOW + n_recs);
+ } else {
+ rec_set_next_offs_old(prev_rec,
+ page_offset(insert_rec));
+
+ rec_set_n_owned_old(insert_rec, 0);
+ rec_set_heap_no_old(insert_rec,
+ PAGE_HEAP_NO_USER_LOW + n_recs);
+ }
+
+ count++;
+ n_recs++;
+
+ if (UNIV_UNLIKELY
+ (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
+
+ slot_index++;
+
+ slot = page_dir_get_nth_slot(new_page, slot_index);
+
+ page_dir_slot_set_rec(slot, insert_rec);
+ page_dir_slot_set_n_owned(slot, NULL, count);
+
+ count = 0;
+ }
+
+ rec_size = rec_offs_size(offsets);
+
+ ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
+
+ heap_top += rec_size;
+
+ rec_offs_make_valid(insert_rec, index, offsets);
+ btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end");
+
+ page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
+ index, mtr);
+ prev_rec = insert_rec;
+ rec = page_rec_get_next(rec);
+ } while (!page_rec_is_supremum(rec));
+
+ if ((slot_index > 0) && (count + 1
+ + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
+ <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ /* We can merge the two last dir slots. This operation is
+ here to make this function imitate exactly the equivalent
+ task made using page_cur_insert_rec, which we use in database
+ recovery to reproduce the task performed by this function.
+ To be able to check the correctness of recovery, it is good
+ that it imitates exactly. */
+
+ count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
+
+ page_dir_slot_set_n_owned(slot, NULL, 0);
+
+ slot_index--;
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
+
+ ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
+
+ if (UNIV_LIKELY(log_ptr != NULL)) {
+ mach_write_to_4(log_ptr, log_data_len);
+ }
+
+ if (page_is_comp(new_page)) {
+ rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
+ } else {
+ rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
+ }
+
+ slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
+
+ page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
+ page_dir_slot_set_n_owned(slot, NULL, count + 1);
+
+ page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
+ page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
+ page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs);
+ page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
+
+ page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(new_page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
+
+ /* Restore the log mode */
+
+ mtr_set_log_mode(mtr, log_mode);
+}
+
+/***********************************************************//**
+Writes log record of a record delete on a page. */
+UNIV_INLINE
+void
+page_cur_delete_rec_write_log(
+/*==========================*/
+ rec_t* rec, /*!< in: record to be deleted */
+ const dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+{
+ byte* log_ptr;
+
+ ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+
+ log_ptr = mlog_open_and_write_index(mtr, rec, index,
+ page_rec_is_comp(rec)
+ ? MLOG_COMP_REC_DELETE
+ : MLOG_REC_DELETE, 2);
+
+ if (!log_ptr) {
+ /* Logging in mtr is switched off during crash recovery:
+ in that case mlog_open returns NULL */
+ return;
+ }
+
+ /* Write the cursor rec offset as a 2-byte ulint */
+ mach_write_to_2(log_ptr, page_offset(rec));
+
+ mlog_close(mtr, log_ptr + 2);
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses log record of a record delete on a page.
+@return pointer to record end or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_delete_rec(
+/*======================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+{
+ ulint offset;
+ page_cur_t cursor;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ /* Read the cursor rec offset as a 2-byte ulint */
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ ut_a(offset <= UNIV_PAGE_SIZE);
+
+ if (block) {
+ page_t* page = buf_block_get_frame(block);
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_t* rec = page + offset;
+ rec_offs_init(offsets_);
+
+ page_cur_position(rec, block, &cursor);
+ ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
+
+ page_cur_delete_rec(&cursor, index,
+ rec_get_offsets(rec, index, offsets_,
+ ULINT_UNDEFINED, &heap),
+ mtr);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ }
+
+ return(ptr);
+}
+
+/***********************************************************//**
+Deletes a record at the page cursor. The cursor is moved to the next
+record after the deleted one. */
+UNIV_INTERN
+void
+page_cur_delete_rec(
+/*================*/
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const ulint* offsets,/*!< in: rec_get_offsets(
+ cursor->rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle
+ or NULL */
+{
+ page_dir_slot_t* cur_dir_slot;
+ page_dir_slot_t* prev_slot;
+ page_t* page;
+ page_zip_des_t* page_zip;
+ rec_t* current_rec;
+ rec_t* prev_rec = NULL;
+ rec_t* next_rec;
+ ulint cur_slot_no;
+ ulint cur_n_owned;
+ rec_t* rec;
+
+ page = page_cur_get_page(cursor);
+ page_zip = page_cur_get_page_zip(cursor);
+
+ /* page_zip_validate() will fail here when
+ btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark().
+ Then, both "page_zip" and "page" would have the min-rec-mark
+ set on the smallest user record, but "page" would additionally
+ have it set on the smallest-but-one record. Because sloppy
+ page_zip_validate_low() only ignores min-rec-flag differences
+ in the smallest user record, it cannot be used here either. */
+
+ current_rec = cursor->rec;
+ ut_ad(rec_offs_validate(current_rec, index, offsets));
+ ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+ == index->id || recv_recovery_is_on()
+ || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+
+ /* The record must not be the supremum or infimum record. */
+ ut_ad(page_rec_is_user_rec(current_rec));
+
+ if (page_get_n_recs(page) == 1 && !recv_recovery_is_on()) {
+ /* Empty the page, unless we are applying the redo log
+ during crash recovery. During normal operation, the
+ page_create_empty() gets logged as one of MLOG_PAGE_CREATE,
+ MLOG_COMP_PAGE_CREATE, MLOG_ZIP_PAGE_COMPRESS. */
+ ut_ad(page_is_leaf(page));
+ /* Usually, this should be the root page,
+ and the whole index tree should become empty.
+ However, this could also be a call in
+ btr_cur_pessimistic_update() to delete the only
+ record in the page and to insert another one. */
+ page_cur_move_to_next(cursor);
+ ut_ad(page_cur_is_after_last(cursor));
+ page_create_empty(page_cur_get_block(cursor),
+ const_cast<dict_index_t*>(index), mtr);
+ return;
+ }
+
+ /* Save to local variables some data associated with current_rec */
+ cur_slot_no = page_dir_find_owner_slot(current_rec);
+ ut_ad(cur_slot_no > 0);
+ cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
+ cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
+
+ /* 0. Write the log record */
+ if (mtr != 0) {
+ page_cur_delete_rec_write_log(current_rec, index, mtr);
+ }
+
+ /* 1. Reset the last insert info in the page header and increment
+ the modify clock for the frame */
+
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
+
+ /* The page gets invalid for optimistic searches: increment the
+ frame modify clock only if there is an mini-transaction covering
+ the change. During IMPORT we allocate local blocks that are not
+ part of the buffer pool. */
+
+ if (mtr != 0) {
+ buf_block_modify_clock_inc(page_cur_get_block(cursor));
+ }
+
+ /* 2. Find the next and the previous record. Note that the cursor is
+ left at the next record. */
+
+ ut_ad(cur_slot_no > 0);
+ prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
+
+ rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
+
+ /* rec now points to the record of the previous directory slot. Look
+ for the immediate predecessor of current_rec in a loop. */
+
+ while(current_rec != rec) {
+ prev_rec = rec;
+ rec = page_rec_get_next(rec);
+ }
+
+ page_cur_move_to_next(cursor);
+ next_rec = cursor->rec;
+
+ /* 3. Remove the record from the linked list of records */
+
+ page_rec_set_next(prev_rec, next_rec);
+
+ /* 4. If the deleted record is pointed to by a dir slot, update the
+ record pointer in slot. In the following if-clause we assume that
+ prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
+ >= 2. */
+
+#if PAGE_DIR_SLOT_MIN_N_OWNED < 2
+# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
+#endif
+ ut_ad(cur_n_owned > 1);
+
+ if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
+ page_dir_slot_set_rec(cur_dir_slot, prev_rec);
+ }
+
+ /* 5. Update the number of owned records of the slot */
+
+ page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
+
+ /* 6. Free the memory occupied by the record */
+ btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index),
+ offsets, "delete");
+ page_mem_free(page, page_zip, current_rec, index, offsets);
+
+ /* 7. Now we have decremented the number of owned records of the slot.
+ If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
+ slots. */
+
+ if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
+ page_dir_balance_slot(page, page_zip, cur_slot_no);
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+/*******************************************************************//**
+Print the first n numbers, generated by page_cur_lcg_prng() to make sure
+(visually) that it works properly. */
+void
+test_page_cur_lcg_prng(
+/*===================*/
+ int n) /*!< in: print first n numbers */
+{
+ int i;
+ unsigned long long rnd;
+
+ for (i = 0; i < n; i++) {
+ rnd = page_cur_lcg_prng();
+ printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
+ rnd,
+ rnd % 2,
+ rnd % 3,
+ rnd % 5,
+ rnd % 7,
+ rnd % 11);
+ }
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/xtradb/page/page0page.cc b/storage/xtradb/page/page0page.cc
new file mode 100644
index 00000000000..bd5fb36af8f
--- /dev/null
+++ b/storage/xtradb/page/page0page.cc
@@ -0,0 +1,2813 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file page/page0page.cc
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#define THIS_MODULE
+#include "page0page.h"
+#ifdef UNIV_NONINL
+#include "page0page.ic"
+#endif
+#undef THIS_MODULE
+
+#include "page0cur.h"
+#include "page0zip.h"
+#include "buf0buf.h"
+#include "btr0btr.h"
+#ifndef UNIV_HOTBACKUP
+# include "srv0srv.h"
+# include "lock0lock.h"
+# include "fut0lst.h"
+# include "btr0sea.h"
+#endif /* !UNIV_HOTBACKUP */
+
+/* THE INDEX PAGE
+ ==============
+
+The index page consists of a page header which contains the page's
+id and other information. On top of it are the index records
+in a heap linked into a one way linear list according to alphabetic order.
+
+Just below page end is an array of pointers which we call page directory,
+to about every sixth record in the list. The pointers are placed in
+the directory in the alphabetical order of the records pointed to,
+enabling us to make binary search using the array. Each slot n:o I
+in the directory points to a record, where a 4-bit field contains a count
+of those records which are in the linear list between pointer I and
+the pointer I - 1 in the directory, including the record
+pointed to by pointer I and not including the record pointed to by I - 1.
+We say that the record pointed to by slot I, or that slot I, owns
+these records. The count is always kept in the range 4 to 8, with
+the exception that it is 1 for the first slot, and 1--8 for the second slot.
+
+An essentially binary search can be performed in the list of index
+records, like we could do if we had pointer to every record in the
+page directory. The data structure is, however, more efficient when
+we are doing inserts, because most inserts are just pushed on a heap.
+Only every 8th insert requires block move in the directory pointer
+table, which itself is quite small. A record is deleted from the page
+by just taking it off the linear list and updating the number of owned
+records-field of the record which owns it, and updating the page directory,
+if necessary. A special case is the one when the record owns itself.
+Because the overhead of inserts is so small, we may also increase the
+page size from the projected default of 8 kB to 64 kB without too
+much loss of efficiency in inserts. Bigger page becomes actual
+when the disk transfer rate compared to seek and latency time rises.
+On the present system, the page size is set so that the page transfer
+time (3 ms) is 20 % of the disk random access time (15 ms).
+
+When the page is split, merged, or becomes full but contains deleted
+records, we have to reorganize the page.
+
+Assuming a page size of 8 kB, a typical index page of a secondary
+index contains 300 index entries, and the size of the page directory
+is 50 x 4 bytes = 200 bytes. */
+
+/***************************************************************//**
+Looks for the directory slot which owns the given record.
+@return the directory slot number */
+UNIV_INTERN
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+ const rec_t* rec) /*!< in: the physical record */
+{
+ const page_t* page;
+ register uint16 rec_offs_bytes;
+ register const page_dir_slot_t* slot;
+ register const page_dir_slot_t* first_slot;
+ register const rec_t* r = rec;
+
+ ut_ad(page_rec_check(rec));
+
+ page = page_align(rec);
+ first_slot = page_dir_get_nth_slot(page, 0);
+ slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
+
+ if (page_is_comp(page)) {
+ while (rec_get_n_owned_new(r) == 0) {
+ r = rec_get_next_ptr_const(r, TRUE);
+ ut_ad(r >= page + PAGE_NEW_SUPREMUM);
+ ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
+ }
+ } else {
+ while (rec_get_n_owned_old(r) == 0) {
+ r = rec_get_next_ptr_const(r, FALSE);
+ ut_ad(r >= page + PAGE_OLD_SUPREMUM);
+ ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
+ }
+ }
+
+ rec_offs_bytes = mach_encode_2(r - page);
+
+ while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
+
+ if (UNIV_UNLIKELY(slot == first_slot)) {
+ fprintf(stderr,
+ "InnoDB: Probable data corruption on"
+ " page %lu\n"
+ "InnoDB: Original record ",
+ (ulong) page_get_page_no(page));
+
+ if (page_is_comp(page)) {
+ fputs("(compact record)", stderr);
+ } else {
+ rec_print_old(stderr, rec);
+ }
+
+ fputs("\n"
+ "InnoDB: on that page.\n"
+ "InnoDB: Cannot find the dir slot for record ",
+ stderr);
+ if (page_is_comp(page)) {
+ fputs("(compact record)", stderr);
+ } else {
+ rec_print_old(stderr, page
+ + mach_decode_2(rec_offs_bytes));
+ }
+ fputs("\n"
+ "InnoDB: on that page!\n", stderr);
+
+ buf_page_print(page, 0, 0);
+
+ ut_error;
+ }
+
+ slot += PAGE_DIR_SLOT_SIZE;
+ }
+
+ return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
+}
+
+/**************************************************************//**
+Used to check the consistency of a directory slot.
+@return TRUE if succeed */
+static
+ibool
+page_dir_slot_check(
+/*================*/
+ const page_dir_slot_t* slot) /*!< in: slot */
+{
+ const page_t* page;
+ ulint n_slots;
+ ulint n_owned;
+
+ ut_a(slot);
+
+ page = page_align(slot);
+
+ n_slots = page_dir_get_n_slots(page);
+
+ ut_a(slot <= page_dir_get_nth_slot(page, 0));
+ ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
+
+ ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
+
+ if (page_is_comp(page)) {
+ n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
+ } else {
+ n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
+ }
+
+ if (slot == page_dir_get_nth_slot(page, 0)) {
+ ut_a(n_owned == 1);
+ } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
+ ut_a(n_owned >= 1);
+ ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ } else {
+ ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
+ ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************//**
+Sets the max trx id field value. */
+UNIV_INTERN
+void
+page_set_max_trx_id(
+/*================*/
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */
+{
+ page_t* page = buf_block_get_frame(block);
+#ifndef UNIV_HOTBACKUP
+ ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#endif /* !UNIV_HOTBACKUP */
+
+ /* It is not necessary to write this change to the redo log, as
+ during a database recovery we assume that the max trx id of every
+ page is the maximum trx id assigned before the crash. */
+
+ if (page_zip) {
+ mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
+ page_zip_write_header(page_zip,
+ page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+ 8, mtr);
+#ifndef UNIV_HOTBACKUP
+ } else if (mtr) {
+ mlog_write_ull(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+ trx_id, mtr);
+#endif /* !UNIV_HOTBACKUP */
+ } else {
+ mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
+ }
+}
+
+/************************************************************//**
+Allocates a block of memory from the heap of an index page.
+@return pointer to start of allocated buffer, or NULL if allocation fails */
+UNIV_INTERN
+byte*
+page_mem_alloc_heap(
+/*================*/
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
+ space available for inserting the record,
+ or NULL */
+ ulint need, /*!< in: total number of bytes needed */
+ ulint* heap_no)/*!< out: this contains the heap number
+ of the allocated record
+ if allocation succeeds */
+{
+ byte* block;
+ ulint avl_space;
+
+ ut_ad(page && heap_no);
+
+ avl_space = page_get_max_insert_size(page, 1);
+
+ if (avl_space >= need) {
+ block = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+ page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP,
+ block + need);
+ *heap_no = page_dir_get_n_heap(page);
+
+ page_dir_set_n_heap(page, page_zip, 1 + *heap_no);
+
+ return(block);
+ }
+
+ return(NULL);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Writes a log record of page creation. */
+UNIV_INLINE
+void
+page_create_write_log(
+/*==================*/
+ buf_frame_t* frame, /*!< in: a buffer frame where the page is
+ created */
+ mtr_t* mtr, /*!< in: mini-transaction handle */
+ ibool comp) /*!< in: TRUE=compact page format */
+{
+ mlog_write_initial_log_record(frame, comp
+ ? MLOG_COMP_PAGE_CREATE
+ : MLOG_PAGE_CREATE, mtr);
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_create_write_log(frame,mtr,comp) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a redo log record of creating a page.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_create(
+/*==============*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
+ ulint comp, /*!< in: nonzero=compact page format */
+ buf_block_t* block, /*!< in: block or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ /* The record is empty, except for the record initial part */
+
+ if (block) {
+ page_create(block, mtr, comp);
+ }
+
+ return(ptr);
+}
+
+/**********************************************************//**
+The index page creation function.
+@return pointer to the page */
+static
+page_t*
+page_create_low(
+/*============*/
+ buf_block_t* block, /*!< in: a buffer block where the
+ page is created */
+ ulint comp) /*!< in: nonzero=compact page format */
+{
+ page_dir_slot_t* slot;
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ dfield_t* field;
+ byte* heap_top;
+ rec_t* infimum_rec;
+ rec_t* supremum_rec;
+ page_t* page;
+ dict_index_t* index;
+ ulint* offsets;
+
+ ut_ad(block);
+#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
+# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
+#endif
+#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
+# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
+#endif
+
+ /* The infimum and supremum records use a dummy index. */
+ if (UNIV_LIKELY(comp)) {
+ index = dict_ind_compact;
+ } else {
+ index = dict_ind_redundant;
+ }
+
+ /* 1. INCREMENT MODIFY CLOCK */
+ buf_block_modify_clock_inc(block);
+
+ page = buf_block_get_frame(block);
+
+ fil_page_set_type(page, FIL_PAGE_INDEX);
+
+ heap = mem_heap_create(200);
+
+ /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
+
+ /* Create first a data tuple for infimum record */
+ tuple = dtuple_create(heap, 1);
+ dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
+ field = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(field, "infimum", 8);
+ dtype_set(dfield_get_type(field),
+ DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
+ /* Set the corresponding physical record to its place in the page
+ record heap */
+
+ heap_top = page + PAGE_DATA;
+
+ infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
+
+ if (UNIV_LIKELY(comp)) {
+ ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
+
+ rec_set_n_owned_new(infimum_rec, NULL, 1);
+ rec_set_heap_no_new(infimum_rec, 0);
+ } else {
+ ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
+
+ rec_set_n_owned_old(infimum_rec, 1);
+ rec_set_heap_no_old(infimum_rec, 0);
+ }
+
+ offsets = rec_get_offsets(infimum_rec, index, NULL,
+ ULINT_UNDEFINED, &heap);
+
+ heap_top = rec_get_end(infimum_rec, offsets);
+
+ /* Create then a tuple for supremum */
+
+ tuple = dtuple_create(heap, 1);
+ dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
+ field = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(field, "supremum", comp ? 8 : 9);
+ dtype_set(dfield_get_type(field),
+ DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
+
+ supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
+
+ if (UNIV_LIKELY(comp)) {
+ ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
+
+ rec_set_n_owned_new(supremum_rec, NULL, 1);
+ rec_set_heap_no_new(supremum_rec, 1);
+ } else {
+ ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
+
+ rec_set_n_owned_old(supremum_rec, 1);
+ rec_set_heap_no_old(supremum_rec, 1);
+ }
+
+ offsets = rec_get_offsets(supremum_rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ heap_top = rec_get_end(supremum_rec, offsets);
+
+ ut_ad(heap_top == page
+ + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
+
+ mem_heap_free(heap);
+
+ /* 4. INITIALIZE THE PAGE */
+
+ page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
+ page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
+ page_header_set_field(page, NULL, PAGE_N_HEAP, comp
+ ? 0x8000 | PAGE_HEAP_NO_USER_LOW
+ : PAGE_HEAP_NO_USER_LOW);
+ page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
+ page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
+ page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+ page_header_set_field(page, NULL, PAGE_N_RECS, 0);
+ page_set_max_trx_id(block, NULL, 0, NULL);
+ memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
+ - page_offset(heap_top));
+
+ /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
+
+ /* Set the slots to point to infimum and supremum. */
+
+ slot = page_dir_get_nth_slot(page, 0);
+ page_dir_slot_set_rec(slot, infimum_rec);
+
+ slot = page_dir_get_nth_slot(page, 1);
+ page_dir_slot_set_rec(slot, supremum_rec);
+
+ /* Set the next pointers in infimum and supremum */
+
+ if (UNIV_LIKELY(comp)) {
+ rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
+ rec_set_next_offs_new(supremum_rec, 0);
+ } else {
+ rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
+ rec_set_next_offs_old(supremum_rec, 0);
+ }
+
+ return(page);
+}
+
+/**********************************************************//**
+Create an uncompressed B-tree index page.
+@return pointer to the page */
+UNIV_INTERN
+page_t*
+page_create(
+/*========*/
+ buf_block_t* block, /*!< in: a buffer block where the
+ page is created */
+ mtr_t* mtr, /*!< in: mini-transaction handle */
+ ulint comp) /*!< in: nonzero=compact page format */
+{
+ page_create_write_log(buf_block_get_frame(block), mtr, comp);
+ return(page_create_low(block, comp));
+}
+
+/**********************************************************//**
+Create a compressed B-tree index page.
+@return pointer to the page */
+UNIV_INTERN
+page_t*
+page_create_zip(
+/*============*/
+ buf_block_t* block, /*!< in/out: a buffer frame where the
+ page is created */
+ dict_index_t* index, /*!< in: the index of the page */
+ ulint level, /*!< in: the B-tree level of the page */
+ trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ page_t* page;
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+
+ ut_ad(block);
+ ut_ad(page_zip);
+ ut_ad(index);
+ ut_ad(dict_table_is_comp(index->table));
+
+ page = page_create_low(block, TRUE);
+ mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level);
+ mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id);
+
+ if (!page_zip_compress(page_zip, page, index,
+ page_zip_level, mtr)) {
+ /* The compression of a newly created page
+ should always succeed. */
+ ut_error;
+ }
+
+ return(page);
+}
+
+/**********************************************************//**
+Empty a previously created B-tree index page. */
+UNIV_INTERN
+void
+page_create_empty(
+/*==============*/
+ buf_block_t* block, /*!< in/out: B-tree block */
+ dict_index_t* index, /*!< in: the index of the page */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ trx_id_t max_trx_id = 0;
+ const page_t* page = buf_block_get_frame(block);
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ max_trx_id = page_get_max_trx_id(page);
+ ut_ad(max_trx_id);
+ }
+
+ if (page_zip) {
+ page_create_zip(block, index,
+ page_header_get_field(page, PAGE_LEVEL),
+ max_trx_id, mtr);
+ } else {
+ page_create(block, mtr, page_is_comp(page));
+
+ if (max_trx_id) {
+ page_update_max_trx_id(
+ block, page_zip, max_trx_id, mtr);
+ }
+ }
+}
+
+/*************************************************************//**
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
+UNIV_INTERN
+void
+page_copy_rec_list_end_no_locks(
+/*============================*/
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_cur_t cur1;
+ rec_t* cur2;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ page_cur_position(rec, block, &cur1);
+
+ if (page_cur_is_before_first(&cur1)) {
+
+ page_cur_move_to_next(&cur1);
+ }
+
+ btr_assert_not_corrupted(new_block, index);
+ ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
+ ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
+ (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
+
+ cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
+
+ /* Copy records from the original page to the new page */
+
+ while (!page_cur_is_after_last(&cur1)) {
+ rec_t* cur1_rec = page_cur_get_rec(&cur1);
+ rec_t* ins_rec;
+ offsets = rec_get_offsets(cur1_rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ ins_rec = page_cur_insert_rec_low(cur2, index,
+ cur1_rec, offsets, mtr);
+ if (UNIV_UNLIKELY(!ins_rec)) {
+ /* Track an assertion failure reported on the mailing
+ list on June 18th, 2003 */
+
+ buf_page_print(new_page, 0,
+ BUF_PAGE_PRINT_NO_CRASH);
+ buf_page_print(page_align(rec), 0,
+ BUF_PAGE_PRINT_NO_CRASH);
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+ "InnoDB: rec offset %lu, cur1 offset %lu,"
+ " cur2 offset %lu\n",
+ (ulong) page_offset(rec),
+ (ulong) page_offset(page_cur_get_rec(&cur1)),
+ (ulong) page_offset(cur2));
+ ut_error;
+ }
+
+ page_cur_move_to_next(&cur1);
+ cur2 = ins_rec;
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Copies records from page to new_page, from a given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return pointer to the original successor of the infimum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_end(
+/*===================*/
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
+ page_t* page = page_align(rec);
+ rec_t* ret = page_rec_get_next(
+ page_get_infimum_rec(new_page));
+ ulint log_mode = 0; /* remove warning */
+
+#ifdef UNIV_ZIP_DEBUG
+ if (new_page_zip) {
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ ut_a(page_zip);
+
+ /* Strict page_zip_validate() may fail here.
+ Furthermore, btr_compress() may set FIL_PAGE_PREV to
+ FIL_NULL on new_page while leaving it intact on
+ new_page_zip. So, we cannot validate new_page_zip. */
+ ut_a(page_zip_validate_low(page_zip, page, index, TRUE));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+ ut_ad(buf_block_get_frame(block) == page);
+ ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
+ ut_ad(page_is_comp(page) == page_is_comp(new_page));
+ /* Here, "ret" may be pointing to a user record or the
+ predefined supremum record. */
+
+ if (new_page_zip) {
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ }
+
+ if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) {
+ page_copy_rec_list_end_to_created_page(new_page, rec,
+ index, mtr);
+ } else {
+ page_copy_rec_list_end_no_locks(new_block, block, rec,
+ index, mtr);
+ }
+
+ /* Update PAGE_MAX_TRX_ID on the uncompressed page.
+ Modifications will be redo logged and copied to the compressed
+ page in page_zip_compress() or page_zip_reorganize() below. */
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ page_update_max_trx_id(new_block, NULL,
+ page_get_max_trx_id(page), mtr);
+ }
+
+ if (new_page_zip) {
+ mtr_set_log_mode(mtr, log_mode);
+
+ if (!page_zip_compress(new_page_zip, new_page,
+ index, page_zip_level, mtr)) {
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ ulint ret_pos
+ = page_rec_get_n_recs_before(ret);
+ /* Before copying, "ret" was the successor of
+ the predefined infimum record. It must still
+ have at least one predecessor (the predefined
+ infimum record, or a freshly copied record
+ that is smaller than "ret"). */
+ ut_a(ret_pos > 0);
+
+ if (!page_zip_reorganize(new_block, index, mtr)) {
+
+ btr_blob_dbg_remove(new_page, index,
+ "copy_end_reorg_fail");
+ if (!page_zip_decompress(new_page_zip,
+ new_page, FALSE)) {
+ ut_error;
+ }
+ ut_ad(page_validate(new_page, index));
+ btr_blob_dbg_add(new_page, index,
+ "copy_end_reorg_fail");
+ return(NULL);
+ } else {
+ /* The page was reorganized:
+ Seek to ret_pos. */
+ ret = new_page + PAGE_NEW_INFIMUM;
+
+ do {
+ ret = rec_get_next_ptr(ret, TRUE);
+ } while (--ret_pos);
+ }
+ }
+ }
+
+ /* Update the lock table and possible hash index */
+
+ lock_move_rec_list_end(new_block, block, rec);
+
+ btr_search_move_or_delete_hash_entries(new_block, block, index);
+
+ return(ret);
+}
+
+/*************************************************************//**
+Copies records from page to new_page, up to the given record,
+NOT including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return pointer to the original predecessor of the supremum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_start(
+/*=====================*/
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
+ page_cur_t cur1;
+ rec_t* cur2;
+ ulint log_mode = 0 /* remove warning */;
+ mem_heap_t* heap = NULL;
+ rec_t* ret
+ = page_rec_get_prev(page_get_supremum_rec(new_page));
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ /* Here, "ret" may be pointing to a user record or the
+ predefined infimum record. */
+
+ if (page_rec_is_infimum(rec)) {
+
+ return(ret);
+ }
+
+ if (new_page_zip) {
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ }
+
+ page_cur_set_before_first(block, &cur1);
+ page_cur_move_to_next(&cur1);
+
+ cur2 = ret;
+
+ /* Copy records from the original page to the new page */
+
+ while (page_cur_get_rec(&cur1) != rec) {
+ rec_t* cur1_rec = page_cur_get_rec(&cur1);
+ offsets = rec_get_offsets(cur1_rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ cur2 = page_cur_insert_rec_low(cur2, index,
+ cur1_rec, offsets, mtr);
+ ut_a(cur2);
+
+ page_cur_move_to_next(&cur1);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ /* Update PAGE_MAX_TRX_ID on the uncompressed page.
+ Modifications will be redo logged and copied to the compressed
+ page in page_zip_compress() or page_zip_reorganize() below. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && page_is_leaf(page_align(rec))) {
+ page_update_max_trx_id(new_block, NULL,
+ page_get_max_trx_id(page_align(rec)),
+ mtr);
+ }
+
+ if (new_page_zip) {
+ mtr_set_log_mode(mtr, log_mode);
+
+ DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail",
+ goto zip_reorganize;);
+
+ if (!page_zip_compress(new_page_zip, new_page, index,
+ page_zip_level, mtr)) {
+
+ ulint ret_pos;
+#ifndef DBUG_OFF
+zip_reorganize:
+#endif /* DBUG_OFF */
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ ret_pos = page_rec_get_n_recs_before(ret);
+ /* Before copying, "ret" was the predecessor
+ of the predefined supremum record. If it was
+ the predefined infimum record, then it would
+ still be the infimum, and we would have
+ ret_pos == 0. */
+
+ if (UNIV_UNLIKELY
+ (!page_zip_reorganize(new_block, index, mtr))) {
+
+ btr_blob_dbg_remove(new_page, index,
+ "copy_start_reorg_fail");
+ if (UNIV_UNLIKELY
+ (!page_zip_decompress(new_page_zip,
+ new_page, FALSE))) {
+ ut_error;
+ }
+ ut_ad(page_validate(new_page, index));
+ btr_blob_dbg_add(new_page, index,
+ "copy_start_reorg_fail");
+ return(NULL);
+ }
+
+ /* The page was reorganized: Seek to ret_pos. */
+ ret = page_rec_get_nth(new_page, ret_pos);
+ }
+ }
+
+ /* Update the lock table and possible hash index */
+
+ lock_move_rec_list_start(new_block, block, rec, ret);
+
+ btr_search_move_or_delete_hash_entries(new_block, block, index);
+
+ return(ret);
+}
+
+/**********************************************************//**
+Writes a log record of a record list end or start deletion. */
+UNIV_INLINE
+void
+page_delete_rec_list_write_log(
+/*===========================*/
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ byte type, /*!< in: operation type:
+ MLOG_LIST_END_DELETE, ... */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ byte* log_ptr;
+ ut_ad(type == MLOG_LIST_END_DELETE
+ || type == MLOG_LIST_START_DELETE
+ || type == MLOG_COMP_LIST_END_DELETE
+ || type == MLOG_COMP_LIST_START_DELETE);
+
+ log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
+ if (log_ptr) {
+ /* Write the parameter as a 2-byte ulint */
+ mach_write_to_2(log_ptr, page_offset(rec));
+ mlog_close(mtr, log_ptr + 2);
+ }
+}
+#else /* !UNIV_HOTBACKUP */
+# define page_delete_rec_list_write_log(rec,index,type,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Parses a log record of a record list end or start deletion.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_delete_rec_list(
+/*=======================*/
+ byte type, /*!< in: MLOG_LIST_END_DELETE,
+ MLOG_LIST_START_DELETE,
+ MLOG_COMP_LIST_END_DELETE or
+ MLOG_COMP_LIST_START_DELETE */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in/out: buffer block or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+{
+ page_t* page;
+ ulint offset;
+
+ ut_ad(type == MLOG_LIST_END_DELETE
+ || type == MLOG_LIST_START_DELETE
+ || type == MLOG_COMP_LIST_END_DELETE
+ || type == MLOG_COMP_LIST_START_DELETE);
+
+ /* Read the record offset as a 2-byte ulint */
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (!block) {
+
+ return(ptr);
+ }
+
+ page = buf_block_get_frame(block);
+
+ ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+ if (type == MLOG_LIST_END_DELETE
+ || type == MLOG_COMP_LIST_END_DELETE) {
+ page_delete_rec_list_end(page + offset, block, index,
+ ULINT_UNDEFINED, ULINT_UNDEFINED,
+ mtr);
+ } else {
+ page_delete_rec_list_start(page + offset, block, index, mtr);
+ }
+
+ return(ptr);
+}
+
+/*************************************************************//**
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_end(
+/*=====================*/
+ rec_t* rec, /*!< in: pointer to record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n_recs, /*!< in: number of records to delete,
+ or ULINT_UNDEFINED if not known */
+ ulint size, /*!< in: the sum of the sizes of the
+ records in the end of the chain to
+ delete, or ULINT_UNDEFINED if not known */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_dir_slot_t*slot;
+ ulint slot_index;
+ rec_t* last_rec;
+ rec_t* prev_rec;
+ ulint n_owned;
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ page_t* page = page_align(rec);
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
+ ut_ad(!page_zip || page_rec_is_comp(rec));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (page_rec_is_supremum(rec)) {
+ ut_ad(n_recs == 0 || n_recs == ULINT_UNDEFINED);
+ /* Nothing to do, there are no records bigger than the
+ page supremum. */
+ return;
+ }
+
+ if (recv_recovery_is_on()) {
+ /* If we are replaying a redo log record, we must
+ replay it exactly. Since MySQL 5.6.11, we should be
+ generating a redo log record for page creation if
+ the page would become empty. Thus, this branch should
+ only be executed when applying redo log that was
+ generated by an older version of MySQL. */
+ } else if (page_rec_is_infimum(rec)
+ || n_recs == page_get_n_recs(page)) {
+delete_all:
+ /* We are deleting all records. */
+ page_create_empty(block, index, mtr);
+ return;
+ } else if (page_is_comp(page)) {
+ if (page_rec_get_next_low(page + PAGE_NEW_INFIMUM, 1) == rec) {
+ /* We are deleting everything from the first
+ user record onwards. */
+ goto delete_all;
+ }
+ } else {
+ if (page_rec_get_next_low(page + PAGE_OLD_INFIMUM, 0) == rec) {
+ /* We are deleting everything from the first
+ user record onwards. */
+ goto delete_all;
+ }
+ }
+
+ /* Reset the last insert info in the page header and increment
+ the modify clock for the frame */
+
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
+
+ /* The page gets invalid for optimistic searches: increment the
+ frame modify clock */
+
+ buf_block_modify_clock_inc(block);
+
+ page_delete_rec_list_write_log(rec, index, page_is_comp(page)
+ ? MLOG_COMP_LIST_END_DELETE
+ : MLOG_LIST_END_DELETE, mtr);
+
+ if (page_zip) {
+ ulint log_mode;
+
+ ut_a(page_is_comp(page));
+ /* Individual deletes are not logged */
+
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+ do {
+ page_cur_t cur;
+ page_cur_position(rec, block, &cur);
+
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ rec = rec_get_next_ptr(rec, TRUE);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+ page_cur_delete_rec(&cur, index, offsets, mtr);
+ } while (page_offset(rec) != PAGE_NEW_SUPREMUM);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ /* Restore log mode */
+
+ mtr_set_log_mode(mtr, log_mode);
+ return;
+ }
+
+ prev_rec = page_rec_get_prev(rec);
+
+ last_rec = page_rec_get_prev(page_get_supremum_rec(page));
+
+ if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
+ rec_t* rec2 = rec;
+ /* Calculate the sum of sizes and the number of records */
+ size = 0;
+ n_recs = 0;
+
+ do {
+ ulint s;
+ offsets = rec_get_offsets(rec2, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ s = rec_offs_size(offsets);
+ ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
+ < UNIV_PAGE_SIZE);
+ ut_ad(size + s < UNIV_PAGE_SIZE);
+ size += s;
+ n_recs++;
+
+ rec2 = page_rec_get_next(rec2);
+ } while (!page_rec_is_supremum(rec2));
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ }
+
+ ut_ad(size < UNIV_PAGE_SIZE);
+
+ /* Update the page directory; there is no need to balance the number
+ of the records owned by the supremum record, as it is allowed to be
+ less than PAGE_DIR_SLOT_MIN_N_OWNED */
+
+ if (page_is_comp(page)) {
+ rec_t* rec2 = rec;
+ ulint count = 0;
+
+ while (rec_get_n_owned_new(rec2) == 0) {
+ count++;
+
+ rec2 = rec_get_next_ptr(rec2, TRUE);
+ }
+
+ ut_ad(rec_get_n_owned_new(rec2) > count);
+
+ n_owned = rec_get_n_owned_new(rec2) - count;
+ slot_index = page_dir_find_owner_slot(rec2);
+ ut_ad(slot_index > 0);
+ slot = page_dir_get_nth_slot(page, slot_index);
+ } else {
+ rec_t* rec2 = rec;
+ ulint count = 0;
+
+ while (rec_get_n_owned_old(rec2) == 0) {
+ count++;
+
+ rec2 = rec_get_next_ptr(rec2, FALSE);
+ }
+
+ ut_ad(rec_get_n_owned_old(rec2) > count);
+
+ n_owned = rec_get_n_owned_old(rec2) - count;
+ slot_index = page_dir_find_owner_slot(rec2);
+ ut_ad(slot_index > 0);
+ slot = page_dir_get_nth_slot(page, slot_index);
+ }
+
+ page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
+ page_dir_slot_set_n_owned(slot, NULL, n_owned);
+
+ page_dir_set_n_slots(page, NULL, slot_index + 1);
+
+ /* Remove the record chain segment from the record chain */
+ page_rec_set_next(prev_rec, page_get_supremum_rec(page));
+
+ btr_blob_dbg_op(page, rec, index, "delete_end",
+ btr_blob_dbg_remove_rec);
+
+ /* Catenate the deleted chain segment to the page free list */
+
+ page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
+ page_header_set_ptr(page, NULL, PAGE_FREE, rec);
+
+ page_header_set_field(page, NULL, PAGE_GARBAGE, size
+ + page_header_get_field(page, PAGE_GARBAGE));
+
+ page_header_set_field(page, NULL, PAGE_N_RECS,
+ (ulint)(page_get_n_recs(page) - n_recs));
+}
+
+/*************************************************************//**
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_start(
+/*=======================*/
+ rec_t* rec, /*!< in: record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_cur_t cur1;
+ ulint log_mode;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ mem_heap_t* heap = NULL;
+ byte type;
+
+ rec_offs_init(offsets_);
+
+ ut_ad((ibool) !!page_rec_is_comp(rec)
+ == dict_table_is_comp(index->table));
+#ifdef UNIV_ZIP_DEBUG
+ {
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+ page_t* page = buf_block_get_frame(block);
+
+ /* page_zip_validate() would detect a min_rec_mark mismatch
+ in btr_page_split_and_insert()
+ between btr_attach_half_pages() and insert_page = ...
+ when btr_page_get_split_rec_to_left() holds
+ (direction == FSP_DOWN). */
+ ut_a(!page_zip
+ || page_zip_validate_low(page_zip, page, index, TRUE));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (page_rec_is_infimum(rec)) {
+ return;
+ }
+
+ if (page_rec_is_supremum(rec)) {
+ /* We are deleting all records. */
+ page_create_empty(block, index, mtr);
+ return;
+ }
+
+ if (page_rec_is_comp(rec)) {
+ type = MLOG_COMP_LIST_START_DELETE;
+ } else {
+ type = MLOG_LIST_START_DELETE;
+ }
+
+ page_delete_rec_list_write_log(rec, index, type, mtr);
+
+ page_cur_set_before_first(block, &cur1);
+ page_cur_move_to_next(&cur1);
+
+ /* Individual deletes are not logged */
+
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+ while (page_cur_get_rec(&cur1) != rec) {
+ offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
+ offsets, ULINT_UNDEFINED, &heap);
+ page_cur_delete_rec(&cur1, index, offsets, mtr);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ /* Restore log mode */
+
+ mtr_set_log_mode(mtr, log_mode);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Moves record list end to another page. Moved records include
+split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return TRUE on success; FALSE on compression failure (new_block will
+be decompressed) */
+UNIV_INTERN
+ibool
+page_move_rec_list_end(
+/*===================*/
+ buf_block_t* new_block, /*!< in/out: index page where to move */
+ buf_block_t* block, /*!< in: index page from where to move */
+ rec_t* split_rec, /*!< in: first record to move */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* new_page = buf_block_get_frame(new_block);
+ ulint old_data_size;
+ ulint new_data_size;
+ ulint old_n_recs;
+ ulint new_n_recs;
+
+ old_data_size = page_get_data_size(new_page);
+ old_n_recs = page_get_n_recs(new_page);
+#ifdef UNIV_ZIP_DEBUG
+ {
+ page_zip_des_t* new_page_zip
+ = buf_block_get_page_zip(new_block);
+ page_zip_des_t* page_zip
+ = buf_block_get_page_zip(block);
+ ut_a(!new_page_zip == !page_zip);
+ ut_a(!new_page_zip
+ || page_zip_validate(new_page_zip, new_page, index));
+ ut_a(!page_zip
+ || page_zip_validate(page_zip, page_align(split_rec),
+ index));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block,
+ split_rec, index, mtr))) {
+ return(FALSE);
+ }
+
+ new_data_size = page_get_data_size(new_page);
+ new_n_recs = page_get_n_recs(new_page);
+
+ ut_ad(new_data_size >= old_data_size);
+
+ page_delete_rec_list_end(split_rec, block, index,
+ new_n_recs - old_n_recs,
+ new_data_size - old_data_size, mtr);
+
+ return(TRUE);
+}
+
+/*************************************************************//**
+Moves record list start to another page. Moved records do not include
+split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return TRUE on success; FALSE on compression failure */
+UNIV_INTERN
+ibool
+page_move_rec_list_start(
+/*=====================*/
+ buf_block_t* new_block, /*!< in/out: index page where to move */
+ buf_block_t* block, /*!< in/out: page containing split_rec */
+ rec_t* split_rec, /*!< in: first record not to move */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block,
+ split_rec, index, mtr))) {
+ return(FALSE);
+ }
+
+ page_delete_rec_list_start(split_rec, block, index, mtr);
+
+ return(TRUE);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**************************************************************//**
+Used to delete n slots from the directory. This function updates
+also n_owned fields in the records, so that the first slot after
+the deleted ones inherits the records of the deleted slots. */
+UNIV_INLINE
+void
+page_dir_delete_slot(
+/*=================*/
+ page_t* page, /*!< in/out: the index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint slot_no)/*!< in: slot to be deleted */
+{
+ page_dir_slot_t* slot;
+ ulint n_owned;
+ ulint i;
+ ulint n_slots;
+
+ ut_ad(!page_zip || page_is_comp(page));
+ ut_ad(slot_no > 0);
+ ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
+
+ n_slots = page_dir_get_n_slots(page);
+
+ /* 1. Reset the n_owned fields of the slots to be
+ deleted */
+ slot = page_dir_get_nth_slot(page, slot_no);
+ n_owned = page_dir_slot_get_n_owned(slot);
+ page_dir_slot_set_n_owned(slot, page_zip, 0);
+
+ /* 2. Update the n_owned value of the first non-deleted slot */
+
+ slot = page_dir_get_nth_slot(page, slot_no + 1);
+ page_dir_slot_set_n_owned(slot, page_zip,
+ n_owned + page_dir_slot_get_n_owned(slot));
+
+ /* 3. Destroy the slot by copying slots */
+ for (i = slot_no + 1; i < n_slots; i++) {
+ rec_t* rec = (rec_t*)
+ page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
+ page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
+ }
+
+ /* 4. Zero out the last slot, which will be removed */
+ mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0);
+
+ /* 5. Update the page header */
+ page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1);
+}
+
+/**************************************************************//**
+Used to add n slots to the directory. Does not set the record pointers
+in the added slots or update n_owned values: this is the responsibility
+of the caller. */
+UNIV_INLINE
+void
+page_dir_add_slot(
+/*==============*/
+ page_t* page, /*!< in/out: the index page */
+ page_zip_des_t* page_zip,/*!< in/out: comprssed page, or NULL */
+ ulint start) /*!< in: the slot above which the new slots
+ are added */
+{
+ page_dir_slot_t* slot;
+ ulint n_slots;
+
+ n_slots = page_dir_get_n_slots(page);
+
+ ut_ad(start < n_slots - 1);
+
+ /* Update the page header */
+ page_dir_set_n_slots(page, page_zip, n_slots + 1);
+
+ /* Move slots up */
+ slot = page_dir_get_nth_slot(page, n_slots);
+ memmove(slot, slot + PAGE_DIR_SLOT_SIZE,
+ (n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE);
+}
+
+/****************************************************************//**
+Splits a directory slot which owns too many records. */
+UNIV_INTERN
+void
+page_dir_split_slot(
+/*================*/
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be written, or NULL */
+ ulint slot_no)/*!< in: the directory slot */
+{
+ rec_t* rec;
+ page_dir_slot_t* new_slot;
+ page_dir_slot_t* prev_slot;
+ page_dir_slot_t* slot;
+ ulint i;
+ ulint n_owned;
+
+ ut_ad(page);
+ ut_ad(!page_zip || page_is_comp(page));
+ ut_ad(slot_no > 0);
+
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ n_owned = page_dir_slot_get_n_owned(slot);
+ ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
+
+ /* 1. We loop to find a record approximately in the middle of the
+ records owned by the slot. */
+
+ prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
+ rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
+
+ for (i = 0; i < n_owned / 2; i++) {
+ rec = page_rec_get_next(rec);
+ }
+
+ ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
+
+ /* 2. We add one directory slot immediately below the slot to be
+ split. */
+
+ page_dir_add_slot(page, page_zip, slot_no - 1);
+
+ /* The added slot is now number slot_no, and the old slot is
+ now number slot_no + 1 */
+
+ new_slot = page_dir_get_nth_slot(page, slot_no);
+ slot = page_dir_get_nth_slot(page, slot_no + 1);
+
+ /* 3. We store the appropriate values to the new slot. */
+
+ page_dir_slot_set_rec(new_slot, rec);
+ page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
+
+ /* 4. Finally, we update the number of records field of the
+ original slot */
+
+ page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
+}
+
+/*************************************************************//**
+Tries to balance the given directory slot with too few records with the upper
+neighbor, so that there are at least the minimum number of records owned by
+the slot; this may result in the merging of two slots. */
+UNIV_INTERN
+void
+page_dir_balance_slot(
+/*==================*/
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint slot_no)/*!< in: the directory slot */
+{
+ page_dir_slot_t* slot;
+ page_dir_slot_t* up_slot;
+ ulint n_owned;
+ ulint up_n_owned;
+ rec_t* old_rec;
+ rec_t* new_rec;
+
+ ut_ad(page);
+ ut_ad(!page_zip || page_is_comp(page));
+ ut_ad(slot_no > 0);
+
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ /* The last directory slot cannot be balanced with the upper
+ neighbor, as there is none. */
+
+ if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
+
+ return;
+ }
+
+ up_slot = page_dir_get_nth_slot(page, slot_no + 1);
+
+ n_owned = page_dir_slot_get_n_owned(slot);
+ up_n_owned = page_dir_slot_get_n_owned(up_slot);
+
+ ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
+
+ /* If the upper slot has the minimum value of n_owned, we will merge
+ the two slots, therefore we assert: */
+ ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
+
+ if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
+
+ /* In this case we can just transfer one record owned
+ by the upper slot to the property of the lower slot */
+ old_rec = (rec_t*) page_dir_slot_get_rec(slot);
+
+ if (page_is_comp(page)) {
+ new_rec = rec_get_next_ptr(old_rec, TRUE);
+
+ rec_set_n_owned_new(old_rec, page_zip, 0);
+ rec_set_n_owned_new(new_rec, page_zip, n_owned + 1);
+ } else {
+ new_rec = rec_get_next_ptr(old_rec, FALSE);
+
+ rec_set_n_owned_old(old_rec, 0);
+ rec_set_n_owned_old(new_rec, n_owned + 1);
+ }
+
+ page_dir_slot_set_rec(slot, new_rec);
+
+ page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
+ } else {
+ /* In this case we may merge the two slots */
+ page_dir_delete_slot(page, page_zip, slot_no);
+ }
+}
+
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return nth record */
+UNIV_INTERN
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+ const page_t* page, /*!< in: page */
+ ulint nth) /*!< in: nth record */
+{
+ const page_dir_slot_t* slot;
+ ulint i;
+ ulint n_owned;
+ const rec_t* rec;
+
+ if (nth == 0) {
+ return(page_get_infimum_rec(page));
+ }
+
+ ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
+
+ for (i = 0;; i++) {
+
+ slot = page_dir_get_nth_slot(page, i);
+ n_owned = page_dir_slot_get_n_owned(slot);
+
+ if (n_owned > nth) {
+ break;
+ } else {
+ nth -= n_owned;
+ }
+ }
+
+ ut_ad(i > 0);
+ slot = page_dir_get_nth_slot(page, i - 1);
+ rec = page_dir_slot_get_rec(slot);
+
+ if (page_is_comp(page)) {
+ do {
+ rec = page_rec_get_next_low(rec, TRUE);
+ ut_ad(rec);
+ } while (nth--);
+ } else {
+ do {
+ rec = page_rec_get_next_low(rec, FALSE);
+ ut_ad(rec);
+ } while (nth--);
+ }
+
+ return(rec);
+}
+
+/***************************************************************//**
+Returns the number of records before the given record in chain.
+The number includes infimum and supremum records.
+@return number of records */
+UNIV_INTERN
+ulint
+page_rec_get_n_recs_before(
+/*=======================*/
+ const rec_t* rec) /*!< in: the physical record */
+{
+ const page_dir_slot_t* slot;
+ const rec_t* slot_rec;
+ const page_t* page;
+ ulint i;
+ lint n = 0;
+
+ ut_ad(page_rec_check(rec));
+
+ page = page_align(rec);
+ if (page_is_comp(page)) {
+ while (rec_get_n_owned_new(rec) == 0) {
+
+ rec = rec_get_next_ptr_const(rec, TRUE);
+ n--;
+ }
+
+ for (i = 0; ; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ slot_rec = page_dir_slot_get_rec(slot);
+
+ n += rec_get_n_owned_new(slot_rec);
+
+ if (rec == slot_rec) {
+
+ break;
+ }
+ }
+ } else {
+ while (rec_get_n_owned_old(rec) == 0) {
+
+ rec = rec_get_next_ptr_const(rec, FALSE);
+ n--;
+ }
+
+ for (i = 0; ; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ slot_rec = page_dir_slot_get_rec(slot);
+
+ n += rec_get_n_owned_old(slot_rec);
+
+ if (rec == slot_rec) {
+
+ break;
+ }
+ }
+ }
+
+ n--;
+
+ ut_ad(n >= 0);
+ ut_ad((ulong) n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
+
+ return((ulint) n);
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Prints record contents including the data relevant only in
+the index page context. */
+UNIV_INTERN
+void
+page_rec_print(
+/*===========*/
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: record descriptor */
+{
+ ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+ rec_print_new(stderr, rec, offsets);
+ if (page_rec_is_comp(rec)) {
+ fprintf(stderr,
+ " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
+ (ulong) rec_get_n_owned_new(rec),
+ (ulong) rec_get_heap_no_new(rec),
+ (ulong) rec_get_next_offs(rec, TRUE));
+ } else {
+ fprintf(stderr,
+ " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
+ (ulong) rec_get_n_owned_old(rec),
+ (ulong) rec_get_heap_no_old(rec),
+ (ulong) rec_get_next_offs(rec, FALSE));
+ }
+
+ page_rec_check(rec);
+ rec_validate(rec, offsets);
+}
+
+# ifdef UNIV_BTR_PRINT
+/***************************************************************//**
+This is used to print the contents of the directory for
+debugging purposes. */
+UNIV_INTERN
+void
+page_dir_print(
+/*===========*/
+ page_t* page, /*!< in: index page */
+ ulint pr_n) /*!< in: print n first and n last entries */
+{
+ ulint n;
+ ulint i;
+ page_dir_slot_t* slot;
+
+ n = page_dir_get_n_slots(page);
+
+ fprintf(stderr, "--------------------------------\n"
+ "PAGE DIRECTORY\n"
+ "Page address %p\n"
+ "Directory stack top at offs: %lu; number of slots: %lu\n",
+ page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
+ (ulong) n);
+ for (i = 0; i < n; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ if ((i == pr_n) && (i < n - pr_n)) {
+ fputs(" ... \n", stderr);
+ }
+ if ((i < pr_n) || (i >= n - pr_n)) {
+ fprintf(stderr,
+ "Contents of slot: %lu: n_owned: %lu,"
+ " rec offs: %lu\n",
+ (ulong) i,
+ (ulong) page_dir_slot_get_n_owned(slot),
+ (ulong)
+ page_offset(page_dir_slot_get_rec(slot)));
+ }
+ }
+ fprintf(stderr, "Total of %lu records\n"
+ "--------------------------------\n",
+ (ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
+}
+
+/***************************************************************//**
+This is used to print the contents of the page record list for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print_list(
+/*============*/
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint pr_n) /*!< in: print n first and n last entries */
+{
+ page_t* page = block->frame;
+ page_cur_t cur;
+ ulint count;
+ ulint n_recs;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
+
+ fprintf(stderr,
+ "--------------------------------\n"
+ "PAGE RECORD LIST\n"
+ "Page address %p\n", page);
+
+ n_recs = page_get_n_recs(page);
+
+ page_cur_set_before_first(block, &cur);
+ count = 0;
+ for (;;) {
+ offsets = rec_get_offsets(cur.rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ page_rec_print(cur.rec, offsets);
+
+ if (count == pr_n) {
+ break;
+ }
+ if (page_cur_is_after_last(&cur)) {
+ break;
+ }
+ page_cur_move_to_next(&cur);
+ count++;
+ }
+
+ if (n_recs > 2 * pr_n) {
+ fputs(" ... \n", stderr);
+ }
+
+ while (!page_cur_is_after_last(&cur)) {
+ page_cur_move_to_next(&cur);
+
+ if (count + pr_n >= n_recs) {
+ offsets = rec_get_offsets(cur.rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ page_rec_print(cur.rec, offsets);
+ }
+ count++;
+ }
+
+ fprintf(stderr,
+ "Total of %lu records \n"
+ "--------------------------------\n",
+ (ulong) (count + 1));
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+/***************************************************************//**
+Prints the info in a page header. */
+UNIV_INTERN
+void
+page_header_print(
+/*==============*/
+ const page_t* page)
+{
+ fprintf(stderr,
+ "--------------------------------\n"
+ "PAGE HEADER INFO\n"
+ "Page address %p, n records %lu (%s)\n"
+ "n dir slots %lu, heap top %lu\n"
+ "Page n heap %lu, free %lu, garbage %lu\n"
+ "Page last insert %lu, direction %lu, n direction %lu\n",
+ page, (ulong) page_header_get_field(page, PAGE_N_RECS),
+ page_is_comp(page) ? "compact format" : "original format",
+ (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
+ (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
+ (ulong) page_dir_get_n_heap(page),
+ (ulong) page_header_get_field(page, PAGE_FREE),
+ (ulong) page_header_get_field(page, PAGE_GARBAGE),
+ (ulong) page_header_get_field(page, PAGE_LAST_INSERT),
+ (ulong) page_header_get_field(page, PAGE_DIRECTION),
+ (ulong) page_header_get_field(page, PAGE_N_DIRECTION));
+}
+
+/***************************************************************//**
+This is used to print the contents of the page for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print(
+/*=======*/
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint dn, /*!< in: print dn first and last entries
+ in directory */
+ ulint rn) /*!< in: print rn first and last records
+ in directory */
+{
+ page_t* page = block->frame;
+
+ page_header_print(page);
+ page_dir_print(page, dn);
+ page_print_list(block, index, rn);
+}
+# endif /* UNIV_BTR_PRINT */
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************//**
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field.
+@return TRUE if ok */
+UNIV_INTERN
+ibool
+page_rec_validate(
+/*==============*/
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+{
+ ulint n_owned;
+ ulint heap_no;
+ const page_t* page;
+
+ page = page_align(rec);
+ ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
+
+ page_rec_check(rec);
+ rec_validate(rec, offsets);
+
+ if (page_rec_is_comp(rec)) {
+ n_owned = rec_get_n_owned_new(rec);
+ heap_no = rec_get_heap_no_new(rec);
+ } else {
+ n_owned = rec_get_n_owned_old(rec);
+ heap_no = rec_get_heap_no_old(rec);
+ }
+
+ if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
+ fprintf(stderr,
+ "InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
+ (ulong) page_offset(rec), (ulong) n_owned);
+ return(FALSE);
+ }
+
+ if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
+ fprintf(stderr,
+ "InnoDB: Heap no of rec %lu too big %lu %lu\n",
+ (ulong) page_offset(rec), (ulong) heap_no,
+ (ulong) page_dir_get_n_heap(page));
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Checks that the first directory slot points to the infimum record and
+the last to the supremum. This function is intended to track if the
+bug fixed in 4.0.14 has caused corruption to users' databases. */
+UNIV_INTERN
+void
+page_check_dir(
+/*===========*/
+ const page_t* page) /*!< in: index page */
+{
+ ulint n_slots;
+ ulint infimum_offs;
+ ulint supremum_offs;
+
+ n_slots = page_dir_get_n_slots(page);
+ infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
+ supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
+ n_slots - 1));
+
+ if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
+
+ fprintf(stderr,
+ "InnoDB: Page directory corruption:"
+ " infimum not pointed to\n");
+ buf_page_print(page, 0, 0);
+ }
+
+ if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
+
+ fprintf(stderr,
+ "InnoDB: Page directory corruption:"
+ " supremum not pointed to\n");
+ buf_page_print(page, 0, 0);
+ }
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_old(
+/*=====================*/
+ const page_t* page) /*!< in: index page in ROW_FORMAT=REDUNDANT */
+{
+ const page_dir_slot_t* slot;
+ ulint slot_no;
+ ulint n_slots;
+ const rec_t* rec;
+ const byte* rec_heap_top;
+ ulint count;
+ ulint own_count;
+ ibool ret = FALSE;
+
+ ut_a(!page_is_comp(page));
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+
+ if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
+ fprintf(stderr,
+ "InnoDB: Nonsensical number %lu of page dir slots\n",
+ (ulong) n_slots);
+
+ goto func_exit;
+ }
+
+ rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+ if (UNIV_UNLIKELY(rec_heap_top
+ > page_dir_get_nth_slot(page, n_slots - 1))) {
+
+ fprintf(stderr,
+ "InnoDB: Record heap and dir overlap on a page,"
+ " heap top %lu, dir %lu\n",
+ (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
+ (ulong)
+ page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+
+ goto func_exit;
+ }
+
+ /* Validate the record list in a loop checking also that it is
+ consistent with the page record directory. */
+
+ count = 0;
+ own_count = 1;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ rec = page_get_infimum_rec(page);
+
+ for (;;) {
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ fprintf(stderr,
+ "InnoDB: Record %lu is above"
+ " rec heap top %lu\n",
+ (ulong)(rec - page),
+ (ulong)(rec_heap_top - page));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
+ /* This is a record pointed to by a dir slot */
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
+ != own_count)) {
+
+ fprintf(stderr,
+ "InnoDB: Wrong owned count %lu, %lu,"
+ " rec %lu\n",
+ (ulong) rec_get_n_owned_old(rec),
+ (ulong) own_count,
+ (ulong)(rec - page));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY
+ (page_dir_slot_get_rec(slot) != rec)) {
+ fprintf(stderr,
+ "InnoDB: Dir slot does not point"
+ " to right rec %lu\n",
+ (ulong)(rec - page));
+
+ goto func_exit;
+ }
+
+ own_count = 0;
+
+ if (!page_rec_is_supremum(rec)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ break;
+ }
+
+ if (UNIV_UNLIKELY
+ (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
+ || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Next record offset"
+ " nonsensical %lu for rec %lu\n",
+ (ulong) rec_get_next_offs(rec, FALSE),
+ (ulong) (rec - page));
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Page record list appears"
+ " to be circular %lu\n",
+ (ulong) count);
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_const(rec);
+ own_count++;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
+ fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+ fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
+ (ulong) slot_no, (ulong) (n_slots - 1));
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
+ fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
+ (ulong) page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW,
+ (ulong) (count + 1));
+
+ goto func_exit;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+ || rec >= page + UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Free list record has"
+ " a nonsensical offset %lu\n",
+ (ulong) (rec - page));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ fprintf(stderr,
+ "InnoDB: Free list record %lu"
+ " is above rec heap top %lu\n",
+ (ulong) (rec - page),
+ (ulong) (rec_heap_top - page));
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Page free list appears"
+ " to be circular %lu\n",
+ (ulong) count);
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_const(rec);
+ }
+
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+
+ fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
+ (ulong) page_dir_get_n_heap(page),
+ (ulong) (count + 1));
+
+ goto func_exit;
+ }
+
+ ret = TRUE;
+
+func_exit:
+ return(ret);
+}
+
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_new(
+/*=====================*/
+ const page_t* page) /*!< in: index page in ROW_FORMAT!=REDUNDANT */
+{
+ const page_dir_slot_t* slot;
+ ulint slot_no;
+ ulint n_slots;
+ const rec_t* rec;
+ const byte* rec_heap_top;
+ ulint count;
+ ulint own_count;
+ ibool ret = FALSE;
+
+ ut_a(page_is_comp(page));
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+
+ if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
+ fprintf(stderr,
+ "InnoDB: Nonsensical number %lu"
+ " of page dir slots\n", (ulong) n_slots);
+
+ goto func_exit;
+ }
+
+ rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+ if (UNIV_UNLIKELY(rec_heap_top
+ > page_dir_get_nth_slot(page, n_slots - 1))) {
+
+ fprintf(stderr,
+ "InnoDB: Record heap and dir overlap on a page,"
+ " heap top %lu, dir %lu\n",
+ (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
+ (ulong)
+ page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+
+ goto func_exit;
+ }
+
+ /* Validate the record list in a loop checking also that it is
+ consistent with the page record directory. */
+
+ count = 0;
+ own_count = 1;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ rec = page_get_infimum_rec(page);
+
+ for (;;) {
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ fprintf(stderr,
+ "InnoDB: Record %lu is above rec"
+ " heap top %lu\n",
+ (ulong) page_offset(rec),
+ (ulong) page_offset(rec_heap_top));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
+ /* This is a record pointed to by a dir slot */
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
+ != own_count)) {
+
+ fprintf(stderr,
+ "InnoDB: Wrong owned count %lu, %lu,"
+ " rec %lu\n",
+ (ulong) rec_get_n_owned_new(rec),
+ (ulong) own_count,
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY
+ (page_dir_slot_get_rec(slot) != rec)) {
+ fprintf(stderr,
+ "InnoDB: Dir slot does not point"
+ " to right rec %lu\n",
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ own_count = 0;
+
+ if (!page_rec_is_supremum(rec)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ break;
+ }
+
+ if (UNIV_UNLIKELY
+ (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
+ || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Next record offset nonsensical %lu"
+ " for rec %lu\n",
+ (ulong) rec_get_next_offs(rec, TRUE),
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Page record list appears"
+ " to be circular %lu\n",
+ (ulong) count);
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_const(rec);
+ own_count++;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+ fprintf(stderr, "InnoDB: n owned is zero"
+ " in a supremum rec\n");
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+ fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
+ (ulong) slot_no, (ulong) (n_slots - 1));
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
+ fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
+ (ulong) page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW,
+ (ulong) (count + 1));
+
+ goto func_exit;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+ || rec >= page + UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Free list record has"
+ " a nonsensical offset %lu\n",
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ fprintf(stderr,
+ "InnoDB: Free list record %lu"
+ " is above rec heap top %lu\n",
+ (ulong) page_offset(rec),
+ (ulong) page_offset(rec_heap_top));
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Page free list appears"
+ " to be circular %lu\n",
+ (ulong) count);
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_const(rec);
+ }
+
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+
+ fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
+ (ulong) page_dir_get_n_heap(page),
+ (ulong) (count + 1));
+
+ goto func_exit;
+ }
+
+ ret = TRUE;
+
+func_exit:
+ return(ret);
+}
+
+/***************************************************************//**
+This function checks the consistency of an index page.
+@return TRUE if ok */
+UNIV_INTERN
+ibool
+page_validate(
+/*==========*/
+ const page_t* page, /*!< in: index page */
+ dict_index_t* index) /*!< in: data dictionary index containing
+ the page record type definition */
+{
+ const page_dir_slot_t* slot;
+ mem_heap_t* heap;
+ byte* buf;
+ ulint count;
+ ulint own_count;
+ ulint rec_own_count;
+ ulint slot_no;
+ ulint data_size;
+ const rec_t* rec;
+ const rec_t* old_rec = NULL;
+ ulint offs;
+ ulint n_slots;
+ ibool ret = FALSE;
+ ulint i;
+ ulint* offsets = NULL;
+ ulint* old_offsets = NULL;
+
+ if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
+ != dict_table_is_comp(index->table))) {
+ fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
+ goto func_exit2;
+ }
+ if (page_is_comp(page)) {
+ if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
+ goto func_exit2;
+ }
+ } else {
+ if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
+ goto func_exit2;
+ }
+ }
+
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)
+ && !page_is_empty(page)) {
+ trx_id_t max_trx_id = page_get_max_trx_id(page);
+ trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id();
+
+ if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "PAGE_MAX_TRX_ID out of bounds: "
+ TRX_ID_FMT ", " TRX_ID_FMT,
+ max_trx_id, sys_max_trx_id);
+ goto func_exit2;
+ }
+ }
+
+ heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
+
+ /* The following buffer is used to check that the
+ records in the page record heap do not overlap */
+
+ buf = static_cast<byte*>(mem_heap_zalloc(heap, UNIV_PAGE_SIZE));
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+
+ if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
+ <= page_dir_get_nth_slot(page, n_slots - 1)))) {
+
+ fprintf(stderr,
+ "InnoDB: Record heap and dir overlap"
+ " on space %lu page %lu index %s, %p, %p\n",
+ (ulong) page_get_space_id(page),
+ (ulong) page_get_page_no(page), index->name,
+ page_header_get_ptr(page, PAGE_HEAP_TOP),
+ page_dir_get_nth_slot(page, n_slots - 1));
+
+ goto func_exit;
+ }
+
+ /* Validate the record list in a loop checking also that
+ it is consistent with the directory. */
+ count = 0;
+ data_size = 0;
+ own_count = 1;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ rec = page_get_infimum_rec(page);
+
+ for (;;) {
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ if (page_is_comp(page) && page_rec_is_user_rec(rec)
+ && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
+ == page_is_leaf(page))) {
+ fputs("InnoDB: node_ptr flag mismatch\n", stderr);
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+ goto func_exit;
+ }
+
+#ifndef UNIV_HOTBACKUP
+ /* Check that the records are in the ascending order */
+ if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
+ && !page_rec_is_supremum(rec)) {
+ if (UNIV_UNLIKELY
+ (1 != cmp_rec_rec(rec, old_rec,
+ offsets, old_offsets, index))) {
+ fprintf(stderr,
+ "InnoDB: Records in wrong order"
+ " on space %lu page %lu index %s\n",
+ (ulong) page_get_space_id(page),
+ (ulong) page_get_page_no(page),
+ index->name);
+ fputs("\nInnoDB: previous record ", stderr);
+ rec_print_new(stderr, old_rec, old_offsets);
+ fputs("\nInnoDB: record ", stderr);
+ rec_print_new(stderr, rec, offsets);
+ putc('\n', stderr);
+
+ goto func_exit;
+ }
+ }
+#endif /* !UNIV_HOTBACKUP */
+
+ if (page_rec_is_user_rec(rec)) {
+
+ data_size += rec_offs_size(offsets);
+ }
+
+ offs = page_offset(rec_get_start(rec, offsets));
+ i = rec_offs_size(offsets);
+ if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+ fputs("InnoDB: record offset out of bounds\n", stderr);
+ goto func_exit;
+ }
+
+ while (i--) {
+ if (UNIV_UNLIKELY(buf[offs + i])) {
+ /* No other record may overlap this */
+
+ fputs("InnoDB: Record overlaps another\n",
+ stderr);
+ goto func_exit;
+ }
+
+ buf[offs + i] = 1;
+ }
+
+ if (page_is_comp(page)) {
+ rec_own_count = rec_get_n_owned_new(rec);
+ } else {
+ rec_own_count = rec_get_n_owned_old(rec);
+ }
+
+ if (UNIV_UNLIKELY(rec_own_count)) {
+ /* This is a record pointed to by a dir slot */
+ if (UNIV_UNLIKELY(rec_own_count != own_count)) {
+ fprintf(stderr,
+ "InnoDB: Wrong owned count %lu, %lu\n",
+ (ulong) rec_own_count,
+ (ulong) own_count);
+ goto func_exit;
+ }
+
+ if (page_dir_slot_get_rec(slot) != rec) {
+ fputs("InnoDB: Dir slot does not"
+ " point to right rec\n",
+ stderr);
+ goto func_exit;
+ }
+
+ page_dir_slot_check(slot);
+
+ own_count = 0;
+ if (!page_rec_is_supremum(rec)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+ if (page_rec_is_supremum(rec)) {
+ break;
+ }
+
+ count++;
+ own_count++;
+ old_rec = rec;
+ rec = page_rec_get_next_const(rec);
+
+ /* set old_offsets to offsets; recycle offsets */
+ {
+ ulint* offs = old_offsets;
+ old_offsets = offsets;
+ offsets = offs;
+ }
+ }
+
+ if (page_is_comp(page)) {
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+
+ goto n_owned_zero;
+ }
+ } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
+n_owned_zero:
+ fputs("InnoDB: n owned is zero\n", stderr);
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+ fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
+ (ulong) slot_no, (ulong) (n_slots - 1));
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
+ fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
+ (ulong) page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW,
+ (ulong) (count + 1));
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
+ fprintf(stderr,
+ "InnoDB: Summed data size %lu, returned by func %lu\n",
+ (ulong) data_size, (ulong) page_get_data_size(page));
+ goto func_exit;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
+
+ goto func_exit;
+ }
+
+ count++;
+ offs = page_offset(rec_get_start(rec, offsets));
+ i = rec_offs_size(offsets);
+ if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+ fputs("InnoDB: record offset out of bounds\n", stderr);
+ goto func_exit;
+ }
+
+ while (i--) {
+
+ if (UNIV_UNLIKELY(buf[offs + i])) {
+ fputs("InnoDB: Record overlaps another"
+ " in free list\n", stderr);
+ goto func_exit;
+ }
+
+ buf[offs + i] = 1;
+ }
+
+ rec = page_rec_get_next_const(rec);
+ }
+
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+ fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
+ (ulong) page_dir_get_n_heap(page),
+ (ulong) count + 1);
+ goto func_exit;
+ }
+
+ ret = TRUE;
+
+func_exit:
+ mem_heap_free(heap);
+
+ if (UNIV_UNLIKELY(ret == FALSE)) {
+func_exit2:
+ fprintf(stderr,
+ "InnoDB: Apparent corruption"
+ " in space %lu page %lu index %s\n",
+ (ulong) page_get_space_id(page),
+ (ulong) page_get_page_no(page),
+ index->name);
+ buf_page_print(page, 0, 0);
+ }
+
+ return(ret);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Looks in the page record list for a record with the given heap number.
+@return record, NULL if not found */
+UNIV_INTERN
+const rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+ const page_t* page, /*!< in: index page */
+ ulint heap_no)/*!< in: heap number */
+{
+ const rec_t* rec;
+
+ if (page_is_comp(page)) {
+ rec = page + PAGE_NEW_INFIMUM;
+
+ for(;;) {
+ ulint rec_heap_no = rec_get_heap_no_new(rec);
+
+ if (rec_heap_no == heap_no) {
+
+ return(rec);
+ } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+
+ return(NULL);
+ }
+
+ rec = page + rec_get_next_offs(rec, TRUE);
+ }
+ } else {
+ rec = page + PAGE_OLD_INFIMUM;
+
+ for (;;) {
+ ulint rec_heap_no = rec_get_heap_no_old(rec);
+
+ if (rec_heap_no == heap_no) {
+
+ return(rec);
+ } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+
+ return(NULL);
+ }
+
+ rec = page + rec_get_next_offs(rec, FALSE);
+ }
+ }
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*******************************************************//**
+Removes the record from a leaf page. This function does not log
+any changes. It is used by the IMPORT tablespace functions.
+The cursor is moved to the next record after the deleted one.
+@return true if success, i.e., the page did not become too empty */
+UNIV_INTERN
+bool
+page_delete_rec(
+/*============*/
+ const dict_index_t* index, /*!< in: The index that the record
+ belongs to */
+ page_cur_t* pcur, /*!< in/out: page cursor on record
+ to delete */
+ page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
+ const ulint* offsets)/*!< in: offsets for record */
+{
+ bool no_compress_needed;
+ buf_block_t* block = pcur->block;
+ page_t* page = buf_block_get_frame(block);
+
+ ut_ad(page_is_leaf(page));
+
+ if (!rec_offs_any_extern(offsets)
+ && ((page_get_data_size(page) - rec_offs_size(offsets)
+ < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ || (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
+ && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)
+ || (page_get_n_recs(page) < 2))) {
+
+ ulint root_page_no = dict_index_get_page(index);
+
+ /* The page fillfactor will drop below a predefined
+ minimum value, OR the level in the B-tree contains just
+ one page, OR the page will become empty: we recommend
+ compression if this is not the root page. */
+
+ no_compress_needed = page_get_page_no(page) == root_page_no;
+ } else {
+ no_compress_needed = true;
+ }
+
+ if (no_compress_needed) {
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ page_cur_delete_rec(pcur, index, offsets, 0);
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+ }
+
+ return(no_compress_needed);
+}
+
+/** Get the last non-delete-marked record on a page.
+@param[in] page index tree leaf page
+@return the last record, not delete-marked
+@retval infimum record if all records are delete-marked */
+
+const rec_t*
+page_find_rec_max_not_deleted(
+ const page_t* page)
+{
+ const rec_t* rec = page_get_infimum_rec(page);
+ const rec_t* prev_rec = NULL; // remove warning
+
+ /* Because the page infimum is never delete-marked,
+ prev_rec will always be assigned to it first. */
+ ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
+ if (page_is_comp(page)) {
+ do {
+ if (!rec_get_deleted_flag(rec, true)) {
+ prev_rec = rec;
+ }
+ rec = page_rec_get_next_low(rec, true);
+ } while (rec != page + PAGE_NEW_SUPREMUM);
+ } else {
+ do {
+ if (!rec_get_deleted_flag(rec, false)) {
+ prev_rec = rec;
+ }
+ rec = page_rec_get_next_low(rec, false);
+ } while (rec != page + PAGE_OLD_SUPREMUM);
+ }
+ return(prev_rec);
+}
diff --git a/storage/xtradb/page/page0zip.cc b/storage/xtradb/page/page0zip.cc
new file mode 100644
index 00000000000..67dca183c6d
--- /dev/null
+++ b/storage/xtradb/page/page0zip.cc
@@ -0,0 +1,4952 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file page/page0zip.cc
+Compressed page interface
+
+Created June 2005 by Marko Makela
+*******************************************************/
+
+// First include (the generated) my_config.h, to get correct platform defines.
+#include "my_config.h"
+
+#include <map>
+using namespace std;
+
+#define THIS_MODULE
+#include "page0zip.h"
+#ifdef UNIV_NONINL
+# include "page0zip.ic"
+#endif
+#undef THIS_MODULE
+#include "page0page.h"
+#include "mtr0log.h"
+#include "ut0sort.h"
+#include "dict0dict.h"
+#include "btr0cur.h"
+#include "page0types.h"
+#include "log0recv.h"
+#include "zlib.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0buf.h"
+# include "buf0lru.h"
+# include "btr0sea.h"
+# include "dict0boot.h"
+# include "lock0lock.h"
+# include "srv0mon.h"
+# include "srv0srv.h"
+# include "ut0crc32.h"
+#else /* !UNIV_HOTBACKUP */
+# include "buf0checksum.h"
+# define lock_move_reorganize_page(block, temp_block) ((void) 0)
+# define buf_LRU_stat_inc_unzip() ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_HOTBACKUP
+/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Statistics on compression, indexed by index->id */
+UNIV_INTERN page_zip_stat_per_index_t page_zip_stat_per_index;
+/** Mutex protecting page_zip_stat_per_index */
+UNIV_INTERN ib_mutex_t page_zip_stat_per_index_mutex;
+#ifdef HAVE_PSI_INTERFACE
+UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
+#endif /* !UNIV_HOTBACKUP */
+
+/* Compression level to be used by zlib. Settable by user. */
+UNIV_INTERN uint page_zip_level = DEFAULT_COMPRESSION_LEVEL;
+
+/* Whether or not to log compressed page images to avoid possible
+compression algorithm changes in zlib. */
+UNIV_INTERN my_bool page_zip_log_pages = true;
+
+/* Please refer to ../include/page0zip.ic for a description of the
+compressed page format. */
+
+/* The infimum and supremum records are omitted from the compressed page.
+On compress, we compare that the records are there, and on uncompress we
+restore the records. */
+/** Extra bytes of an infimum record */
+static const byte infimum_extra[] = {
+ 0x01, /* info_bits=0, n_owned=1 */
+ 0x00, 0x02 /* heap_no=0, status=2 */
+ /* ?, ? */ /* next=(first user rec, or supremum) */
+};
+/** Data bytes of an infimum record */
+static const byte infimum_data[] = {
+ 0x69, 0x6e, 0x66, 0x69,
+ 0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
+};
+/** Extra bytes and data bytes of a supremum record */
+static const byte supremum_extra_data[] = {
+ /* 0x0?, */ /* info_bits=0, n_owned=1..8 */
+ 0x00, 0x0b, /* heap_no=1, status=3 */
+ 0x00, 0x00, /* next=0 */
+ 0x73, 0x75, 0x70, 0x72,
+ 0x65, 0x6d, 0x75, 0x6d /* "supremum" */
+};
+
+/** Assert that a block of memory is filled with zero bytes.
+Compare at most sizeof(field_ref_zero) bytes.
+@param b in: memory block
+@param s in: size of the memory block, in bytes */
+#define ASSERT_ZERO(b, s) \
+ ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
+/** Assert that a BLOB pointer is filled with zero bytes.
+@param b in: BLOB pointer */
+#define ASSERT_ZERO_BLOB(b) \
+ ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
+
+/* Enable some extra debugging output. This code can be enabled
+independently of any UNIV_ debugging conditions. */
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+# include <stdarg.h>
+__attribute__((format (printf, 1, 2)))
+/**********************************************************************//**
+Report a failure to decompress or compress.
+@return number of characters printed */
+static
+int
+page_zip_fail_func(
+/*===============*/
+ const char* fmt, /*!< in: printf(3) format string */
+ ...) /*!< in: arguments corresponding to fmt */
+{
+ int res;
+ va_list ap;
+
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: ", stderr);
+ va_start(ap, fmt);
+ res = vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ return(res);
+}
+/** Wrapper for page_zip_fail_func()
+@param fmt_args in: printf(3) format string and arguments */
+# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
+#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+/** Dummy wrapper for page_zip_fail_func()
+@param fmt_args ignored: printf(3) format string and arguments */
+# define page_zip_fail(fmt_args) /* empty */
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determine the guaranteed free space on an empty page.
+@return minimum payload size on the page */
+UNIV_INTERN
+ulint
+page_zip_empty_size(
+/*================*/
+ ulint n_fields, /*!< in: number of columns in the index */
+ ulint zip_size) /*!< in: compressed page size in bytes */
+{
+ lint size = zip_size
+ /* subtract the page header and the longest
+ uncompressed data needed for one record */
+ - (PAGE_DATA
+ + PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
+ + 1/* encoded heap_no==2 in page_zip_write_rec() */
+ + 1/* end of modification log */
+ - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
+ /* subtract the space for page_zip_fields_encode() */
+ - compressBound(static_cast<uLong>(2 * (n_fields + 1)));
+ return(size > 0 ? (ulint) size : 0);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Gets the number of elements in the dense page directory,
+including deleted records (the free list).
+@return number of elements in the dense page directory */
+UNIV_INLINE
+ulint
+page_zip_dir_elems(
+/*===============*/
+ const page_zip_des_t* page_zip) /*!< in: compressed page */
+{
+ /* Exclude the page infimum and supremum from the record count. */
+ return(page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW);
+}
+
+/*************************************************************//**
+Gets the size of the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@return length of dense page directory, in bytes */
+UNIV_INLINE
+ulint
+page_zip_dir_size(
+/*==============*/
+ const page_zip_des_t* page_zip) /*!< in: compressed page */
+{
+ return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip));
+}
+
+/*************************************************************//**
+Gets an offset to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@return offset of the dense page directory */
+UNIV_INLINE
+ulint
+page_zip_dir_start_offs(
+/*====================*/
+ const page_zip_des_t* page_zip, /*!< in: compressed page */
+ ulint n_dense) /*!< in: directory size */
+{
+ ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
+
+ return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
+}
+
+/*************************************************************//**
+Gets a pointer to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@param[in] page_zip compressed page
+@param[in] n_dense number of entries in the directory
+@return pointer to the dense page directory */
+#define page_zip_dir_start_low(page_zip, n_dense) \
+ ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
+/*************************************************************//**
+Gets a pointer to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@param[in] page_zip compressed page
+@return pointer to the dense page directory */
+#define page_zip_dir_start(page_zip) \
+ page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
+
+/*************************************************************//**
+Gets the size of the compressed page trailer (the dense page directory),
+only including user records (excluding the free list).
+@return length of dense page directory comprising existing records, in bytes */
+UNIV_INLINE
+ulint
+page_zip_dir_user_size(
+/*===================*/
+ const page_zip_des_t* page_zip) /*!< in: compressed page */
+{
+ ulint size = PAGE_ZIP_DIR_SLOT_SIZE
+ * page_get_n_recs(page_zip->data);
+ ut_ad(size <= page_zip_dir_size(page_zip));
+ return(size);
+}
+
+/*************************************************************//**
+Find the slot of the given record in the dense page directory.
+@return dense directory slot, or NULL if record not found */
+UNIV_INLINE
+byte*
+page_zip_dir_find_low(
+/*==================*/
+ byte* slot, /*!< in: start of records */
+ byte* end, /*!< in: end of records */
+ ulint offset) /*!< in: offset of user record */
+{
+ ut_ad(slot <= end);
+
+ for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
+ if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
+ == offset) {
+ return(slot);
+ }
+ }
+
+ return(NULL);
+}
+
+/*************************************************************//**
+Find the slot of the given non-free record in the dense page directory.
+@return dense directory slot, or NULL if record not found */
+UNIV_INLINE
+byte*
+page_zip_dir_find(
+/*==============*/
+ page_zip_des_t* page_zip, /*!< in: compressed page */
+ ulint offset) /*!< in: offset of user record */
+{
+ byte* end = page_zip->data + page_zip_get_size(page_zip);
+
+ ut_ad(page_zip_simple_validate(page_zip));
+
+ return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
+ end,
+ offset));
+}
+
+/*************************************************************//**
+Find the slot of the given free record in the dense page directory.
+@return dense directory slot, or NULL if record not found */
+UNIV_INLINE
+byte*
+page_zip_dir_find_free(
+/*===================*/
+ page_zip_des_t* page_zip, /*!< in: compressed page */
+ ulint offset) /*!< in: offset of user record */
+{
+ byte* end = page_zip->data + page_zip_get_size(page_zip);
+
+ ut_ad(page_zip_simple_validate(page_zip));
+
+ return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
+ end - page_zip_dir_user_size(page_zip),
+ offset));
+}
+
+/*************************************************************//**
+Read a given slot in the dense page directory.
+@return record offset on the uncompressed page, possibly ORed with
+PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
+UNIV_INLINE
+ulint
+page_zip_dir_get(
+/*=============*/
+ const page_zip_des_t* page_zip, /*!< in: compressed page */
+ ulint slot) /*!< in: slot
+ (0=first user record) */
+{
+ ut_ad(page_zip_simple_validate(page_zip));
+ ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
+ return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
+ - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Write a log record of compressing an index page. */
+static
+void
+page_zip_compress_write_log(
+/*========================*/
+ const page_zip_des_t* page_zip,/*!< in: compressed page */
+ const page_t* page, /*!< in: uncompressed page */
+ dict_index_t* index, /*!< in: index of the B-tree node */
+ mtr_t* mtr) /*!< in: mini-transaction */
+{
+ byte* log_ptr;
+ ulint trailer_size;
+
+ ut_ad(!dict_index_is_ibuf(index));
+
+ log_ptr = mlog_open(mtr, 11 + 2 + 2);
+
+ if (!log_ptr) {
+
+ return;
+ }
+
+ /* Read the number of user records. */
+ trailer_size = page_dir_get_n_heap(page_zip->data)
+ - PAGE_HEAP_NO_USER_LOW;
+ /* Multiply by uncompressed of size stored per record */
+ if (!page_is_leaf(page)) {
+ trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
+ } else if (dict_index_is_clust(index)) {
+ trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+ } else {
+ trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
+ }
+ /* Add the space occupied by BLOB pointers. */
+ trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
+ ut_a(page_zip->m_end > PAGE_DATA);
+#if FIL_PAGE_DATA > PAGE_DATA
+# error "FIL_PAGE_DATA > PAGE_DATA"
+#endif
+ ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
+
+ log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
+ MLOG_ZIP_PAGE_COMPRESS,
+ log_ptr, mtr);
+ mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
+ log_ptr += 2;
+ mach_write_to_2(log_ptr, trailer_size);
+ log_ptr += 2;
+ mlog_close(mtr, log_ptr);
+
+ /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
+ mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
+ mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
+ /* Write most of the page header, the compressed stream and
+ the modification log. */
+ mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
+ page_zip->m_end - FIL_PAGE_TYPE);
+ /* Write the uncompressed trailer of the compressed page. */
+ mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
+ - trailer_size, trailer_size);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/******************************************************//**
+Determine how many externally stored columns are contained
+in existing records with smaller heap_no than rec. */
+static
+ulint
+page_zip_get_n_prev_extern(
+/*=======================*/
+ const page_zip_des_t* page_zip,/*!< in: dense page directory on
+ compressed page */
+ const rec_t* rec, /*!< in: compact physical record
+ on a B-tree leaf page */
+ const dict_index_t* index) /*!< in: record descriptor */
+{
+ const page_t* page = page_align(rec);
+ ulint n_ext = 0;
+ ulint i;
+ ulint left;
+ ulint heap_no;
+ ulint n_recs = page_get_n_recs(page_zip->data);
+
+ ut_ad(page_is_leaf(page));
+ ut_ad(page_is_comp(page));
+ ut_ad(dict_table_is_comp(index->table));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(!dict_index_is_ibuf(index));
+
+ heap_no = rec_get_heap_no_new(rec);
+ ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
+ left = heap_no - PAGE_HEAP_NO_USER_LOW;
+ if (UNIV_UNLIKELY(!left)) {
+ return(0);
+ }
+
+ for (i = 0; i < n_recs; i++) {
+ const rec_t* r = page + (page_zip_dir_get(page_zip, i)
+ & PAGE_ZIP_DIR_SLOT_MASK);
+
+ if (rec_get_heap_no_new(r) < heap_no) {
+ n_ext += rec_get_n_extern_new(r, index,
+ ULINT_UNDEFINED);
+ if (!--left) {
+ break;
+ }
+ }
+ }
+
+ return(n_ext);
+}
+
+/**********************************************************************//**
+Encode the length of a fixed-length column.
+@return buf + length of encoded val */
+static
+byte*
+page_zip_fixed_field_encode(
+/*========================*/
+ byte* buf, /*!< in: pointer to buffer where to write */
+ ulint val) /*!< in: value to write */
+{
+ ut_ad(val >= 2);
+
+ if (UNIV_LIKELY(val < 126)) {
+ /*
+ 0 = nullable variable field of at most 255 bytes length;
+ 1 = not null variable field of at most 255 bytes length;
+ 126 = nullable variable field with maximum length >255;
+ 127 = not null variable field with maximum length >255
+ */
+ *buf++ = (byte) val;
+ } else {
+ *buf++ = (byte) (0x80 | val >> 8);
+ *buf++ = (byte) val;
+ }
+
+ return(buf);
+}
+
+/**********************************************************************//**
+Write the index information for the compressed page.
+@return used size of buf */
+static
+ulint
+page_zip_fields_encode(
+/*===================*/
+ ulint n, /*!< in: number of fields to compress */
+ dict_index_t* index, /*!< in: index comprising at least n fields */
+ ulint trx_id_pos,/*!< in: position of the trx_id column
+ in the index, or ULINT_UNDEFINED if
+ this is a non-leaf page */
+ byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
+{
+ const byte* buf_start = buf;
+ ulint i;
+ ulint col;
+ ulint trx_id_col = 0;
+ /* sum of lengths of preceding non-nullable fixed fields, or 0 */
+ ulint fixed_sum = 0;
+
+ ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
+
+ for (i = col = 0; i < n; i++) {
+ dict_field_t* field = dict_index_get_nth_field(index, i);
+ ulint val;
+
+ if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
+ val = 1; /* set the "not nullable" flag */
+ } else {
+ val = 0; /* nullable field */
+ }
+
+ if (!field->fixed_len) {
+ /* variable-length field */
+ const dict_col_t* column
+ = dict_field_get_col(field);
+
+ if (UNIV_UNLIKELY(column->len > 255)
+ || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
+ val |= 0x7e; /* max > 255 bytes */
+ }
+
+ if (fixed_sum) {
+ /* write out the length of any
+ preceding non-nullable fields */
+ buf = page_zip_fixed_field_encode(
+ buf, fixed_sum << 1 | 1);
+ fixed_sum = 0;
+ col++;
+ }
+
+ *buf++ = (byte) val;
+ col++;
+ } else if (val) {
+ /* fixed-length non-nullable field */
+
+ if (fixed_sum && UNIV_UNLIKELY
+ (fixed_sum + field->fixed_len
+ > DICT_MAX_FIXED_COL_LEN)) {
+ /* Write out the length of the
+ preceding non-nullable fields,
+ to avoid exceeding the maximum
+ length of a fixed-length column. */
+ buf = page_zip_fixed_field_encode(
+ buf, fixed_sum << 1 | 1);
+ fixed_sum = 0;
+ col++;
+ }
+
+ if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
+ if (fixed_sum) {
+ /* Write out the length of any
+ preceding non-nullable fields,
+ and start a new trx_id column. */
+ buf = page_zip_fixed_field_encode(
+ buf, fixed_sum << 1 | 1);
+ col++;
+ }
+
+ trx_id_col = col;
+ fixed_sum = field->fixed_len;
+ } else {
+ /* add to the sum */
+ fixed_sum += field->fixed_len;
+ }
+ } else {
+ /* fixed-length nullable field */
+
+ if (fixed_sum) {
+ /* write out the length of any
+ preceding non-nullable fields */
+ buf = page_zip_fixed_field_encode(
+ buf, fixed_sum << 1 | 1);
+ fixed_sum = 0;
+ col++;
+ }
+
+ buf = page_zip_fixed_field_encode(
+ buf, field->fixed_len << 1);
+ col++;
+ }
+ }
+
+ if (fixed_sum) {
+ /* Write out the lengths of last fixed-length columns. */
+ buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
+ }
+
+ if (trx_id_pos != ULINT_UNDEFINED) {
+ /* Write out the position of the trx_id column */
+ i = trx_id_col;
+ } else {
+ /* Write out the number of nullable fields */
+ i = index->n_nullable;
+ }
+
+ if (i < 128) {
+ *buf++ = (byte) i;
+ } else {
+ *buf++ = (byte) (0x80 | i >> 8);
+ *buf++ = (byte) i;
+ }
+
+ ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
+ return((ulint) (buf - buf_start));
+}
+
+/**********************************************************************//**
+Populate the dense page directory from the sparse directory. */
+static
+void
+page_zip_dir_encode(
+/*================*/
+ const page_t* page, /*!< in: compact page */
+ byte* buf, /*!< in: pointer to dense page directory[-1];
+ out: dense directory on compressed page */
+ const rec_t** recs) /*!< in: pointer to an array of 0, or NULL;
+ out: dense page directory sorted by ascending
+ address (and heap_no) */
+{
+ const byte* rec;
+ ulint status;
+ ulint min_mark;
+ ulint heap_no;
+ ulint i;
+ ulint n_heap;
+ ulint offs;
+
+ min_mark = 0;
+
+ if (page_is_leaf(page)) {
+ status = REC_STATUS_ORDINARY;
+ } else {
+ status = REC_STATUS_NODE_PTR;
+ if (UNIV_UNLIKELY
+ (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
+ min_mark = REC_INFO_MIN_REC_FLAG;
+ }
+ }
+
+ n_heap = page_dir_get_n_heap(page);
+
+ /* Traverse the list of stored records in the collation order,
+ starting from the first user record. */
+
+ rec = page + PAGE_NEW_INFIMUM;
+
+ i = 0;
+
+ for (;;) {
+ ulint info_bits;
+ offs = rec_get_next_offs(rec, TRUE);
+ if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
+ break;
+ }
+ rec = page + offs;
+ heap_no = rec_get_heap_no_new(rec);
+ ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
+ ut_a(heap_no < n_heap);
+ ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
+ ut_a(offs >= PAGE_ZIP_START);
+#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
+# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
+#endif
+#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE_MAX - 1
+# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE_MAX - 1"
+#endif
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
+ offs |= PAGE_ZIP_DIR_SLOT_OWNED;
+ }
+
+ info_bits = rec_get_info_bits(rec, TRUE);
+ if (info_bits & REC_INFO_DELETED_FLAG) {
+ info_bits &= ~REC_INFO_DELETED_FLAG;
+ offs |= PAGE_ZIP_DIR_SLOT_DEL;
+ }
+ ut_a(info_bits == min_mark);
+ /* Only the smallest user record can have
+ REC_INFO_MIN_REC_FLAG set. */
+ min_mark = 0;
+
+ mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
+
+ if (UNIV_LIKELY_NULL(recs)) {
+ /* Ensure that each heap_no occurs at most once. */
+ ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
+ /* exclude infimum and supremum */
+ recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
+ }
+
+ ut_a(rec_get_status(rec) == status);
+ }
+
+ offs = page_header_get_field(page, PAGE_FREE);
+
+ /* Traverse the free list (of deleted records). */
+ while (offs) {
+ ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
+ rec = page + offs;
+
+ heap_no = rec_get_heap_no_new(rec);
+ ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
+ ut_a(heap_no < n_heap);
+
+ ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
+ ut_a(rec_get_status(rec) == status);
+
+ mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
+
+ if (UNIV_LIKELY_NULL(recs)) {
+ /* Ensure that each heap_no occurs at most once. */
+ ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
+ /* exclude infimum and supremum */
+ recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
+ }
+
+ offs = rec_get_next_offs(rec, TRUE);
+ }
+
+ /* Ensure that each heap no occurs at least once. */
+ ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
+}
+
+extern "C" {
+
+/**********************************************************************//**
+Allocate memory for zlib. */
+static
+void*
+page_zip_zalloc(
+/*============*/
+ void* opaque, /*!< in/out: memory heap */
+ uInt items, /*!< in: number of items to allocate */
+ uInt size) /*!< in: size of an item in bytes */
+{
+ return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque), items * size));
+}
+
+/**********************************************************************//**
+Deallocate memory for zlib. */
+static
+void
+page_zip_free(
+/*==========*/
+ void* opaque __attribute__((unused)), /*!< in: memory heap */
+ void* address __attribute__((unused)))/*!< in: object to free */
+{
+}
+
+} /* extern "C" */
+
+/**********************************************************************//**
+Configure the zlib allocator to use the given memory heap. */
+UNIV_INTERN
+void
+page_zip_set_alloc(
+/*===============*/
+ void* stream, /*!< in/out: zlib stream */
+ mem_heap_t* heap) /*!< in: memory heap to use */
+{
+ z_stream* strm = static_cast<z_stream*>(stream);
+
+ strm->zalloc = page_zip_zalloc;
+ strm->zfree = page_zip_free;
+ strm->opaque = heap;
+}
+
+#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/** Symbol for enabling compression and decompression diagnostics */
+# define PAGE_ZIP_COMPRESS_DBG
+#endif
+
+#ifdef PAGE_ZIP_COMPRESS_DBG
+/** Set this variable in a debugger to enable
+excessive logging in page_zip_compress(). */
+UNIV_INTERN ibool page_zip_compress_dbg;
+/** Set this variable in a debugger to enable
+binary logging of the data passed to deflate().
+When this variable is nonzero, it will act
+as a log file name generator. */
+UNIV_INTERN unsigned page_zip_compress_log;
+
+/**********************************************************************//**
+Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set.
+@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
+static
+int
+page_zip_compress_deflate(
+/*======================*/
+ FILE* logfile,/*!< in: log file, or NULL */
+ z_streamp strm, /*!< in/out: compressed stream for deflate() */
+ int flush) /*!< in: deflate() flushing method */
+{
+ int status;
+ if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
+ ut_print_buf(stderr, strm->next_in, strm->avail_in);
+ }
+ if (UNIV_LIKELY_NULL(logfile)) {
+ fwrite(strm->next_in, 1, strm->avail_in, logfile);
+ }
+ status = deflate(strm, flush);
+ if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
+ fprintf(stderr, " -> %d\n", status);
+ }
+ return(status);
+}
+
+/* Redefine deflate(). */
+# undef deflate
+/** Debug wrapper for the zlib compression routine deflate().
+Log the operation if page_zip_compress_dbg is set.
+@param strm in/out: compressed stream
+@param flush in: flushing method
+@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
+# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
+/** Declaration of the logfile parameter */
+# define FILE_LOGFILE FILE* logfile,
+/** The logfile parameter */
+# define LOGFILE logfile,
+#else /* PAGE_ZIP_COMPRESS_DBG */
+/** Empty declaration of the logfile parameter */
+# define FILE_LOGFILE
+/** Missing logfile parameter */
+# define LOGFILE
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+
+/**********************************************************************//**
+Compress the records of a node pointer page.
+@return Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_node_ptrs(
+/*========================*/
+ FILE_LOGFILE
+ z_stream* c_stream, /*!< in/out: compressed page stream */
+ const rec_t** recs, /*!< in: dense page directory
+ sorted by address */
+ ulint n_dense, /*!< in: size of recs[] */
+ dict_index_t* index, /*!< in: the index of the page */
+ byte* storage, /*!< in: end of dense page directory */
+ mem_heap_t* heap) /*!< in: temporary memory heap */
+{
+ int err = Z_OK;
+ ulint* offsets = NULL;
+
+ do {
+ const rec_t* rec = *recs++;
+
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ /* Only leaf nodes may contain externally stored columns. */
+ ut_ad(!rec_offs_any_extern(offsets));
+
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ /* Compress the extra bytes. */
+ c_stream->avail_in = static_cast<uInt>(
+ rec - REC_N_NEW_EXTRA_BYTES - c_stream->next_in);
+
+ if (c_stream->avail_in) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+ break;
+ }
+ }
+ ut_ad(!c_stream->avail_in);
+
+ /* Compress the data bytes, except node_ptr. */
+ c_stream->next_in = (byte*) rec;
+ c_stream->avail_in = static_cast<uInt>(
+ rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
+
+ if (c_stream->avail_in) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+ break;
+ }
+ }
+
+ ut_ad(!c_stream->avail_in);
+
+ memcpy(storage - REC_NODE_PTR_SIZE
+ * (rec_get_heap_no_new(rec) - 1),
+ c_stream->next_in, REC_NODE_PTR_SIZE);
+ c_stream->next_in += REC_NODE_PTR_SIZE;
+ } while (--n_dense);
+
+ return(err);
+}
+
+/**********************************************************************//**
+Compress the records of a leaf node of a secondary index.
+@return Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_sec(
+/*==================*/
+ FILE_LOGFILE
+ z_stream* c_stream, /*!< in/out: compressed page stream */
+ const rec_t** recs, /*!< in: dense page directory
+ sorted by address */
+ ulint n_dense) /*!< in: size of recs[] */
+{
+ int err = Z_OK;
+
+ ut_ad(n_dense > 0);
+
+ do {
+ const rec_t* rec = *recs++;
+
+ /* Compress everything up to this record. */
+ c_stream->avail_in = static_cast<uInt>(
+ rec - REC_N_NEW_EXTRA_BYTES
+ - c_stream->next_in);
+
+ if (UNIV_LIKELY(c_stream->avail_in)) {
+ UNIV_MEM_ASSERT_RW(c_stream->next_in,
+ c_stream->avail_in);
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+ break;
+ }
+ }
+
+ ut_ad(!c_stream->avail_in);
+ ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
+
+ /* Skip the REC_N_NEW_EXTRA_BYTES. */
+
+ c_stream->next_in = (byte*) rec;
+ } while (--n_dense);
+
+ return(err);
+}
+
+/**********************************************************************//**
+Compress a record of a leaf node of a clustered index that contains
+externally stored columns.
+@return Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_clust_ext(
+/*========================*/
+ FILE_LOGFILE
+ z_stream* c_stream, /*!< in/out: compressed page stream */
+ const rec_t* rec, /*!< in: record */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
+ byte* deleted, /*!< in: dense directory entry pointing
+ to the head of the free list */
+ byte* storage, /*!< in: end of dense page directory */
+ byte** externs, /*!< in/out: pointer to the next
+ available BLOB pointer */
+ ulint* n_blobs) /*!< in/out: number of
+ externally stored columns */
+{
+ int err;
+ ulint i;
+
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ ulint len;
+ const byte* src;
+
+ if (UNIV_UNLIKELY(i == trx_id_col)) {
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+ /* Store trx_id and roll_ptr
+ in uncompressed form. */
+ src = rec_get_nth_field(rec, offsets, i, &len);
+ ut_ad(src + DATA_TRX_ID_LEN
+ == rec_get_nth_field(rec, offsets,
+ i + 1, &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+
+ /* Compress any preceding bytes. */
+ c_stream->avail_in = static_cast<uInt>(
+ src - c_stream->next_in);
+
+ if (c_stream->avail_in) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+
+ return(err);
+ }
+ }
+
+ ut_ad(!c_stream->avail_in);
+ ut_ad(c_stream->next_in == src);
+
+ memcpy(storage
+ - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+ * (rec_get_heap_no_new(rec) - 1),
+ c_stream->next_in,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ c_stream->next_in
+ += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
+ /* Skip also roll_ptr */
+ i++;
+ } else if (rec_offs_nth_extern(offsets, i)) {
+ src = rec_get_nth_field(rec, offsets, i, &len);
+ ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+ src += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+ c_stream->avail_in = static_cast<uInt>(
+ src - c_stream->next_in);
+ if (UNIV_LIKELY(c_stream->avail_in)) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+
+ return(err);
+ }
+ }
+
+ ut_ad(!c_stream->avail_in);
+ ut_ad(c_stream->next_in == src);
+
+ /* Reserve space for the data at
+ the end of the space reserved for
+ the compressed data and the page
+ modification log. */
+
+ if (UNIV_UNLIKELY
+ (c_stream->avail_out
+ <= BTR_EXTERN_FIELD_REF_SIZE)) {
+ /* out of space */
+ return(Z_BUF_ERROR);
+ }
+
+ ut_ad(*externs == c_stream->next_out
+ + c_stream->avail_out
+ + 1/* end of modif. log */);
+
+ c_stream->next_in
+ += BTR_EXTERN_FIELD_REF_SIZE;
+
+ /* Skip deleted records. */
+ if (UNIV_LIKELY_NULL
+ (page_zip_dir_find_low(
+ storage, deleted,
+ page_offset(rec)))) {
+ continue;
+ }
+
+ (*n_blobs)++;
+ c_stream->avail_out
+ -= BTR_EXTERN_FIELD_REF_SIZE;
+ *externs -= BTR_EXTERN_FIELD_REF_SIZE;
+
+ /* Copy the BLOB pointer */
+ memcpy(*externs, c_stream->next_in
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ }
+ }
+
+ return(Z_OK);
+}
+
+/**********************************************************************//**
+Compress the records of a leaf node of a clustered index.
+@return Z_OK, or a zlib error code */
+static
+int
+page_zip_compress_clust(
+/*====================*/
+ FILE_LOGFILE
+ z_stream* c_stream, /*!< in/out: compressed page stream */
+ const rec_t** recs, /*!< in: dense page directory
+ sorted by address */
+ ulint n_dense, /*!< in: size of recs[] */
+ dict_index_t* index, /*!< in: the index of the page */
+ ulint* n_blobs, /*!< in: 0; out: number of
+ externally stored columns */
+ ulint trx_id_col, /*!< index of the trx_id column */
+ byte* deleted, /*!< in: dense directory entry pointing
+ to the head of the free list */
+ byte* storage, /*!< in: end of dense page directory */
+ mem_heap_t* heap) /*!< in: temporary memory heap */
+{
+ int err = Z_OK;
+ ulint* offsets = NULL;
+ /* BTR_EXTERN_FIELD_REF storage */
+ byte* externs = storage - n_dense
+ * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ ut_ad(*n_blobs == 0);
+
+ do {
+ const rec_t* rec = *recs++;
+
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ ut_ad(rec_offs_n_fields(offsets)
+ == dict_index_get_n_fields(index));
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ /* Compress the extra bytes. */
+ c_stream->avail_in = static_cast<uInt>(
+ rec - REC_N_NEW_EXTRA_BYTES
+ - c_stream->next_in);
+
+ if (c_stream->avail_in) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+
+ goto func_exit;
+ }
+ }
+ ut_ad(!c_stream->avail_in);
+ ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
+
+ /* Compress the data bytes. */
+
+ c_stream->next_in = (byte*) rec;
+
+ /* Check if there are any externally stored columns.
+ For each externally stored column, store the
+ BTR_EXTERN_FIELD_REF separately. */
+ if (rec_offs_any_extern(offsets)) {
+ ut_ad(dict_index_is_clust(index));
+
+ err = page_zip_compress_clust_ext(
+ LOGFILE
+ c_stream, rec, offsets, trx_id_col,
+ deleted, storage, &externs, n_blobs);
+
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+
+ goto func_exit;
+ }
+ } else {
+ ulint len;
+ const byte* src;
+
+ /* Store trx_id and roll_ptr in uncompressed form. */
+ src = rec_get_nth_field(rec, offsets,
+ trx_id_col, &len);
+ ut_ad(src + DATA_TRX_ID_LEN
+ == rec_get_nth_field(rec, offsets,
+ trx_id_col + 1, &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ /* Compress any preceding bytes. */
+ c_stream->avail_in = static_cast<uInt>(
+ src - c_stream->next_in);
+
+ if (c_stream->avail_in) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+
+ return(err);
+ }
+ }
+
+ ut_ad(!c_stream->avail_in);
+ ut_ad(c_stream->next_in == src);
+
+ memcpy(storage
+ - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+ * (rec_get_heap_no_new(rec) - 1),
+ c_stream->next_in,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ c_stream->next_in
+ += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
+ /* Skip also roll_ptr */
+ ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
+ }
+
+ /* Compress the last bytes of the record. */
+ c_stream->avail_in = static_cast<uInt>(
+ rec + rec_offs_data_size(offsets) - c_stream->next_in);
+
+ if (c_stream->avail_in) {
+ err = deflate(c_stream, Z_NO_FLUSH);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+
+ goto func_exit;
+ }
+ }
+ ut_ad(!c_stream->avail_in);
+ } while (--n_dense);
+
+func_exit:
+ return(err);
+}
+
+/**********************************************************************//**
+Compress a page.
+@return TRUE on success, FALSE on failure; page_zip will be left
+intact on failure. */
+UNIV_INTERN
+ibool
+page_zip_compress(
+/*==============*/
+ page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
+ m_start, m_end, m_nonempty */
+ const page_t* page, /*!< in: uncompressed page */
+ dict_index_t* index, /*!< in: index of the B-tree node */
+ ulint level, /*!< in: compression level */
+ mtr_t* mtr) /*!< in: mini-transaction, or NULL */
+{
+ z_stream c_stream;
+ int err;
+ ulint n_fields;/* number of index fields needed */
+ byte* fields; /*!< index field information */
+ byte* buf; /*!< compressed payload of the page */
+ byte* buf_end;/* end of buf */
+ ulint n_dense;
+ ulint slot_size;/* amount of uncompressed bytes per record */
+ const rec_t** recs; /*!< dense page directory, sorted by address */
+ mem_heap_t* heap;
+ ulint trx_id_col;
+ ulint n_blobs = 0;
+ byte* storage;/* storage of uncompressed columns */
+#ifndef UNIV_HOTBACKUP
+ ullint usec = ut_time_us(NULL);
+#endif /* !UNIV_HOTBACKUP */
+#ifdef PAGE_ZIP_COMPRESS_DBG
+ FILE* logfile = NULL;
+#endif
+ /* A local copy of srv_cmp_per_index_enabled to avoid reading that
+ variable multiple times in this function since it can be changed at
+ anytime. */
+ my_bool cmp_per_index_enabled = srv_cmp_per_index_enabled;
+
+ if (!page) {
+ return(FALSE);
+ }
+
+ ut_a(page_is_comp(page));
+ ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(page_simple_validate_new((page_t*) page));
+ ut_ad(page_zip_simple_validate(page_zip));
+ ut_ad(dict_table_is_comp(index->table));
+ ut_ad(!dict_index_is_ibuf(index));
+
+ UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+
+ /* Check the data that will be omitted. */
+ ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
+ infimum_extra, sizeof infimum_extra));
+ ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
+ infimum_data, sizeof infimum_data));
+ ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
+ /* info_bits == 0, n_owned <= max */
+ <= PAGE_DIR_SLOT_MAX_N_OWNED);
+ ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
+ supremum_extra_data, sizeof supremum_extra_data));
+
+ if (page_is_empty(page)) {
+ ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
+ == PAGE_NEW_SUPREMUM);
+ }
+
+ if (page_is_leaf(page)) {
+ n_fields = dict_index_get_n_fields(index);
+ } else {
+ n_fields = dict_index_get_n_unique_in_tree(index);
+ }
+
+ /* The dense directory excludes the infimum and supremum records. */
+ n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
+#ifdef PAGE_ZIP_COMPRESS_DBG
+ if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
+ fprintf(stderr, "compress %p %p %lu %lu %lu\n",
+ (void*) page_zip, (void*) page,
+ (ibool) page_is_leaf(page),
+ n_fields, n_dense);
+ }
+ if (UNIV_UNLIKELY(page_zip_compress_log)) {
+ /* Create a log file for every compression attempt. */
+ char logfilename[9];
+ ut_snprintf(logfilename, sizeof logfilename,
+ "%08x", page_zip_compress_log++);
+ logfile = fopen(logfilename, "wb");
+
+ if (logfile) {
+ /* Write the uncompressed page to the log. */
+ fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
+ /* Record the compressed size as zero.
+ This will be overwritten at successful exit. */
+ putc(0, logfile);
+ putc(0, logfile);
+ putc(0, logfile);
+ putc(0, logfile);
+ }
+ }
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+#ifndef UNIV_HOTBACKUP
+ page_zip_stat[page_zip->ssize - 1].compressed++;
+ if (cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index->id].compressed++;
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
+#endif /* !UNIV_HOTBACKUP */
+
+ if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
+ >= page_zip_get_size(page_zip))) {
+
+ goto err_exit;
+ }
+
+ MONITOR_INC(MONITOR_PAGE_COMPRESS);
+
+ heap = mem_heap_create(page_zip_get_size(page_zip)
+ + n_fields * (2 + sizeof(ulint))
+ + REC_OFFS_HEADER_SIZE
+ + n_dense * ((sizeof *recs)
+ - PAGE_ZIP_DIR_SLOT_SIZE)
+ + UNIV_PAGE_SIZE * 4
+ + (512 << MAX_MEM_LEVEL));
+
+ recs = static_cast<const rec_t**>(
+ mem_heap_zalloc(heap, n_dense * sizeof *recs));
+
+ fields = static_cast<byte*>(mem_heap_alloc(heap, (n_fields + 1) * 2));
+
+ buf = static_cast<byte*>(
+ mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA));
+
+ buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
+
+ /* Compress the data payload. */
+ page_zip_set_alloc(&c_stream, heap);
+
+ err = deflateInit2(&c_stream, static_cast<int>(level),
+ Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
+ MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+ ut_a(err == Z_OK);
+
+ c_stream.next_out = buf;
+ /* Subtract the space reserved for uncompressed data. */
+ /* Page header and the end marker of the modification log */
+ c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1);
+
+ /* Dense page directory and uncompressed columns, if any */
+ if (page_is_leaf(page)) {
+ if (dict_index_is_clust(index)) {
+ trx_id_col = dict_index_get_sys_col_pos(
+ index, DATA_TRX_ID);
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+ slot_size = PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+ } else {
+ /* Signal the absence of trx_id
+ in page_zip_fields_encode() */
+ ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
+ == ULINT_UNDEFINED);
+ trx_id_col = 0;
+ slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
+ }
+ } else {
+ slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
+ trx_id_col = ULINT_UNDEFINED;
+ }
+
+ if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
+ + 6/* sizeof(zlib header and footer) */)) {
+ goto zlib_error;
+ }
+
+ c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size);
+ c_stream.avail_in = static_cast<uInt>(
+ page_zip_fields_encode(n_fields, index, trx_id_col, fields));
+ c_stream.next_in = fields;
+ if (UNIV_LIKELY(!trx_id_col)) {
+ trx_id_col = ULINT_UNDEFINED;
+ }
+
+ UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
+ err = deflate(&c_stream, Z_FULL_FLUSH);
+ if (err != Z_OK) {
+ goto zlib_error;
+ }
+
+ ut_ad(!c_stream.avail_in);
+
+ page_zip_dir_encode(page, buf_end, recs);
+
+ c_stream.next_in = (byte*) page + PAGE_ZIP_START;
+
+ storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+
+ /* Compress the records in heap_no order. */
+ if (UNIV_UNLIKELY(!n_dense)) {
+ } else if (!page_is_leaf(page)) {
+ /* This is a node pointer page. */
+ err = page_zip_compress_node_ptrs(LOGFILE
+ &c_stream, recs, n_dense,
+ index, storage, heap);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+ goto zlib_error;
+ }
+ } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
+ /* This is a leaf page in a secondary index. */
+ err = page_zip_compress_sec(LOGFILE
+ &c_stream, recs, n_dense);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+ goto zlib_error;
+ }
+ } else {
+ /* This is a leaf page in a clustered index. */
+ err = page_zip_compress_clust(LOGFILE
+ &c_stream, recs, n_dense,
+ index, &n_blobs, trx_id_col,
+ buf_end - PAGE_ZIP_DIR_SLOT_SIZE
+ * page_get_n_recs(page),
+ storage, heap);
+ if (UNIV_UNLIKELY(err != Z_OK)) {
+ goto zlib_error;
+ }
+ }
+
+ /* Finish the compression. */
+ ut_ad(!c_stream.avail_in);
+ /* Compress any trailing garbage, in case the last record was
+ allocated from an originally longer space on the free list,
+ or the data of the last record from page_zip_compress_sec(). */
+ c_stream.avail_in = static_cast<uInt>(
+ page_header_get_field(page, PAGE_HEAP_TOP)
+ - (c_stream.next_in - page));
+ ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
+
+ UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
+ err = deflate(&c_stream, Z_FINISH);
+
+ if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
+zlib_error:
+ deflateEnd(&c_stream);
+ mem_heap_free(heap);
+err_exit:
+#ifdef PAGE_ZIP_COMPRESS_DBG
+ if (logfile) {
+ fclose(logfile);
+ }
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+#ifndef UNIV_HOTBACKUP
+ if (page_is_leaf(page)) {
+ dict_index_zip_failure(index);
+ }
+
+ ullint time_diff = ut_time_us(NULL) - usec;
+ page_zip_stat[page_zip->ssize - 1].compressed_usec
+ += time_diff;
+ if (cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index->id].compressed_usec
+ += time_diff;
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
+#endif /* !UNIV_HOTBACKUP */
+ return(FALSE);
+ }
+
+ err = deflateEnd(&c_stream);
+ ut_a(err == Z_OK);
+
+ ut_ad(buf + c_stream.total_out == c_stream.next_out);
+ ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
+
+ /* Valgrind believes that zlib does not initialize some bits
+ in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
+ UNIV_MEM_VALID(buf, c_stream.total_out);
+
+ /* Zero out the area reserved for the modification log.
+ Space for the end marker of the modification log is not
+ included in avail_out. */
+ memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
+
+#ifdef UNIV_DEBUG
+ page_zip->m_start =
+#endif /* UNIV_DEBUG */
+ page_zip->m_end = PAGE_DATA + c_stream.total_out;
+ page_zip->m_nonempty = FALSE;
+ page_zip->n_blobs = n_blobs;
+ /* Copy those header fields that will not be written
+ in buf_flush_init_for_writing() */
+ memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
+ FIL_PAGE_LSN - FIL_PAGE_PREV);
+ memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
+ memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
+ PAGE_DATA - FIL_PAGE_DATA);
+ /* Copy the rest of the compressed page */
+ memcpy(page_zip->data + PAGE_DATA, buf,
+ page_zip_get_size(page_zip) - PAGE_DATA);
+ mem_heap_free(heap);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (mtr) {
+#ifndef UNIV_HOTBACKUP
+ page_zip_compress_write_log(page_zip, page, index, mtr);
+#endif /* !UNIV_HOTBACKUP */
+ }
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+#ifdef PAGE_ZIP_COMPRESS_DBG
+ if (logfile) {
+ /* Record the compressed size of the block. */
+ byte sz[4];
+ mach_write_to_4(sz, c_stream.total_out);
+ fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
+ fwrite(sz, 1, sizeof sz, logfile);
+ fclose(logfile);
+ }
+#endif /* PAGE_ZIP_COMPRESS_DBG */
+#ifndef UNIV_HOTBACKUP
+ ullint time_diff = ut_time_us(NULL) - usec;
+ page_zip_stat[page_zip->ssize - 1].compressed_ok++;
+ page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
+ if (cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index->id].compressed_ok++;
+ page_zip_stat_per_index[index->id].compressed_usec += time_diff;
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
+
+ if (page_is_leaf(page)) {
+ dict_index_zip_success(index);
+ }
+#endif /* !UNIV_HOTBACKUP */
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Compare two page directory entries.
+@return positive if rec1 > rec2 */
+UNIV_INLINE
+ibool
+page_zip_dir_cmp(
+/*=============*/
+ const rec_t* rec1, /*!< in: rec1 */
+ const rec_t* rec2) /*!< in: rec2 */
+{
+ return(rec1 > rec2);
+}
+
+/**********************************************************************//**
+Sort the dense page directory by address (heap_no). */
+static
+void
+page_zip_dir_sort(
+/*==============*/
+ rec_t** arr, /*!< in/out: dense page directory */
+ rec_t** aux_arr,/*!< in/out: work area */
+ ulint low, /*!< in: lower bound of the sorting area, inclusive */
+ ulint high) /*!< in: upper bound of the sorting area, exclusive */
+{
+ UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
+ page_zip_dir_cmp);
+}
+
+/**********************************************************************//**
+Deallocate the index information initialized by page_zip_fields_decode(). */
+static
+void
+page_zip_fields_free(
+/*=================*/
+ dict_index_t* index) /*!< in: dummy index to be freed */
+{
+ if (index) {
+ dict_table_t* table = index->table;
+ os_fast_mutex_free(&index->zip_pad.mutex);
+ mem_heap_free(index->heap);
+
+ dict_mem_table_free(table);
+ }
+}
+
+/**********************************************************************//**
+Read the index information for the compressed page.
+@return own: dummy index describing the page, or NULL on error */
+static
+dict_index_t*
+page_zip_fields_decode(
+/*===================*/
+ const byte* buf, /*!< in: index information */
+ const byte* end, /*!< in: end of buf */
+ ulint* trx_id_col)/*!< in: NULL for non-leaf pages;
+ for leaf pages, pointer to where to store
+ the position of the trx_id column */
+{
+ const byte* b;
+ ulint n;
+ ulint i;
+ ulint val;
+ dict_table_t* table;
+ dict_index_t* index;
+
+ /* Determine the number of fields. */
+ for (b = buf, n = 0; b < end; n++) {
+ if (*b++ & 0x80) {
+ b++; /* skip the second byte */
+ }
+ }
+
+ n--; /* n_nullable or trx_id */
+
+ if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
+
+ page_zip_fail(("page_zip_fields_decode: n = %lu\n",
+ (ulong) n));
+ return(NULL);
+ }
+
+ if (UNIV_UNLIKELY(b > end)) {
+
+ page_zip_fail(("page_zip_fields_decode: %p > %p\n",
+ (const void*) b, (const void*) end));
+ return(NULL);
+ }
+
+ table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
+ DICT_TF_COMPACT, 0, true);
+ index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
+ DICT_HDR_SPACE, 0, n);
+ index->table = table;
+ index->n_uniq = n;
+ /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
+ index->cached = TRUE;
+
+ /* Initialize the fields. */
+ for (b = buf, i = 0; i < n; i++) {
+ ulint mtype;
+ ulint len;
+
+ val = *b++;
+
+ if (UNIV_UNLIKELY(val & 0x80)) {
+ /* fixed length > 62 bytes */
+ val = (val & 0x7f) << 8 | *b++;
+ len = val >> 1;
+ mtype = DATA_FIXBINARY;
+ } else if (UNIV_UNLIKELY(val >= 126)) {
+ /* variable length with max > 255 bytes */
+ len = 0x7fff;
+ mtype = DATA_BINARY;
+ } else if (val <= 1) {
+ /* variable length with max <= 255 bytes */
+ len = 0;
+ mtype = DATA_BINARY;
+ } else {
+ /* fixed length < 62 bytes */
+ len = val >> 1;
+ mtype = DATA_FIXBINARY;
+ }
+
+ dict_mem_table_add_col(table, NULL, NULL, mtype,
+ val & 1 ? DATA_NOT_NULL : 0, len);
+ dict_index_add_col(index, table,
+ dict_table_get_nth_col(table, i), 0);
+ }
+
+ val = *b++;
+ if (UNIV_UNLIKELY(val & 0x80)) {
+ val = (val & 0x7f) << 8 | *b++;
+ }
+
+ /* Decode the position of the trx_id column. */
+ if (trx_id_col) {
+ if (!val) {
+ val = ULINT_UNDEFINED;
+ } else if (UNIV_UNLIKELY(val >= n)) {
+ page_zip_fields_free(index);
+ index = NULL;
+ } else {
+ index->type = DICT_CLUSTERED;
+ }
+
+ *trx_id_col = val;
+ } else {
+ /* Decode the number of nullable fields. */
+ if (UNIV_UNLIKELY(index->n_nullable > val)) {
+ page_zip_fields_free(index);
+ index = NULL;
+ } else {
+ index->n_nullable = val;
+ }
+ }
+
+ ut_ad(b == end);
+
+ return(index);
+}
+
+/**********************************************************************//**
+Populate the sparse page directory from the dense directory.
+@return TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_dir_decode(
+/*================*/
+ const page_zip_des_t* page_zip,/*!< in: dense page directory on
+ compressed page */
+ page_t* page, /*!< in: compact page with valid header;
+ out: trailer and sparse page directory
+ filled in */
+ rec_t** recs, /*!< out: dense page directory sorted by
+ ascending address (and heap_no) */
+ rec_t** recs_aux,/*!< in/out: scratch area */
+ ulint n_dense)/*!< in: number of user records, and
+ size of recs[] and recs_aux[] */
+{
+ ulint i;
+ ulint n_recs;
+ byte* slot;
+
+ n_recs = page_get_n_recs(page);
+
+ if (UNIV_UNLIKELY(n_recs > n_dense)) {
+ page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
+ (ulong) n_recs, (ulong) n_dense));
+ return(FALSE);
+ }
+
+ /* Traverse the list of stored records in the sorting order,
+ starting from the first user record. */
+
+ slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
+ UNIV_PREFETCH_RW(slot);
+
+ /* Zero out the page trailer. */
+ memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
+
+ mach_write_to_2(slot, PAGE_NEW_INFIMUM);
+ slot -= PAGE_DIR_SLOT_SIZE;
+ UNIV_PREFETCH_RW(slot);
+
+ /* Initialize the sparse directory and copy the dense directory. */
+ for (i = 0; i < n_recs; i++) {
+ ulint offs = page_zip_dir_get(page_zip, i);
+
+ if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
+ mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
+ slot -= PAGE_DIR_SLOT_SIZE;
+ UNIV_PREFETCH_RW(slot);
+ }
+
+ if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
+ < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
+ page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
+ (unsigned) i, (unsigned) n_recs,
+ (ulong) offs));
+ return(FALSE);
+ }
+
+ recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
+ }
+
+ mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
+ {
+ const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
+ page, page_dir_get_n_slots(page) - 1);
+
+ if (UNIV_UNLIKELY(slot != last_slot)) {
+ page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
+ (const void*) slot,
+ (const void*) last_slot));
+ return(FALSE);
+ }
+ }
+
+ /* Copy the rest of the dense directory. */
+ for (; i < n_dense; i++) {
+ ulint offs = page_zip_dir_get(page_zip, i);
+
+ if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
+ page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
+ (unsigned) i, (unsigned) n_dense,
+ (ulong) offs));
+ return(FALSE);
+ }
+
+ recs[i] = page + offs;
+ }
+
+ if (UNIV_LIKELY(n_dense > 1)) {
+ page_zip_dir_sort(recs, recs_aux, 0, n_dense);
+ }
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Initialize the REC_N_NEW_EXTRA_BYTES of each record.
+@return TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_set_extra_bytes(
+/*=====================*/
+ const page_zip_des_t* page_zip,/*!< in: compressed page */
+ page_t* page, /*!< in/out: uncompressed page */
+ ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
+{
+ ulint n;
+ ulint i;
+ ulint n_owned = 1;
+ ulint offs;
+ rec_t* rec;
+
+ n = page_get_n_recs(page);
+ rec = page + PAGE_NEW_INFIMUM;
+
+ for (i = 0; i < n; i++) {
+ offs = page_zip_dir_get(page_zip, i);
+
+ if (offs & PAGE_ZIP_DIR_SLOT_DEL) {
+ info_bits |= REC_INFO_DELETED_FLAG;
+ }
+ if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
+ info_bits |= n_owned;
+ n_owned = 1;
+ } else {
+ n_owned++;
+ }
+ offs &= PAGE_ZIP_DIR_SLOT_MASK;
+ if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
+ + REC_N_NEW_EXTRA_BYTES)) {
+ page_zip_fail(("page_zip_set_extra_bytes 1:"
+ " %u %u %lx\n",
+ (unsigned) i, (unsigned) n,
+ (ulong) offs));
+ return(FALSE);
+ }
+
+ rec_set_next_offs_new(rec, offs);
+ rec = page + offs;
+ rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
+ info_bits = 0;
+ }
+
+ /* Set the next pointer of the last user record. */
+ rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
+
+ /* Set n_owned of the supremum record. */
+ page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
+
+ /* The dense directory excludes the infimum and supremum records. */
+ n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
+
+ if (i >= n) {
+ if (UNIV_LIKELY(i == n)) {
+ return(TRUE);
+ }
+
+ page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
+ (unsigned) i, (unsigned) n));
+ return(FALSE);
+ }
+
+ offs = page_zip_dir_get(page_zip, i);
+
+ /* Set the extra bytes of deleted records on the free list. */
+ for (;;) {
+ if (UNIV_UNLIKELY(!offs)
+ || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
+
+ page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
+ (ulong) offs));
+ return(FALSE);
+ }
+
+ rec = page + offs;
+ rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
+
+ if (++i == n) {
+ break;
+ }
+
+ offs = page_zip_dir_get(page_zip, i);
+ rec_set_next_offs_new(rec, offs);
+ }
+
+ /* Terminate the free list. */
+ rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
+ rec_set_next_offs_new(rec, 0);
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Apply the modification log to a record containing externally stored
+columns. Do not copy the fields that are stored separately.
+@return pointer to modification log, or NULL on failure */
+static
+const byte*
+page_zip_apply_log_ext(
+/*===================*/
+ rec_t* rec, /*!< in/out: record */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
+ const byte* data, /*!< in: modification log */
+ const byte* end) /*!< in: end of modification log */
+{
+ ulint i;
+ ulint len;
+ byte* next_out = rec;
+
+ /* Check if there are any externally stored columns.
+ For each externally stored column, skip the
+ BTR_EXTERN_FIELD_REF. */
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ byte* dst;
+
+ if (UNIV_UNLIKELY(i == trx_id_col)) {
+ /* Skip trx_id and roll_ptr */
+ dst = rec_get_nth_field(rec, offsets,
+ i, &len);
+ if (UNIV_UNLIKELY(dst - next_out >= end - data)
+ || UNIV_UNLIKELY
+ (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
+ || rec_offs_nth_extern(offsets, i)) {
+ page_zip_fail(("page_zip_apply_log_ext:"
+ " trx_id len %lu,"
+ " %p - %p >= %p - %p\n",
+ (ulong) len,
+ (const void*) dst,
+ (const void*) next_out,
+ (const void*) end,
+ (const void*) data));
+ return(NULL);
+ }
+
+ memcpy(next_out, data, dst - next_out);
+ data += dst - next_out;
+ next_out = dst + (DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN);
+ } else if (rec_offs_nth_extern(offsets, i)) {
+ dst = rec_get_nth_field(rec, offsets,
+ i, &len);
+ ut_ad(len
+ >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ len += dst - next_out
+ - BTR_EXTERN_FIELD_REF_SIZE;
+
+ if (UNIV_UNLIKELY(data + len >= end)) {
+ page_zip_fail(("page_zip_apply_log_ext: "
+ "ext %p+%lu >= %p\n",
+ (const void*) data,
+ (ulong) len,
+ (const void*) end));
+ return(NULL);
+ }
+
+ memcpy(next_out, data, len);
+ data += len;
+ next_out += len
+ + BTR_EXTERN_FIELD_REF_SIZE;
+ }
+ }
+
+ /* Copy the last bytes of the record. */
+ len = rec_get_end(rec, offsets) - next_out;
+ if (UNIV_UNLIKELY(data + len >= end)) {
+ page_zip_fail(("page_zip_apply_log_ext: "
+ "last %p+%lu >= %p\n",
+ (const void*) data,
+ (ulong) len,
+ (const void*) end));
+ return(NULL);
+ }
+ memcpy(next_out, data, len);
+ data += len;
+
+ return(data);
+}
+
+/**********************************************************************//**
+Apply the modification log to an uncompressed page.
+Do not copy the fields that are stored separately.
+@return pointer to end of modification log, or NULL on failure */
+static
+const byte*
+page_zip_apply_log(
+/*===============*/
+ const byte* data, /*!< in: modification log */
+ ulint size, /*!< in: maximum length of the log, in bytes */
+ rec_t** recs, /*!< in: dense page directory,
+ sorted by address (indexed by
+ heap_no - PAGE_HEAP_NO_USER_LOW) */
+ ulint n_dense,/*!< in: size of recs[] */
+ ulint trx_id_col,/*!< in: column number of trx_id in the index,
+ or ULINT_UNDEFINED if none */
+ ulint heap_status,
+ /*!< in: heap_no and status bits for
+ the next record to uncompress */
+ dict_index_t* index, /*!< in: index of the page */
+ ulint* offsets)/*!< in/out: work area for
+ rec_get_offsets_reverse() */
+{
+ const byte* const end = data + size;
+
+ for (;;) {
+ ulint val;
+ rec_t* rec;
+ ulint len;
+ ulint hs;
+
+ val = *data++;
+ if (UNIV_UNLIKELY(!val)) {
+ return(data - 1);
+ }
+ if (val & 0x80) {
+ val = (val & 0x7f) << 8 | *data++;
+ if (UNIV_UNLIKELY(!val)) {
+ page_zip_fail(("page_zip_apply_log:"
+ " invalid val %x%x\n",
+ data[-2], data[-1]));
+ return(NULL);
+ }
+ }
+ if (UNIV_UNLIKELY(data >= end)) {
+ page_zip_fail(("page_zip_apply_log: %p >= %p\n",
+ (const void*) data,
+ (const void*) end));
+ return(NULL);
+ }
+ if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
+ page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
+ (ulong) val, (ulong) n_dense));
+ return(NULL);
+ }
+
+ /* Determine the heap number and status bits of the record. */
+ rec = recs[(val >> 1) - 1];
+
+ hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
+ hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
+
+ /* This may either be an old record that is being
+ overwritten (updated in place, or allocated from
+ the free list), or a new record, with the next
+ available_heap_no. */
+ if (UNIV_UNLIKELY(hs > heap_status)) {
+ page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
+ (ulong) hs, (ulong) heap_status));
+ return(NULL);
+ } else if (hs == heap_status) {
+ /* A new record was allocated from the heap. */
+ if (UNIV_UNLIKELY(val & 1)) {
+ /* Only existing records may be cleared. */
+ page_zip_fail(("page_zip_apply_log:"
+ " attempting to create"
+ " deleted rec %lu\n",
+ (ulong) hs));
+ return(NULL);
+ }
+ heap_status += 1 << REC_HEAP_NO_SHIFT;
+ }
+
+ mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
+
+ if (val & 1) {
+ /* Clear the data bytes of the record. */
+ mem_heap_t* heap = NULL;
+ ulint* offs;
+ offs = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ memset(rec, 0, rec_offs_data_size(offs));
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ continue;
+ }
+
+#if REC_STATUS_NODE_PTR != TRUE
+# error "REC_STATUS_NODE_PTR != TRUE"
+#endif
+ rec_get_offsets_reverse(data, index,
+ hs & REC_STATUS_NODE_PTR,
+ offsets);
+ rec_offs_make_valid(rec, index, offsets);
+
+ /* Copy the extra bytes (backwards). */
+ {
+ byte* start = rec_get_start(rec, offsets);
+ byte* b = rec - REC_N_NEW_EXTRA_BYTES;
+ while (b != start) {
+ *--b = *data++;
+ }
+ }
+
+ /* Copy the data bytes. */
+ if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
+ /* Non-leaf nodes should not contain any
+ externally stored columns. */
+ if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
+ page_zip_fail(("page_zip_apply_log: "
+ "%lu&REC_STATUS_NODE_PTR\n",
+ (ulong) hs));
+ return(NULL);
+ }
+
+ data = page_zip_apply_log_ext(
+ rec, offsets, trx_id_col, data, end);
+
+ if (UNIV_UNLIKELY(!data)) {
+ return(NULL);
+ }
+ } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
+ len = rec_offs_data_size(offsets)
+ - REC_NODE_PTR_SIZE;
+ /* Copy the data bytes, except node_ptr. */
+ if (UNIV_UNLIKELY(data + len >= end)) {
+ page_zip_fail(("page_zip_apply_log: "
+ "node_ptr %p+%lu >= %p\n",
+ (const void*) data,
+ (ulong) len,
+ (const void*) end));
+ return(NULL);
+ }
+ memcpy(rec, data, len);
+ data += len;
+ } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
+ len = rec_offs_data_size(offsets);
+
+ /* Copy all data bytes of
+ a record in a secondary index. */
+ if (UNIV_UNLIKELY(data + len >= end)) {
+ page_zip_fail(("page_zip_apply_log: "
+ "sec %p+%lu >= %p\n",
+ (const void*) data,
+ (ulong) len,
+ (const void*) end));
+ return(NULL);
+ }
+
+ memcpy(rec, data, len);
+ data += len;
+ } else {
+ /* Skip DB_TRX_ID and DB_ROLL_PTR. */
+ ulint l = rec_get_nth_field_offs(offsets,
+ trx_id_col, &len);
+ byte* b;
+
+ if (UNIV_UNLIKELY(data + l >= end)
+ || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN))) {
+ page_zip_fail(("page_zip_apply_log: "
+ "trx_id %p+%lu >= %p\n",
+ (const void*) data,
+ (ulong) l,
+ (const void*) end));
+ return(NULL);
+ }
+
+ /* Copy any preceding data bytes. */
+ memcpy(rec, data, l);
+ data += l;
+
+ /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
+ b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ len = rec_get_end(rec, offsets) - b;
+ if (UNIV_UNLIKELY(data + len >= end)) {
+ page_zip_fail(("page_zip_apply_log: "
+ "clust %p+%lu >= %p\n",
+ (const void*) data,
+ (ulong) len,
+ (const void*) end));
+ return(NULL);
+ }
+ memcpy(b, data, len);
+ data += len;
+ }
+ }
+}
+
+/**********************************************************************//**
+Set the heap_no in a record, and skip the fixed-size record header
+that is not included in the d_stream.
+@return TRUE on success, FALSE if d_stream does not end at rec */
+static
+ibool
+page_zip_decompress_heap_no(
+/*========================*/
+ z_stream* d_stream, /*!< in/out: compressed page stream */
+ rec_t* rec, /*!< in/out: record */
+ ulint& heap_status) /*!< in/out: heap_no and status bits */
+{
+ if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) {
+ /* n_dense has grown since the page was last compressed. */
+ return(FALSE);
+ }
+
+ /* Skip the REC_N_NEW_EXTRA_BYTES. */
+ d_stream->next_out = rec;
+
+ /* Set heap_no and the status bits. */
+ mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
+ heap_status += 1 << REC_HEAP_NO_SHIFT;
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress the records of a node pointer page.
+@return TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_decompress_node_ptrs(
+/*==========================*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ z_stream* d_stream, /*!< in/out: compressed page stream */
+ rec_t** recs, /*!< in: dense page directory
+ sorted by address */
+ ulint n_dense, /*!< in: size of recs[] */
+ dict_index_t* index, /*!< in: the index of the page */
+ ulint* offsets, /*!< in/out: temporary offsets */
+ mem_heap_t* heap) /*!< in: temporary memory heap */
+{
+ ulint heap_status = REC_STATUS_NODE_PTR
+ | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
+ ulint slot;
+ const byte* storage;
+
+ /* Subtract the space reserved for uncompressed data. */
+ d_stream->avail_in -= static_cast<uInt>(
+ n_dense * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE));
+
+ /* Decompress the records in heap_no order. */
+ for (slot = 0; slot < n_dense; slot++) {
+ rec_t* rec = recs[slot];
+
+ d_stream->avail_out = static_cast<uInt>(
+ rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
+
+ ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
+ - PAGE_ZIP_START - PAGE_DIR);
+ switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ page_zip_decompress_heap_no(
+ d_stream, rec, heap_status);
+ goto zlib_done;
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (!d_stream->avail_out) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_node_ptrs:"
+ " 1 inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ goto zlib_error;
+ }
+
+ if (!page_zip_decompress_heap_no(
+ d_stream, rec, heap_status)) {
+ ut_ad(0);
+ }
+
+ /* Read the offsets. The status bits are needed here. */
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ /* Non-leaf nodes should not have any externally
+ stored columns. */
+ ut_ad(!rec_offs_any_extern(offsets));
+
+ /* Decompress the data bytes, except node_ptr. */
+ d_stream->avail_out =static_cast<uInt>(
+ rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
+
+ switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ goto zlib_done;
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (!d_stream->avail_out) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_node_ptrs:"
+ " 2 inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ goto zlib_error;
+ }
+
+ /* Clear the node pointer in case the record
+ will be deleted and the space will be reallocated
+ to a smaller record. */
+ memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
+ d_stream->next_out += REC_NODE_PTR_SIZE;
+
+ ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
+ }
+
+ /* Decompress any trailing garbage, in case the last record was
+ allocated from an originally longer space on the free list. */
+ d_stream->avail_out = static_cast<uInt>(
+ page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
+ - page_offset(d_stream->next_out));
+ if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+ - PAGE_ZIP_START - PAGE_DIR)) {
+
+ page_zip_fail(("page_zip_decompress_node_ptrs:"
+ " avail_out = %u\n",
+ d_stream->avail_out));
+ goto zlib_error;
+ }
+
+ if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
+ page_zip_fail(("page_zip_decompress_node_ptrs:"
+ " inflate(Z_FINISH)=%s\n",
+ d_stream->msg));
+zlib_error:
+ inflateEnd(d_stream);
+ return(FALSE);
+ }
+
+ /* Note that d_stream->avail_out > 0 may hold here
+ if the modification log is nonempty. */
+
+zlib_done:
+ if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
+ ut_error;
+ }
+
+ {
+ page_t* page = page_align(d_stream->next_out);
+
+ /* Clear the unused heap space on the uncompressed page. */
+ memset(d_stream->next_out, 0,
+ page_dir_get_nth_slot(page,
+ page_dir_get_n_slots(page) - 1)
+ - d_stream->next_out);
+ }
+
+#ifdef UNIV_DEBUG
+ page_zip->m_start = PAGE_DATA + d_stream->total_in;
+#endif /* UNIV_DEBUG */
+
+ /* Apply the modification log. */
+ {
+ const byte* mod_log_ptr;
+ mod_log_ptr = page_zip_apply_log(d_stream->next_in,
+ d_stream->avail_in + 1,
+ recs, n_dense,
+ ULINT_UNDEFINED, heap_status,
+ index, offsets);
+
+ if (UNIV_UNLIKELY(!mod_log_ptr)) {
+ return(FALSE);
+ }
+ page_zip->m_end = mod_log_ptr - page_zip->data;
+ page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
+ }
+
+ if (UNIV_UNLIKELY
+ (page_zip_get_trailer_len(page_zip,
+ dict_index_is_clust(index))
+ + page_zip->m_end >= page_zip_get_size(page_zip))) {
+ page_zip_fail(("page_zip_decompress_node_ptrs:"
+ " %lu + %lu >= %lu, %lu\n",
+ (ulong) page_zip_get_trailer_len(
+ page_zip, dict_index_is_clust(index)),
+ (ulong) page_zip->m_end,
+ (ulong) page_zip_get_size(page_zip),
+ (ulong) dict_index_is_clust(index)));
+ return(FALSE);
+ }
+
+ /* Restore the uncompressed columns in heap_no order. */
+ storage = page_zip_dir_start_low(page_zip, n_dense);
+
+ for (slot = 0; slot < n_dense; slot++) {
+ rec_t* rec = recs[slot];
+
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ /* Non-leaf nodes should not have any externally
+ stored columns. */
+ ut_ad(!rec_offs_any_extern(offsets));
+ storage -= REC_NODE_PTR_SIZE;
+
+ memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
+ storage, REC_NODE_PTR_SIZE);
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress the records of a leaf node of a secondary index.
+@return TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_decompress_sec(
+/*====================*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ z_stream* d_stream, /*!< in/out: compressed page stream */
+ rec_t** recs, /*!< in: dense page directory
+ sorted by address */
+ ulint n_dense, /*!< in: size of recs[] */
+ dict_index_t* index, /*!< in: the index of the page */
+ ulint* offsets) /*!< in/out: temporary offsets */
+{
+ ulint heap_status = REC_STATUS_ORDINARY
+ | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
+ ulint slot;
+
+ ut_a(!dict_index_is_clust(index));
+
+ /* Subtract the space reserved for uncompressed data. */
+ d_stream->avail_in -= static_cast<uint>(
+ n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
+
+ for (slot = 0; slot < n_dense; slot++) {
+ rec_t* rec = recs[slot];
+
+ /* Decompress everything up to this record. */
+ d_stream->avail_out = static_cast<uint>(
+ rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
+
+ if (UNIV_LIKELY(d_stream->avail_out)) {
+ switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ page_zip_decompress_heap_no(
+ d_stream, rec, heap_status);
+ goto zlib_done;
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (!d_stream->avail_out) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_sec:"
+ " inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ goto zlib_error;
+ }
+ }
+
+ if (!page_zip_decompress_heap_no(
+ d_stream, rec, heap_status)) {
+ ut_ad(0);
+ }
+ }
+
+ /* Decompress the data of the last record and any trailing garbage,
+ in case the last record was allocated from an originally longer space
+ on the free list. */
+ d_stream->avail_out = static_cast<uInt>(
+ page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
+ - page_offset(d_stream->next_out));
+ if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+ - PAGE_ZIP_START - PAGE_DIR)) {
+
+ page_zip_fail(("page_zip_decompress_sec:"
+ " avail_out = %u\n",
+ d_stream->avail_out));
+ goto zlib_error;
+ }
+
+ if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
+ page_zip_fail(("page_zip_decompress_sec:"
+ " inflate(Z_FINISH)=%s\n",
+ d_stream->msg));
+zlib_error:
+ inflateEnd(d_stream);
+ return(FALSE);
+ }
+
+ /* Note that d_stream->avail_out > 0 may hold here
+ if the modification log is nonempty. */
+
+zlib_done:
+ if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
+ ut_error;
+ }
+
+ {
+ page_t* page = page_align(d_stream->next_out);
+
+ /* Clear the unused heap space on the uncompressed page. */
+ memset(d_stream->next_out, 0,
+ page_dir_get_nth_slot(page,
+ page_dir_get_n_slots(page) - 1)
+ - d_stream->next_out);
+ }
+
+#ifdef UNIV_DEBUG
+ page_zip->m_start = PAGE_DATA + d_stream->total_in;
+#endif /* UNIV_DEBUG */
+
+ /* Apply the modification log. */
+ {
+ const byte* mod_log_ptr;
+ mod_log_ptr = page_zip_apply_log(d_stream->next_in,
+ d_stream->avail_in + 1,
+ recs, n_dense,
+ ULINT_UNDEFINED, heap_status,
+ index, offsets);
+
+ if (UNIV_UNLIKELY(!mod_log_ptr)) {
+ return(FALSE);
+ }
+ page_zip->m_end = mod_log_ptr - page_zip->data;
+ page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
+ }
+
+ if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE)
+ + page_zip->m_end >= page_zip_get_size(page_zip))) {
+
+ page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
+ (ulong) page_zip_get_trailer_len(
+ page_zip, FALSE),
+ (ulong) page_zip->m_end,
+ (ulong) page_zip_get_size(page_zip)));
+ return(FALSE);
+ }
+
+ /* There are no uncompressed columns on leaf pages of
+ secondary indexes. */
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress a record of a leaf node of a clustered index that contains
+externally stored columns.
+@return TRUE on success */
+static
+ibool
+page_zip_decompress_clust_ext(
+/*==========================*/
+ z_stream* d_stream, /*!< in/out: compressed page stream */
+ rec_t* rec, /*!< in/out: record */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ ulint trx_id_col) /*!< in: position of of DB_TRX_ID */
+{
+ ulint i;
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ ulint len;
+ byte* dst;
+
+ if (UNIV_UNLIKELY(i == trx_id_col)) {
+ /* Skip trx_id and roll_ptr */
+ dst = rec_get_nth_field(rec, offsets, i, &len);
+ if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN)) {
+
+ page_zip_fail(("page_zip_decompress_clust_ext:"
+ " len[%lu] = %lu\n",
+ (ulong) i, (ulong) len));
+ return(FALSE);
+ }
+
+ if (rec_offs_nth_extern(offsets, i)) {
+
+ page_zip_fail(("page_zip_decompress_clust_ext:"
+ " DB_TRX_ID at %lu is ext\n",
+ (ulong) i));
+ return(FALSE);
+ }
+
+ d_stream->avail_out = static_cast<uInt>(
+ dst - d_stream->next_out);
+
+ switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (!d_stream->avail_out) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_clust_ext:"
+ " 1 inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ return(FALSE);
+ }
+
+ ut_ad(d_stream->next_out == dst);
+
+ /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
+ avoid uninitialized bytes in case the record
+ is affected by page_zip_apply_log(). */
+ memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ d_stream->next_out += DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN;
+ } else if (rec_offs_nth_extern(offsets, i)) {
+ dst = rec_get_nth_field(rec, offsets, i, &len);
+ ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+ dst += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+ d_stream->avail_out = static_cast<uInt>(
+ dst - d_stream->next_out);
+ switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (!d_stream->avail_out) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_clust_ext:"
+ " 2 inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ return(FALSE);
+ }
+
+ ut_ad(d_stream->next_out == dst);
+
+ /* Clear the BLOB pointer in case
+ the record will be deleted and the
+ space will not be reused. Note that
+ the final initialization of the BLOB
+ pointers (copying from "externs"
+ or clearing) will have to take place
+ only after the page modification log
+ has been applied. Otherwise, we
+ could end up with an uninitialized
+ BLOB pointer when a record is deleted,
+ reallocated and deleted. */
+ memset(d_stream->next_out, 0,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ d_stream->next_out
+ += BTR_EXTERN_FIELD_REF_SIZE;
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Compress the records of a leaf node of a clustered index.
+@return TRUE on success, FALSE on failure */
+static
+ibool
+page_zip_decompress_clust(
+/*======================*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ z_stream* d_stream, /*!< in/out: compressed page stream */
+ rec_t** recs, /*!< in: dense page directory
+ sorted by address */
+ ulint n_dense, /*!< in: size of recs[] */
+ dict_index_t* index, /*!< in: the index of the page */
+ ulint trx_id_col, /*!< index of the trx_id column */
+ ulint* offsets, /*!< in/out: temporary offsets */
+ mem_heap_t* heap) /*!< in: temporary memory heap */
+{
+ int err;
+ ulint slot;
+ ulint heap_status = REC_STATUS_ORDINARY
+ | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
+ const byte* storage;
+ const byte* externs;
+
+ ut_a(dict_index_is_clust(index));
+
+ /* Subtract the space reserved for uncompressed data. */
+ d_stream->avail_in -= static_cast<uInt>(n_dense)
+ * (PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN);
+
+ /* Decompress the records in heap_no order. */
+ for (slot = 0; slot < n_dense; slot++) {
+ rec_t* rec = recs[slot];
+
+ d_stream->avail_out =static_cast<uInt>(
+ rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
+
+ ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
+ - PAGE_ZIP_START - PAGE_DIR);
+ err = inflate(d_stream, Z_SYNC_FLUSH);
+ switch (err) {
+ case Z_STREAM_END:
+ page_zip_decompress_heap_no(
+ d_stream, rec, heap_status);
+ goto zlib_done;
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (UNIV_LIKELY(!d_stream->avail_out)) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_clust:"
+ " 1 inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ goto zlib_error;
+ }
+
+ if (!page_zip_decompress_heap_no(
+ d_stream, rec, heap_status)) {
+ ut_ad(0);
+ }
+
+ /* Read the offsets. The status bits are needed here. */
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ /* This is a leaf page in a clustered index. */
+
+ /* Check if there are any externally stored columns.
+ For each externally stored column, restore the
+ BTR_EXTERN_FIELD_REF separately. */
+
+ if (rec_offs_any_extern(offsets)) {
+ if (UNIV_UNLIKELY
+ (!page_zip_decompress_clust_ext(
+ d_stream, rec, offsets, trx_id_col))) {
+
+ goto zlib_error;
+ }
+ } else {
+ /* Skip trx_id and roll_ptr */
+ ulint len;
+ byte* dst = rec_get_nth_field(rec, offsets,
+ trx_id_col, &len);
+ if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN)) {
+
+ page_zip_fail(("page_zip_decompress_clust:"
+ " len = %lu\n", (ulong) len));
+ goto zlib_error;
+ }
+
+ d_stream->avail_out = static_cast<uInt>(
+ dst - d_stream->next_out);
+
+ switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (!d_stream->avail_out) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_clust:"
+ " 2 inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ goto zlib_error;
+ }
+
+ ut_ad(d_stream->next_out == dst);
+
+ /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
+ avoid uninitialized bytes in case the record
+ is affected by page_zip_apply_log(). */
+ memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ d_stream->next_out += DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN;
+ }
+
+ /* Decompress the last bytes of the record. */
+ d_stream->avail_out = static_cast<uInt>(
+ rec_get_end(rec, offsets) - d_stream->next_out);
+
+ switch (inflate(d_stream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ case Z_OK:
+ case Z_BUF_ERROR:
+ if (!d_stream->avail_out) {
+ break;
+ }
+ /* fall through */
+ default:
+ page_zip_fail(("page_zip_decompress_clust:"
+ " 3 inflate(Z_SYNC_FLUSH)=%s\n",
+ d_stream->msg));
+ goto zlib_error;
+ }
+ }
+
+ /* Decompress any trailing garbage, in case the last record was
+ allocated from an originally longer space on the free list. */
+ d_stream->avail_out = static_cast<uInt>(
+ page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
+ - page_offset(d_stream->next_out));
+ if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
+ - PAGE_ZIP_START - PAGE_DIR)) {
+
+ page_zip_fail(("page_zip_decompress_clust:"
+ " avail_out = %u\n",
+ d_stream->avail_out));
+ goto zlib_error;
+ }
+
+ if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
+ page_zip_fail(("page_zip_decompress_clust:"
+ " inflate(Z_FINISH)=%s\n",
+ d_stream->msg));
+zlib_error:
+ inflateEnd(d_stream);
+ return(FALSE);
+ }
+
+ /* Note that d_stream->avail_out > 0 may hold here
+ if the modification log is nonempty. */
+
+zlib_done:
+ if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
+ ut_error;
+ }
+
+ {
+ page_t* page = page_align(d_stream->next_out);
+
+ /* Clear the unused heap space on the uncompressed page. */
+ memset(d_stream->next_out, 0,
+ page_dir_get_nth_slot(page,
+ page_dir_get_n_slots(page) - 1)
+ - d_stream->next_out);
+ }
+
+#ifdef UNIV_DEBUG
+ page_zip->m_start = PAGE_DATA + d_stream->total_in;
+#endif /* UNIV_DEBUG */
+
+ /* Apply the modification log. */
+ {
+ const byte* mod_log_ptr;
+ mod_log_ptr = page_zip_apply_log(d_stream->next_in,
+ d_stream->avail_in + 1,
+ recs, n_dense,
+ trx_id_col, heap_status,
+ index, offsets);
+
+ if (UNIV_UNLIKELY(!mod_log_ptr)) {
+ return(FALSE);
+ }
+ page_zip->m_end = mod_log_ptr - page_zip->data;
+ page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
+ }
+
+ if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE)
+ + page_zip->m_end >= page_zip_get_size(page_zip))) {
+
+ page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
+ (ulong) page_zip_get_trailer_len(
+ page_zip, TRUE),
+ (ulong) page_zip->m_end,
+ (ulong) page_zip_get_size(page_zip)));
+ return(FALSE);
+ }
+
+ storage = page_zip_dir_start_low(page_zip, n_dense);
+
+ externs = storage - n_dense
+ * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ /* Restore the uncompressed columns in heap_no order. */
+
+ for (slot = 0; slot < n_dense; slot++) {
+ ulint i;
+ ulint len;
+ byte* dst;
+ rec_t* rec = recs[slot];
+ ibool exists = !page_zip_dir_find_free(
+ page_zip, page_offset(rec));
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ dst = rec_get_nth_field(rec, offsets,
+ trx_id_col, &len);
+ ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+ memcpy(dst, storage,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ /* Check if there are any externally stored
+ columns in this record. For each externally
+ stored column, restore or clear the
+ BTR_EXTERN_FIELD_REF. */
+ if (!rec_offs_any_extern(offsets)) {
+ continue;
+ }
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ if (!rec_offs_nth_extern(offsets, i)) {
+ continue;
+ }
+ dst = rec_get_nth_field(rec, offsets, i, &len);
+
+ if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
+ page_zip_fail(("page_zip_decompress_clust:"
+ " %lu < 20\n",
+ (ulong) len));
+ return(FALSE);
+ }
+
+ dst += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+ if (UNIV_LIKELY(exists)) {
+ /* Existing record:
+ restore the BLOB pointer */
+ externs -= BTR_EXTERN_FIELD_REF_SIZE;
+
+ if (UNIV_UNLIKELY
+ (externs < page_zip->data
+ + page_zip->m_end)) {
+ page_zip_fail(("page_zip_"
+ "decompress_clust: "
+ "%p < %p + %lu\n",
+ (const void*) externs,
+ (const void*)
+ page_zip->data,
+ (ulong)
+ page_zip->m_end));
+ return(FALSE);
+ }
+
+ memcpy(dst, externs,
+ BTR_EXTERN_FIELD_REF_SIZE);
+
+ page_zip->n_blobs++;
+ } else {
+ /* Deleted record:
+ clear the BLOB pointer */
+ memset(dst, 0,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ }
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress a page. This function should tolerate errors on the compressed
+page. Instead of letting assertions fail, it will return FALSE if an
+inconsistency is detected.
+@return TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
+page_zip_decompress(
+/*================*/
+ page_zip_des_t* page_zip,/*!< in: data, ssize;
+ out: m_start, m_end, m_nonempty, n_blobs */
+ page_t* page, /*!< out: uncompressed page, may be trashed */
+ ibool all) /*!< in: TRUE=decompress the whole page;
+ FALSE=verify but do not copy some
+ page header fields that should not change
+ after page creation */
+{
+ z_stream d_stream;
+ dict_index_t* index = NULL;
+ rec_t** recs; /*!< dense page directory, sorted by address */
+ ulint n_dense;/* number of user records on the page */
+ ulint trx_id_col = ULINT_UNDEFINED;
+ mem_heap_t* heap;
+ ulint* offsets;
+#ifndef UNIV_HOTBACKUP
+ ullint usec = ut_time_us(NULL);
+#endif /* !UNIV_HOTBACKUP */
+
+ ut_ad(page_zip_simple_validate(page_zip));
+ UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+ /* The dense directory excludes the infimum and supremum records. */
+ n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
+ if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
+ >= page_zip_get_size(page_zip))) {
+ page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
+ (ulong) n_dense,
+ (ulong) page_zip_get_size(page_zip)));
+ return(FALSE);
+ }
+
+ heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
+
+ recs = static_cast<rec_t**>(
+ mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)));
+
+ if (all) {
+ /* Copy the page header. */
+ memcpy(page, page_zip->data, PAGE_DATA);
+ } else {
+ /* Check that the bytes that we skip are identical. */
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+ ut_a(!memcmp(FIL_PAGE_TYPE + page,
+ FIL_PAGE_TYPE + page_zip->data,
+ PAGE_HEADER - FIL_PAGE_TYPE));
+ ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
+ PAGE_HEADER + PAGE_LEVEL + page_zip->data,
+ PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+
+ /* Copy the mutable parts of the page header. */
+ memcpy(page, page_zip->data, FIL_PAGE_TYPE);
+ memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data,
+ PAGE_LEVEL - PAGE_N_DIR_SLOTS);
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+ /* Check that the page headers match after copying. */
+ ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ /* Clear the uncompressed page, except the header. */
+ memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA);
+#endif /* UNIV_ZIP_DEBUG */
+ UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA);
+
+ /* Copy the page directory. */
+ if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
+ recs + n_dense, n_dense))) {
+zlib_error:
+ mem_heap_free(heap);
+ return(FALSE);
+ }
+
+ /* Copy the infimum and supremum records. */
+ memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
+ infimum_extra, sizeof infimum_extra);
+ if (page_is_empty(page)) {
+ rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
+ PAGE_NEW_SUPREMUM);
+ } else {
+ rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
+ page_zip_dir_get(page_zip, 0)
+ & PAGE_ZIP_DIR_SLOT_MASK);
+ }
+ memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
+ memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
+ supremum_extra_data, sizeof supremum_extra_data);
+
+ page_zip_set_alloc(&d_stream, heap);
+
+ d_stream.next_in = page_zip->data + PAGE_DATA;
+ /* Subtract the space reserved for
+ the page header and the end marker of the modification log. */
+ d_stream.avail_in = static_cast<uInt>(
+ page_zip_get_size(page_zip) - (PAGE_DATA + 1));
+ d_stream.next_out = page + PAGE_ZIP_START;
+ d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
+
+ if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
+ != Z_OK)) {
+ ut_error;
+ }
+
+ /* Decode the zlib header and the index information. */
+ if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
+
+ page_zip_fail(("page_zip_decompress:"
+ " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
+ goto zlib_error;
+ }
+
+ if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
+
+ page_zip_fail(("page_zip_decompress:"
+ " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
+ goto zlib_error;
+ }
+
+ index = page_zip_fields_decode(
+ page + PAGE_ZIP_START, d_stream.next_out,
+ page_is_leaf(page) ? &trx_id_col : NULL);
+
+ if (UNIV_UNLIKELY(!index)) {
+
+ goto zlib_error;
+ }
+
+ /* Decompress the user records. */
+ page_zip->n_blobs = 0;
+ d_stream.next_out = page + PAGE_ZIP_START;
+
+ {
+ /* Pre-allocate the offsets for rec_get_offsets_reverse(). */
+ ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
+ + dict_index_get_n_fields(index);
+
+ offsets = static_cast<ulint*>(
+ mem_heap_alloc(heap, n * sizeof(ulint)));
+
+ *offsets = n;
+ }
+
+ /* Decompress the records in heap_no order. */
+ if (!page_is_leaf(page)) {
+ /* This is a node pointer page. */
+ ulint info_bits;
+
+ if (UNIV_UNLIKELY
+ (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
+ recs, n_dense, index,
+ offsets, heap))) {
+ goto err_exit;
+ }
+
+ info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
+ ? REC_INFO_MIN_REC_FLAG : 0;
+
+ if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
+ info_bits))) {
+ goto err_exit;
+ }
+ } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
+ /* This is a leaf page in a secondary index. */
+ if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
+ recs, n_dense,
+ index, offsets))) {
+ goto err_exit;
+ }
+
+ if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
+ page, 0))) {
+err_exit:
+ page_zip_fields_free(index);
+ mem_heap_free(heap);
+ return(FALSE);
+ }
+ } else {
+ /* This is a leaf page in a clustered index. */
+ if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
+ &d_stream, recs,
+ n_dense, index,
+ trx_id_col,
+ offsets, heap))) {
+ goto err_exit;
+ }
+
+ if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
+ page, 0))) {
+ goto err_exit;
+ }
+ }
+
+ ut_a(page_is_comp(page));
+ UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+
+ page_zip_fields_free(index);
+ mem_heap_free(heap);
+#ifndef UNIV_HOTBACKUP
+ ullint time_diff = ut_time_us(NULL) - usec;
+ page_zip_stat[page_zip->ssize - 1].decompressed++;
+ page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
+
+ index_id_t index_id = btr_page_get_index_id(page);
+
+ if (srv_cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index_id].decompressed++;
+ page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
+#endif /* !UNIV_HOTBACKUP */
+
+ /* Update the stat counter for LRU policy. */
+ buf_LRU_stat_inc_unzip();
+
+ MONITOR_INC(MONITOR_PAGE_DECOMPRESS);
+
+ return(TRUE);
+}
+
+#ifdef UNIV_ZIP_DEBUG
+/**********************************************************************//**
+Dump a block of memory on the standard error stream. */
+static
+void
+page_zip_hexdump_func(
+/*==================*/
+ const char* name, /*!< in: name of the data structure */
+ const void* buf, /*!< in: data */
+ ulint size) /*!< in: length of the data, in bytes */
+{
+ const byte* s = static_cast<const byte*>(buf);
+ ulint addr;
+ const ulint width = 32; /* bytes per line */
+
+ fprintf(stderr, "%s:\n", name);
+
+ for (addr = 0; addr < size; addr += width) {
+ ulint i;
+
+ fprintf(stderr, "%04lx ", (ulong) addr);
+
+ i = ut_min(width, size - addr);
+
+ while (i--) {
+ fprintf(stderr, "%02x", *s++);
+ }
+
+ putc('\n', stderr);
+ }
+}
+
+/** Dump a block of memory on the standard error stream.
+@param buf in: data
+@param size in: length of the data, in bytes */
+#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
+
+/** Flag: make page_zip_validate() compare page headers only */
+UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
+
+/**********************************************************************//**
+Check that the compressed and decompressed pages match.
+@return TRUE if valid, FALSE if not */
+UNIV_INTERN
+ibool
+page_zip_validate_low(
+/*==================*/
+ const page_zip_des_t* page_zip,/*!< in: compressed page */
+ const page_t* page, /*!< in: uncompressed page */
+ const dict_index_t* index, /*!< in: index of the page, if known */
+ ibool sloppy) /*!< in: FALSE=strict,
+ TRUE=ignore the MIN_REC_FLAG */
+{
+ page_zip_des_t temp_page_zip;
+ byte* temp_page_buf;
+ page_t* temp_page;
+ ibool valid;
+
+ if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
+ FIL_PAGE_LSN - FIL_PAGE_PREV)
+ || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
+ || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
+ PAGE_DATA - FIL_PAGE_DATA)) {
+ page_zip_fail(("page_zip_validate: page header\n"));
+ page_zip_hexdump(page_zip, sizeof *page_zip);
+ page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
+ page_zip_hexdump(page, UNIV_PAGE_SIZE);
+ return(FALSE);
+ }
+
+ ut_a(page_is_comp(page));
+
+ if (page_zip_validate_header_only) {
+ return(TRUE);
+ }
+
+ /* page_zip_decompress() expects the uncompressed page to be
+ UNIV_PAGE_SIZE aligned. */
+ temp_page_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+ temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE));
+
+ UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+ temp_page_zip = *page_zip;
+ valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
+ if (!valid) {
+ fputs("page_zip_validate(): failed to decompress\n", stderr);
+ goto func_exit;
+ }
+ if (page_zip->n_blobs != temp_page_zip.n_blobs) {
+ page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
+ page_zip->n_blobs, temp_page_zip.n_blobs));
+ valid = FALSE;
+ }
+#ifdef UNIV_DEBUG
+ if (page_zip->m_start != temp_page_zip.m_start) {
+ page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
+ page_zip->m_start, temp_page_zip.m_start));
+ valid = FALSE;
+ }
+#endif /* UNIV_DEBUG */
+ if (page_zip->m_end != temp_page_zip.m_end) {
+ page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
+ page_zip->m_end, temp_page_zip.m_end));
+ valid = FALSE;
+ }
+ if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
+ page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
+ page_zip->m_nonempty,
+ temp_page_zip.m_nonempty));
+ valid = FALSE;
+ }
+ if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
+ UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
+
+ /* In crash recovery, the "minimum record" flag may be
+ set incorrectly until the mini-transaction is
+ committed. Let us tolerate that difference when we
+ are performing a sloppy validation. */
+
+ ulint* offsets;
+ mem_heap_t* heap;
+ const rec_t* rec;
+ const rec_t* trec;
+ byte info_bits_diff;
+ ulint offset
+ = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
+ ut_a(offset >= PAGE_NEW_SUPREMUM);
+ offset -= 5/*REC_NEW_INFO_BITS*/;
+
+ info_bits_diff = page[offset] ^ temp_page[offset];
+
+ if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
+ temp_page[offset] = page[offset];
+
+ if (!memcmp(page + PAGE_HEADER,
+ temp_page + PAGE_HEADER,
+ UNIV_PAGE_SIZE - PAGE_HEADER
+ - FIL_PAGE_DATA_END)) {
+
+ /* Only the minimum record flag
+ differed. Let us ignore it. */
+ page_zip_fail(("page_zip_validate: "
+ "min_rec_flag "
+ "(%s"
+ "%lu,%lu,0x%02lx)\n",
+ sloppy ? "ignored, " : "",
+ page_get_space_id(page),
+ page_get_page_no(page),
+ (ulong) page[offset]));
+ valid = sloppy;
+ goto func_exit;
+ }
+ }
+
+ /* Compare the pointers in the PAGE_FREE list. */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+ trec = page_header_get_ptr(temp_page, PAGE_FREE);
+
+ while (rec || trec) {
+ if (page_offset(rec) != page_offset(trec)) {
+ page_zip_fail(("page_zip_validate: "
+ "PAGE_FREE list: %u!=%u\n",
+ (unsigned) page_offset(rec),
+ (unsigned) page_offset(trec)));
+ valid = FALSE;
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_low(rec, TRUE);
+ trec = page_rec_get_next_low(trec, TRUE);
+ }
+
+ /* Compare the records. */
+ heap = NULL;
+ offsets = NULL;
+ rec = page_rec_get_next_low(
+ page + PAGE_NEW_INFIMUM, TRUE);
+ trec = page_rec_get_next_low(
+ temp_page + PAGE_NEW_INFIMUM, TRUE);
+
+ do {
+ if (page_offset(rec) != page_offset(trec)) {
+ page_zip_fail(("page_zip_validate: "
+ "record list: 0x%02x!=0x%02x\n",
+ (unsigned) page_offset(rec),
+ (unsigned) page_offset(trec)));
+ valid = FALSE;
+ break;
+ }
+
+ if (index) {
+ /* Compare the data. */
+ offsets = rec_get_offsets(
+ rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ if (memcmp(rec - rec_offs_extra_size(offsets),
+ trec - rec_offs_extra_size(offsets),
+ rec_offs_size(offsets))) {
+ page_zip_fail(
+ ("page_zip_validate: "
+ "record content: 0x%02x",
+ (unsigned) page_offset(rec)));
+ valid = FALSE;
+ break;
+ }
+ }
+
+ rec = page_rec_get_next_low(rec, TRUE);
+ trec = page_rec_get_next_low(trec, TRUE);
+ } while (rec || trec);
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ }
+
+func_exit:
+ if (!valid) {
+ page_zip_hexdump(page_zip, sizeof *page_zip);
+ page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
+ page_zip_hexdump(page, UNIV_PAGE_SIZE);
+ page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
+ }
+ ut_free(temp_page_buf);
+ return(valid);
+}
+
+/**********************************************************************//**
+Check that the compressed and decompressed pages match.
+@return TRUE if valid, FALSE if not */
+UNIV_INTERN
+ibool
+page_zip_validate(
+/*==============*/
+ const page_zip_des_t* page_zip,/*!< in: compressed page */
+ const page_t* page, /*!< in: uncompressed page */
+ const dict_index_t* index) /*!< in: index of the page, if known */
+{
+ return(page_zip_validate_low(page_zip, page, index,
+ recv_recovery_is_on()));
+}
+#endif /* UNIV_ZIP_DEBUG */
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Assert that the compressed and decompressed page headers match.
+@return TRUE */
+static
+ibool
+page_zip_header_cmp(
+/*================*/
+ const page_zip_des_t* page_zip,/*!< in: compressed page */
+ const byte* page) /*!< in: uncompressed page */
+{
+ ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
+ FIL_PAGE_LSN - FIL_PAGE_PREV));
+ ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
+ 2));
+ ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
+ PAGE_DATA - FIL_PAGE_DATA));
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Write a record on the compressed page that contains externally stored
+columns. The data must already have been written to the uncompressed page.
+@return end of modification log */
+static
+byte*
+page_zip_write_rec_ext(
+/*===================*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ const page_t* page, /*!< in: page containing rec */
+ const byte* rec, /*!< in: record being written */
+ dict_index_t* index, /*!< in: record descriptor */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ ulint create, /*!< in: nonzero=insert, zero=update */
+ ulint trx_id_col, /*!< in: position of DB_TRX_ID */
+ ulint heap_no, /*!< in: heap number of rec */
+ byte* storage, /*!< in: end of dense page directory */
+ byte* data) /*!< in: end of modification log */
+{
+ const byte* start = rec;
+ ulint i;
+ ulint len;
+ byte* externs = storage;
+ ulint n_ext = rec_offs_n_extern(offsets);
+
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+ * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
+
+ /* Note that this will not take into account
+ the BLOB columns of rec if create==TRUE. */
+ ut_ad(data + rec_offs_data_size(offsets)
+ - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+ - n_ext * BTR_EXTERN_FIELD_REF_SIZE
+ < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
+
+ {
+ ulint blob_no = page_zip_get_n_prev_extern(
+ page_zip, rec, index);
+ byte* ext_end = externs - page_zip->n_blobs
+ * BTR_EXTERN_FIELD_REF_SIZE;
+ ut_ad(blob_no <= page_zip->n_blobs);
+ externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
+
+ if (create) {
+ page_zip->n_blobs += static_cast<unsigned>(n_ext);
+ ASSERT_ZERO_BLOB(ext_end - n_ext
+ * BTR_EXTERN_FIELD_REF_SIZE);
+ memmove(ext_end - n_ext
+ * BTR_EXTERN_FIELD_REF_SIZE,
+ ext_end,
+ externs - ext_end);
+ }
+
+ ut_a(blob_no + n_ext <= page_zip->n_blobs);
+ }
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ const byte* src;
+
+ if (UNIV_UNLIKELY(i == trx_id_col)) {
+ ut_ad(!rec_offs_nth_extern(offsets,
+ i));
+ ut_ad(!rec_offs_nth_extern(offsets,
+ i + 1));
+ /* Locate trx_id and roll_ptr. */
+ src = rec_get_nth_field(rec, offsets,
+ i, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ ut_ad(src + DATA_TRX_ID_LEN
+ == rec_get_nth_field(
+ rec, offsets,
+ i + 1, &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+
+ /* Log the preceding fields. */
+ ASSERT_ZERO(data, src - start);
+ memcpy(data, start, src - start);
+ data += src - start;
+ start = src + (DATA_TRX_ID_LEN
+ + DATA_ROLL_PTR_LEN);
+
+ /* Store trx_id and roll_ptr. */
+ memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+ * (heap_no - 1),
+ src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ i++; /* skip also roll_ptr */
+ } else if (rec_offs_nth_extern(offsets, i)) {
+ src = rec_get_nth_field(rec, offsets,
+ i, &len);
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(len
+ >= BTR_EXTERN_FIELD_REF_SIZE);
+ src += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+ ASSERT_ZERO(data, src - start);
+ memcpy(data, start, src - start);
+ data += src - start;
+ start = src + BTR_EXTERN_FIELD_REF_SIZE;
+
+ /* Store the BLOB pointer. */
+ externs -= BTR_EXTERN_FIELD_REF_SIZE;
+ ut_ad(data < externs);
+ memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
+ }
+ }
+
+ /* Log the last bytes of the record. */
+ len = rec_offs_data_size(offsets) - (start - rec);
+
+ ASSERT_ZERO(data, len);
+ memcpy(data, start, len);
+ data += len;
+
+ return(data);
+}
+
+/**********************************************************************//**
+Write an entire record on the compressed page. The data must already
+have been written to the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_rec(
+/*===============*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ const byte* rec, /*!< in: record being written */
+ dict_index_t* index, /*!< in: the index the record belongs to */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint create) /*!< in: nonzero=insert, zero=update */
+{
+ const page_t* page;
+ byte* data;
+ byte* storage;
+ ulint heap_no;
+ byte* slot;
+
+ ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+ ut_ad(page_zip_simple_validate(page_zip));
+ ut_ad(page_zip_get_size(page_zip)
+ > PAGE_DATA + page_zip_dir_size(page_zip));
+ ut_ad(rec_offs_comp(offsets));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ ut_ad(page_zip->m_start >= PAGE_DATA);
+
+ page = page_align(rec);
+
+ ut_ad(page_zip_header_cmp(page_zip, page));
+ ut_ad(page_simple_validate_new((page_t*) page));
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ slot = page_zip_dir_find(page_zip, page_offset(rec));
+ ut_a(slot);
+ /* Copy the delete mark. */
+ if (rec_get_deleted_flag(rec, TRUE)) {
+ *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
+ } else {
+ *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
+ }
+
+ ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
+ ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
+ - PAGE_DIR - PAGE_DIR_SLOT_SIZE
+ * page_dir_get_n_slots(page));
+
+ heap_no = rec_get_heap_no_new(rec);
+ ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
+ ut_ad(heap_no < page_dir_get_n_heap(page));
+
+ /* Append to the modification log. */
+ data = page_zip->data + page_zip->m_end;
+ ut_ad(!*data);
+
+ /* Identify the record by writing its heap number - 1.
+ 0 is reserved to indicate the end of the modification log. */
+
+ if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
+ *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
+ ut_ad(!*data);
+ }
+ *data++ = (byte) ((heap_no - 1) << 1);
+ ut_ad(!*data);
+
+ {
+ const byte* start = rec - rec_offs_extra_size(offsets);
+ const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
+
+ /* Write the extra bytes backwards, so that
+ rec_offs_extra_size() can be easily computed in
+ page_zip_apply_log() by invoking
+ rec_get_offsets_reverse(). */
+
+ while (b != start) {
+ *data++ = *--b;
+ ut_ad(!*data);
+ }
+ }
+
+ /* Write the data bytes. Store the uncompressed bytes separately. */
+ storage = page_zip_dir_start(page_zip);
+
+ if (page_is_leaf(page)) {
+ ulint len;
+
+ if (dict_index_is_clust(index)) {
+ ulint trx_id_col;
+
+ trx_id_col = dict_index_get_sys_col_pos(index,
+ DATA_TRX_ID);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+ /* Store separately trx_id, roll_ptr and
+ the BTR_EXTERN_FIELD_REF of each BLOB column. */
+ if (rec_offs_any_extern(offsets)) {
+ data = page_zip_write_rec_ext(
+ page_zip, page,
+ rec, index, offsets, create,
+ trx_id_col, heap_no, storage, data);
+ } else {
+ /* Locate trx_id and roll_ptr. */
+ const byte* src
+ = rec_get_nth_field(rec, offsets,
+ trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ ut_ad(src + DATA_TRX_ID_LEN
+ == rec_get_nth_field(
+ rec, offsets,
+ trx_id_col + 1, &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+
+ /* Log the preceding fields. */
+ ASSERT_ZERO(data, src - rec);
+ memcpy(data, rec, src - rec);
+ data += src - rec;
+
+ /* Store trx_id and roll_ptr. */
+ memcpy(storage
+ - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
+ * (heap_no - 1),
+ src,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
+ /* Log the last bytes of the record. */
+ len = rec_offs_data_size(offsets)
+ - (src - rec);
+
+ ASSERT_ZERO(data, len);
+ memcpy(data, src, len);
+ data += len;
+ }
+ } else {
+ /* Leaf page of a secondary index:
+ no externally stored columns */
+ ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
+ == ULINT_UNDEFINED);
+ ut_ad(!rec_offs_any_extern(offsets));
+
+ /* Log the entire record. */
+ len = rec_offs_data_size(offsets);
+
+ ASSERT_ZERO(data, len);
+ memcpy(data, rec, len);
+ data += len;
+ }
+ } else {
+ /* This is a node pointer page. */
+ ulint len;
+
+ /* Non-leaf nodes should not have any externally
+ stored columns. */
+ ut_ad(!rec_offs_any_extern(offsets));
+
+ /* Copy the data bytes, except node_ptr. */
+ len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
+ ut_ad(data + len < storage - REC_NODE_PTR_SIZE
+ * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
+ ASSERT_ZERO(data, len);
+ memcpy(data, rec, len);
+ data += len;
+
+ /* Copy the node pointer to the uncompressed area. */
+ memcpy(storage - REC_NODE_PTR_SIZE
+ * (heap_no - 1),
+ rec + len,
+ REC_NODE_PTR_SIZE);
+ }
+
+ ut_a(!*data);
+ ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
+ page_zip->m_end = data - page_zip->data;
+ page_zip->m_nonempty = TRUE;
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page_align(rec), index));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+/***********************************************************//**
+Parses a log record of writing a BLOB pointer of a record.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_blob_ptr(
+/*==========================*/
+ byte* ptr, /*!< in: redo log buffer */
+ byte* end_ptr,/*!< in: redo log buffer end */
+ page_t* page, /*!< in/out: uncompressed page */
+ page_zip_des_t* page_zip)/*!< in/out: compressed page */
+{
+ ulint offset;
+ ulint z_offset;
+
+ ut_ad(!page == !page_zip);
+
+ if (UNIV_UNLIKELY
+ (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ z_offset = mach_read_from_2(ptr + 2);
+
+ if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
+ || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
+ || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+corrupt:
+ recv_sys->found_corrupt_log = TRUE;
+
+ return(NULL);
+ }
+
+ if (page) {
+ if (UNIV_UNLIKELY(!page_zip)
+ || UNIV_UNLIKELY(!page_is_leaf(page))) {
+
+ goto corrupt;
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+
+ memcpy(page + offset,
+ ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
+ memcpy(page_zip->data + z_offset,
+ ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+ }
+
+ return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
+}
+
+/**********************************************************************//**
+Write a BLOB pointer of a record on the leaf page of a clustered index.
+The information must already have been updated on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_blob_ptr(
+/*====================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ const byte* rec, /*!< in/out: record whose data is being
+ written */
+ dict_index_t* index, /*!< in: index of the page */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint n, /*!< in: column index */
+ mtr_t* mtr) /*!< in: mini-transaction handle,
+ or NULL if no logging is needed */
+{
+ const byte* field;
+ byte* externs;
+ const page_t* page = page_align(rec);
+ ulint blob_no;
+ ulint len;
+
+ ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+ ut_ad(page_simple_validate_new((page_t*) page));
+ ut_ad(page_zip_simple_validate(page_zip));
+ ut_ad(page_zip_get_size(page_zip)
+ > PAGE_DATA + page_zip_dir_size(page_zip));
+ ut_ad(rec_offs_comp(offsets));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(rec_offs_any_extern(offsets));
+ ut_ad(rec_offs_nth_extern(offsets, n));
+
+ ut_ad(page_zip->m_start >= PAGE_DATA);
+ ut_ad(page_zip_header_cmp(page_zip, page));
+
+ ut_ad(page_is_leaf(page));
+ ut_ad(dict_index_is_clust(index));
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
+ + rec_get_n_extern_new(rec, index, n);
+ ut_a(blob_no < page_zip->n_blobs);
+
+ externs = page_zip->data + page_zip_get_size(page_zip)
+ - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+ * (PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ field = rec_get_nth_field(rec, offsets, n, &len);
+
+ externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
+ field += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+ memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (mtr) {
+#ifndef UNIV_HOTBACKUP
+ byte* log_ptr = mlog_open(
+ mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
+ if (UNIV_UNLIKELY(!log_ptr)) {
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(
+ (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
+ mach_write_to_2(log_ptr, page_offset(field));
+ log_ptr += 2;
+ mach_write_to_2(log_ptr, externs - page_zip->data);
+ log_ptr += 2;
+ memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
+ log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
+ mlog_close(mtr, log_ptr);
+#endif /* !UNIV_HOTBACKUP */
+ }
+}
+
+/***********************************************************//**
+Parses a log record of writing the node pointer of a record.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_node_ptr(
+/*==========================*/
+ byte* ptr, /*!< in: redo log buffer */
+ byte* end_ptr,/*!< in: redo log buffer end */
+ page_t* page, /*!< in/out: uncompressed page */
+ page_zip_des_t* page_zip)/*!< in/out: compressed page */
+{
+ ulint offset;
+ ulint z_offset;
+
+ ut_ad(!page == !page_zip);
+
+ if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
+
+ return(NULL);
+ }
+
+ offset = mach_read_from_2(ptr);
+ z_offset = mach_read_from_2(ptr + 2);
+
+ if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
+ || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
+ || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+corrupt:
+ recv_sys->found_corrupt_log = TRUE;
+
+ return(NULL);
+ }
+
+ if (page) {
+ byte* storage_end;
+ byte* field;
+ byte* storage;
+ ulint heap_no;
+
+ if (UNIV_UNLIKELY(!page_zip)
+ || UNIV_UNLIKELY(page_is_leaf(page))) {
+
+ goto corrupt;
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+
+ field = page + offset;
+ storage = page_zip->data + z_offset;
+
+ storage_end = page_zip_dir_start(page_zip);
+
+ heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
+
+ if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
+ || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
+ || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
+
+ goto corrupt;
+ }
+
+ memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
+ memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+ }
+
+ return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
+}
+
+/**********************************************************************//**
+Write the node pointer of a record on a non-leaf compressed page. */
+UNIV_INTERN
+void
+page_zip_write_node_ptr(
+/*====================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ byte* rec, /*!< in/out: record */
+ ulint size, /*!< in: data size of rec */
+ ulint ptr, /*!< in: node pointer */
+ mtr_t* mtr) /*!< in: mini-transaction, or NULL */
+{
+ byte* field;
+ byte* storage;
+#ifdef UNIV_DEBUG
+ page_t* page = page_align(rec);
+#endif /* UNIV_DEBUG */
+
+ ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+ ut_ad(page_simple_validate_new(page));
+ ut_ad(page_zip_simple_validate(page_zip));
+ ut_ad(page_zip_get_size(page_zip)
+ > PAGE_DATA + page_zip_dir_size(page_zip));
+ ut_ad(page_rec_is_comp(rec));
+
+ ut_ad(page_zip->m_start >= PAGE_DATA);
+ ut_ad(page_zip_header_cmp(page_zip, page));
+
+ ut_ad(!page_is_leaf(page));
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+ UNIV_MEM_ASSERT_RW(rec, size);
+
+ storage = page_zip_dir_start(page_zip)
+ - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
+ field = rec + size - REC_NODE_PTR_SIZE;
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+ ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+#if REC_NODE_PTR_SIZE != 4
+# error "REC_NODE_PTR_SIZE != 4"
+#endif
+ mach_write_to_4(field, ptr);
+ memcpy(storage, field, REC_NODE_PTR_SIZE);
+
+ if (mtr) {
+#ifndef UNIV_HOTBACKUP
+ byte* log_ptr = mlog_open(mtr,
+ 11 + 2 + 2 + REC_NODE_PTR_SIZE);
+ if (UNIV_UNLIKELY(!log_ptr)) {
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(
+ field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
+ mach_write_to_2(log_ptr, page_offset(field));
+ log_ptr += 2;
+ mach_write_to_2(log_ptr, storage - page_zip->data);
+ log_ptr += 2;
+ memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
+ log_ptr += REC_NODE_PTR_SIZE;
+ mlog_close(mtr, log_ptr);
+#endif /* !UNIV_HOTBACKUP */
+ }
+}
+
+/**********************************************************************//**
+Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+UNIV_INTERN
+void
+page_zip_write_trx_id_and_roll_ptr(
+/*===============================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ byte* rec, /*!< in/out: record */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
+ trx_id_t trx_id, /*!< in: transaction identifier */
+ roll_ptr_t roll_ptr)/*!< in: roll_ptr */
+{
+ byte* field;
+ byte* storage;
+#ifdef UNIV_DEBUG
+ page_t* page = page_align(rec);
+#endif /* UNIV_DEBUG */
+ ulint len;
+
+ ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+
+ ut_ad(page_simple_validate_new(page));
+ ut_ad(page_zip_simple_validate(page_zip));
+ ut_ad(page_zip_get_size(page_zip)
+ > PAGE_DATA + page_zip_dir_size(page_zip));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(rec_offs_comp(offsets));
+
+ ut_ad(page_zip->m_start >= PAGE_DATA);
+ ut_ad(page_zip_header_cmp(page_zip, page));
+
+ ut_ad(page_is_leaf(page));
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+ storage = page_zip_dir_start(page_zip)
+ - (rec_get_heap_no_new(rec) - 1)
+ * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
+#endif
+ field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ ut_ad(field + DATA_TRX_ID_LEN
+ == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+ ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
+ mach_write_to_6(field, trx_id);
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
+ mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
+ memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+}
+
+/**********************************************************************//**
+Clear an area on the uncompressed and compressed page.
+Do not clear the data payload, as that would grow the modification log. */
+static
+void
+page_zip_clear_rec(
+/*===============*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ byte* rec, /*!< in: record to clear */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
+{
+ ulint heap_no;
+ page_t* page = page_align(rec);
+ byte* storage;
+ byte* field;
+ ulint len;
+ /* page_zip_validate() would fail here if a record
+ containing externally stored columns is being deleted. */
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
+ ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
+ ut_ad(page_zip_header_cmp(page_zip, page));
+
+ heap_no = rec_get_heap_no_new(rec);
+ ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ if (!page_is_leaf(page)) {
+ /* Clear node_ptr. On the compressed page,
+ there is an array of node_ptr immediately before the
+ dense page directory, at the very end of the page. */
+ storage = page_zip_dir_start(page_zip);
+ ut_ad(dict_index_get_n_unique_in_tree(index) ==
+ rec_offs_n_fields(offsets) - 1);
+ field = rec_get_nth_field(rec, offsets,
+ rec_offs_n_fields(offsets) - 1,
+ &len);
+ ut_ad(len == REC_NODE_PTR_SIZE);
+
+ ut_ad(!rec_offs_any_extern(offsets));
+ memset(field, 0, REC_NODE_PTR_SIZE);
+ memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
+ 0, REC_NODE_PTR_SIZE);
+ } else if (dict_index_is_clust(index)) {
+ /* Clear trx_id and roll_ptr. On the compressed page,
+ there is an array of these fields immediately before the
+ dense page directory, at the very end of the page. */
+ const ulint trx_id_pos
+ = dict_col_get_clust_pos(
+ dict_table_get_sys_col(
+ index->table, DATA_TRX_ID), index);
+ storage = page_zip_dir_start(page_zip);
+ field = rec_get_nth_field(rec, offsets, trx_id_pos, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+
+ memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ memset(storage - (heap_no - 1)
+ * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+ 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ if (rec_offs_any_extern(offsets)) {
+ ulint i;
+
+ for (i = rec_offs_n_fields(offsets); i--; ) {
+ /* Clear all BLOB pointers in order to make
+ page_zip_validate() pass. */
+ if (rec_offs_nth_extern(offsets, i)) {
+ field = rec_get_nth_field(
+ rec, offsets, i, &len);
+ ut_ad(len
+ == BTR_EXTERN_FIELD_REF_SIZE);
+ memset(field + len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ 0, BTR_EXTERN_FIELD_REF_SIZE);
+ }
+ }
+ }
+ } else {
+ ut_ad(!rec_offs_any_extern(offsets));
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+/**********************************************************************//**
+Write the "deleted" flag of a record on a compressed page. The flag must
+already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_deleted(
+/*=====================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ const byte* rec, /*!< in: record on the uncompressed page */
+ ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
+{
+ byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
+ ut_a(slot);
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+ if (flag) {
+ *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
+ } else {
+ *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
+ }
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page_align(rec), NULL));
+#endif /* UNIV_ZIP_DEBUG */
+}
+
+/**********************************************************************//**
+Write the "owned" flag of a record on a compressed page. The n_owned field
+must already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_owned(
+/*===================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ const byte* rec, /*!< in: record on the uncompressed page */
+ ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
+{
+ byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
+ ut_a(slot);
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+ if (flag) {
+ *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
+ } else {
+ *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
+ }
+}
+
+/**********************************************************************//**
+Insert a record to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_insert(
+/*================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ const byte* prev_rec,/*!< in: record after which to insert */
+ const byte* free_rec,/*!< in: record from which rec was
+ allocated, or NULL */
+ byte* rec) /*!< in: record to insert */
+{
+ ulint n_dense;
+ byte* slot_rec;
+ byte* slot_free;
+
+ ut_ad(prev_rec != rec);
+ ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
+ ut_ad(page_zip_simple_validate(page_zip));
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+ if (page_rec_is_infimum(prev_rec)) {
+ /* Use the first slot. */
+ slot_rec = page_zip->data + page_zip_get_size(page_zip);
+ } else {
+ byte* end = page_zip->data + page_zip_get_size(page_zip);
+ byte* start = end - page_zip_dir_user_size(page_zip);
+
+ if (UNIV_LIKELY(!free_rec)) {
+ /* PAGE_N_RECS was already incremented
+ in page_cur_insert_rec_zip(), but the
+ dense directory slot at that position
+ contains garbage. Skip it. */
+ start += PAGE_ZIP_DIR_SLOT_SIZE;
+ }
+
+ slot_rec = page_zip_dir_find_low(start, end,
+ page_offset(prev_rec));
+ ut_a(slot_rec);
+ }
+
+ /* Read the old n_dense (n_heap may have been incremented). */
+ n_dense = page_dir_get_n_heap(page_zip->data)
+ - (PAGE_HEAP_NO_USER_LOW + 1);
+
+ if (UNIV_LIKELY_NULL(free_rec)) {
+ /* The record was allocated from the free list.
+ Shift the dense directory only up to that slot.
+ Note that in this case, n_dense is actually
+ off by one, because page_cur_insert_rec_zip()
+ did not increment n_heap. */
+ ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
+ + PAGE_HEAP_NO_USER_LOW);
+ ut_ad(rec >= free_rec);
+ slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
+ ut_ad(slot_free);
+ slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
+ } else {
+ /* The record was allocated from the heap.
+ Shift the entire dense directory. */
+ ut_ad(rec_get_heap_no_new(rec) == n_dense
+ + PAGE_HEAP_NO_USER_LOW);
+
+ /* Shift to the end of the dense page directory. */
+ slot_free = page_zip->data + page_zip_get_size(page_zip)
+ - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
+ }
+
+ /* Shift the dense directory to allocate place for rec. */
+ memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
+ slot_rec - slot_free);
+
+ /* Write the entry for the inserted record.
+ The "owned" and "deleted" flags must be zero. */
+ mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
+}
+
+/**********************************************************************//**
+Shift the dense page directory and the array of BLOB pointers
+when a record is deleted. */
+UNIV_INTERN
+void
+page_zip_dir_delete(
+/*================*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ byte* rec, /*!< in: deleted record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const byte* free) /*!< in: previous start of
+ the free list */
+{
+ byte* slot_rec;
+ byte* slot_free;
+ ulint n_ext;
+ page_t* page = page_align(rec);
+
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(rec_offs_comp(offsets));
+
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
+ rec_offs_extra_size(offsets));
+
+ slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
+
+ ut_a(slot_rec);
+
+ /* This could not be done before page_zip_dir_find(). */
+ page_header_set_field(page, page_zip, PAGE_N_RECS,
+ (ulint)(page_get_n_recs(page) - 1));
+
+ if (UNIV_UNLIKELY(!free)) {
+ /* Make the last slot the start of the free list. */
+ slot_free = page_zip->data + page_zip_get_size(page_zip)
+ - PAGE_ZIP_DIR_SLOT_SIZE
+ * (page_dir_get_n_heap(page_zip->data)
+ - PAGE_HEAP_NO_USER_LOW);
+ } else {
+ slot_free = page_zip_dir_find_free(page_zip,
+ page_offset(free));
+ ut_a(slot_free < slot_rec);
+ /* Grow the free list by one slot by moving the start. */
+ slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
+ }
+
+ if (UNIV_LIKELY(slot_rec > slot_free)) {
+ memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
+ slot_free,
+ slot_rec - slot_free);
+ }
+
+ /* Write the entry for the deleted record.
+ The "owned" and "deleted" flags will be cleared. */
+ mach_write_to_2(slot_free, page_offset(rec));
+
+ if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
+ ut_ad(!rec_offs_any_extern(offsets));
+ goto skip_blobs;
+ }
+
+ n_ext = rec_offs_n_extern(offsets);
+ if (UNIV_UNLIKELY(n_ext)) {
+ /* Shift and zero fill the array of BLOB pointers. */
+ ulint blob_no;
+ byte* externs;
+ byte* ext_end;
+
+ blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
+ ut_a(blob_no + n_ext <= page_zip->n_blobs);
+
+ externs = page_zip->data + page_zip_get_size(page_zip)
+ - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
+ * (PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ ext_end = externs - page_zip->n_blobs
+ * BTR_EXTERN_FIELD_REF_SIZE;
+ externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
+
+ page_zip->n_blobs -= static_cast<unsigned>(n_ext);
+ /* Shift and zero fill the array. */
+ memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
+ (page_zip->n_blobs - blob_no)
+ * BTR_EXTERN_FIELD_REF_SIZE);
+ memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
+ }
+
+skip_blobs:
+ /* The compression algorithm expects info_bits and n_owned
+ to be 0 for deleted records. */
+ rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
+
+ page_zip_clear_rec(page_zip, rec, index, offsets);
+}
+
+/**********************************************************************//**
+Add a slot to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_add_slot(
+/*==================*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ ulint is_clustered) /*!< in: nonzero for clustered index,
+ zero for others */
+{
+ ulint n_dense;
+ byte* dir;
+ byte* stored;
+
+ ut_ad(page_is_comp(page_zip->data));
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+ /* Read the old n_dense (n_heap has already been incremented). */
+ n_dense = page_dir_get_n_heap(page_zip->data)
+ - (PAGE_HEAP_NO_USER_LOW + 1);
+
+ dir = page_zip->data + page_zip_get_size(page_zip)
+ - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
+
+ if (!page_is_leaf(page_zip->data)) {
+ ut_ad(!page_zip->n_blobs);
+ stored = dir - n_dense * REC_NODE_PTR_SIZE;
+ } else if (is_clustered) {
+ /* Move the BLOB pointer array backwards to make space for the
+ roll_ptr and trx_id columns and the dense directory slot. */
+ byte* externs;
+
+ stored = dir - n_dense
+ * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ externs = stored
+ - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
+ ASSERT_ZERO(externs
+ - (PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+ PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
+ + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+ externs, stored - externs);
+ } else {
+ stored = dir
+ - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
+ ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
+ PAGE_ZIP_DIR_SLOT_SIZE);
+ }
+
+ /* Move the uncompressed area backwards to make space
+ for one directory slot. */
+ memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
+}
+
+/***********************************************************//**
+Parses a log record of writing to the header of a page.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_header(
+/*========================*/
+ byte* ptr, /*!< in: redo log buffer */
+ byte* end_ptr,/*!< in: redo log buffer end */
+ page_t* page, /*!< in/out: uncompressed page */
+ page_zip_des_t* page_zip)/*!< in/out: compressed page */
+{
+ ulint offset;
+ ulint len;
+
+ ut_ad(ptr && end_ptr);
+ ut_ad(!page == !page_zip);
+
+ if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
+
+ return(NULL);
+ }
+
+ offset = (ulint) *ptr++;
+ len = (ulint) *ptr++;
+
+ if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
+corrupt:
+ recv_sys->found_corrupt_log = TRUE;
+
+ return(NULL);
+ }
+
+ if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ if (UNIV_UNLIKELY(!page_zip)) {
+
+ goto corrupt;
+ }
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+
+ memcpy(page + offset, ptr, len);
+ memcpy(page_zip->data + offset, ptr, len);
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, NULL));
+#endif /* UNIV_ZIP_DEBUG */
+ }
+
+ return(ptr + len);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Write a log record of writing to the uncompressed header portion of a page. */
+UNIV_INTERN
+void
+page_zip_write_header_log(
+/*======================*/
+ const byte* data, /*!< in: data on the uncompressed page */
+ ulint length, /*!< in: length of the data */
+ mtr_t* mtr) /*!< in: mini-transaction */
+{
+ byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
+ ulint offset = page_offset(data);
+
+ ut_ad(offset < PAGE_DATA);
+ ut_ad(offset + length < PAGE_DATA);
+#if PAGE_DATA > 255
+# error "PAGE_DATA > 255"
+#endif
+ ut_ad(length < 256);
+
+ /* If no logging is requested, we may return now */
+ if (UNIV_UNLIKELY(!log_ptr)) {
+
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(
+ (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
+ *log_ptr++ = (byte) offset;
+ *log_ptr++ = (byte) length;
+ mlog_close(mtr, log_ptr);
+
+ mlog_catenate_string(mtr, data, length);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Reorganize and compress a page. This is a low-level operation for
+compressed pages, to be used when page_zip_compress() fails.
+On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
+The function btr_page_reorganize() should be preferred whenever possible.
+IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
+non-clustered index, the caller must update the insert buffer free
+bits in the same mini-transaction in such a way that the modification
+will be redo-logged.
+@return TRUE on success, FALSE on failure; page_zip will be left
+intact on failure, but page will be overwritten. */
+UNIV_INTERN
+ibool
+page_zip_reorganize(
+/*================*/
+ buf_block_t* block, /*!< in/out: page with compressed page;
+ on the compressed page, in: size;
+ out: data, n_blobs,
+ m_start, m_end, m_nonempty */
+ dict_index_t* index, /*!< in: index of the B-tree node */
+ mtr_t* mtr) /*!< in: mini-transaction */
+{
+#ifndef UNIV_HOTBACKUP
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+#endif /* !UNIV_HOTBACKUP */
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ page_t* page = buf_block_get_frame(block);
+ buf_block_t* temp_block;
+ page_t* temp_page;
+ ulint log_mode;
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_is_comp(page));
+ ut_ad(!dict_index_is_ibuf(index));
+ /* Note that page_zip_validate(page_zip, page, index) may fail here. */
+ UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+ UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+ /* Disable logging */
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+
+#ifndef UNIV_HOTBACKUP
+ temp_block = buf_block_alloc(buf_pool);
+ btr_search_drop_page_hash_index(block);
+ block->check_index_page_at_flush = TRUE;
+#else /* !UNIV_HOTBACKUP */
+ ut_ad(block == back_block1);
+ temp_block = back_block2;
+#endif /* !UNIV_HOTBACKUP */
+ temp_page = temp_block->frame;
+
+ /* Copy the old page to temporary space */
+ buf_frame_copy(temp_page, page);
+
+ btr_blob_dbg_remove(page, index, "zip_reorg");
+
+ /* Recreate the page: note that global data on page (possible
+ segment headers, next page-field, etc.) is preserved intact */
+
+ page_create(block, mtr, TRUE);
+
+ /* Copy the records from the temporary space to the recreated page;
+ do not copy the lock bits yet */
+
+ page_copy_rec_list_end_no_locks(block, temp_block,
+ page_get_infimum_rec(temp_page),
+ index, mtr);
+
+ if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
+ /* Copy max trx id to recreated page */
+ trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
+ page_set_max_trx_id(block, NULL, max_trx_id, NULL);
+ ut_ad(max_trx_id != 0);
+ }
+
+ /* Restore logging. */
+ mtr_set_log_mode(mtr, log_mode);
+
+ if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) {
+
+#ifndef UNIV_HOTBACKUP
+ buf_block_free(temp_block);
+#endif /* !UNIV_HOTBACKUP */
+ return(FALSE);
+ }
+
+ lock_move_reorganize_page(block, temp_block);
+
+#ifndef UNIV_HOTBACKUP
+ buf_block_free(temp_block);
+#endif /* !UNIV_HOTBACKUP */
+ return(TRUE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Copy the records of a page byte for byte. Do not copy the page header
+or trailer, except those B-tree header fields that are directly
+related to the storage of records. Also copy PAGE_MAX_TRX_ID.
+NOTE: The caller must update the lock table and the adaptive hash index. */
+UNIV_INTERN
+void
+page_zip_copy_recs(
+/*===============*/
+ page_zip_des_t* page_zip, /*!< out: copy of src_zip
+ (n_blobs, m_start, m_end,
+ m_nonempty, data[0..size-1]) */
+ page_t* page, /*!< out: copy of src */
+ const page_zip_des_t* src_zip, /*!< in: compressed page */
+ const page_t* src, /*!< in: page */
+ dict_index_t* index, /*!< in: index of the B-tree */
+ mtr_t* mtr) /*!< in: mini-transaction */
+{
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(!dict_index_is_ibuf(index));
+#ifdef UNIV_ZIP_DEBUG
+ /* The B-tree operations that call this function may set
+ FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
+ mismatch. A strict page_zip_validate() will be executed later
+ during the B-tree operations. */
+ ut_a(page_zip_validate_low(src_zip, src, index, TRUE));
+#endif /* UNIV_ZIP_DEBUG */
+ ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
+ if (UNIV_UNLIKELY(src_zip->n_blobs)) {
+ ut_a(page_is_leaf(src));
+ ut_a(dict_index_is_clust(index));
+ }
+
+ /* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
+ indexes. It does not matter on other pages. */
+ ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
+ || page_get_max_trx_id(src));
+
+ UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
+ UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
+ UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
+ UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
+
+ /* Copy those B-tree page header fields that are related to
+ the records stored in the page. Also copy the field
+ PAGE_MAX_TRX_ID. Skip the rest of the page header and
+ trailer. On the compressed page, there is no trailer. */
+#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
+# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
+#endif
+ memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
+ PAGE_HEADER_PRIV_END);
+ memcpy(PAGE_DATA + page, PAGE_DATA + src,
+ UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
+ memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
+ PAGE_HEADER_PRIV_END);
+ memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
+ page_zip_get_size(page_zip) - PAGE_DATA);
+
+ /* Copy all fields of src_zip to page_zip, except the pointer
+ to the compressed data page. */
+ {
+ page_zip_t* data = page_zip->data;
+ memcpy(page_zip, src_zip, sizeof *page_zip);
+ page_zip->data = data;
+ }
+ ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index))
+ + page_zip->m_end < page_zip_get_size(page_zip));
+
+ if (!page_is_leaf(src)
+ && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
+ && UNIV_LIKELY(mach_read_from_4(page
+ + FIL_PAGE_PREV) != FIL_NULL)) {
+ /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
+ ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
+ TRUE);
+ if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
+ rec_t* rec = page + offs;
+ ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
+ & REC_INFO_MIN_REC_FLAG);
+ rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
+ }
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+ btr_blob_dbg_add(page, index, "page_zip_copy_recs");
+
+ page_zip_compress_write_log(page_zip, page, index, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Parses a log record of compressing an index page.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_compress(
+/*====================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< out: uncompressed page */
+ page_zip_des_t* page_zip)/*!< out: compressed page */
+{
+ ulint size;
+ ulint trailer_size;
+
+ ut_ad(ptr && end_ptr);
+ ut_ad(!page == !page_zip);
+
+ if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
+
+ return(NULL);
+ }
+
+ size = mach_read_from_2(ptr);
+ ptr += 2;
+ trailer_size = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ if (UNIV_UNLIKELY(!page_zip)
+ || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
+corrupt:
+ recv_sys->found_corrupt_log = TRUE;
+
+ return(NULL);
+ }
+
+ memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
+ memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
+ memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
+ memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
+ page_zip_get_size(page_zip) - trailer_size
+ - (FIL_PAGE_TYPE + size));
+ memcpy(page_zip->data + page_zip_get_size(page_zip)
+ - trailer_size, ptr + 8 + size, trailer_size);
+
+ if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
+ TRUE))) {
+
+ goto corrupt;
+ }
+ }
+
+ return(ptr + 8 + size + trailer_size);
+}
+
+/**********************************************************************//**
+Calculate the compressed page checksum.
+@return page checksum */
+UNIV_INTERN
+ulint
+page_zip_calc_checksum(
+/*===================*/
+ const void* data, /*!< in: compressed page */
+ ulint size, /*!< in: size of compressed page */
+ srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
+{
+ uLong adler;
+ ib_uint32_t crc32;
+ const Bytef* s = static_cast<const byte*>(data);
+
+ /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
+ and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
+
+ switch (algo) {
+ case SRV_CHECKSUM_ALGORITHM_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+
+ ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+ crc32 = ut_crc32(s + FIL_PAGE_OFFSET,
+ FIL_PAGE_LSN - FIL_PAGE_OFFSET)
+ ^ ut_crc32(s + FIL_PAGE_TYPE, 2)
+ ^ ut_crc32(s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+ return((ulint) crc32);
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+ ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+ adler = adler32(0L, s + FIL_PAGE_OFFSET,
+ FIL_PAGE_LSN - FIL_PAGE_OFFSET);
+ adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
+ adler = adler32(
+ adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ static_cast<uInt>(size)
+ - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+ return((ulint) adler);
+ case SRV_CHECKSUM_ALGORITHM_NONE:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+ return(BUF_NO_CHECKSUM_MAGIC);
+ /* no default so the compiler will emit a warning if new enum
+ is added and not handled here */
+ }
+
+ ut_error;
+ return(0);
+}
+
+/**********************************************************************//**
+Verify a compressed page's checksum.
+@return TRUE if the stored checksum is valid according to the value of
+innodb_checksum_algorithm */
+UNIV_INTERN
+ibool
+page_zip_verify_checksum(
+/*=====================*/
+ const void* data, /*!< in: compressed page */
+ ulint size) /*!< in: size of compressed page */
+{
+ ib_uint32_t stored;
+ ib_uint32_t calc;
+ ib_uint32_t crc32 = 0 /* silence bogus warning */;
+ ib_uint32_t innodb = 0 /* silence bogus warning */;
+
+ stored = static_cast<ib_uint32_t>(mach_read_from_4(
+ static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM));
+
+#if FIL_PAGE_LSN % 8
+#error "FIL_PAGE_LSN must be 64 bit aligned"
+#endif
+
+ /* Check if page is empty */
+ if (stored == 0
+ && *reinterpret_cast<const ib_uint64_t*>(static_cast<const char*>(
+ data)
+ + FIL_PAGE_LSN) == 0) {
+ /* make sure that the page is really empty */
+ ulint i;
+ for (i = 0; i < size; i++) {
+ if (*((const char*) data + i) != 0) {
+ return(FALSE);
+ }
+ }
+ /* Empty page */
+ return(TRUE);
+ }
+
+ calc = static_cast<ib_uint32_t>(page_zip_calc_checksum(
+ data, size, static_cast<srv_checksum_algorithm_t>(
+ srv_checksum_algorithm)));
+
+ if (stored == calc) {
+ return(TRUE);
+ }
+
+ switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+ return(stored == calc);
+ case SRV_CHECKSUM_ALGORITHM_CRC32:
+ if (stored == BUF_NO_CHECKSUM_MAGIC) {
+ return(TRUE);
+ }
+ crc32 = calc;
+ innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum(
+ data, size, SRV_CHECKSUM_ALGORITHM_INNODB));
+ break;
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ if (stored == BUF_NO_CHECKSUM_MAGIC) {
+ return(TRUE);
+ }
+ crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum(
+ data, size, SRV_CHECKSUM_ALGORITHM_CRC32));
+ innodb = calc;
+ break;
+ case SRV_CHECKSUM_ALGORITHM_NONE:
+ return(TRUE);
+ /* no default so the compiler will emit a warning if new enum
+ is added and not handled here */
+ }
+
+ return(stored == crc32 || stored == innodb);
+}