summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2023-01-24 14:59:42 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2023-01-24 14:59:42 +0200
commit4d9fe4032b768e04e33482d83199882c7bb0d9d8 (patch)
tree0340142c1c99721b67311591b1f553d1042b628d
parent12b4133f59c4f3c8e89bc593e5c391a3cdb3d02f (diff)
parentfa543a0f621fcf19e31c7d044f2b6c4f0836cd5a (diff)
downloadmariadb-git-4d9fe4032b768e04e33482d83199882c7bb0d9d8.tar.gz
Merge 10.8 into 10.9
-rw-r--r--cmake/cpack_rpm.cmake1
-rw-r--r--include/mysql/service_wsrep.h12
-rw-r--r--mysql-test/suite/galera/disabled.def1
-rw-r--r--mysql-test/suite/galera/r/galera_insert_bulk.result30
-rw-r--r--mysql-test/suite/galera/t/galera_insert_bulk.test88
-rw-r--r--mysql-test/suite/galera_3nodes/disabled.def2
-rw-r--r--mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result57
-rw-r--r--mysql-test/suite/innodb/r/innodb_wl6326.result405
-rw-r--r--mysql-test/suite/innodb/r/insert_into_empty.result4
-rw-r--r--mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt1
-rw-r--r--mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test82
-rw-r--r--mysql-test/suite/innodb/t/innodb_wl6326.opt1
-rw-r--r--mysql-test/suite/innodb/t/innodb_wl6326.test519
-rw-r--r--mysql-test/suite/innodb/t/insert_into_empty.test4
-rw-r--r--mysql-test/suite/innodb/t/undo_truncate.opt1
-rw-r--r--mysql-test/suite/innodb_gis/r/rtree_split.result12
-rw-r--r--mysql-test/suite/innodb_gis/t/rtree_split.test15
-rw-r--r--sql/service_wsrep.cc22
-rw-r--r--sql/sp_head.cc2
-rw-r--r--sql/sql_plugin_services.inl2
-rw-r--r--sql/wsrep_dummy.cc6
-rw-r--r--storage/innobase/CMakeLists.txt2
-rw-r--r--storage/innobase/btr/btr0btr.cc500
-rw-r--r--storage/innobase/btr/btr0cur.cc2342
-rw-r--r--storage/innobase/btr/btr0defragment.cc73
-rw-r--r--storage/innobase/btr/btr0pcur.cc162
-rw-r--r--storage/innobase/btr/btr0sea.cc22
-rw-r--r--storage/innobase/buf/buf0buf.cc27
-rw-r--r--storage/innobase/dict/dict0crea.cc29
-rw-r--r--storage/innobase/dict/dict0dict.cc8
-rw-r--r--storage/innobase/dict/dict0load.cc23
-rw-r--r--storage/innobase/dict/dict0stats.cc4
-rw-r--r--storage/innobase/fil/fil0fil.cc2
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc25
-rw-r--r--storage/innobase/gis/gis0sea.cc700
-rw-r--r--storage/innobase/handler/ha_innodb.cc6
-rw-r--r--storage/innobase/handler/handler0alter.cc24
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc28
-rw-r--r--storage/innobase/include/btr0btr.h11
-rw-r--r--storage/innobase/include/btr0cur.h100
-rw-r--r--storage/innobase/include/btr0pcur.h55
-rw-r--r--storage/innobase/include/btr0pcur.inl40
-rw-r--r--storage/innobase/include/btr0types.h32
-rw-r--r--storage/innobase/include/buf0buf.h2
-rw-r--r--storage/innobase/include/gis0rtree.h46
-rw-r--r--storage/innobase/include/gis0type.h8
-rw-r--r--storage/innobase/include/ibuf0ibuf.inl4
-rw-r--r--storage/innobase/include/mtr0log.h32
-rw-r--r--storage/innobase/include/mtr0mtr.h228
-rw-r--r--storage/innobase/include/small_vector.h100
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc424
-rw-r--r--storage/innobase/row/row0import.cc5
-rw-r--r--storage/innobase/row/row0ins.cc154
-rw-r--r--storage/innobase/row/row0log.cc33
-rw-r--r--storage/innobase/row/row0merge.cc18
-rw-r--r--storage/innobase/row/row0purge.cc46
-rw-r--r--storage/innobase/row/row0row.cc16
-rw-r--r--storage/innobase/row/row0sel.cc16
-rw-r--r--storage/innobase/row/row0uins.cc44
-rw-r--r--storage/innobase/row/row0umod.cc53
-rw-r--r--storage/innobase/row/row0upd.cc53
-rw-r--r--storage/innobase/trx/trx0purge.cc1
-rw-r--r--storage/spider/ha_spider.cc6
-rw-r--r--storage/spider/mysql-test/spider/bugfix/r/mdev_30191.result44
-rw-r--r--storage/spider/mysql-test/spider/bugfix/t/mdev_30191.cnf3
-rw-r--r--storage/spider/mysql-test/spider/bugfix/t/mdev_30191.test51
66 files changed, 3171 insertions, 3698 deletions
diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake
index dc2ca544d28..4811eec8f8c 100644
--- a/cmake/cpack_rpm.cmake
+++ b/cmake/cpack_rpm.cmake
@@ -155,6 +155,7 @@ SET(ignored
"%ignore ${CMAKE_INSTALL_PREFIX}/share/doc"
"%ignore ${CMAKE_INSTALL_PREFIX}/share/man"
"%ignore ${CMAKE_INSTALL_PREFIX}/share/man/man1"
+ "%ignore ${CMAKE_INSTALL_PREFIX}/share/man/man3"
"%ignore ${CMAKE_INSTALL_PREFIX}/share/man/man8"
"%ignore ${CMAKE_INSTALL_PREFIX}/share/pkgconfig"
)
diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h
index 42b758c03f3..8541b348ae4 100644
--- a/include/mysql/service_wsrep.h
+++ b/include/mysql/service_wsrep.h
@@ -69,6 +69,9 @@ extern struct wsrep_service_st {
void (*wsrep_thd_self_abort_func)(MYSQL_THD thd);
int (*wsrep_thd_append_key_func)(MYSQL_THD thd, const struct wsrep_key* key,
int n_keys, enum Wsrep_service_key_type);
+ int (*wsrep_thd_append_table_key_func)(MYSQL_THD thd, const char* db,
+ const char* table, enum Wsrep_service_key_type);
+ my_bool (*wsrep_thd_is_local_transaction)(const MYSQL_THD thd);
const char* (*wsrep_thd_client_state_str_func)(const MYSQL_THD thd);
const char* (*wsrep_thd_client_mode_str_func)(const MYSQL_THD thd);
const char* (*wsrep_thd_transaction_state_str_func)(const MYSQL_THD thd);
@@ -121,6 +124,8 @@ extern struct wsrep_service_st {
#define wsrep_thd_is_local(T) wsrep_service->wsrep_thd_is_local_func(T)
#define wsrep_thd_self_abort(T) wsrep_service->wsrep_thd_self_abort_func(T)
#define wsrep_thd_append_key(T,W,N,K) wsrep_service->wsrep_thd_append_key_func(T,W,N,K)
+#define wsrep_thd_append_table_key(T,D,B,K) wsrep_service->wsrep_thd_append_table_key_func(T,D,B,K)
+#define wsrep_thd_is_local_transaction(T) wsrep_service->wsrep_thd_is_local_transaction_func(T)
#define wsrep_thd_client_state_str(T) wsrep_service->wsrep_thd_client_state_str_func(T)
#define wsrep_thd_client_mode_str(T) wsrep_service->wsrep_thd_client_mode_str_func(T)
#define wsrep_thd_transaction_state_str(T) wsrep_service->wsrep_thd_transaction_state_str_func(T)
@@ -226,6 +231,13 @@ extern "C" int wsrep_thd_append_key(MYSQL_THD thd,
int n_keys,
enum Wsrep_service_key_type);
+extern "C" int wsrep_thd_append_table_key(MYSQL_THD thd,
+ const char* db,
+ const char* table,
+ enum Wsrep_service_key_type);
+
+extern "C" my_bool wsrep_thd_is_local_transaction(const MYSQL_THD thd);
+
extern const char* wsrep_sr_table_name_full;
extern "C" const char* wsrep_get_sr_table_name();
diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def
index e189b47f7c8..849dffc7018 100644
--- a/mysql-test/suite/galera/disabled.def
+++ b/mysql-test/suite/galera/disabled.def
@@ -11,7 +11,6 @@
##############################################################################
galera_as_slave_ctas : MDEV-28378 timeout
-galera_bf_abort_at_after_statement : Timeout in wait_condition.inc for SELECT COUNT(*) = 1 FROM t1 where id = 1 and val = 3
galera_pc_recovery : MDEV-25199 cluster fails to start up
galera_sst_encrypted : MDEV-29876 Galera test failure on galera_sst_encrypted
MW-284 : MDEV-29861 Galera test case hangs
diff --git a/mysql-test/suite/galera/r/galera_insert_bulk.result b/mysql-test/suite/galera/r/galera_insert_bulk.result
new file mode 100644
index 00000000000..f4d4adf64e1
--- /dev/null
+++ b/mysql-test/suite/galera/r/galera_insert_bulk.result
@@ -0,0 +1,30 @@
+connection node_2;
+connection node_1;
+connection node_1;
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
+SET foreign_key_checks = 0;
+SET unique_checks = 0;
+START TRANSACTION;
+connection node_2;
+SET foreign_key_checks = 1;
+SET unique_checks = 1;
+INSERT INTO t1 VALUES (1001);
+connection node_1;
+COMMIT;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+DROP TABLE t1;
+connection node_1;
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
+START TRANSACTION;
+connection node_2;
+SET foreign_key_checks = 1;
+SET unique_checks = 1;
+START TRANSACTION;
+INSERT INTO t1 VALUES (1001);
+connection node_1;
+COMMIT;
+2
+connection node_2;
+COMMIT;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+DROP TABLE t1;
diff --git a/mysql-test/suite/galera/t/galera_insert_bulk.test b/mysql-test/suite/galera/t/galera_insert_bulk.test
new file mode 100644
index 00000000000..f58870d5f74
--- /dev/null
+++ b/mysql-test/suite/galera/t/galera_insert_bulk.test
@@ -0,0 +1,88 @@
+#
+# Test that bulk insert replicates as table-level exclusive key and
+# rolls back properly if needed.
+#
+
+--source include/galera_cluster.inc
+--source include/have_innodb.inc
+
+#
+# Make bulk insert BF-abort, but regular insert succeed.
+#
+
+--connection node_1
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
+
+# Disable foreign and unique key checks to allow bulk insert.
+SET foreign_key_checks = 0;
+SET unique_checks = 0;
+
+START TRANSACTION;
+
+--let $count=0
+--disable_query_log
+while ($count < 1000)
+{
+ --eval INSERT INTO t1 VALUES ($count)
+ --inc $count
+}
+--enable_query_log
+
+--connection node_2
+
+# Disable bulk insert.
+SET foreign_key_checks = 1;
+SET unique_checks = 1;
+
+# Insert a value out of the bulk insert range.
+INSERT INTO t1 VALUES (1001);
+
+--connection node_1
+--error ER_LOCK_DEADLOCK
+COMMIT;
+
+DROP TABLE t1;
+
+#
+# Make bulk insert succeed, but regular insert BF-abort.
+#
+
+--connection node_1
+CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
+
+--let $before_bulk_keys = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_repl_keys'`
+
+START TRANSACTION;
+
+--let $count=0
+--disable_query_log
+while ($count < 1000)
+{
+ --eval INSERT INTO t1 VALUES ($count)
+ --inc $count
+}
+--enable_query_log
+
+--connection node_2
+
+# Disable bulk insert.
+SET foreign_key_checks = 1;
+SET unique_checks = 1;
+
+START TRANSACTION;
+
+# Insert a value out of the bulk insert range.
+INSERT INTO t1 VALUES (1001);
+
+--connection node_1
+COMMIT;
+
+# Expect two keys to be added for bulk insert: DB-level shared key and table-level exclusive key.
+--let $bulk_keys_count = `SELECT VARIABLE_VALUE - $before_bulk_keys FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_repl_keys'`
+--echo $bulk_keys_count
+
+--connection node_2
+--error ER_LOCK_DEADLOCK
+COMMIT;
+
+DROP TABLE t1;
diff --git a/mysql-test/suite/galera_3nodes/disabled.def b/mysql-test/suite/galera_3nodes/disabled.def
index 5b58ce4bbe5..b51d221e72f 100644
--- a/mysql-test/suite/galera_3nodes/disabled.def
+++ b/mysql-test/suite/galera_3nodes/disabled.def
@@ -12,10 +12,8 @@
galera_2_cluster : MDEV-29877 Galera test failure on galera_2_cluster
galera_gtid_2_cluster : MDEV-29877 Galera test failure on galera_2_cluster
-galera_parallel_apply_3nodes : MDEV-29368 DEBUG_SYNC timeout
galera_ipv6_mariabackup : MDEV-24097
galera_ipv6_mariabackup_section : MDEV-24097, MDEV-22195
-galera_parallel_apply_3nodes : MDEV-29774 Galera test galera_parallel_apply_3nodes is unstable
galera_vote_rejoin_mysqldump : MDEV-24481: galera_3nodes.galera_vote_rejoin_mysqldump MTR failed: mysql_shutdown failed
galera_ssl_reload : MDEV-30172 At line 50: mysql_shutdown failed
GCF-354 : mysqltest: At line 39: query 'DROP TABLE test.t1' failed: 1047: WSREP has not yet prepared node for application use
diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
deleted file mode 100644
index 13e45425872..00000000000
--- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE
-# OPERATION IF IT IS DONE IN-PLACE
-#
-call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery");
-call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc");
-call mtr.add_suppression("Plugin 'InnoDB'");
-FLUSH TABLES;
-CREATE TABLE t1(
-a INT AUTO_INCREMENT PRIMARY KEY,
-b CHAR(1),
-c INT,
-INDEX(b))
-ENGINE=InnoDB STATS_PERSISTENT=0;
-SET GLOBAL innodb_change_buffering_debug = 1;
-SET GLOBAL innodb_change_buffering = all;
-Warnings:
-Warning 1287 '@@innodb_change_buffering' is deprecated and will be removed in a future release
-INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192;
-BEGIN;
-SELECT b FROM t1 LIMIT 3;
-b
-x
-x
-x
-connect con1,localhost,root,,;
-BEGIN;
-DELETE FROM t1 WHERE a=1;
-INSERT INTO t1 VALUES(1,'X',1);
-SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
-SELECT b FROM t1 LIMIT 3;
-ERROR HY000: Lost connection to server during query
-disconnect con1;
-connection default;
-FOUND 1 /Wrote log record for ibuf update in place operation/ in mysqld.1.err
-# restart: --innodb-read-only
-CHECK TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 check Error Unknown storage engine 'InnoDB'
-test.t1 check error Corrupt
-FOUND 1 /innodb_read_only prevents crash recovery/ in mysqld.1.err
-# restart: --innodb-force-recovery=5
-SELECT * FROM t1 LIMIT 1;
-a b c
-1 X 1
-SHOW ENGINE INNODB STATUS;
-Type Name Status
-InnoDB insert 0, delete mark 0
-SET GLOBAL innodb_fast_shutdown=0;
-# restart
-CHECK TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 check status OK
-SHOW ENGINE INNODB STATUS;
-Type Name Status
-InnoDB
-DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb_wl6326.result b/mysql-test/suite/innodb/r/innodb_wl6326.result
deleted file mode 100644
index fcd58aedafe..00000000000
--- a/mysql-test/suite/innodb/r/innodb_wl6326.result
+++ /dev/null
@@ -1,405 +0,0 @@
-SET GLOBAL innodb_adaptive_hash_index = false;
-SET GLOBAL innodb_stats_persistent = false;
-connect con1,localhost,root,,;
-connect con2,localhost,root,,;
-connect con3,localhost,root,,;
-CREATE TABLE t1 (
-a00 CHAR(255) NOT NULL DEFAULT 'a',
-a01 CHAR(255) NOT NULL DEFAULT 'a',
-a02 CHAR(255) NOT NULL DEFAULT 'a',
-a03 CHAR(255) NOT NULL DEFAULT 'a',
-a04 CHAR(255) NOT NULL DEFAULT 'a',
-a05 CHAR(255) NOT NULL DEFAULT 'a',
-a06 CHAR(255) NOT NULL DEFAULT 'a',
-b INT NOT NULL DEFAULT 0
-) ENGINE = InnoDB;
-ALTER TABLE t1 ADD PRIMARY KEY(
-a00,
-a01,
-a02,
-a03,
-a04,
-a05,
-a06
-);
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-1
-SET GLOBAL innodb_limit_optimistic_insert_debug = 7;
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('aa');
-INSERT INTO t1 (a00) VALUES ('ab');
-INSERT INTO t1 (a00) VALUES ('ac');
-INSERT INTO t1 (a00) VALUES ('ad');
-INSERT INTO t1 (a00) VALUES ('ae');
-INSERT INTO t1 (a00) VALUES ('af');
-INSERT INTO t1 (a00) VALUES ('ag');
-INSERT INTO t1 (a00) VALUES ('ah');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-3
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('ai');
-INSERT INTO t1 (a00) VALUES ('aj');
-INSERT INTO t1 (a00) VALUES ('ak');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-4
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('al');
-INSERT INTO t1 (a00) VALUES ('am');
-INSERT INTO t1 (a00) VALUES ('an');
-INSERT INTO t1 (a00) VALUES ('ao');
-INSERT INTO t1 (a00) VALUES ('ap');
-INSERT INTO t1 (a00) VALUES ('aq');
-INSERT INTO t1 (a00) VALUES ('ar');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-5
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('as');
-INSERT INTO t1 (a00) VALUES ('at');
-INSERT INTO t1 (a00) VALUES ('au');
-INSERT INTO t1 (a00) VALUES ('av');
-INSERT INTO t1 (a00) VALUES ('aw');
-INSERT INTO t1 (a00) VALUES ('ax');
-INSERT INTO t1 (a00) VALUES ('ay');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-6
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('az');
-INSERT INTO t1 (a00) VALUES ('ba');
-INSERT INTO t1 (a00) VALUES ('bb');
-INSERT INTO t1 (a00) VALUES ('bc');
-INSERT INTO t1 (a00) VALUES ('bd');
-INSERT INTO t1 (a00) VALUES ('be');
-INSERT INTO t1 (a00) VALUES ('bf');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-7
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bg');
-INSERT INTO t1 (a00) VALUES ('bh');
-INSERT INTO t1 (a00) VALUES ('bi');
-INSERT INTO t1 (a00) VALUES ('bj');
-INSERT INTO t1 (a00) VALUES ('bk');
-INSERT INTO t1 (a00) VALUES ('bl');
-INSERT INTO t1 (a00) VALUES ('bm');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-8
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bn');
-INSERT INTO t1 (a00) VALUES ('bo');
-INSERT INTO t1 (a00) VALUES ('bp');
-INSERT INTO t1 (a00) VALUES ('bq');
-INSERT INTO t1 (a00) VALUES ('br');
-INSERT INTO t1 (a00) VALUES ('bs');
-INSERT INTO t1 (a00) VALUES ('bt');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-11
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bu');
-INSERT INTO t1 (a00) VALUES ('bv');
-INSERT INTO t1 (a00) VALUES ('bw');
-INSERT INTO t1 (a00) VALUES ('bx');
-INSERT INTO t1 (a00) VALUES ('by');
-INSERT INTO t1 (a00) VALUES ('bz');
-INSERT INTO t1 (a00) VALUES ('ca');
-INSERT INTO t1 (a00) VALUES ('cb');
-INSERT INTO t1 (a00) VALUES ('cc');
-INSERT INTO t1 (a00) VALUES ('cd');
-INSERT INTO t1 (a00) VALUES ('ce');
-INSERT INTO t1 (a00) VALUES ('cf');
-INSERT INTO t1 (a00) VALUES ('cg');
-INSERT INTO t1 (a00) VALUES ('ch');
-INSERT INTO t1 (a00) VALUES ('ci');
-INSERT INTO t1 (a00) VALUES ('cj');
-INSERT INTO t1 (a00) VALUES ('ck');
-INSERT INTO t1 (a00) VALUES ('cl');
-INSERT INTO t1 (a00) VALUES ('cm');
-INSERT INTO t1 (a00) VALUES ('cn');
-INSERT INTO t1 (a00) VALUES ('co');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-15
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('cp');
-INSERT INTO t1 (a00) VALUES ('cq');
-INSERT INTO t1 (a00) VALUES ('cr');
-INSERT INTO t1 (a00) VALUES ('cs');
-INSERT INTO t1 (a00) VALUES ('ct');
-INSERT INTO t1 (a00) VALUES ('cu');
-INSERT INTO t1 (a00) VALUES ('cv');
-INSERT INTO t1 (a00) VALUES ('cw');
-INSERT INTO t1 (a00) VALUES ('cx');
-INSERT INTO t1 (a00) VALUES ('cy');
-INSERT INTO t1 (a00) VALUES ('cz');
-INSERT INTO t1 (a00) VALUES ('da');
-INSERT INTO t1 (a00) VALUES ('db');
-INSERT INTO t1 (a00) VALUES ('dc');
-INSERT INTO t1 (a00) VALUES ('dd');
-INSERT INTO t1 (a00) VALUES ('de');
-INSERT INTO t1 (a00) VALUES ('df');
-INSERT INTO t1 (a00) VALUES ('dg');
-INSERT INTO t1 (a00) VALUES ('dh');
-INSERT INTO t1 (a00) VALUES ('di');
-INSERT INTO t1 (a00) VALUES ('dj');
-INSERT INTO t1 (a00) VALUES ('dk');
-INSERT INTO t1 (a00) VALUES ('dl');
-INSERT INTO t1 (a00) VALUES ('dm');
-INSERT INTO t1 (a00) VALUES ('dn');
-INSERT INTO t1 (a00) VALUES ('do');
-INSERT INTO t1 (a00) VALUES ('dp');
-INSERT INTO t1 (a00) VALUES ('dq');
-INSERT INTO t1 (a00) VALUES ('dr');
-INSERT INTO t1 (a00) VALUES ('ds');
-INSERT INTO t1 (a00) VALUES ('dt');
-INSERT INTO t1 (a00) VALUES ('du');
-INSERT INTO t1 (a00) VALUES ('dv');
-INSERT INTO t1 (a00) VALUES ('dw');
-INSERT INTO t1 (a00) VALUES ('dx');
-INSERT INTO t1 (a00) VALUES ('dy');
-INSERT INTO t1 (a00) VALUES ('dz');
-INSERT INTO t1 (a00) VALUES ('ea');
-INSERT INTO t1 (a00) VALUES ('eb');
-INSERT INTO t1 (a00) VALUES ('ec');
-INSERT INTO t1 (a00) VALUES ('ed');
-INSERT INTO t1 (a00) VALUES ('ee');
-INSERT INTO t1 (a00) VALUES ('ef');
-INSERT INTO t1 (a00) VALUES ('eg');
-INSERT INTO t1 (a00) VALUES ('eh');
-INSERT INTO t1 (a00) VALUES ('ei');
-INSERT INTO t1 (a00) VALUES ('ej');
-INSERT INTO t1 (a00) VALUES ('ek');
-INSERT INTO t1 (a00) VALUES ('el');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-23
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('em');
-INSERT INTO t1 (a00) VALUES ('en');
-INSERT INTO t1 (a00) VALUES ('eo');
-INSERT INTO t1 (a00) VALUES ('ep');
-INSERT INTO t1 (a00) VALUES ('eq');
-INSERT INTO t1 (a00) VALUES ('er');
-INSERT INTO t1 (a00) VALUES ('es');
-INSERT INTO t1 (a00) VALUES ('et');
-INSERT INTO t1 (a00) VALUES ('eu');
-INSERT INTO t1 (a00) VALUES ('ev');
-INSERT INTO t1 (a00) VALUES ('ew');
-INSERT INTO t1 (a00) VALUES ('ex');
-INSERT INTO t1 (a00) VALUES ('ey');
-INSERT INTO t1 (a00) VALUES ('ez');
-INSERT INTO t1 (a00) VALUES ('fa');
-INSERT INTO t1 (a00) VALUES ('fb');
-INSERT INTO t1 (a00) VALUES ('fc');
-INSERT INTO t1 (a00) VALUES ('fd');
-INSERT INTO t1 (a00) VALUES ('fe');
-INSERT INTO t1 (a00) VALUES ('ff');
-INSERT INTO t1 (a00) VALUES ('fg');
-INSERT INTO t1 (a00) VALUES ('fh');
-INSERT INTO t1 (a00) VALUES ('fi');
-INSERT INTO t1 (a00) VALUES ('fj');
-INSERT INTO t1 (a00) VALUES ('fk');
-INSERT INTO t1 (a00) VALUES ('fl');
-INSERT INTO t1 (a00) VALUES ('fm');
-INSERT INTO t1 (a00) VALUES ('fn');
-INSERT INTO t1 (a00) VALUES ('fo');
-INSERT INTO t1 (a00) VALUES ('fp');
-INSERT INTO t1 (a00) VALUES ('fq');
-INSERT INTO t1 (a00) VALUES ('fr');
-INSERT INTO t1 (a00) VALUES ('fs');
-INSERT INTO t1 (a00) VALUES ('ft');
-INSERT INTO t1 (a00) VALUES ('fu');
-INSERT INTO t1 (a00) VALUES ('fv');
-INSERT INTO t1 (a00) VALUES ('fw');
-INSERT INTO t1 (a00) VALUES ('fx');
-INSERT INTO t1 (a00) VALUES ('fy');
-INSERT INTO t1 (a00) VALUES ('fz');
-INSERT INTO t1 (a00) VALUES ('ga');
-INSERT INTO t1 (a00) VALUES ('gb');
-INSERT INTO t1 (a00) VALUES ('gc');
-INSERT INTO t1 (a00) VALUES ('gd');
-INSERT INTO t1 (a00) VALUES ('ge');
-INSERT INTO t1 (a00) VALUES ('gf');
-INSERT INTO t1 (a00) VALUES ('gg');
-INSERT INTO t1 (a00) VALUES ('gh');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-29
-SET GLOBAL innodb_limit_optimistic_insert_debug = 0;
-# Test start
-SET DEBUG_SYNC = 'RESET';
-INSERT INTO t1 (a00) VALUES ('bfa');
-connection con1;
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-INSERT INTO t1 (a00) VALUES ('bfb');
-connection con2;
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-a00 a01
-aa a
-SELECT a00,a01 FROM t1 WHERE a00 = 'aq';
-a00 a01
-aq a
-SELECT a00,a01 FROM t1 WHERE a00 = 'cp';
-a00 a01
-cp a
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-a00 a01
-el a
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-SELECT a00,a01 FROM t1 WHERE a00 = 'ar';
-connection con3;
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-SELECT a00,a01 FROM t1 WHERE a00 = 'cn';
-connection default;
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
-SET DEBUG_SYNC = 'now SIGNAL continue';
-connection con1;
-connection con2;
-a00 a01
-ar a
-connection con3;
-a00 a01
-cn a
-connection default;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-30
-SET DEBUG_SYNC = 'RESET';
-INSERT INTO t1 (a00) VALUES ('coa');
-connection con1;
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-INSERT INTO t1 (a00) VALUES ('cob');
-connection con2;
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-connection con3;
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-connection default;
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
-SET DEBUG_SYNC = 'now SIGNAL continue';
-connection con1;
-connection con2;
-a00 a01
-aa a
-connection con3;
-a00 a01
-el a
-connection default;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-31
-SET DEBUG_SYNC = 'RESET';
-INSERT INTO t1 (a00) VALUES ('gba');
-connection con1;
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-INSERT INTO t1 (a00) VALUES ('gbb');
-connection con2;
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-a00 a01
-aa a
-SELECT a00,a01 FROM t1 WHERE a00 = 'ek';
-a00 a01
-ek a
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-connection con3;
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-SELECT a00,a01 FROM t1 WHERE a00 = 'gb';
-connection default;
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
-SET DEBUG_SYNC = 'now SIGNAL continue';
-connection con1;
-connection con2;
-a00 a01
-el a
-connection con3;
-a00 a01
-gb a
-connection default;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-32
-SET DEBUG_SYNC = 'RESET';
-connection default;
-disconnect con1;
-disconnect con2;
-disconnect con3;
-DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/insert_into_empty.result b/mysql-test/suite/innodb/r/insert_into_empty.result
index d1ba0878033..429e0174f9d 100644
--- a/mysql-test/suite/innodb/r/insert_into_empty.result
+++ b/mysql-test/suite/innodb/r/insert_into_empty.result
@@ -45,6 +45,10 @@ SET tx_read_only=1;
BEGIN;
INSERT INTO t2 VALUES(0);
INSERT INTO t VALUES(0);
+ROLLBACK;
+BEGIN;
+INSERT INTO t2 VALUES(0);
+INSERT INTO t VALUES(0);
COMMIT;
INSERT INTO t VALUES(0);
DROP TEMPORARY TABLE t,t2;
diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt
deleted file mode 100644
index e5d7090c883..00000000000
--- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb_buffer_pool_size=24M
diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
deleted file mode 100644
index 129037e783b..00000000000
--- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
+++ /dev/null
@@ -1,82 +0,0 @@
---echo #
---echo # Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE
---echo # OPERATION IF IT IS DONE IN-PLACE
---echo #
---source include/have_innodb.inc
-# innodb_change_buffering_debug option is debug only
---source include/have_debug.inc
-# Embedded server does not support crashing
---source include/not_embedded.inc
-# DBUG_SUICIDE() hangs under valgrind
---source include/not_valgrind.inc
-# This test is slow on buildbot.
---source include/big_test.inc
---source include/have_sequence.inc
-
-call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery");
-call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc");
-call mtr.add_suppression("Plugin 'InnoDB'");
-FLUSH TABLES;
-
-CREATE TABLE t1(
- a INT AUTO_INCREMENT PRIMARY KEY,
- b CHAR(1),
- c INT,
- INDEX(b))
-ENGINE=InnoDB STATS_PERSISTENT=0;
-
---let $_server_id= `SELECT @@server_id`
---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
-
-# The flag innodb_change_buffering_debug is only available in debug builds.
-# It instructs InnoDB to try to evict pages from the buffer pool when
-# change buffering is possible, so that the change buffer will be used
-# whenever possible.
-SET GLOBAL innodb_change_buffering_debug = 1;
-SET GLOBAL innodb_change_buffering = all;
-let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
-
-# Create enough rows for the table, so that the change buffer will be
-# used for modifying the secondary index page. There must be multiple
-# index pages, because changes to the root page are never buffered.
-INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192;
-
-BEGIN;
-SELECT b FROM t1 LIMIT 3;
-
-connect (con1,localhost,root,,);
-BEGIN;
-DELETE FROM t1 WHERE a=1;
-# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
-INSERT INTO t1 VALUES(1,'X',1);
-
-SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
---exec echo "wait" > $_expect_file_name
---error 2013
-# This should force a change buffer merge
-SELECT b FROM t1 LIMIT 3;
-disconnect con1;
-connection default;
-let SEARCH_PATTERN=Wrote log record for ibuf update in place operation;
---source include/search_pattern_in_file.inc
-
---let $restart_parameters= --innodb-read-only
---source include/start_mysqld.inc
-CHECK TABLE t1;
---source include/shutdown_mysqld.inc
-let SEARCH_PATTERN=innodb_read_only prevents crash recovery;
---source include/search_pattern_in_file.inc
-
---let $restart_parameters= --innodb-force-recovery=5
---source include/start_mysqld.inc
-SELECT * FROM t1 LIMIT 1;
-replace_regex /.*operations:.* (insert.*), delete \d.*discarded .*/\1/;
-SHOW ENGINE INNODB STATUS;
-# Slow shutdown will not merge the changes due to innodb_force_recovery=5.
-SET GLOBAL innodb_fast_shutdown=0;
---let $restart_parameters=
---source include/restart_mysqld.inc
-CHECK TABLE t1;
-replace_regex /.*operations:.* insert [1-9][0-9]*, delete mark [1-9][0-9]*, delete \d.*discarded .*//;
-SHOW ENGINE INNODB STATUS;
-DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.opt b/mysql-test/suite/innodb/t/innodb_wl6326.opt
deleted file mode 100644
index 99bf0e5a28b..00000000000
--- a/mysql-test/suite/innodb/t/innodb_wl6326.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb-sys-tablestats
diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.test b/mysql-test/suite/innodb/t/innodb_wl6326.test
deleted file mode 100644
index 1cf98cd1c7b..00000000000
--- a/mysql-test/suite/innodb/t/innodb_wl6326.test
+++ /dev/null
@@ -1,519 +0,0 @@
-#
-# WL#6326: InnoDB: fix index->lock contention
-#
-
---source include/have_innodb.inc
---source include/have_debug.inc
---source include/have_debug_sync.inc
---source include/have_innodb_16k.inc
-
---disable_query_log
-SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug;
-SET @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index;
-SET @old_innodb_stats_persistent = @@innodb_stats_persistent;
---enable_query_log
-
-# Save the initial number of concurrent sessions
---source include/count_sessions.inc
-
-SET GLOBAL innodb_adaptive_hash_index = false;
-SET GLOBAL innodb_stats_persistent = false;
-
---connect (con1,localhost,root,,)
---connect (con2,localhost,root,,)
---connect (con3,localhost,root,,)
-
-CREATE TABLE t1 (
- a00 CHAR(255) NOT NULL DEFAULT 'a',
- a01 CHAR(255) NOT NULL DEFAULT 'a',
- a02 CHAR(255) NOT NULL DEFAULT 'a',
- a03 CHAR(255) NOT NULL DEFAULT 'a',
- a04 CHAR(255) NOT NULL DEFAULT 'a',
- a05 CHAR(255) NOT NULL DEFAULT 'a',
- a06 CHAR(255) NOT NULL DEFAULT 'a',
- b INT NOT NULL DEFAULT 0
-) ENGINE = InnoDB;
-
-ALTER TABLE t1 ADD PRIMARY KEY(
- a00,
- a01,
- a02,
- a03,
- a04,
- a05,
- a06
-);
-
-#
-# Prepare primary key index tree to be used for this test.
-#
-
-# Only root (1)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-# Make the first records sparse artificially,
-# not to cause modify_tree by single node_ptr insert operation.
-# * (7 - 2) records should be larger than a half of the page size
-# * (7 + 2) records should be fit to the page
-# (above t1 definition is already adjusted)
-SET GLOBAL innodb_limit_optimistic_insert_debug = 7;
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('aa');
-INSERT INTO t1 (a00) VALUES ('ab');
-INSERT INTO t1 (a00) VALUES ('ac');
-INSERT INTO t1 (a00) VALUES ('ad');
-INSERT INTO t1 (a00) VALUES ('ae');
-INSERT INTO t1 (a00) VALUES ('af');
-INSERT INTO t1 (a00) VALUES ('ag');
-INSERT INTO t1 (a00) VALUES ('ah');
-COMMIT;
-# Raise root (1-2)
-# (aa,ad)
-# (aa,ab,ac)(ad,ae,af,ag,ah)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('ai');
-INSERT INTO t1 (a00) VALUES ('aj');
-INSERT INTO t1 (a00) VALUES ('ak');
-COMMIT;
-# Split leaf (1-3)
-# (aa,ad,ak)
-# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('al');
-INSERT INTO t1 (a00) VALUES ('am');
-INSERT INTO t1 (a00) VALUES ('an');
-INSERT INTO t1 (a00) VALUES ('ao');
-INSERT INTO t1 (a00) VALUES ('ap');
-INSERT INTO t1 (a00) VALUES ('aq');
-INSERT INTO t1 (a00) VALUES ('ar');
-COMMIT;
-# Split leaf (1-4)
-# (aa,ad,ak,ar)
-# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('as');
-INSERT INTO t1 (a00) VALUES ('at');
-INSERT INTO t1 (a00) VALUES ('au');
-INSERT INTO t1 (a00) VALUES ('av');
-INSERT INTO t1 (a00) VALUES ('aw');
-INSERT INTO t1 (a00) VALUES ('ax');
-INSERT INTO t1 (a00) VALUES ('ay');
-COMMIT;
-# Split leaf (1-5)
-# (aa,ad,ak,ar,ay)
-# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar,as,at,au,av,aw,ax)(ay)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('az');
-INSERT INTO t1 (a00) VALUES ('ba');
-INSERT INTO t1 (a00) VALUES ('bb');
-INSERT INTO t1 (a00) VALUES ('bc');
-INSERT INTO t1 (a00) VALUES ('bd');
-INSERT INTO t1 (a00) VALUES ('be');
-INSERT INTO t1 (a00) VALUES ('bf');
-COMMIT;
-# Split leaf (1-6)
-# (aa,ad,ak,ar,ay,bf)
-# (aa,ab,ac)(ad..)(ak..)(ar,as,at,au,av,aw,ax)(ay,az,ba,bb,bc,bd,be)(bf)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bg');
-INSERT INTO t1 (a00) VALUES ('bh');
-INSERT INTO t1 (a00) VALUES ('bi');
-INSERT INTO t1 (a00) VALUES ('bj');
-INSERT INTO t1 (a00) VALUES ('bk');
-INSERT INTO t1 (a00) VALUES ('bl');
-INSERT INTO t1 (a00) VALUES ('bm');
-COMMIT;
-# Split leaf (1-7)
-# (aa,ad,ak,ar,ay,bf,bm)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay,az,ba,bb,bc,bd,be)(bf,bg,bh,bi,bj,bk,bl)(bm)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bn');
-INSERT INTO t1 (a00) VALUES ('bo');
-INSERT INTO t1 (a00) VALUES ('bp');
-INSERT INTO t1 (a00) VALUES ('bq');
-INSERT INTO t1 (a00) VALUES ('br');
-INSERT INTO t1 (a00) VALUES ('bs');
-INSERT INTO t1 (a00) VALUES ('bt');
-COMMIT;
-# Raise root (1-2-8)
-# (aa,ar)
-# (aa,ad,ak) (ar,ay,bf,bm,bt)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bu');
-INSERT INTO t1 (a00) VALUES ('bv');
-INSERT INTO t1 (a00) VALUES ('bw');
-INSERT INTO t1 (a00) VALUES ('bx');
-INSERT INTO t1 (a00) VALUES ('by');
-INSERT INTO t1 (a00) VALUES ('bz');
-INSERT INTO t1 (a00) VALUES ('ca');
-
-INSERT INTO t1 (a00) VALUES ('cb');
-INSERT INTO t1 (a00) VALUES ('cc');
-INSERT INTO t1 (a00) VALUES ('cd');
-INSERT INTO t1 (a00) VALUES ('ce');
-INSERT INTO t1 (a00) VALUES ('cf');
-INSERT INTO t1 (a00) VALUES ('cg');
-INSERT INTO t1 (a00) VALUES ('ch');
-
-INSERT INTO t1 (a00) VALUES ('ci');
-INSERT INTO t1 (a00) VALUES ('cj');
-INSERT INTO t1 (a00) VALUES ('ck');
-INSERT INTO t1 (a00) VALUES ('cl');
-INSERT INTO t1 (a00) VALUES ('cm');
-INSERT INTO t1 (a00) VALUES ('cn');
-INSERT INTO t1 (a00) VALUES ('co');
-COMMIT;
-# Split also at level 1 (1-3-11)
-# (aa,ar,co)
-# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('cp');
-INSERT INTO t1 (a00) VALUES ('cq');
-INSERT INTO t1 (a00) VALUES ('cr');
-INSERT INTO t1 (a00) VALUES ('cs');
-INSERT INTO t1 (a00) VALUES ('ct');
-INSERT INTO t1 (a00) VALUES ('cu');
-INSERT INTO t1 (a00) VALUES ('cv');
-
-INSERT INTO t1 (a00) VALUES ('cw');
-INSERT INTO t1 (a00) VALUES ('cx');
-INSERT INTO t1 (a00) VALUES ('cy');
-INSERT INTO t1 (a00) VALUES ('cz');
-INSERT INTO t1 (a00) VALUES ('da');
-INSERT INTO t1 (a00) VALUES ('db');
-INSERT INTO t1 (a00) VALUES ('dc');
-
-INSERT INTO t1 (a00) VALUES ('dd');
-INSERT INTO t1 (a00) VALUES ('de');
-INSERT INTO t1 (a00) VALUES ('df');
-INSERT INTO t1 (a00) VALUES ('dg');
-INSERT INTO t1 (a00) VALUES ('dh');
-INSERT INTO t1 (a00) VALUES ('di');
-INSERT INTO t1 (a00) VALUES ('dj');
-
-INSERT INTO t1 (a00) VALUES ('dk');
-INSERT INTO t1 (a00) VALUES ('dl');
-INSERT INTO t1 (a00) VALUES ('dm');
-INSERT INTO t1 (a00) VALUES ('dn');
-INSERT INTO t1 (a00) VALUES ('do');
-INSERT INTO t1 (a00) VALUES ('dp');
-INSERT INTO t1 (a00) VALUES ('dq');
-
-INSERT INTO t1 (a00) VALUES ('dr');
-INSERT INTO t1 (a00) VALUES ('ds');
-INSERT INTO t1 (a00) VALUES ('dt');
-INSERT INTO t1 (a00) VALUES ('du');
-INSERT INTO t1 (a00) VALUES ('dv');
-INSERT INTO t1 (a00) VALUES ('dw');
-INSERT INTO t1 (a00) VALUES ('dx');
-
-INSERT INTO t1 (a00) VALUES ('dy');
-INSERT INTO t1 (a00) VALUES ('dz');
-INSERT INTO t1 (a00) VALUES ('ea');
-INSERT INTO t1 (a00) VALUES ('eb');
-INSERT INTO t1 (a00) VALUES ('ec');
-INSERT INTO t1 (a00) VALUES ('ed');
-INSERT INTO t1 (a00) VALUES ('ee');
-
-INSERT INTO t1 (a00) VALUES ('ef');
-INSERT INTO t1 (a00) VALUES ('eg');
-INSERT INTO t1 (a00) VALUES ('eh');
-INSERT INTO t1 (a00) VALUES ('ei');
-INSERT INTO t1 (a00) VALUES ('ej');
-INSERT INTO t1 (a00) VALUES ('ek');
-INSERT INTO t1 (a00) VALUES ('el');
-COMMIT;
-# Split also at level 1 (1-4-18)
-# (aa,ar,co,el)
-# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('em');
-INSERT INTO t1 (a00) VALUES ('en');
-INSERT INTO t1 (a00) VALUES ('eo');
-INSERT INTO t1 (a00) VALUES ('ep');
-INSERT INTO t1 (a00) VALUES ('eq');
-INSERT INTO t1 (a00) VALUES ('er');
-INSERT INTO t1 (a00) VALUES ('es');
-
-INSERT INTO t1 (a00) VALUES ('et');
-INSERT INTO t1 (a00) VALUES ('eu');
-INSERT INTO t1 (a00) VALUES ('ev');
-INSERT INTO t1 (a00) VALUES ('ew');
-INSERT INTO t1 (a00) VALUES ('ex');
-INSERT INTO t1 (a00) VALUES ('ey');
-INSERT INTO t1 (a00) VALUES ('ez');
-
-INSERT INTO t1 (a00) VALUES ('fa');
-INSERT INTO t1 (a00) VALUES ('fb');
-INSERT INTO t1 (a00) VALUES ('fc');
-INSERT INTO t1 (a00) VALUES ('fd');
-INSERT INTO t1 (a00) VALUES ('fe');
-INSERT INTO t1 (a00) VALUES ('ff');
-INSERT INTO t1 (a00) VALUES ('fg');
-
-INSERT INTO t1 (a00) VALUES ('fh');
-INSERT INTO t1 (a00) VALUES ('fi');
-INSERT INTO t1 (a00) VALUES ('fj');
-INSERT INTO t1 (a00) VALUES ('fk');
-INSERT INTO t1 (a00) VALUES ('fl');
-INSERT INTO t1 (a00) VALUES ('fm');
-INSERT INTO t1 (a00) VALUES ('fn');
-
-INSERT INTO t1 (a00) VALUES ('fo');
-INSERT INTO t1 (a00) VALUES ('fp');
-INSERT INTO t1 (a00) VALUES ('fq');
-INSERT INTO t1 (a00) VALUES ('fr');
-INSERT INTO t1 (a00) VALUES ('fs');
-INSERT INTO t1 (a00) VALUES ('ft');
-INSERT INTO t1 (a00) VALUES ('fu');
-
-INSERT INTO t1 (a00) VALUES ('fv');
-INSERT INTO t1 (a00) VALUES ('fw');
-INSERT INTO t1 (a00) VALUES ('fx');
-INSERT INTO t1 (a00) VALUES ('fy');
-INSERT INTO t1 (a00) VALUES ('fz');
-INSERT INTO t1 (a00) VALUES ('ga');
-INSERT INTO t1 (a00) VALUES ('gb');
-
-INSERT INTO t1 (a00) VALUES ('gc');
-INSERT INTO t1 (a00) VALUES ('gd');
-INSERT INTO t1 (a00) VALUES ('ge');
-INSERT INTO t1 (a00) VALUES ('gf');
-INSERT INTO t1 (a00) VALUES ('gg');
-INSERT INTO t1 (a00) VALUES ('gh');
-COMMIT;
-
-# Current tree form (1-4-24)
-# (aa,ar,co,el)
-# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el..,gb)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el..)..(gb..)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-# Insert the rest of records normally
-SET GLOBAL innodb_limit_optimistic_insert_debug = 0;
-
---echo # Test start
-
-# (1) Insert records to leaf page (bf..) and cause modify_page.
-# - root page is not X latched
-# - latched from level 1 page (ar,ay,bf,bm,bt,ca,ch)
-
-SET DEBUG_SYNC = 'RESET';
-
-# Filling leaf page (bf..)
-INSERT INTO t1 (a00) VALUES ('bfa');
-
---connection con1
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-# Cause modify_tree
---send
-INSERT INTO t1 (a00) VALUES ('bfb');
-
---connection con2
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-# Not blocked searches
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aq';
-# "where a00 = 'co'" is blocked because searching from smaller ('co','a','a',..).
-SELECT a00,a01 FROM t1 WHERE a00 = 'cp';
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'ar';
-
---connection con3
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'cn';
-
---connection default
-# FIXME: These occasionally time out!
---disable_warnings
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
---enable_warnings
-SET DEBUG_SYNC = 'now SIGNAL continue';
-
---connection con1
---reap
-
---connection con2
---reap
-
---connection con3
---reap
-
---connection default
-
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-
-
-# (2) Insert records to leaf page (co..) and cause modify_page
-# - root page is X latched, because node_ptr for 'co'
-# is 1st record for (co,cv,dc,dj,dq,dx,ee)
-#
-# * ordinary pessimitic insert might be done by pessistic update
-# and we should consider possibility node_ptr to be deleted.
-
-SET DEBUG_SYNC = 'RESET';
-
-# Filling leaf page (co..)
-INSERT INTO t1 (a00) VALUES ('coa');
-
---connection con1
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-# Cause modify_tree
---send
-INSERT INTO t1 (a00) VALUES ('cob');
-
---connection con2
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-# All searches are blocked because root page is X latched
-
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-
---connection con3
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-
---connection default
-# FIXME: These occasionally time out!
---disable_warnings
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
---enable_warnings
-SET DEBUG_SYNC = 'now SIGNAL continue';
-
---connection con1
---reap
-
---connection con2
---reap
-
---connection con3
---reap
-
---connection default
-
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-
-
-# (3) Insert records to rightmost leaf page (gb..) and cause modify_page
-# - root page is not X latched, because node_ptr for 'gb' is the last record
-# of the level 1 though it is last record in the page.
-# - lathed from level 1 page (el..,gb)
-
-SET DEBUG_SYNC = 'RESET';
-
-# Filling leaf page (gb..)
-INSERT INTO t1 (a00) VALUES ('gba');
-
---connection con1
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-# Cause modify_tree
---send
-INSERT INTO t1 (a00) VALUES ('gbb');
-
---connection con2
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-# Not blocked searches
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-SELECT a00,a01 FROM t1 WHERE a00 = 'ek';
-
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-
---connection con3
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'gb';
-
---connection default
-# FIXME: These occasionally time out!
---disable_warnings
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
---enable_warnings
-SET DEBUG_SYNC = 'now SIGNAL continue';
-
---connection con1
---reap
-
---connection con2
---reap
-
---connection con3
---reap
-
---connection default
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-
-
-# Cleanup
-SET DEBUG_SYNC = 'RESET';
-
---connection default
---disconnect con1
---disconnect con2
---disconnect con3
-
-DROP TABLE t1;
-
---disable_query_log
-SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug;
-SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index;
-SET GLOBAL innodb_stats_persistent = @old_innodb_stats_persistent;
---enable_query_log
-
-# Wait till all disconnects are completed.
---source include/wait_until_count_sessions.inc
diff --git a/mysql-test/suite/innodb/t/insert_into_empty.test b/mysql-test/suite/innodb/t/insert_into_empty.test
index 68ae83626cb..64043e383dc 100644
--- a/mysql-test/suite/innodb/t/insert_into_empty.test
+++ b/mysql-test/suite/innodb/t/insert_into_empty.test
@@ -51,6 +51,10 @@ SET tx_read_only=1;
BEGIN;
INSERT INTO t2 VALUES(0);
INSERT INTO t VALUES(0);
+ROLLBACK;
+BEGIN;
+INSERT INTO t2 VALUES(0);
+INSERT INTO t VALUES(0);
COMMIT;
INSERT INTO t VALUES(0);
diff --git a/mysql-test/suite/innodb/t/undo_truncate.opt b/mysql-test/suite/innodb/t/undo_truncate.opt
index f4d78725c6e..1459ec5db74 100644
--- a/mysql-test/suite/innodb/t/undo_truncate.opt
+++ b/mysql-test/suite/innodb/t/undo_truncate.opt
@@ -1 +1,2 @@
--innodb-buffer-pool-size=24M
+--innodb-immediate-scrub-data-uncompressed=ON
diff --git a/mysql-test/suite/innodb_gis/r/rtree_split.result b/mysql-test/suite/innodb_gis/r/rtree_split.result
index 8e475776ce0..97027bde865 100644
--- a/mysql-test/suite/innodb_gis/r/rtree_split.result
+++ b/mysql-test/suite/innodb_gis/r/rtree_split.result
@@ -61,3 +61,15 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1);
count(*)
57344
drop table t1;
+#
+# MDEV-30400 Assertion height == btr_page_get_level ... on INSERT
+#
+CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB;
+SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug;
+SET GLOBAL innodb_limit_optimistic_insert_debug=2;
+BEGIN;
+INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_6;
+ROLLBACK;
+SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit;
+DROP TABLE t1;
+# End of 10.6 tests
diff --git a/mysql-test/suite/innodb_gis/t/rtree_split.test b/mysql-test/suite/innodb_gis/t/rtree_split.test
index 6f285187508..a23315dc3f3 100644
--- a/mysql-test/suite/innodb_gis/t/rtree_split.test
+++ b/mysql-test/suite/innodb_gis/t/rtree_split.test
@@ -73,3 +73,18 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1);
# Clean up.
drop table t1;
+
+--echo #
+--echo # MDEV-30400 Assertion height == btr_page_get_level ... on INSERT
+--echo #
+
+CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB;
+SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug;
+SET GLOBAL innodb_limit_optimistic_insert_debug=2;
+BEGIN;
+INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_6;
+ROLLBACK;
+SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit;
+DROP TABLE t1;
+
+--echo # End of 10.6 tests
diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc
index 943db803242..dd12149ff48 100644
--- a/sql/service_wsrep.cc
+++ b/sql/service_wsrep.cc
@@ -238,7 +238,7 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
}
victim_thd->wsrep_aborter= bf_thd->thread_id;
- victim_thd->awake_no_mutex(KILL_QUERY);
+ victim_thd->awake_no_mutex(KILL_QUERY_HARD);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
} else {
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake, signal %d", signal);
@@ -417,3 +417,23 @@ extern "C" void wsrep_thd_set_PA_unsafe(THD *thd)
WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe");
}
}
+
+extern "C" int wsrep_thd_append_table_key(MYSQL_THD thd,
+ const char* db,
+ const char* table,
+ enum Wsrep_service_key_type key_type)
+{
+ wsrep_key_arr_t key_arr = {0, 0};
+ int ret = wsrep_prepare_keys_for_isolation(thd, db, table, NULL, &key_arr);
+ ret = ret || wsrep_thd_append_key(thd, key_arr.keys,
+ (int)key_arr.keys_len, key_type);
+ wsrep_keys_free(&key_arr);
+ return ret;
+}
+
+extern "C" my_bool wsrep_thd_is_local_transaction(const THD *thd)
+{
+ return (wsrep_thd_is_local(thd) &&
+ thd->wsrep_cs().transaction().active());
+}
+
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index b1a89998467..85a812f85d9 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -1520,7 +1520,7 @@ sp_head::execute(THD *thd, bool merge_da_on_success)
wsrep_current_error_status(thd));
thd->wsrep_cs().reset_error();
/* Reset also thd->killed if it has been set during BF abort. */
- if (thd->killed == KILL_QUERY)
+ if (killed_mask_hard(thd->killed) == KILL_QUERY)
thd->killed= NOT_KILLED;
/* if failed transaction was not replayed, must return with error from here */
if (!must_replay) err_status = 1;
diff --git a/sql/sql_plugin_services.inl b/sql/sql_plugin_services.inl
index e883081e9f7..3a66e982e7b 100644
--- a/sql/sql_plugin_services.inl
+++ b/sql/sql_plugin_services.inl
@@ -162,6 +162,8 @@ static struct wsrep_service_st wsrep_handler = {
wsrep_thd_is_local,
wsrep_thd_self_abort,
wsrep_thd_append_key,
+ wsrep_thd_append_table_key,
+ wsrep_thd_is_local_transaction,
wsrep_thd_client_state_str,
wsrep_thd_client_mode_str,
wsrep_thd_transaction_state_str,
diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc
index ac14fc4597a..9bfaf9285f3 100644
--- a/sql/wsrep_dummy.cc
+++ b/sql/wsrep_dummy.cc
@@ -101,6 +101,12 @@ void wsrep_thd_self_abort(THD *)
int wsrep_thd_append_key(THD *, const struct wsrep_key*, int, enum Wsrep_service_key_type)
{ return 0; }
+int wsrep_thd_append_table_key(THD *, const char*, const char*, enum Wsrep_service_key_type)
+{ return 0; }
+
+my_bool wsrep_thd_is_local_transaction(const THD*)
+{ return 0; }
+
const char* wsrep_thd_client_state_str(const THD*)
{ return 0; }
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 49b363b76d3..e422ef47b89 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -332,12 +332,14 @@ SET(INNOBASE_SOURCES
include/row0upd.inl
include/row0vers.h
include/rw_lock.h
+ include/small_vector.h
include/srv0mon.h
include/srv0mon.inl
include/srv0srv.h
include/srv0start.h
include/srw_lock.h
include/sux_lock.h
+ include/transactional_lock_guard.h
include/trx0i_s.h
include/trx0purge.h
include/trx0rec.h
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 73d88596743..d12c395aa17 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -460,6 +460,54 @@ btr_page_create(
}
}
+buf_block_t *
+mtr_t::get_already_latched(const page_id_t id, mtr_memo_type_t type) const
+{
+ ut_ad(is_active());
+ ut_ad(type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX ||
+ type == MTR_MEMO_PAGE_S_FIX);
+ for (ulint i= 0; i < m_memo.size(); i++)
+ {
+ const mtr_memo_slot_t &slot= m_memo[i];
+ const auto slot_type= mtr_memo_type_t(slot.type & ~MTR_MEMO_MODIFY);
+ if (slot_type == MTR_MEMO_PAGE_X_FIX || slot_type == type)
+ {
+ buf_block_t *block= static_cast<buf_block_t*>(slot.object);
+ if (block->page.id() == id)
+ return block;
+ }
+ }
+ return nullptr;
+}
+
+/** Fetch an index root page that was already latched in the
+mini-transaction. */
+static buf_block_t *btr_get_latched_root(const dict_index_t &index, mtr_t *mtr)
+{
+ return mtr->get_already_latched(page_id_t{index.table->space_id, index.page},
+ MTR_MEMO_PAGE_SX_FIX);
+}
+
+/** Fetch an index page that should have been already latched in the
+mini-transaction. */
+static buf_block_t *
+btr_block_reget(mtr_t *mtr, const dict_index_t &index,
+ const page_id_t id, rw_lock_type_t rw_latch,
+ dberr_t *err)
+{
+ if (buf_block_t *block=
+ mtr->get_already_latched(id, mtr_memo_type_t(rw_latch)))
+ {
+ *err= DB_SUCCESS;
+ return block;
+ }
+
+#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+ ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK));
+#endif
+ return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err);
+}
+
/**************************************************************//**
Allocates a new file page to be used in an ibuf tree. Takes the page from
the free list of the tree, which must contain pages!
@@ -472,18 +520,16 @@ btr_page_alloc_for_ibuf(
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
{
- buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err);
+ buf_block_t *root= btr_get_latched_root(*index, mtr);
if (UNIV_UNLIKELY(!root))
return root;
-
buf_block_t *new_block=
- buf_page_get_gen(page_id_t(index->table->space_id,
+ buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
mach_read_from_4(PAGE_HEADER +
PAGE_BTR_IBUF_FREE_LIST +
FLST_FIRST + FIL_ADDR_PAGE +
root->page.frame)),
- index->table->space->zip_size(), RW_X_LATCH, nullptr,
- BUF_GET, mtr, err);
+ 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err);
if (new_block)
*err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block,
PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
@@ -523,11 +569,11 @@ btr_page_alloc_low(
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!root->index || !root->index->freed());
#endif
- mtr->release_block_at_savepoint(savepoint, root);
+ mtr->rollback_to_savepoint(savepoint);
}
else
{
- mtr->u_lock_register(savepoint);
+ mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX);
root->page.lock.u_lock();
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(root, true);
@@ -579,15 +625,12 @@ btr_page_free_for_ibuf(
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
-
- dberr_t err;
- if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err))
- {
- err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ buf_block_t *root= btr_get_latched_root(*index, mtr);
+ dberr_t err=
+ flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
- ut_d(if (err == DB_SUCCESS)
- flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
- }
+ ut_d(if (err == DB_SUCCESS)
+ flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
return err;
}
@@ -637,11 +680,11 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!root->index || !root->index->freed());
#endif
- mtr->release_block_at_savepoint(savepoint, root);
+ mtr->rollback_to_savepoint(savepoint);
}
else
{
- mtr->u_lock_register(savepoint);
+ mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX);
root->page.lock.u_lock();
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(root, true);
@@ -712,35 +755,27 @@ btr_node_ptr_get_child(
mtr, err);
}
-MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result))
+MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result))
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
an sx-latch on the tree.
@return rec_get_offsets() of the node pointer record */
static
rec_offs*
-btr_page_get_father_node_ptr_func(
-/*==============================*/
+btr_page_get_father_node_ptr_for_validate(
rec_offs* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
btr_cur_t* cursor, /*!< in: cursor pointing to user record,
out: cursor on node pointer record,
its page x-latched */
- btr_latch_mode latch_mode,/*!< in: BTR_CONT_MODIFY_TREE
- or BTR_CONT_SEARCH_TREE */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(latch_mode == BTR_CONT_MODIFY_TREE
- || latch_mode == BTR_CONT_SEARCH_TREE);
-
const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no();
dict_index_t* index = btr_cur_get_index(cursor);
ut_ad(!dict_index_is_spatial(index));
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
-
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
ut_ad(dict_index_get_page(index) != page_no);
const auto level = btr_page_get_level(btr_cur_get_page(cursor));
@@ -752,12 +787,16 @@ btr_page_get_father_node_ptr_func(
dict_index_build_node_ptr(index,
user_rec, 0,
heap, level),
- PAGE_CUR_LE, latch_mode,
+ RW_S_LATCH,
cursor, mtr) != DB_SUCCESS) {
return nullptr;
}
const rec_t* node_ptr = btr_cur_get_rec(cursor);
+#if 0 /* MDEV-29835 FIXME */
+ ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive()
+ || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
+#endif
offsets = rec_get_offsets(node_ptr, index, offsets, 0,
ULINT_UNDEFINED, &heap);
@@ -769,13 +808,65 @@ btr_page_get_father_node_ptr_func(
return(offsets);
}
-#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
- btr_page_get_father_node_ptr_func( \
- of,heap,cur,BTR_CONT_MODIFY_TREE,mtr)
+MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result))
+/** Return the node pointer to a page.
+@param offsets work area for the return value
+@param heap memory heap
+@param cursor in: child page; out: node pointer to it
+@param mtr mini-transaction
+@return rec_get_offsets() of the node pointer record
+@retval nullptr if the parent page had not been latched in mtr */
+static rec_offs *btr_page_get_parent(rec_offs *offsets, mem_heap_t *heap,
+ btr_cur_t *cursor, mtr_t *mtr)
+{
+ const uint32_t page_no= cursor->block()->page.id().page_no();
+ const dict_index_t *index= cursor->index();
+ ut_ad(!index->is_spatial());
+ ut_ad(index->page != page_no);
+
+ uint32_t p= index->page;
+ auto level= btr_page_get_level(cursor->block()->page.frame);
+ const dtuple_t *tuple=
+ dict_index_build_node_ptr(index, btr_cur_get_rec(cursor), 0, heap, level);
+ level++;
+
+ ulint i;
+ for (i= 0; i < mtr->get_savepoint(); i++)
+ if (buf_block_t *block= mtr->block_at_savepoint(i))
+ if (block->page.id().page_no() == p)
+ {
+ ut_ad(block->page.lock.have_u_or_x() ||
+ (!block->page.lock.have_s() && index->lock.have_x()));
+ ulint up_match= 0, low_match= 0;
+ cursor->page_cur.block= block;
+ if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &up_match,
+ &low_match, &cursor->page_cur,
+ nullptr))
+ return nullptr;
+ offsets= rec_get_offsets(cursor->page_cur.rec, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+ p= btr_node_ptr_get_child_page_no(cursor->page_cur.rec, offsets);
+ if (p != page_no)
+ {
+ if (btr_page_get_level(block->page.frame) == level)
+ return nullptr;
+ i= 0; // MDEV-29835 FIXME: require all pages to be latched in order!
+ continue;
+ }
+ ut_ad(block->page.lock.have_u_or_x());
+ if (block->page.lock.have_u_not_x())
+ {
+ /* btr_cur_t::search_leaf(BTR_MODIFY_TREE) only U-latches the
+ root page initially. */
+ ut_ad(block->page.id().page_no() == index->page);
+ block->page.lock.u_x_upgrade();
+ mtr->page_lock_upgrade(*block);
+ }
+ return offsets;
+ }
-#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr) \
- btr_page_get_father_node_ptr_func( \
- of,heap,cur,BTR_CONT_SEARCH_TREE,mtr)
+ return nullptr;
+}
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
@@ -796,7 +887,7 @@ btr_page_get_father_block(
if (UNIV_UNLIKELY(!rec))
return nullptr;
cursor->page_cur.rec= rec;
- return btr_page_get_father_node_ptr(offsets, heap, cursor, mtr);
+ return btr_page_get_parent(offsets, heap, cursor, mtr);
}
/** Seek to the parent page of a B-tree page.
@@ -811,7 +902,7 @@ bool btr_page_get_father(mtr_t* mtr, btr_cur_t* cursor)
return false;
cursor->page_cur.rec= rec;
mem_heap_t *heap= mem_heap_create(100);
- const bool got= btr_page_get_father_node_ptr(nullptr, heap, cursor, mtr);
+ const bool got= btr_page_get_parent(nullptr, heap, cursor, mtr);
mem_heap_free(heap);
return got;
}
@@ -1718,48 +1809,43 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr)
/** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE.
@param[in] index clustered index with instant ALTER TABLE
@param[in] all whether to reset FIL_PAGE_TYPE as well
-@param[in,out] mtr mini-transaction
-@return error code */
+@param[in,out] mtr mini-transaction */
ATTRIBUTE_COLD
-dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr)
+void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr)
{
ut_ad(!index.table->is_temporary());
ut_ad(index.is_primary());
- dberr_t err;
- if (buf_block_t *root= btr_root_block_get(&index, RW_SX_LATCH, mtr, &err))
+ buf_block_t *root= btr_get_latched_root(index, mtr);
+ byte *page_type= root->page.frame + FIL_PAGE_TYPE;
+ if (all)
{
- byte *page_type= root->page.frame + FIL_PAGE_TYPE;
- if (all)
- {
- ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT ||
- mach_read_from_2(page_type) == FIL_PAGE_INDEX);
- mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX);
- byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame;
- mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant,
- page_ptr_get_direction(instant + 1));
- }
- else
- ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT);
- static const byte supremuminfimum[8 + 8] = "supremuminfimum";
- uint16_t infimum, supremum;
- if (page_is_comp(root->page.frame))
- {
- infimum= PAGE_NEW_INFIMUM;
- supremum= PAGE_NEW_SUPREMUM;
- }
- else
- {
- infimum= PAGE_OLD_INFIMUM;
- supremum= PAGE_OLD_SUPREMUM;
- }
- ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) ==
- !memcmp(&root->page.frame[supremum], supremuminfimum, 8));
- mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum],
- supremuminfimum + 8, 8);
- mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum],
- supremuminfimum, 8);
+ ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT ||
+ mach_read_from_2(page_type) == FIL_PAGE_INDEX);
+ mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX);
+ byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame;
+ mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant,
+ page_ptr_get_direction(instant + 1));
}
- return err;
+ else
+ ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT);
+ static const byte supremuminfimum[8 + 8] = "supremuminfimum";
+ uint16_t infimum, supremum;
+ if (page_is_comp(root->page.frame))
+ {
+ infimum= PAGE_NEW_INFIMUM;
+ supremum= PAGE_NEW_SUPREMUM;
+ }
+ else
+ {
+ infimum= PAGE_OLD_INFIMUM;
+ supremum= PAGE_OLD_SUPREMUM;
+ }
+ ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) ==
+ !memcmp(&root->page.frame[supremum], supremuminfimum, 8));
+ mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum],
+ supremuminfimum + 8, 8);
+ mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum],
+ supremuminfimum, 8);
}
/*************************************************************//**
@@ -1856,11 +1942,6 @@ btr_root_raise_and_insert(
}
/* Copy the records from root to the new page one by one. */
- dberr_t e;
- if (!err) {
- err = &e;
- }
-
if (0
#ifdef UNIV_ZIP_COPY
|| new_page_zip
@@ -2004,21 +2085,15 @@ btr_root_raise_and_insert(
page_cursor->block = new_block;
page_cursor->index = index;
- if (tuple) {
- ut_ad(dtuple_check_typed(tuple));
- /* Reposition the cursor to the child node */
- ulint low_match = 0, up_match = 0;
+ ut_ad(dtuple_check_typed(tuple));
+ /* Reposition the cursor to the child node */
+ ulint low_match = 0, up_match = 0;
- if (page_cur_search_with_match(tuple, PAGE_CUR_LE,
- &up_match, &low_match,
- page_cursor, nullptr)) {
- if (err) {
- *err = DB_CORRUPTION;
- }
- return nullptr;
- }
- } else {
- page_cursor->rec = page_get_infimum_rec(new_block->page.frame);
+ if (page_cur_search_with_match(tuple, PAGE_CUR_LE,
+ &up_match, &low_match,
+ page_cursor, nullptr)) {
+ *err = DB_CORRUPTION;
+ return nullptr;
}
/* Split the child and insert tuple */
@@ -2237,6 +2312,7 @@ func_exit:
return(rec);
}
+#ifdef UNIV_DEBUG
/*************************************************************//**
Returns TRUE if the insert fits on the appropriate half-page with the
chosen split_rec.
@@ -2335,6 +2411,7 @@ got_rec:
return(false);
}
+#endif
/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
@@ -2357,25 +2434,34 @@ btr_insert_on_non_leaf_level(
rtr_info_t rtr_info;
ut_ad(level > 0);
- auto mode = PAGE_CUR_LE;
+
+ flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG;
+ cursor.page_cur.index = index;
+
+ dberr_t err;
if (index->is_spatial()) {
- mode = PAGE_CUR_RTREE_INSERT;
/* For spatial index, initialize structures to track
its parents etc. */
rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
rtr_info_update_btr(&cursor, &rtr_info);
+ err = rtr_search_to_nth_level(level, tuple,
+ PAGE_CUR_RTREE_INSERT,
+ BTR_CONT_MODIFY_TREE,
+ &cursor, mtr);
+ } else {
+ err = btr_cur_search_to_nth_level(level, tuple, RW_X_LATCH,
+ &cursor, mtr);
}
- flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG;
- cursor.page_cur.index = index;
-
- dberr_t err = btr_cur_search_to_nth_level(level, tuple, mode,
- BTR_CONT_MODIFY_TREE,
- &cursor, mtr);
ut_ad(cursor.flag == BTR_CUR_BINARY);
+#if 0 /* MDEV-29835 FIXME */
+ ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive()
+ || index->is_spatial()
+ || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
+#endif
if (UNIV_LIKELY(err == DB_SUCCESS)) {
err = btr_cur_optimistic_insert(flags,
@@ -2471,6 +2557,7 @@ btr_attach_half_pages(
/* Get the level of the split pages */
const ulint level = btr_page_get_level(block->page.frame);
ut_ad(level == btr_page_get_level(new_block->page.frame));
+ page_id_t id{block->page.id()};
/* Get the previous and next pages of page */
const uint32_t prev_page_no = btr_page_get_prev(block->page.frame);
@@ -2478,12 +2565,32 @@ btr_attach_half_pages(
/* for consistency, both blocks should be locked, before change */
if (prev_page_no != FIL_NULL && direction == FSP_DOWN) {
- prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH,
- !level, mtr);
+ id.set_page_no(prev_page_no);
+ prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!prev_block) {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index->lock,
+ MTR_MEMO_X_LOCK));
+# endif
+ prev_block = btr_block_get(*index, prev_page_no,
+ RW_X_LATCH, !level, mtr);
+ }
+#endif
}
if (next_page_no != FIL_NULL && direction != FSP_DOWN) {
- next_block = btr_block_get(*index, next_page_no, RW_X_LATCH,
- !level, mtr);
+ id.set_page_no(next_page_no);
+ next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!next_block) {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index->lock,
+ MTR_MEMO_X_LOCK));
+# endif
+ next_block = btr_block_get(*index, next_page_no,
+ RW_X_LATCH, !level, mtr);
+ }
+#endif
}
/* Build the node pointer (= node key and page address) for the upper
@@ -3020,6 +3127,7 @@ insert_empty:
return nullptr;
}
+#ifdef UNIV_DEBUG
/* If the split is made on the leaf level and the insert will fit
on the appropriate half-page, we may release the tree x-latch.
We can then move the records after releasing the tree latch,
@@ -3027,21 +3135,21 @@ insert_empty:
const bool insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, split_rec, offsets, tuple,
n_ext, heap);
+#endif
if (!split_rec && !insert_left) {
UT_DELETE_ARRAY(buf);
buf = NULL;
}
- if (!srv_read_only_mode
- && insert_will_fit
+#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled
+ if (insert_will_fit
&& page_is_leaf(page)
&& !dict_index_is_online_ddl(cursor->index())) {
-#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled
mtr->release(cursor->index()->lock);
-#endif
/* NOTE: We cannot release root block latch here, because it
has segment header and already modified in most of cases.*/
}
+#endif
/* 5. Move then the records to the new page */
if (direction == FSP_DOWN) {
@@ -3273,52 +3381,58 @@ func_exit:
dberr_t btr_level_list_remove(const buf_block_t& block,
const dict_index_t& index, mtr_t* mtr)
{
- ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(block.zip_size() == index.table->space->zip_size());
- ut_ad(index.table->space->id == block.page.id().space());
- /* Get the previous and next page numbers of page */
-
- const page_t* page = block.page.frame;
- const uint32_t prev_page_no = btr_page_get_prev(page);
- const uint32_t next_page_no = btr_page_get_next(page);
-
- /* Update page links of the level */
- dberr_t err;
+ ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(block.zip_size() == index.table->space->zip_size());
+ ut_ad(index.table->space->id == block.page.id().space());
+ /* Get the previous and next page numbers of page */
+ const uint32_t prev_page_no= btr_page_get_prev(block.page.frame);
+ const uint32_t next_page_no= btr_page_get_next(block.page.frame);
+ page_id_t id{block.page.id()};
+ buf_block_t *prev= nullptr, *next;
+ dberr_t err;
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block = btr_block_get(
- index, prev_page_no, RW_X_LATCH, page_is_leaf(page),
- mtr, &err);
- if (UNIV_UNLIKELY(!prev_block)) {
- return err;
- }
- if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame
- + FIL_PAGE_NEXT,
- page + FIL_PAGE_OFFSET,
- 4))) {
- return DB_CORRUPTION;
- }
- btr_page_set_next(prev_block, next_page_no, mtr);
- }
+ /* Update page links of the level */
+ if (prev_page_no != FIL_NULL)
+ {
+ id.set_page_no(prev_page_no);
+ prev= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!prev)
+ {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
+# endif
+ prev= btr_block_get(index, id.page_no(), RW_X_LATCH,
+ page_is_leaf(block.page.frame), mtr, &err);
+ if (UNIV_UNLIKELY(!prev))
+ return err;
+ }
+#endif
+ }
- if (next_page_no != FIL_NULL) {
- buf_block_t* next_block = btr_block_get(
- index, next_page_no, RW_X_LATCH, page_is_leaf(page),
- mtr, &err);
+ if (next_page_no != FIL_NULL)
+ {
+ id.set_page_no(next_page_no);
+ next= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!next)
+ {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
+# endif
+ next= btr_block_get(index, id.page_no(), RW_X_LATCH,
+ page_is_leaf(block.page.frame), mtr, &err);
+ if (UNIV_UNLIKELY(!next))
+ return err;
+ }
+#endif
+ btr_page_set_prev(next, prev_page_no, mtr);
+ }
- if (UNIV_UNLIKELY(!next_block)) {
- return err;
- }
- if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame
- + FIL_PAGE_PREV,
- page + FIL_PAGE_OFFSET,
- 4))) {
- return DB_CORRUPTION;
- }
- btr_page_set_prev(next_block, prev_page_no, mtr);
- }
+ if (prev)
+ btr_page_set_next(prev, next_page_no, mtr);
- return DB_SUCCESS;
+ return DB_SUCCESS;
}
/*************************************************************//**
@@ -4168,23 +4282,30 @@ btr_discard_page(
const uint32_t left_page_no = btr_page_get_prev(block->page.frame);
const uint32_t right_page_no = btr_page_get_next(block->page.frame);
+ page_id_t merge_page_id{block->page.id()};
ut_d(bool parent_is_different = false);
+ dberr_t err;
if (left_page_no != FIL_NULL) {
- dberr_t err;
- merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH,
- true, mtr, &err);
+ merge_page_id.set_page_no(left_page_no);
+ merge_block = btr_block_reget(mtr, *index, merge_page_id,
+ RW_X_LATCH, &err);
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
-
+#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+ ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ + FIL_PAGE_NEXT,
+ block->page.frame + FIL_PAGE_OFFSET,
+ 4));
+#else
if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_NEXT,
block->page.frame
+ FIL_PAGE_OFFSET, 4))) {
return DB_CORRUPTION;
}
-
+#endif
ut_d(parent_is_different =
(page_rec_get_next(
page_get_infimum_rec(
@@ -4192,19 +4313,25 @@ btr_discard_page(
&parent_cursor)))
== btr_cur_get_rec(&parent_cursor)));
} else if (right_page_no != FIL_NULL) {
- dberr_t err;
- merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH,
- true, mtr, &err);
+ merge_page_id.set_page_no(right_page_no);
+ merge_block = btr_block_reget(mtr, *index, merge_page_id,
+ RW_X_LATCH, &err);
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
+#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+ ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ + FIL_PAGE_PREV,
+ block->page.frame + FIL_PAGE_OFFSET,
+ 4));
+#else
if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_PREV,
block->page.frame
+ FIL_PAGE_OFFSET, 4))) {
return DB_CORRUPTION;
}
-
+#endif
ut_d(parent_is_different = page_rec_is_supremum(
page_rec_get_next(btr_cur_get_rec(&parent_cursor))));
if (page_is_leaf(merge_block->page.frame)) {
@@ -4246,13 +4373,10 @@ btr_discard_page(
}
#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* merge_page_zip
- = buf_block_get_page_zip(merge_block);
- ut_a(!merge_page_zip
- || page_zip_validate(merge_page_zip,
- merge_block->page.frame, index));
- }
+ if (page_zip_des_t* merge_page_zip
+ = buf_block_get_page_zip(merge_block));
+ ut_a(page_zip_validate(merge_page_zip,
+ merge_block->page.frame, index));
#endif /* UNIV_ZIP_DEBUG */
if (index->has_locking()) {
@@ -4271,7 +4395,7 @@ btr_discard_page(
}
/* Free the file page */
- dberr_t err = btr_page_free(index, block, mtr);
+ err = btr_page_free(index, block, mtr);
if (err == DB_SUCCESS) {
/* btr_check_node_ptr() needs parent block latched.
@@ -4464,6 +4588,8 @@ btr_check_node_ptr(
offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor);
}
+ ut_ad(offsets);
+
if (page_is_leaf(page)) {
goto func_exit;
@@ -4796,19 +4922,16 @@ btr_validate_level(
page_zip_des_t* page_zip;
#endif /* UNIV_ZIP_DEBUG */
ulint savepoint = 0;
- ulint savepoint2 = 0;
uint32_t parent_page_no = FIL_NULL;
uint32_t parent_right_page_no = FIL_NULL;
bool rightmost_child = false;
mtr.start();
- if (!srv_read_only_mode) {
- if (lockout) {
- mtr_x_lock_index(index, &mtr);
- } else {
- mtr_sx_lock_index(index, &mtr);
- }
+ if (lockout) {
+ mtr_x_lock_index(index, &mtr);
+ } else {
+ mtr_sx_lock_index(index, &mtr);
}
dberr_t err;
@@ -4856,7 +4979,6 @@ corrupted:
offsets = rec_get_offsets(node_ptr, index, offsets, 0,
ULINT_UNDEFINED, &heap);
- savepoint2 = mtr_set_savepoint(&mtr);
block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr,
&err);
if (!block) {
@@ -4877,10 +4999,8 @@ corrupted:
/* To obey latch order of tree blocks,
we should release the right_block once to
obtain lock of the uncle block. */
- mtr_release_block_at_savepoint(
- &mtr, savepoint2, block);
+ mtr.release_last_page();
- savepoint2 = mtr_set_savepoint(&mtr);
block = btr_block_get(*index, left_page_no,
RW_SX_LATCH, false,
&mtr, &err);
@@ -4908,12 +5028,10 @@ func_exit:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
- if (!srv_read_only_mode) {
- if (lockout) {
- mtr_x_lock_index(index, &mtr);
- } else {
- mtr_sx_lock_index(index, &mtr);
- }
+ if (lockout) {
+ mtr_x_lock_index(index, &mtr);
+ } else {
+ mtr_sx_lock_index(index, &mtr);
}
page = block->page.frame;
@@ -4958,7 +5076,7 @@ func_exit:
if (right_page_no != FIL_NULL) {
const rec_t* right_rec;
- savepoint = mtr_set_savepoint(&mtr);
+ savepoint = mtr.get_savepoint();
right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
@@ -5152,8 +5270,10 @@ broken_links:
/* To obey latch order of tree blocks,
we should release the right_block once to
obtain lock of the uncle block. */
- mtr_release_block_at_savepoint(
- &mtr, savepoint, right_block);
+ ut_ad(right_block
+ == mtr.at_savepoint(savepoint));
+ mtr.rollback_to_savepoint(savepoint,
+ savepoint + 1);
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 2f237bb5957..67b8a68930a 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3,7 +3,7 @@
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -103,14 +103,14 @@ throughput clearly from about 100000. */
#define BTR_CUR_FINE_HISTORY_LENGTH 100000
#ifdef BTR_CUR_HASH_ADAPT
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */
ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
ulint btr_cur_n_non_sea_old;
/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
+btr_cur_t::search_leaf(). */
ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
@@ -187,167 +187,6 @@ btr_rec_free_externally_stored_fields(
/*==================== B-TREE SEARCH =========================*/
-/** Latches the leaf page or pages requested.
-@param[in] block leaf page where the search converged
-@param[in] latch_mode BTR_SEARCH_LEAF, ...
-@param[in] cursor cursor
-@param[in] mtr mini-transaction
-@param[out] latch_leaves latched blocks and savepoints */
-void
-btr_cur_latch_leaves(
- buf_block_t* block,
- btr_latch_mode latch_mode,
- btr_cur_t* cursor,
- mtr_t* mtr,
- btr_latch_leaves_t* latch_leaves)
-{
- compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH));
- compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH));
- compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH));
- ut_ad(block->page.id().space() == cursor->index()->table->space->id);
- ut_ad(block->page.in_file());
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&cursor->index()->lock,
- MTR_MEMO_S_LOCK
- | MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- auto rtr_info = cursor->rtr_info;
- if (UNIV_LIKELY_NULL(rtr_info) && !cursor->index()->is_spatial()) {
- rtr_info = nullptr;
- }
-
- const rw_lock_type_t mode = rw_lock_type_t(
- latch_mode & (RW_X_LATCH | RW_S_LATCH));
- static_assert(ulint{RW_S_LATCH} == ulint{BTR_SEARCH_LEAF}, "");
- static_assert(ulint{RW_X_LATCH} == ulint{BTR_MODIFY_LEAF}, "");
- static_assert(BTR_SEARCH_LEAF & BTR_SEARCH_TREE, "");
-
- switch (latch_mode) {
- default:
- break;
- uint32_t left_page_no;
- uint32_t right_page_no;
- ulint save;
- case BTR_SEARCH_LEAF:
- case BTR_MODIFY_LEAF:
- case BTR_SEARCH_TREE:
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS]
- = mtr->get_savepoint();
- }
-latch_block:
- if (latch_leaves) {
- latch_leaves->savepoints[1] = mtr->get_savepoint();
- latch_leaves->blocks[1] = block;
- }
- block->page.fix();
- mtr->page_lock(block, mode);
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_blocks[RTR_MAX_LEVELS] = block;
- }
- return;
- case BTR_MODIFY_TREE:
- /* It is exclusive for other operations which calls
- btr_page_set_prev() */
- ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock,
- MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- save = mtr->get_savepoint();
- /* x-latch also siblings from left to right */
- left_page_no = btr_page_get_prev(block->page.frame);
-
- if (left_page_no != FIL_NULL) {
- buf_block_t *b = btr_block_get(
- *cursor->index(), left_page_no, RW_X_LATCH,
- true, mtr);
-
- if (latch_leaves) {
- latch_leaves->savepoints[0] = save;
- latch_leaves->blocks[0] = b;
- }
-
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS]
- = save;
- rtr_info->tree_blocks[RTR_MAX_LEVELS] = b;
- }
-
- save = mtr->get_savepoint();
- }
-
- if (latch_leaves) {
- latch_leaves->savepoints[1] = mtr->get_savepoint();
- latch_leaves->blocks[1] = block;
- }
-
- block->page.fix();
- block->page.lock.x_lock();
-
- mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
-#ifdef BTR_CUR_HASH_ADAPT
- ut_ad(!btr_search_check_marked_free_index(block));
-#endif
-
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] = save;
- rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] = block;
- }
-
- right_page_no = btr_page_get_next(block->page.frame);
-
- if (right_page_no != FIL_NULL) {
- save = mtr->get_savepoint();
-
- buf_block_t* b = btr_block_get(
- *cursor->index(), right_page_no, RW_X_LATCH,
- true, mtr);
- if (latch_leaves) {
- latch_leaves->savepoints[2] = save;
- latch_leaves->blocks[2] = b;
- }
-
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS + 2]
- = save;
- rtr_info->tree_blocks[RTR_MAX_LEVELS + 2] = b;
- }
- }
-
- return;
-
- case BTR_SEARCH_PREV:
- case BTR_MODIFY_PREV:
- ut_ad(!rtr_info);
- static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
- static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
- static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV)
- == (RW_S_LATCH ^ RW_X_LATCH), "");
-
- /* Because we are holding index->lock, no page splits
- or merges may run concurrently, and we may read
- FIL_PAGE_PREV from a buffer-fixed, unlatched page. */
- left_page_no = btr_page_get_prev(block->page.frame);
-
- if (left_page_no != FIL_NULL) {
- save = mtr->get_savepoint();
- cursor->left_block = btr_block_get(
- *cursor->index(), left_page_no,
- mode, true, mtr);
- if (latch_leaves) {
- latch_leaves->savepoints[0] = save;
- latch_leaves->blocks[0] = cursor->left_block;
- }
- }
-
- goto latch_block;
- case BTR_CONT_MODIFY_TREE:
- ut_ad(cursor->index()->is_spatial());
- return;
- }
-
- MY_ASSERT_UNREACHABLE();
-}
-
/** Load the instant ALTER TABLE metadata from the clustered index
when loading a table definition.
@param[in,out] index clustered index definition
@@ -729,98 +568,6 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
return index->n_core_null_bytes > 128;
}
-/** Optimistically latches the leaf page or pages requested.
-@param[in] block guessed buffer block
-@param[in] modify_clock modify clock value
-@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
-@param[in,out] cursor cursor
-@param[in] mtr mini-transaction
-@return true if success */
-TRANSACTIONAL_TARGET
-bool
-btr_cur_optimistic_latch_leaves(
- buf_block_t* block,
- ib_uint64_t modify_clock,
- btr_latch_mode* latch_mode,
- btr_cur_t* cursor,
- mtr_t* mtr)
-{
- ut_ad(block->page.buf_fix_count());
- ut_ad(block->page.in_file());
- ut_ad(block->page.frame);
-
- switch (*latch_mode) {
- default:
- MY_ASSERT_UNREACHABLE();
- return(false);
- case BTR_SEARCH_LEAF:
- case BTR_MODIFY_LEAF:
- return(buf_page_optimistic_get(*latch_mode, block,
- modify_clock, mtr));
- case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */
- case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */
- uint32_t curr_page_no, left_page_no;
- {
- transactional_shared_lock_guard<block_lock> g{
- block->page.lock};
- if (block->modify_clock != modify_clock) {
- return false;
- }
- curr_page_no = block->page.id().page_no();
- left_page_no = btr_page_get_prev(block->page.frame);
- }
-
- static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
- static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
- static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV)
- == (RW_S_LATCH ^ RW_X_LATCH), "");
-
- const rw_lock_type_t mode = rw_lock_type_t(
- *latch_mode & (RW_X_LATCH | RW_S_LATCH));
-
- if (left_page_no != FIL_NULL) {
- cursor->left_block = buf_page_get_gen(
- page_id_t(cursor->index()->table->space_id,
- left_page_no),
- cursor->index()->table->space->zip_size(),
- mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr);
-
- if (cursor->left_block
- && btr_page_get_next(
- cursor->left_block->page.frame)
- != curr_page_no) {
-release_left_block:
- mtr->release_last_page();
- return false;
- }
- } else {
- cursor->left_block = nullptr;
- }
-
- if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) {
- if (btr_page_get_prev(block->page.frame)
- == left_page_no) {
- /* block was already buffer-fixed while
- entering the function and
- buf_page_optimistic_get() buffer-fixes
- it again. */
- ut_ad(2 <= block->page.buf_fix_count());
- *latch_mode = btr_latch_mode(mode);
- return(true);
- }
-
- mtr->release_last_page();
- }
-
- ut_ad(block->page.buf_fix_count());
- if (cursor->left_block) {
- goto release_left_block;
- }
- }
-
- return false;
-}
-
/**
Gets intention in btr_intention_t from latch_mode, and cleares the intention
at the latch_mode.
@@ -848,38 +595,6 @@ btr_intention_t btr_cur_get_and_clear_intention(btr_latch_mode *latch_mode)
return(intention);
}
-/**
-Gets the desired latch type for the root leaf (root page is root leaf)
-at the latch mode.
-@param latch_mode in: BTR_SEARCH_LEAF, ...
-@return latch type */
-static
-rw_lock_type_t
-btr_cur_latch_for_root_leaf(
- ulint latch_mode)
-{
- switch (latch_mode) {
- case BTR_SEARCH_LEAF:
- case BTR_SEARCH_TREE:
- case BTR_SEARCH_PREV:
- return(RW_S_LATCH);
- case BTR_MODIFY_LEAF:
- case BTR_MODIFY_TREE:
- case BTR_MODIFY_PREV:
- return(RW_X_LATCH);
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- /* A root page should be latched already,
- and don't need to be latched here.
- fall through (RW_NO_LATCH) */
- case BTR_NO_LATCHES:
- return(RW_NO_LATCH);
- }
-
- MY_ASSERT_UNREACHABLE();
- return(RW_NO_LATCH); /* avoid compiler warnings */
-}
-
/** @return whether the distance between two records is at most the
specified value */
static bool
@@ -1197,1221 +912,879 @@ static ulint btr_node_ptr_max_size(const dict_index_t* index)
return rec_max_size;
}
-/********************************************************************//**
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
-
-If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
-search tuple should be performed in the B-tree. InnoDB does an insert
-immediately after the cursor. Thus, the cursor may end up on a user record,
-or on a page infimum record.
-@param level the tree level of search
-@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that
- it cannot get compared to the node ptr page number field!
-@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a
- unique prefix of a record, mode should be PAGE_CUR_LE, not
- PAGE_CUR_GE, as the latter may end up on the previous page of
- the record! Inserts should always be made using PAGE_CUR_LE
- to search the position!
-@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT,
- BTR_DELETE_MARK, or BTR_DELETE;
- cursor->left_block is used to store a pointer to the left
- neighbor page
-@param cursor tree cursor; the cursor page is s- or x-latched, but see also
- above!
-@param mtr mini-transaction
-@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none)
-@return DB_SUCCESS on success or error code otherwise */
-TRANSACTIONAL_TARGET
-dberr_t btr_cur_search_to_nth_level(ulint level,
- const dtuple_t *tuple,
- page_cur_mode_t mode,
- btr_latch_mode latch_mode,
- btr_cur_t *cursor, mtr_t *mtr,
- ib_uint64_t autoinc)
+/** @return a B-tree search mode suitable for non-leaf pages
+@param mode leaf page search mode */
+static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode)
{
- page_t* page = NULL; /* remove warning */
- buf_block_t* block;
- buf_block_t* guess;
- ulint height;
- ulint up_match;
- ulint up_bytes;
- ulint low_match;
- ulint low_bytes;
- ulint rw_latch;
- page_cur_mode_t page_mode;
- page_cur_mode_t search_mode = PAGE_CUR_UNSUPP;
- ulint buf_mode;
- ulint node_ptr_max_size = srv_page_size / 2;
- page_cur_t* page_cursor;
- btr_op_t btr_op;
- ulint root_height = 0; /* remove warning */
-
- btr_intention_t lock_intention;
- buf_block_t* tree_blocks[BTR_MAX_LEVELS];
- ulint tree_savepoints[BTR_MAX_LEVELS];
- ulint n_blocks = 0;
- ulint n_releases = 0;
- bool detected_same_key_root = false;
-
- ulint leftmost_from_level = 0;
- buf_block_t** prev_tree_blocks = NULL;
- ulint* prev_tree_savepoints = NULL;
- ulint prev_n_blocks = 0;
- ulint prev_n_releases = 0;
- bool need_path = true;
- bool rtree_parent_modified = false;
- bool mbr_adj = false;
- bool found = false;
- dict_index_t * const index = cursor->index();
-
- DBUG_ENTER("btr_cur_search_to_nth_level");
-
-#ifdef BTR_CUR_ADAPT
- btr_search_t* info;
-#endif /* BTR_CUR_ADAPT */
- mem_heap_t* heap = NULL;
- rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs* offsets = offsets_;
- rec_offs offsets2_[REC_OFFS_NORMAL_SIZE];
- rec_offs* offsets2 = offsets2_;
- rec_offs_init(offsets_);
- rec_offs_init(offsets2_);
- /* Currently, PAGE_CUR_LE is the only search mode used for searches
- ending to upper levels */
-
- ut_ad(level == 0 || mode == PAGE_CUR_LE
- || RTREE_SEARCH_MODE(mode));
- ut_ad(dict_index_check_search_tuple(index, tuple));
- ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(!(index->type & DICT_FTS));
- ut_ad(index->page != FIL_NULL);
-
- MEM_UNDEFINED(&cursor->up_match, sizeof cursor->up_match);
- MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes);
- MEM_UNDEFINED(&cursor->low_match, sizeof cursor->low_match);
- MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes);
-#ifdef UNIV_DEBUG
- cursor->up_match = ULINT_UNDEFINED;
- cursor->low_match = ULINT_UNDEFINED;
-#endif /* UNIV_DEBUG */
-
- const bool latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
-
- ut_ad(!latch_by_caller
- || srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK
- | MTR_MEMO_SX_LOCK));
-
- /* These flags are mutually exclusive, they are lumped together
- with the latch mode for historical reasons. It's possible for
- none of the flags to be set. */
- switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) {
- default:
- btr_op = BTR_NO_OP;
- break;
- case BTR_INSERT:
- btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
- ? BTR_INSERT_IGNORE_UNIQUE_OP
- : BTR_INSERT_OP;
- break;
- case BTR_DELETE:
- btr_op = BTR_DELETE_OP;
- ut_a(cursor->purge_node);
- break;
- case BTR_DELETE_MARK:
- btr_op = BTR_DELMARK_OP;
- break;
- }
+ if (mode > PAGE_CUR_GE)
+ {
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
+ return mode;
+ }
+ if (mode == PAGE_CUR_GE)
+ return PAGE_CUR_L;
+ ut_ad(mode == PAGE_CUR_G);
+ return PAGE_CUR_LE;
+}
- /* Operations on the insert buffer tree cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
- /* Operations on the clustered index cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
- /* Operations on the temporary table(indexes) cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary());
- /* Operation on the spatial index cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index));
+dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
+ btr_latch_mode latch_mode, mtr_t *mtr)
+{
+ ut_ad(index()->is_btree() || index()->is_ibuf());
+ ut_ad(!index()->is_ibuf() || ibuf_inside(mtr));
- lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+ buf_block_t *guess;
+ btr_op_t btr_op;
+ btr_intention_t lock_intention;
+ bool detected_same_key_root= false;
- /* Turn the flags unrelated to the latch mode off. */
- latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+ mem_heap_t* heap = NULL;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs offsets2_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets2 = offsets2_;
+ rec_offs_init(offsets_);
+ rec_offs_init(offsets2_);
+
+ ut_ad(dict_index_check_search_tuple(index(), tuple));
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(index()->page != FIL_NULL);
+
+ MEM_UNDEFINED(&up_match, sizeof up_match);
+ MEM_UNDEFINED(&up_bytes, sizeof up_bytes);
+ MEM_UNDEFINED(&low_match, sizeof low_match);
+ MEM_UNDEFINED(&low_bytes, sizeof low_bytes);
+ ut_d(up_match= ULINT_UNDEFINED);
+ ut_d(low_match= ULINT_UNDEFINED);
+
+ ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED) ||
+ mtr->memo_contains_flagged(&index()->lock,
+ MTR_MEMO_S_LOCK | MTR_MEMO_SX_LOCK |
+ MTR_MEMO_X_LOCK));
+
+ /* These flags are mutually exclusive, they are lumped together
+ with the latch mode for historical reasons. It's possible for
+ none of the flags to be set. */
+ switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) {
+ default:
+ btr_op= BTR_NO_OP;
+ break;
+ case BTR_INSERT:
+ btr_op= (latch_mode & BTR_IGNORE_SEC_UNIQUE)
+ ? BTR_INSERT_IGNORE_UNIQUE_OP
+ : BTR_INSERT_OP;
+ break;
+ case BTR_DELETE:
+ btr_op= BTR_DELETE_OP;
+ ut_a(purge_node);
+ break;
+ case BTR_DELETE_MARK:
+ btr_op= BTR_DELMARK_OP;
+ break;
+ }
- ut_ad(!latch_by_caller
- || latch_mode == BTR_SEARCH_LEAF
- || latch_mode == BTR_SEARCH_TREE
- || latch_mode == BTR_MODIFY_LEAF);
+ /* Operations on the insert buffer tree cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !index()->is_ibuf());
+ /* Operations on the clustered index cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !index()->is_clust());
+ /* Operations on the temporary table(indexes) cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !index()->table->is_temporary());
- ut_ad(autoinc == 0 || dict_index_is_clust(index));
- ut_ad(autoinc == 0
- || latch_mode == BTR_MODIFY_TREE
- || latch_mode == BTR_MODIFY_LEAF);
- ut_ad(autoinc == 0 || level == 0);
+ const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
+ lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
+ latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- cursor->flag = BTR_CUR_BINARY;
+ ut_ad(!latch_by_caller
+ || latch_mode == BTR_SEARCH_LEAF
+ || latch_mode == BTR_MODIFY_LEAF
+ || latch_mode == BTR_MODIFY_TREE
+ || latch_mode == BTR_MODIFY_ROOT_AND_LEAF);
+ flag= BTR_CUR_BINARY;
#ifndef BTR_CUR_ADAPT
- guess = NULL;
+ guess= nullptr;
#else
- info = btr_search_get_info(index);
- guess = info->root_guess;
-
-#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_t *info= btr_search_get_info(index());
+ guess= info->root_guess;
+
+# ifdef BTR_CUR_HASH_ADAPT
+# ifdef UNIV_SEARCH_PERF_STAT
+ info->n_searches++;
+# endif
+ /* We do a dirty read of btr_search_enabled below,
+ and btr_search_guess_on_hash() will have to check it again. */
+ if (!btr_search_enabled);
+ else if (btr_search_guess_on_hash(index(), info, tuple, mode,
+ latch_mode, this, mtr))
+ {
+ /* Search using the hash index succeeded */
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ++btr_cur_n_sea;
-# ifdef UNIV_SEARCH_PERF_STAT
- info->n_searches++;
+ return DB_SUCCESS;
+ }
+ else
+ ++btr_cur_n_non_sea;
# endif
- /* We do a dirty read of btr_search_enabled below,
- and btr_search_guess_on_hash() will have to check it again. */
- if (!btr_search_enabled) {
- } else if (autoinc == 0
- && latch_mode <= BTR_MODIFY_LEAF
-# ifdef PAGE_CUR_LE_OR_EXTENDS
- && mode != PAGE_CUR_LE_OR_EXTENDS
-# endif /* PAGE_CUR_LE_OR_EXTENDS */
- && info->last_hash_succ
- && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
- && index->is_btree() && !index->table->is_temporary()
- && btr_search_guess_on_hash(index, info, tuple, mode,
- latch_mode, cursor, mtr)) {
-
- /* Search using the hash index succeeded */
-
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ++btr_cur_n_sea;
-
- DBUG_RETURN(DB_SUCCESS);
- } else {
- ++btr_cur_n_non_sea;
- }
-# endif /* BTR_CUR_HASH_ADAPT */
-#endif /* BTR_CUR_ADAPT */
-
- /* If the hash search did not succeed, do binary search down the
- tree */
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched leaf node(s) */
-
- ulint savepoint = mtr_set_savepoint(mtr);
-
- rw_lock_type_t upper_rw_latch;
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- /* Most of delete-intended operations are purging.
- Free blocks and read IO bandwidth should be prior
- for them, when the history list is glowing huge. */
- if (lock_intention == BTR_INTENTION_DELETE
- && buf_pool.n_pend_reads
- && trx_sys.history_size_approx()
- > BTR_CUR_FINE_HISTORY_LENGTH) {
-x_latch_index:
- mtr_x_lock_index(index, mtr);
- } else if (index->is_spatial()
- && lock_intention <= BTR_INTENTION_BOTH) {
- /* X lock the if there is possibility of
- pessimistic delete on spatial index. As we could
- lock upward for the tree */
- goto x_latch_index;
- } else {
- mtr_sx_lock_index(index, mtr);
- }
- upper_rw_latch = RW_X_LATCH;
- break;
- case BTR_CONT_MODIFY_TREE:
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock,
- MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- if (index->is_spatial()) {
- /* If we are about to locate parent page for split
- and/or merge operation for R-Tree index, X latch
- the parent */
- upper_rw_latch = RW_X_LATCH;
- break;
- }
- /* fall through */
- case BTR_CONT_SEARCH_TREE:
- /* Do nothing */
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock,
- MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- upper_rw_latch = RW_NO_LATCH;
- break;
- default:
- if (!srv_read_only_mode) {
- if (!latch_by_caller) {
- ut_ad(latch_mode != BTR_SEARCH_TREE);
- mtr_s_lock_index(index, mtr);
- }
- upper_rw_latch = RW_S_LATCH;
- } else {
- upper_rw_latch = RW_NO_LATCH;
- }
- }
- const rw_lock_type_t root_leaf_rw_latch = btr_cur_latch_for_root_leaf(
- latch_mode);
-
- page_cursor = btr_cur_get_page_cur(cursor);
- page_cursor->index = index;
-
- const ulint zip_size = index->table->space->zip_size();
-
- /* Start with the root page. */
- page_id_t page_id(index->table->space_id, index->page);
-
- if (root_leaf_rw_latch == RW_X_LATCH) {
- node_ptr_max_size = btr_node_ptr_max_size(index);
- }
-
- up_match = 0;
- up_bytes = 0;
- low_match = 0;
- low_bytes = 0;
-
- height = ULINT_UNDEFINED;
-
- /* We use these modified search modes on non-leaf levels of the
- B-tree. These let us end up in the right B-tree leaf. In that leaf
- we use the original search mode. */
-
- switch (mode) {
- case PAGE_CUR_GE:
- page_mode = PAGE_CUR_L;
- break;
- case PAGE_CUR_G:
- page_mode = PAGE_CUR_LE;
- break;
- default:
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || RTREE_SEARCH_MODE(mode)
- || mode == PAGE_CUR_LE_OR_EXTENDS);
-#else /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || RTREE_SEARCH_MODE(mode));
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- page_mode = mode;
- break;
- }
-
- /* Loop and search until we arrive at the desired level */
- btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
-
-search_loop:
- buf_mode = BUF_GET;
- rw_latch = RW_NO_LATCH;
- rtree_parent_modified = false;
-
- if (height != 0) {
- /* We are about to fetch the root or a non-leaf page. */
- if ((latch_mode != BTR_MODIFY_TREE || height == level)
- && !prev_tree_blocks) {
- /* If doesn't have SX or X latch of index,
- each pages should be latched before reading. */
- if (height == ULINT_UNDEFINED
- && upper_rw_latch == RW_S_LATCH
- && autoinc) {
- /* needs sx-latch of root page
- for writing PAGE_ROOT_AUTO_INC */
- rw_latch = RW_SX_LATCH;
- } else {
- rw_latch = upper_rw_latch;
- }
- }
- } else if (latch_mode <= BTR_MODIFY_LEAF) {
- rw_latch = latch_mode;
-
- if (btr_op != BTR_NO_OP
- && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
-
- /* Try to buffer the operation if the leaf
- page is not in the buffer pool. */
-
- buf_mode = btr_op == BTR_DELETE_OP
- ? BUF_GET_IF_IN_POOL_OR_WATCH
- : BUF_GET_IF_IN_POOL;
- }
- }
-
-retry_page_get:
- ut_ad(n_blocks < BTR_MAX_LEVELS);
- tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
- dberr_t err;
- block = buf_page_get_gen(page_id, zip_size, rw_latch, guess,
- buf_mode, mtr, &err,
- height == 0 && !index->is_clust());
- if (!block) {
- switch (err) {
- case DB_SUCCESS:
- /* change buffering */
- break;
- case DB_DECRYPTION_FAILED:
- btr_decryption_failed(*index);
- /* fall through */
- default:
- goto func_exit;
- }
-
- /* This must be a search to perform an insert/delete
- mark/ delete; try using the insert/delete buffer */
-
- ut_ad(height == 0);
- ut_ad(cursor->thr);
-
- switch (btr_op) {
- default:
- MY_ASSERT_UNREACHABLE();
- break;
- case BTR_INSERT_OP:
- case BTR_INSERT_IGNORE_UNIQUE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
-
- if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
- page_id, zip_size, cursor->thr)) {
-
- cursor->flag = BTR_CUR_INSERT_TO_IBUF;
-
- goto func_exit;
- }
- break;
-
- case BTR_DELMARK_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
-
- if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
- index, page_id, zip_size,
- cursor->thr)) {
-
- cursor->flag = BTR_CUR_DEL_MARK_IBUF;
-
- goto func_exit;
- }
-
- break;
-
- case BTR_DELETE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
- ut_ad(index->is_btree());
- auto& chain = buf_pool.page_hash.cell_get(
- page_id.fold());
-
- if (!row_purge_poss_sec(cursor->purge_node,
- index, tuple)) {
-
- /* The record cannot be purged yet. */
- cursor->flag = BTR_CUR_DELETE_REF;
- } else if (ibuf_insert(IBUF_OP_DELETE, tuple,
- index, page_id, zip_size,
- cursor->thr)) {
+#endif
- /* The purge was buffered. */
- cursor->flag = BTR_CUR_DELETE_IBUF;
- } else {
- /* The purge could not be buffered. */
- buf_pool.watch_unset(page_id, chain);
- break;
- }
+ /* If the hash search did not succeed, do binary search down the
+ tree */
- buf_pool.watch_unset(page_id, chain);
- goto func_exit;
- }
+ /* Store the position of the tree latch we push to mtr so that we
+ know how to release it when we have latched leaf node(s) */
- /* Insert to the insert/delete buffer did not succeed, we
- must read the page from disk. */
+ const ulint savepoint= mtr->get_savepoint();
- buf_mode = BUF_GET;
+ ulint node_ptr_max_size= 0;
+ rw_lock_type_t rw_latch= RW_S_LATCH;
- goto retry_page_get;
- }
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ rw_latch= RW_X_LATCH;
+ node_ptr_max_size= btr_node_ptr_max_size(index());
+ if (latch_by_caller)
+ {
+ ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK));
+ break;
+ }
+ if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads &&
+ trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
+ /* Most delete-intended operations are due to the purge of history.
+ Prioritize them when the history list is growing huge. */
+ mtr_x_lock_index(index(), mtr);
+ else
+ mtr_sx_lock_index(index(), mtr);
+ break;
+#ifdef UNIV_DEBUG
+ case BTR_CONT_MODIFY_TREE:
+ ut_ad("invalid mode" == 0);
+ break;
+#endif
+ case BTR_MODIFY_ROOT_AND_LEAF:
+ rw_latch= RW_SX_LATCH;
+ /* fall through */
+ default:
+ if (!latch_by_caller)
+ mtr_s_lock_index(index(), mtr);
+ }
- tree_blocks[n_blocks] = block;
+ const ulint zip_size= index()->table->space->zip_size();
- if (height && prev_tree_blocks) {
- /* also latch left sibling */
- ut_ad(rw_latch == RW_NO_LATCH);
+ /* Start with the root page. */
+ page_id_t page_id(index()->table->space_id, index()->page);
- rw_latch = upper_rw_latch;
+ const page_cur_mode_t page_mode= btr_cur_nonleaf_mode(mode);
+ ulint height= ULINT_UNDEFINED;
+ up_match= 0;
+ up_bytes= 0;
+ low_match= 0;
+ low_bytes= 0;
+ ulint buf_mode= BUF_GET;
+ search_loop:
+ dberr_t err;
+ auto block_savepoint= mtr->get_savepoint();
+ buf_block_t *block=
+ buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr,
+ &err, height == 0 && !index()->is_clust());
+ if (!block)
+ {
+ switch (err) {
+ case DB_DECRYPTION_FAILED:
+ btr_decryption_failed(*index());
+ /* fall through */
+ default:
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return err;
+ case DB_SUCCESS:
+ /* This must be a search to perform an insert, delete mark, or delete;
+ try using the change buffer */
+ ut_ad(height == 0);
+ ut_ad(thr);
+ break;
+ }
- /* Because we are holding index->lock, no page splits
- or merges may run concurrently, and we may read
- FIL_PAGE_PREV from a buffer-fixed, unlatched page. */
- uint32_t left_page_no = btr_page_get_prev(block->page.frame);
+ switch (btr_op) {
+ default:
+ MY_ASSERT_UNREACHABLE();
+ break;
+ case BTR_INSERT_OP:
+ case BTR_INSERT_IGNORE_UNIQUE_OP:
+ ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- if (left_page_no != FIL_NULL) {
- ut_ad(prev_n_blocks < leftmost_from_level);
+ if (ibuf_insert(IBUF_OP_INSERT, tuple, index(), page_id, zip_size, thr))
+ {
+ flag= BTR_CUR_INSERT_TO_IBUF;
+ goto func_exit;
+ }
+ break;
- prev_tree_savepoints[prev_n_blocks]
- = mtr_set_savepoint(mtr);
- buf_block_t* get_block = buf_page_get_gen(
- page_id_t(page_id.space(), left_page_no),
- zip_size, rw_latch, NULL, buf_mode,
- mtr, &err);
- if (!get_block) {
- if (err == DB_DECRYPTION_FAILED) {
- btr_decryption_failed(*index);
- }
- goto func_exit;
- }
+ case BTR_DELMARK_OP:
+ ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- prev_tree_blocks[prev_n_blocks++] = get_block;
- /* BTR_MODIFY_TREE doesn't update prev/next_page_no,
- without their parent page's lock. So, not needed to
- retry here, because we have the parent page's lock. */
- }
+ if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
+ index(), page_id, zip_size, thr))
+ {
+ flag = BTR_CUR_DEL_MARK_IBUF;
+ goto func_exit;
+ }
- mtr->s_lock_register(tree_savepoints[n_blocks]);
- block->page.lock.s_lock();
- }
+ break;
- page = buf_block_get_frame(block);
+ case BTR_DELETE_OP:
+ ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
+ auto& chain = buf_pool.page_hash.cell_get(page_id.fold());
+
+ if (!row_purge_poss_sec(purge_node, index(), tuple))
+ /* The record cannot be purged yet. */
+ flag= BTR_CUR_DELETE_REF;
+ else if (ibuf_insert(IBUF_OP_DELETE, tuple, index(),
+ page_id, zip_size, thr))
+ /* The purge was buffered. */
+ flag= BTR_CUR_DELETE_IBUF;
+ else
+ {
+ /* The purge could not be buffered. */
+ buf_pool.watch_unset(page_id, chain);
+ break;
+ }
- if (height == ULINT_UNDEFINED
- && page_is_leaf(page)
- && rw_latch != RW_NO_LATCH
- && rw_latch != root_leaf_rw_latch) {
- /* The root page is also a leaf page (root_leaf).
- We should reacquire the page, because the root page
- is latched differently from leaf pages. */
- ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
- ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH);
- ut_ad(rw_latch == RW_S_LATCH || autoinc);
- ut_ad(!autoinc || root_leaf_rw_latch == RW_X_LATCH);
+ buf_pool.watch_unset(page_id, chain);
+ goto func_exit;
+ }
- ut_ad(n_blocks == 0);
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_blocks],
- tree_blocks[n_blocks]);
+ /* Change buffering did not succeed, we must read the page. */
+ buf_mode= BUF_GET;
+ goto search_loop;
+ }
- upper_rw_latch = root_leaf_rw_latch;
- goto search_loop;
- }
+ if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() ||
+ btr_page_get_index_id(block->page.frame) != index()->id ||
+ fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE ||
+ !fil_page_index_page_check(block->page.frame))
+ {
+ corrupted:
+ ut_ad("corrupted" == 0); // FIXME: remove this
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
+ page_cur.block= block;
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
#ifdef UNIV_ZIP_DEBUG
- if (rw_latch != RW_NO_LATCH) {
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
- }
+ if (rw_latch == RW_NO_LATCH);
+ else if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block))
+ ut_a(page_zip_validate(page_zip, block->page.frame, index()));
#endif /* UNIV_ZIP_DEBUG */
+ const uint32_t page_level= btr_page_get_level(block->page.frame);
- ut_ad(fil_page_index_page_check(page));
- ut_ad(index->id == btr_page_get_index_id(page));
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page);
- root_height = height;
- cursor->tree_height = root_height + 1;
-
- if (dict_index_is_spatial(index)) {
- ut_ad(cursor->rtr_info);
-
- /* If SSN in memory is not initialized, fetch
- it from root page */
- if (!rtr_get_current_ssn_id(index)) {
- /* FIXME: do this in dict_load_table_one() */
- index->set_ssn(page_get_ssn_id(page) + 1);
- }
-
- /* Save the MBR */
- cursor->rtr_info->thr = cursor->thr;
- rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr);
- }
-
+ if (height == ULINT_UNDEFINED)
+ {
+ /* We are in the B-tree index root page. */
#ifdef BTR_CUR_ADAPT
- info->root_guess = block;
+ info->root_guess= block;
#endif
- }
-
- if (height == 0) {
- if (rw_latch == RW_NO_LATCH) {
- btr_cur_latch_leaves(block, latch_mode, cursor, mtr,
- &latch_leaves);
- }
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- break;
- default:
- if (!latch_by_caller
- && !srv_read_only_mode) {
- /* Release the tree s-latch */
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- &index->lock);
- }
-
- /* release upper blocks */
- if (prev_tree_blocks) {
- ut_ad(!autoinc);
- for (;
- prev_n_releases < prev_n_blocks;
- prev_n_releases++) {
- mtr_release_block_at_savepoint(
- mtr,
- prev_tree_savepoints[
- prev_n_releases],
- prev_tree_blocks[
- prev_n_releases]);
- }
- }
-
- for (; n_releases < n_blocks; n_releases++) {
- if (n_releases == 0
- && (autoinc)) {
- /* keep the root page latch */
- ut_ad(mtr->memo_contains_flagged(
- tree_blocks[n_releases],
- MTR_MEMO_PAGE_SX_FIX
- | MTR_MEMO_PAGE_X_FIX));
- continue;
- }
-
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
- }
-
- page_mode = mode;
- }
-
- if (dict_index_is_spatial(index)) {
- /* Remember the page search mode */
- search_mode = page_mode;
-
- /* Some adjustment on search mode, when the
- page search mode is PAGE_CUR_RTREE_LOCATE
- or PAGE_CUR_RTREE_INSERT, as we are searching
- with MBRs. When it is not the target level, we
- should search all sub-trees that "CONTAIN" the
- search range/MBR. When it is at the target
- level, the search becomes PAGE_CUR_LE */
- if (page_mode == PAGE_CUR_RTREE_LOCATE
- && level == height) {
- if (level == 0) {
- page_mode = PAGE_CUR_LE;
- } else {
- page_mode = PAGE_CUR_RTREE_GET_FATHER;
- }
- }
+ height= page_level;
+ tree_height= height + 1;
- if (page_mode == PAGE_CUR_RTREE_INSERT) {
- page_mode = (level == height)
- ? PAGE_CUR_LE
- : PAGE_CUR_RTREE_INSERT;
-
- ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE);
- }
-
- /* "need_path" indicates if we need to tracking the parent
- pages, if it is not spatial comparison, then no need to
- track it */
- if (page_mode < PAGE_CUR_CONTAIN) {
- need_path = false;
- }
-
- up_match = 0;
- low_match = 0;
-
- if (latch_mode == BTR_MODIFY_TREE
- || latch_mode == BTR_CONT_MODIFY_TREE
- || latch_mode == BTR_CONT_SEARCH_TREE) {
- /* Tree are locked, no need for Page Lock to protect
- the "path" */
- cursor->rtr_info->need_page_lock = false;
- }
+ if (!height)
+ {
+ /* The root page is also a leaf page.
+ We may have to reacquire the page latch in a different mode. */
+ switch (rw_latch) {
+ case RW_S_LATCH:
+ if ((latch_mode & ~12) != RW_S_LATCH)
+ {
+ ut_ad(rw_lock_type_t(latch_mode & ~12) == RW_X_LATCH);
+ goto relatch_x;
}
+ if (latch_mode != BTR_MODIFY_PREV)
+ {
+ if (!latch_by_caller)
+ /* Release the tree s-latch */
+ mtr->rollback_to_savepoint(savepoint, savepoint + 1);
+ goto reached_latched_leaf;
+ }
+ /* fall through */
+ case RW_SX_LATCH:
+ ut_ad(rw_latch == RW_S_LATCH ||
+ latch_mode == BTR_MODIFY_ROOT_AND_LEAF);
+ relatch_x:
+ mtr->rollback_to_savepoint(block_savepoint);
+ height= ULINT_UNDEFINED;
+ rw_latch= RW_X_LATCH;
+ goto search_loop;
+ case RW_X_LATCH:
+ if (latch_mode == BTR_MODIFY_TREE)
+ goto reached_index_root_and_leaf;
+ goto reached_root_and_leaf;
+ case RW_NO_LATCH:
+ ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK));
+ }
+ goto reached_leaf;
+ }
+ }
+ else if (UNIV_UNLIKELY(height != page_level))
+ goto corrupted;
+ else
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ break;
+ case BTR_MODIFY_ROOT_AND_LEAF:
+ ut_ad((mtr->at_savepoint(block_savepoint - 1)->page.id().page_no() ==
+ index()->page) == (tree_height <= height + 2));
+ if (tree_height <= height + 2)
+ /* Retain the root page latch. */
+ break;
+ goto release_parent_page;
+ default:
+ if (rw_latch == RW_NO_LATCH)
+ {
+ ut_ad(!height);
+ break;
+ }
+ release_parent_page:
+ ut_ad(block_savepoint > savepoint);
+ mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint);
+ block_savepoint--;
+ }
- page_cursor->block = block;
-
- if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) {
- ut_ad(need_path);
- found = rtr_cur_search_with_match(
- block, index, tuple, page_mode, page_cursor,
- cursor->rtr_info);
+ if (!height)
+ {
+ reached_leaf:
+ /* We reached the leaf level. */
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
- /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */
- if (search_mode == PAGE_CUR_RTREE_INSERT
- && cursor->rtr_info->mbr_adj) {
- static_assert(BTR_MODIFY_TREE
- == (8 | BTR_MODIFY_LEAF), "");
+ if (latch_mode == BTR_MODIFY_ROOT_AND_LEAF)
+ {
+ reached_root_and_leaf:
+ if (!latch_by_caller)
+ mtr->rollback_to_savepoint(savepoint, savepoint + 1);
+ reached_index_root_and_leaf:
+ ut_ad(rw_latch == RW_X_LATCH);
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_drop_page_hash_index(block, true);
+#endif
+ if (page_cur_search_with_match(tuple, mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ goto func_exit;
+ }
- if (!(latch_mode & 8)) {
- /* Parent MBR needs updated, should retry
- with BTR_MODIFY_TREE */
- goto func_exit;
- }
+ switch (latch_mode) {
+ case BTR_SEARCH_PREV:
+ case BTR_MODIFY_PREV:
+ static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
+ static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
+ ut_ad(!latch_by_caller);
- rtree_parent_modified = true;
- cursor->rtr_info->mbr_adj = false;
- mbr_adj = true;
- }
+ if (rw_latch == RW_NO_LATCH)
+ {
+ /* latch also siblings from left to right */
+ rw_latch= rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH));
+ if (page_has_prev(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_prev(block->page.frame),
+ rw_latch, false, mtr, &err))
+ goto func_exit;
+ mtr->upgrade_buffer_fix(block_savepoint, rw_latch);
+ if (page_has_next(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_next(block->page.frame),
+ rw_latch, false, mtr, &err))
+ goto func_exit;
+ }
+ goto release_tree;
+ case BTR_SEARCH_LEAF:
+ case BTR_MODIFY_LEAF:
+ if (rw_latch == RW_NO_LATCH)
+ {
+ ut_ad(index()->is_ibuf());
+ mtr->upgrade_buffer_fix(block_savepoint, rw_lock_type_t(latch_mode));
+ }
+ if (!latch_by_caller)
+ {
+release_tree:
+ /* Release the tree s-latch */
+ block_savepoint--;
+ mtr->rollback_to_savepoint(savepoint, savepoint + 1);
+ }
+ /* release upper blocks */
+ if (savepoint < block_savepoint)
+ mtr->rollback_to_savepoint(savepoint, block_savepoint);
+ break;
+ default:
+ ut_ad(latch_mode == BTR_MODIFY_TREE);
+ ut_ad(rw_latch == RW_NO_LATCH);
+ /* x-latch also siblings from left to right */
+ if (page_has_prev(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_prev(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ goto func_exit;
+ mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH);
+ if (page_has_next(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_next(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ goto func_exit;
+ }
- if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) {
- cursor->low_match =
- DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
- }
+ reached_latched_leaf:
#ifdef BTR_CUR_HASH_ADAPT
- } else if (height == 0 && btr_search_enabled
- && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
- && index->is_btree()) {
- /* The adaptive hash index is only used when searching
- for leaf pages (height==0), but not in r-trees.
- We only need the byte prefix comparison for the purpose
- of updating the adaptive hash index. */
- if (page_cur_search_with_match_bytes(
- tuple, page_mode, &up_match, &up_bytes,
- &low_match, &low_bytes, page_cursor)) {
- err = DB_CORRUPTION;
- goto func_exit;
- }
+ if (btr_search_enabled && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG))
+ {
+ if (page_cur_search_with_match_bytes(tuple, mode,
+ &up_match, &up_bytes,
+ &low_match, &low_bytes, &page_cur))
+ goto corrupted;
+ }
+ else
#endif /* BTR_CUR_HASH_ADAPT */
- } else {
- /* Search for complete index fields. */
- up_bytes = low_bytes = 0;
- if (page_cur_search_with_match(
- tuple, page_mode, &up_match,
- &low_match, page_cursor,
- need_path ? cursor->rtr_info : nullptr)) {
- err = DB_CORRUPTION;
- goto func_exit;
- }
- }
-
- /* If this is the desired level, leave the loop */
-
- ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor)));
-
- /* Add Predicate lock if it is serializable isolation
- and only if it is in the search case */
- if (dict_index_is_spatial(index)
- && cursor->rtr_info->need_prdt_lock
- && mode != PAGE_CUR_RTREE_INSERT
- && mode != PAGE_CUR_RTREE_LOCATE
- && mode >= PAGE_CUR_CONTAIN) {
- lock_prdt_t prdt;
-
- {
- trx_t* trx = thr_get_trx(cursor->thr);
- TMLockTrxGuard g{TMLockTrxArgs(*trx)};
- lock_init_prdt_from_mbr(
- &prdt, &cursor->rtr_info->mbr, mode,
- trx->lock.lock_heap);
- }
-
- if (rw_latch == RW_NO_LATCH && height != 0) {
- block->page.lock.s_lock();
- }
-
- lock_prdt_lock(block, &prdt, index, LOCK_S,
- LOCK_PREDICATE, cursor->thr);
-
- if (rw_latch == RW_NO_LATCH && height != 0) {
- block->page.lock.s_unlock();
- }
- }
-
- if (level != height) {
-
- const rec_t* node_ptr;
- ut_ad(height > 0);
-
- height--;
- guess = NULL;
-
- node_ptr = page_cur_get_rec(page_cursor);
-
- offsets = rec_get_offsets(node_ptr, index, offsets, 0,
- ULINT_UNDEFINED, &heap);
-
- /* If the rec is the first or last in the page for
- pessimistic delete intention, it might cause node_ptr insert
- for the upper level. We should change the intention and retry.
- */
- if (latch_mode == BTR_MODIFY_TREE
- && btr_cur_need_opposite_intention(
- page, lock_intention, node_ptr)) {
-
-need_opposite_intention:
- ut_ad(upper_rw_latch == RW_X_LATCH);
-
- if (n_releases > 0) {
- /* release root block */
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[0],
- tree_blocks[0]);
- }
-
- /* release all blocks */
- for (; n_releases <= n_blocks; n_releases++) {
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
-
- lock_intention = BTR_INTENTION_BOTH;
-
- page_id.set_page_no(index->page);
- up_match = 0;
- low_match = 0;
- height = ULINT_UNDEFINED;
+ if (page_cur_search_with_match(tuple, mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
- n_blocks = 0;
- n_releases = 0;
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
- goto search_loop;
- }
+#ifdef BTR_CUR_HASH_ADAPT
+ /* We do a dirty read of btr_search_enabled here. We will
+ properly check btr_search_enabled again in
+ btr_search_build_page_hash_index() before building a page hash
+ index, while holding search latch. */
+ if (!btr_search_enabled);
+ else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG)
+ /* This may be a search tuple for btr_pcur_t::restore_position(). */
+ ut_ad(tuple->is_metadata() ||
+ (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT)));
+ else if (index()->table->is_temporary());
+ else if (!rec_is_metadata(page_cur.rec, *index()))
+ btr_search_info_update(index(), this);
+#endif /* BTR_CUR_HASH_ADAPT */
- if (dict_index_is_spatial(index)) {
- if (page_rec_is_supremum(node_ptr)) {
- cursor->low_match = 0;
- cursor->up_match = 0;
- goto func_exit;
- }
+ goto func_exit;
+ }
- /* If we are doing insertion or record locating,
- remember the tree nodes we visited */
- if (page_mode == PAGE_CUR_RTREE_INSERT
- || (search_mode == PAGE_CUR_RTREE_LOCATE
- && (latch_mode != BTR_MODIFY_LEAF))) {
- bool add_latch = false;
-
- if (latch_mode == BTR_MODIFY_TREE
- && rw_latch == RW_NO_LATCH) {
- ut_ad(mtr->memo_contains_flagged(
- &index->lock, MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- block->page.lock.s_lock();
- add_latch = true;
- }
+ guess= nullptr;
+ if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
+ offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED,
+ &heap);
- /* Store the parent cursor location */
-#ifdef UNIV_DEBUG
- ulint num_stored = rtr_store_parent_path(
- block, cursor, latch_mode,
- height + 1, mtr);
-#else
- rtr_store_parent_path(
- block, cursor, latch_mode,
- height + 1, mtr);
-#endif
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
- if (page_mode == PAGE_CUR_RTREE_INSERT) {
- btr_pcur_t* r_cursor =
- rtr_get_parent_cursor(
- cursor, height + 1,
- true);
- /* If it is insertion, there should
- be only one parent for each level
- traverse */
-#ifdef UNIV_DEBUG
- ut_ad(num_stored == 1);
-#endif
-
- node_ptr = btr_pcur_get_rec(r_cursor);
+ switch (latch_mode) {
+ default:
+ break;
+ case BTR_MODIFY_TREE:
+ if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
+ page_cur.rec))
+ /* If the rec is the first or last in the page for pessimistic
+ delete intention, it might cause node_ptr insert for the upper
+ level. We should change the intention and retry. */
+ need_opposite_intention:
+ return pessimistic_search_leaf(tuple, mode, mtr);
- }
+ if (detected_same_key_root || lock_intention != BTR_INTENTION_BOTH ||
+ index()->is_unique() ||
+ (up_match <= rec_offs_n_fields(offsets) &&
+ low_match <= rec_offs_n_fields(offsets)))
+ break;
- if (add_latch) {
- block->page.lock.s_unlock();
- }
+ /* If the first or the last record of the page or the same key
+ value to the first record or last record, then another page might
+ be chosen when BTR_CONT_MODIFY_TREE. So, the parent page should
+ not released to avoiding deadlock with blocking the another search
+ with the same key value. */
+ const rec_t *first=
+ page_rec_get_next_const(page_get_infimum_rec(block->page.frame));
+ ulint matched_fields;
- ut_ad(!page_rec_is_supremum(node_ptr));
- }
+ if (UNIV_UNLIKELY(!first))
+ goto corrupted;
+ if (page_cur.rec == first ||
+ page_rec_is_last(page_cur.rec, block->page.frame))
+ {
+ same_key_root:
+ detected_same_key_root= true;
+ break;
+ }
- ut_ad(page_mode == search_mode
- || (page_mode == PAGE_CUR_WITHIN
- && search_mode == PAGE_CUR_RTREE_LOCATE));
+ matched_fields= 0;
+ offsets2= rec_get_offsets(first, index(), offsets2, 0, ULINT_UNDEFINED,
+ &heap);
+ cmp_rec_rec(page_cur.rec, first, offsets, offsets2, index(), false,
+ &matched_fields);
+ if (matched_fields >= rec_offs_n_fields(offsets) - 1)
+ goto same_key_root;
+ if (const rec_t* last=
+ page_rec_get_prev_const(page_get_supremum_rec(block->page.frame)))
+ {
+ matched_fields= 0;
+ offsets2= rec_get_offsets(last, index(), offsets2, 0, ULINT_UNDEFINED,
+ &heap);
+ cmp_rec_rec(page_cur.rec, last, offsets, offsets2, index(), false,
+ &matched_fields);
+ if (matched_fields >= rec_offs_n_fields(offsets) - 1)
+ goto same_key_root;
+ }
+ else
+ goto corrupted;
- page_mode = search_mode;
- }
+ /* Release the non-root parent page unless it may need to be modified. */
+ if (tree_height > height + 1 &&
+ !btr_cur_will_modify_tree(index(), block->page.frame, lock_intention,
+ page_cur.rec, node_ptr_max_size,
+ zip_size, mtr))
+ {
+ mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint);
+ block_savepoint--;
+ }
+ }
- /* If the first or the last record of the page
- or the same key value to the first record or last record,
- the another page might be chosen when BTR_CONT_MODIFY_TREE.
- So, the parent page should not released to avoiding deadlock
- with blocking the another search with the same key value. */
- if (!detected_same_key_root
- && lock_intention == BTR_INTENTION_BOTH
- && !dict_index_is_unique(index)
- && latch_mode == BTR_MODIFY_TREE
- && (up_match >= rec_offs_n_fields(offsets) - 1
- || low_match >= rec_offs_n_fields(offsets) - 1)) {
- const rec_t* first_rec = page_rec_get_next_const(
- page_get_infimum_rec(page));
- ulint matched_fields;
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets));
- ut_ad(upper_rw_latch == RW_X_LATCH);
+ if (!--height)
+ {
+ /* We are about to access the leaf level. */
- if (UNIV_UNLIKELY(!first_rec)) {
- corrupted:
- err = DB_CORRUPTION;
- goto func_exit;
- }
- if (node_ptr == first_rec
- || page_rec_is_last(node_ptr, page)) {
- detected_same_key_root = true;
- } else {
- matched_fields = 0;
-
- offsets2 = rec_get_offsets(
- first_rec, index, offsets2,
- 0, ULINT_UNDEFINED, &heap);
- cmp_rec_rec(node_ptr, first_rec,
- offsets, offsets2, index, false,
- &matched_fields);
-
- if (matched_fields
- >= rec_offs_n_fields(offsets) - 1) {
- detected_same_key_root = true;
- } else if (const rec_t* last_rec
- = page_rec_get_prev_const(
- page_get_supremum_rec(
- page))) {
- matched_fields = 0;
-
- offsets2 = rec_get_offsets(
- last_rec, index, offsets2,
- 0, ULINT_UNDEFINED, &heap);
- cmp_rec_rec(
- node_ptr, last_rec,
- offsets, offsets2, index,
- false, &matched_fields);
- if (matched_fields
- >= rec_offs_n_fields(offsets) - 1) {
- detected_same_key_root = true;
- }
- } else {
- goto corrupted;
- }
- }
- }
+ switch (latch_mode) {
+ case BTR_MODIFY_ROOT_AND_LEAF:
+ rw_latch= RW_X_LATCH;
+ break;
+ case BTR_MODIFY_PREV: /* ibuf_insert() or btr_pcur_move_to_prev() */
+ case BTR_SEARCH_PREV: /* btr_pcur_move_to_prev() */
+ ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
- /* If the page might cause modify_tree,
- we should not release the parent page's lock. */
- if (!detected_same_key_root
- && latch_mode == BTR_MODIFY_TREE
- && !btr_cur_will_modify_tree(
- index, page, lock_intention, node_ptr,
- node_ptr_max_size, zip_size, mtr)
- && !rtree_parent_modified) {
- ut_ad(upper_rw_latch == RW_X_LATCH);
- ut_ad(n_releases <= n_blocks);
-
- /* we can release upper blocks */
- for (; n_releases < n_blocks; n_releases++) {
- if (n_releases == 0) {
- /* we should not release root page
- to pin to same block. */
- continue;
- }
+ if (page_has_prev(block->page.frame) &&
+ page_rec_is_first(page_cur.rec, block->page.frame))
+ {
+ ut_ad(block_savepoint + 1 == mtr->get_savepoint());
+ /* Latch the previous page if the node pointer is the leftmost
+ of the current page. */
+ buf_block_t *left= btr_block_get(*index(),
+ btr_page_get_prev(block->page.frame),
+ RW_NO_LATCH, false, mtr, &err);
+ if (UNIV_UNLIKELY(!left))
+ goto func_exit;
+ ut_ad(block_savepoint + 2 == mtr->get_savepoint());
+ if (UNIV_LIKELY(left->page.lock.s_lock_try()))
+ mtr->lock_register(block_savepoint + 1, MTR_MEMO_PAGE_S_FIX);
+ else
+ {
+ if (rw_latch == RW_S_LATCH)
+ block->page.lock.s_unlock();
+ else
+ block->page.lock.x_unlock();
+ mtr->upgrade_buffer_fix(block_savepoint + 1, RW_S_LATCH);
+ mtr->lock_register(block_savepoint, MTR_MEMO_BUF_FIX);
+ mtr->upgrade_buffer_fix(block_savepoint, RW_S_LATCH);
+ /* While our latch on the level-2 page prevents splits or
+ merges of this level-1 block, other threads may have
+ modified it due to splitting or merging some level-0 (leaf)
+ pages underneath it. Thus, we must search again. */
+ if (page_cur_search_with_match(tuple, page_mode,
+ &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
+ offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0,
+ ULINT_UNDEFINED, &heap);
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec,
+ offsets));
+ }
+ }
+ goto leaf_with_no_latch;
+ case BTR_MODIFY_LEAF:
+ case BTR_SEARCH_LEAF:
+ if (index()->is_ibuf())
+ goto leaf_with_no_latch;
+ rw_latch= rw_lock_type_t(latch_mode);
+ if (btr_op != BTR_NO_OP &&
+ ibuf_should_try(index(), btr_op != BTR_INSERT_OP))
+ /* Try to buffer the operation if the leaf page
+ is not in the buffer pool. */
+ buf_mode= btr_op == BTR_DELETE_OP
+ ? BUF_GET_IF_IN_POOL_OR_WATCH
+ : BUF_GET_IF_IN_POOL;
+ break;
+ case BTR_MODIFY_TREE:
+ ut_ad(rw_latch == RW_X_LATCH);
- /* release unused blocks to unpin */
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
- }
+ if (lock_intention == BTR_INTENTION_INSERT &&
+ page_has_next(block->page.frame) &&
+ page_rec_is_last(page_cur.rec, block->page.frame))
+ {
+ /* btr_insert_into_right_sibling() might cause deleting node_ptr
+ at upper level */
+ mtr->rollback_to_savepoint(block_savepoint);
+ goto need_opposite_intention;
+ }
+ /* fall through */
+ default:
+ leaf_with_no_latch:
+ rw_latch= RW_NO_LATCH;
+ }
+ }
- if (height == level
- && latch_mode == BTR_MODIFY_TREE) {
- ut_ad(upper_rw_latch == RW_X_LATCH);
- /* we should sx-latch root page, if released already.
- It contains seg_header. */
- if (n_releases > 0) {
- mtr->sx_latch_at_savepoint(
- tree_savepoints[0],
- tree_blocks[0]);
- }
+ goto search_loop;
+}
- /* x-latch the branch blocks not released yet. */
- for (ulint i = n_releases; i <= n_blocks; i++) {
- mtr->x_latch_at_savepoint(
- tree_savepoints[i],
- tree_blocks[i]);
- }
- }
+ATTRIBUTE_COLD
+dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
+ page_cur_mode_t mode, mtr_t *mtr)
+{
+ ut_ad(index()->is_btree() || index()->is_ibuf());
+ ut_ad(!index()->is_ibuf() || ibuf_inside(mtr));
- /* We should consider prev_page of parent page, if the node_ptr
- is the leftmost of the page. because BTR_SEARCH_PREV and
- BTR_MODIFY_PREV latches prev_page of the leaf page. */
- if ((latch_mode == BTR_SEARCH_PREV
- || latch_mode == BTR_MODIFY_PREV)
- && !prev_tree_blocks) {
- /* block should be latched for consistent
- btr_page_get_prev() */
- ut_ad(mtr->memo_contains_flagged(
- block, MTR_MEMO_PAGE_S_FIX
- | MTR_MEMO_PAGE_X_FIX));
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs_init(offsets_);
- if (page_has_prev(page)
- && page_rec_is_first(node_ptr, page)) {
+ ut_ad(flag == BTR_CUR_BINARY);
+ ut_ad(dict_index_check_search_tuple(index(), tuple));
+ ut_ad(dtuple_check_typed(tuple));
+ buf_block_t *block= mtr->at_savepoint(1);
+ ut_ad(block->page.id().page_no() == index()->page);
+ block->page.fix();
+ mtr->rollback_to_savepoint(1);
+ ut_ad(mtr->memo_contains_flagged(&index()->lock,
+ MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK));
+
+ const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)};
+
+ mtr->page_lock(block, RW_X_LATCH);
+
+ up_match= 0;
+ up_bytes= 0;
+ low_match= 0;
+ low_bytes= 0;
+ ulint height= btr_page_get_level(block->page.frame);
+ tree_height= height + 1;
+ mem_heap_t *heap= nullptr;
- if (leftmost_from_level == 0) {
- leftmost_from_level = height + 1;
- }
- } else {
- leftmost_from_level = 0;
- }
+ search_loop:
+ dberr_t err;
+ page_cur.block= block;
- if (height == 0 && leftmost_from_level > 0) {
- /* should retry to get also prev_page
- from level==leftmost_from_level. */
- prev_tree_blocks = static_cast<buf_block_t**>(
- ut_malloc_nokey(sizeof(buf_block_t*)
- * leftmost_from_level));
-
- prev_tree_savepoints = static_cast<ulint*>(
- ut_malloc_nokey(sizeof(ulint)
- * leftmost_from_level));
-
- /* back to the level (leftmost_from_level+1) */
- ulint idx = n_blocks
- - (leftmost_from_level - 1);
-
- page_id.set_page_no(
- tree_blocks[idx]->page.id().page_no());
-
- for (ulint i = n_blocks
- - (leftmost_from_level - 1);
- i <= n_blocks; i++) {
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[i],
- tree_blocks[i]);
- }
+ if (UNIV_UNLIKELY(!height))
+ {
+ if (page_cur_search_with_match(tuple, mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ corrupted:
+ err= DB_CORRUPTION;
+ else
+ {
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
- n_blocks -= (leftmost_from_level - 1);
- height = leftmost_from_level;
- ut_ad(n_releases == 0);
-
- /* replay up_match, low_match */
- up_match = 0;
- low_match = 0;
- rtr_info_t* rtr_info = need_path
- ? cursor->rtr_info : NULL;
-
- for (ulint i = 0; i < n_blocks; i++) {
- page_cursor->block = tree_blocks[i];
- if (page_cur_search_with_match(
- tuple,
- page_mode, &up_match,
- &low_match, page_cursor,
- rtr_info)) {
- err = DB_CORRUPTION;
- goto func_exit;
- }
- }
+#ifdef BTR_CUR_HASH_ADAPT
+ /* We do a dirty read of btr_search_enabled here. We will
+ properly check btr_search_enabled again in
+ btr_search_build_page_hash_index() before building a page hash
+ index, while holding search latch. */
+ if (!btr_search_enabled);
+ else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG)
+ /* This may be a search tuple for btr_pcur_t::restore_position(). */
+ ut_ad(tuple->is_metadata() ||
+ (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT)));
+ else if (index()->table->is_temporary());
+ else if (!rec_is_metadata(page_cur.rec, *index()))
+ btr_search_info_update(index(), this);
+#endif /* BTR_CUR_HASH_ADAPT */
+ err= DB_SUCCESS;
+ }
- goto search_loop;
- }
- }
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return err;
+ }
- /* Go to the child node */
- page_id.set_page_no(
- btr_node_ptr_get_child_page_no(node_ptr, offsets));
+ if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
- n_blocks++;
+ page_id_t page_id{block->page.id()};
- if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
- /* We're doing a search on an ibuf tree and we're one
- level above the leaf page. */
+ offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED,
+ &heap);
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets));
- ut_ad(level == 0);
+ const auto block_savepoint= mtr->get_savepoint();
+ block=
+ buf_page_get_gen(page_id, block->zip_size(), RW_NO_LATCH, nullptr, BUF_GET,
+ mtr, &err, !--height && !index()->is_clust());
- buf_mode = BUF_GET;
- rw_latch = RW_NO_LATCH;
- goto retry_page_get;
- }
+ if (!block)
+ {
+ if (err == DB_DECRYPTION_FAILED)
+ btr_decryption_failed(*index());
+ goto func_exit;
+ }
- if (dict_index_is_spatial(index)
- && page_mode >= PAGE_CUR_CONTAIN
- && page_mode != PAGE_CUR_RTREE_INSERT) {
- ut_ad(need_path);
- rtr_node_path_t* path =
- cursor->rtr_info->path;
-
- if (!path->empty() && found) {
- ut_ad(path->back().page_no
- == page_id.page_no());
- path->pop_back();
-#ifdef UNIV_DEBUG
- if (page_mode == PAGE_CUR_RTREE_LOCATE
- && (latch_mode != BTR_MODIFY_LEAF)) {
- btr_pcur_t* cur
- = cursor->rtr_info->parent_path->back(
- ).cursor;
- rec_t* my_node_ptr
- = btr_pcur_get_rec(cur);
-
- offsets = rec_get_offsets(
- my_node_ptr, index, offsets,
- 0, ULINT_UNDEFINED, &heap);
-
- ulint my_page_no
- = btr_node_ptr_get_child_page_no(
- my_node_ptr, offsets);
-
- ut_ad(page_id.page_no() == my_page_no);
- }
-#endif
- }
- }
+ if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() ||
+ btr_page_get_index_id(block->page.frame) != index()->id ||
+ fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE ||
+ !fil_page_index_page_check(block->page.frame))
+ goto corrupted;
- goto search_loop;
- } else if (!dict_index_is_spatial(index)
- && latch_mode == BTR_MODIFY_TREE
- && lock_intention == BTR_INTENTION_INSERT
- && page_has_next(page)
- && page_rec_is_last(page_cur_get_rec(page_cursor), page)) {
-
- /* btr_insert_into_right_sibling() might cause
- deleting node_ptr at upper level */
-
- guess = NULL;
-
- if (height == 0) {
- /* release the leaf pages if latched */
- for (uint i = 0; i < 3; i++) {
- if (latch_leaves.blocks[i] != NULL) {
- mtr_release_block_at_savepoint(
- mtr, latch_leaves.savepoints[i],
- latch_leaves.blocks[i]);
- latch_leaves.blocks[i] = NULL;
- }
- }
- }
+ if (height != btr_page_get_level(block->page.frame))
+ goto corrupted;
- goto need_opposite_intention;
- }
+ if (page_has_prev(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_prev(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ goto func_exit;
+ mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH);
+#ifdef UNIV_ZIP_DEBUG
+ const page_zip_des_t *page_zip= buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index()));
+#endif /* UNIV_ZIP_DEBUG */
+ if (page_has_next(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_next(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ goto func_exit;
+ goto search_loop;
+}
- if (level != 0) {
- ut_ad(!autoinc);
+/********************************************************************//**
+Searches an index tree and positions a tree cursor on a given non-leaf level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+cursor->up_match and cursor->low_match both will have sensible values.
+Cursor is left at the place where an insert of the
+search tuple should be performed in the B-tree. InnoDB does an insert
+immediately after the cursor. Thus, the cursor may end up on a user record,
+or on a page infimum record.
+@param level the tree level of search
+@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that
+ it cannot get compared to the node ptr page number field!
+@param latch RW_S_LATCH or RW_X_LATCH
+@param cursor tree cursor; the cursor page is s- or x-latched, but see also
+ above!
+@param mtr mini-transaction
+@return DB_SUCCESS on success or error code otherwise */
+TRANSACTIONAL_TARGET
+dberr_t btr_cur_search_to_nth_level(ulint level,
+ const dtuple_t *tuple,
+ rw_lock_type_t rw_latch,
+ btr_cur_t *cursor, mtr_t *mtr)
+{
+ dict_index_t *const index= cursor->index();
- if (upper_rw_latch == RW_NO_LATCH) {
- ut_ad(latch_mode == BTR_CONT_MODIFY_TREE
- || latch_mode == BTR_CONT_SEARCH_TREE);
- btr_block_get(
- *index, page_id.page_no(),
- latch_mode == BTR_CONT_MODIFY_TREE
- ? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err);
- } else {
- ut_ad(mtr->memo_contains_flagged(block,
- upper_rw_latch));
-
- if (latch_by_caller) {
- ut_ad(latch_mode == BTR_SEARCH_TREE);
- /* to exclude modifying tree operations
- should sx-latch the index. */
- ut_ad(mtr->memo_contains(index->lock,
- MTR_MEMO_SX_LOCK));
- /* because has sx-latch of index,
- can release upper blocks. */
- for (; n_releases < n_blocks; n_releases++) {
- mtr_release_block_at_savepoint(
- mtr,
- tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
- }
- }
+ ut_ad(index->is_btree() || index->is_ibuf());
+ mem_heap_t *heap= nullptr;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs *offsets= offsets_;
+ rec_offs_init(offsets_);
+ ut_ad(level);
+ ut_ad(dict_index_check_search_tuple(index, tuple));
+ ut_ad(index->is_ibuf() ? ibuf_inside(mtr) : index->is_btree());
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(index->page != FIL_NULL);
+
+ MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes);
+ MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes);
+ cursor->up_match= 0;
+ cursor->low_match= 0;
+ cursor->flag= BTR_CUR_BINARY;
- if (page_mode <= PAGE_CUR_LE) {
- cursor->low_match = low_match;
- cursor->up_match = up_match;
- }
- } else {
- cursor->low_match = low_match;
- cursor->low_bytes = low_bytes;
- cursor->up_match = up_match;
- cursor->up_bytes = up_bytes;
+#ifndef BTR_CUR_ADAPT
+ buf_block_t *block= nullptr;
+#else
+ btr_search_t *info= btr_search_get_info(index);
+ buf_block_t *block= info->root_guess;
+#endif /* BTR_CUR_ADAPT */
- if (autoinc) {
- page_set_autoinc(tree_blocks[0], autoinc, mtr, false);
- }
+ ut_ad(mtr->memo_contains_flagged(&index->lock,
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
-#ifdef BTR_CUR_HASH_ADAPT
- /* We do a dirty read of btr_search_enabled here. We
- will properly check btr_search_enabled again in
- btr_search_build_page_hash_index() before building a
- page hash index, while holding search latch. */
- if (!btr_search_enabled) {
- } else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) {
- /* This may be a search tuple for
- btr_pcur_t::restore_position(). */
- ut_ad(tuple->is_metadata()
- || (tuple->is_metadata(tuple->info_bits
- ^ REC_STATUS_INSTANT)));
- } else if (index->is_spatial()) {
- } else if (index->table->is_temporary()) {
- } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) {
- /* Only user records belong in the adaptive
- hash index. */
- } else {
- btr_search_info_update(index, cursor);
- }
-#endif /* BTR_CUR_HASH_ADAPT */
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- }
-
- /* For spatial index, remember what blocks are still latched */
- if (dict_index_is_spatial(index)
- && (latch_mode == BTR_MODIFY_TREE
- || latch_mode == BTR_MODIFY_LEAF)) {
- for (ulint i = 0; i < n_releases; i++) {
- cursor->rtr_info->tree_blocks[i] = NULL;
- cursor->rtr_info->tree_savepoints[i] = 0;
- }
+ const ulint zip_size= index->table->space->zip_size();
- for (ulint i = n_releases; i <= n_blocks; i++) {
- cursor->rtr_info->tree_blocks[i] = tree_blocks[i];
- cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i];
- }
- }
+ /* Start with the root page. */
+ page_id_t page_id(index->table->space_id, index->page);
+ ulint height= ULINT_UNDEFINED;
-func_exit:
+search_loop:
+ dberr_t err= DB_SUCCESS;
+ if (buf_block_t *b=
+ mtr->get_already_latched(page_id, mtr_memo_type_t(rw_latch)))
+ block= b;
+ else if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch,
+ block, BUF_GET, mtr, &err)))
+ {
+ if (err == DB_DECRYPTION_FAILED)
+ btr_decryption_failed(*index);
+ goto func_exit;
+ }
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
+#ifdef UNIV_ZIP_DEBUG
+ if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block))
+ ut_a(page_zip_validate(page_zip, block->page.frame, index));
+#endif /* UNIV_ZIP_DEBUG */
- ut_free(prev_tree_blocks);
- ut_free(prev_tree_savepoints);
+ if (!!page_is_comp(block->page.frame) != index->table->not_redundant() ||
+ btr_page_get_index_id(block->page.frame) != index->id ||
+ fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE ||
+ !fil_page_index_page_check(block->page.frame))
+ {
+ corrupted:
+ err= DB_CORRUPTION;
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return err;
+ }
- if (mbr_adj) {
- /* remember that we will need to adjust parent MBR */
- cursor->rtr_info->mbr_adj = true;
- }
+ const uint32_t page_level= btr_page_get_level(block->page.frame);
- DBUG_RETURN(err);
+ if (height == ULINT_UNDEFINED)
+ {
+ /* We are in the root node */
+ height= page_level;
+ if (!height)
+ goto corrupted;
+ cursor->tree_height= height + 1;
+ }
+ else if (height != ulint{page_level})
+ goto corrupted;
+
+ cursor->page_cur.block= block;
+
+ /* Search for complete index fields. */
+ if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &cursor->up_match,
+ &cursor->low_match, &cursor->page_cur,
+ nullptr))
+ goto corrupted;
+
+ /* If this is the desired level, leave the loop */
+ if (level == height)
+ goto func_exit;
+
+ ut_ad(height > level);
+ height--;
+
+ offsets = rec_get_offsets(cursor->page_cur.rec, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(cursor->page_cur.rec,
+ offsets));
+ block= nullptr;
+ goto search_loop;
}
dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
btr_latch_mode latch_mode, mtr_t *mtr)
{
- ulint node_ptr_max_size= srv_page_size / 2;
btr_intention_t lock_intention;
ulint n_blocks= 0;
mem_heap_t *heap= nullptr;
@@ -2422,29 +1795,21 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
rec_offs_init(offsets_);
const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
- latch_mode = btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED);
+ latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED);
lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
- /* This function doesn't need to lock left page of the leaf page */
- if (latch_mode == BTR_SEARCH_PREV)
- latch_mode= BTR_SEARCH_LEAF;
- else if (latch_mode == BTR_MODIFY_PREV)
- latch_mode= BTR_MODIFY_LEAF;
-
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched the leaf node */
auto savepoint= mtr->get_savepoint();
rw_lock_type_t upper_rw_latch= RW_X_LATCH;
+ ulint node_ptr_max_size= 0;
- switch (latch_mode) {
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- abort();
- break;
- case BTR_MODIFY_TREE:
+ if (latch_mode == BTR_MODIFY_TREE)
+ {
+ node_ptr_max_size= btr_node_ptr_max_size(index);
/* Most of delete-intended operations are purging. Free blocks
and read IO bandwidth should be prioritized for them, when the
history list is growing huge. */
@@ -2455,32 +1820,35 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
mtr_x_lock_index(index, mtr);
else
mtr_sx_lock_index(index, mtr);
- break;
- default:
+ }
+ else
+ {
+ static_assert(int{BTR_CONT_MODIFY_TREE} == (12 | BTR_MODIFY_LEAF), "");
+ ut_ad(!(latch_mode & 8));
+ /* This function doesn't need to lock left page of the leaf page */
+ static_assert(int{BTR_SEARCH_PREV} == (4 | BTR_SEARCH_LEAF), "");
+ static_assert(int{BTR_MODIFY_PREV} == (4 | BTR_MODIFY_LEAF), "");
+ latch_mode= btr_latch_mode(latch_mode & ~4);
ut_ad(!latch_by_caller ||
mtr->memo_contains_flagged(&index->lock,
MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK));
upper_rw_latch= RW_S_LATCH;
- if (latch_by_caller)
- break;
- ut_ad(latch_mode != BTR_SEARCH_TREE);
- savepoint++;
- mtr_s_lock_index(index, mtr);
+ if (!latch_by_caller)
+ {
+ savepoint++;
+ mtr_s_lock_index(index, mtr);
+ }
}
ut_ad(savepoint == mtr->get_savepoint());
- const rw_lock_type_t root_leaf_rw_latch=
- btr_cur_latch_for_root_leaf(latch_mode);
+ const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12);
page_cur.index = index;
uint32_t page= index->page;
const auto zip_size= index->table->space->zip_size();
- if (root_leaf_rw_latch == RW_X_LATCH)
- node_ptr_max_size= btr_node_ptr_max_size(index);
-
for (ulint height= ULINT_UNDEFINED;;)
{
ut_ad(n_blocks < BTR_MAX_LEVELS);
@@ -2529,16 +1897,27 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
reached_leaf:
const auto leaf_savepoint= mtr->get_savepoint();
ut_ad(leaf_savepoint);
+ ut_ad(block == mtr->at_savepoint(leaf_savepoint - 1));
- if (rw_latch == RW_NO_LATCH)
- btr_cur_latch_leaves(block, latch_mode, this, mtr);
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- break;
- default:
+ if (latch_mode == BTR_MODIFY_TREE)
+ {
+ ut_ad(rw_latch == RW_NO_LATCH);
+ /* x-latch also siblings from left to right */
+ if (page_has_prev(block->page.frame) &&
+ !btr_block_get(*index, btr_page_get_prev(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ break;
+ mtr->upgrade_buffer_fix(leaf_savepoint - 1, RW_X_LATCH);
+ if (page_has_next(block->page.frame) &&
+ !btr_block_get(*index, btr_page_get_next(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ break;
+ }
+ else
+ {
+ if (rw_latch == RW_NO_LATCH)
+ mtr->upgrade_buffer_fix(leaf_savepoint - 1,
+ rw_lock_type_t(latch_mode));
/* Release index->lock if needed, and the non-leaf pages. */
mtr->rollback_to_savepoint(savepoint - !latch_by_caller,
leaf_savepoint - 1);
@@ -4667,16 +4046,15 @@ btr_cur_pessimistic_update(
}
}
- if (!srv_read_only_mode
- && !big_rec_vec
+#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled
+ if (!big_rec_vec
&& page_is_leaf(block->page.frame)
&& !dict_index_is_online_ddl(index)) {
-#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled
mtr->release(index->lock);
-#endif
/* NOTE: We cannot release root block latch here, because it
has segment header and already modified in most of cases.*/
}
+#endif
err = DB_SUCCESS;
goto return_after_reservations;
@@ -5418,15 +4796,14 @@ return_after_reservations:
err_exit:
mem_heap_free(heap);
- if (!srv_read_only_mode
- && page_is_leaf(page)
- && !dict_index_is_online_ddl(index)) {
#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled
+ if (page_is_leaf(page)
+ && !dict_index_is_online_ddl(index)) {
mtr->release(index->lock);
-#endif
/* NOTE: We cannot release root block latch here, because it
has segment header and already modified in most of cases.*/
}
+#endif
index->table->space->release_free_extents(n_reserved);
return(ret);
@@ -5543,16 +4920,18 @@ public:
buf_block_t *parent_block= m_block;
ulint parent_savepoint= m_savepoint;
- m_savepoint= mtr_set_savepoint(&mtr);
m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level,
&mtr, nullptr);
+ if (!m_block)
+ return false;
if (parent_block && parent_block != right_parent)
- mtr_release_block_at_savepoint(&mtr, parent_savepoint, parent_block);
+ mtr.rollback_to_savepoint(parent_savepoint, parent_savepoint + 1);
- return m_block &&
- (level == ULINT_UNDEFINED ||
- btr_page_get_level(buf_block_get_frame(m_block)) == level);
+ m_savepoint= mtr.get_savepoint() - 1;
+
+ return level == ULINT_UNDEFINED ||
+ btr_page_get_level(m_block->page.frame) == level;
}
/** Sets page mode for leaves */
@@ -5759,14 +5138,18 @@ static ha_rows btr_estimate_n_rows_in_range_on_level(
buf_block_t *prev_block= block;
ulint prev_savepoint= savepoint;
- savepoint= mtr_set_savepoint(&mtr);
+ savepoint= mtr.get_savepoint();
/* Fetch the page. */
block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr,
nullptr);
if (prev_block)
- mtr_release_block_at_savepoint(&mtr, prev_savepoint, prev_block);
+ {
+ mtr.rollback_to_savepoint(prev_savepoint, prev_savepoint + 1);
+ if (block)
+ savepoint--;
+ }
if (!block || btr_page_get_level(buf_block_get_frame(block)) != level)
goto inexact;
@@ -5795,14 +5178,20 @@ static ha_rows btr_estimate_n_rows_in_range_on_level(
} while (page_id.page_no() != right_page_no);
if (block)
- mtr_release_block_at_savepoint(&mtr, savepoint, block);
+ {
+ ut_ad(block == mtr.at_savepoint(savepoint));
+ mtr.rollback_to_savepoint(savepoint, savepoint + 1);
+ }
return (n_rows);
inexact:
if (block)
- mtr_release_block_at_savepoint(&mtr, savepoint, block);
+ {
+ ut_ad(block == mtr.at_savepoint(savepoint));
+ mtr.rollback_to_savepoint(savepoint, savepoint + 1);
+ }
is_n_rows_exact= false;
@@ -5861,9 +5250,7 @@ ha_rows btr_estimate_n_rows_in_range(dict_index_t *index,
mtr.start();
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched leaf node(s) */
- ulint savepoint= mtr_set_savepoint(&mtr);
+ ut_ad(mtr.get_savepoint() == 0);
mtr_s_lock_index(index, &mtr);
ha_rows table_n_rows= dict_table_get_n_rows(index->table);
@@ -5918,10 +5305,10 @@ search_loop:
}
if (height == 0)
- /* There is no need to unlach non-leaf pages here as they must already be
+ /* There is no need to release non-leaf pages here as they must already be
unlatched in btr_est_cur_t::fetch_child(). Try to search on pages after
- index->lock unlatching to decrease contention. */
- mtr_release_s_latch_at_savepoint(&mtr, savepoint, &index->lock);
+ releasing the index latch, to decrease contention. */
+ mtr.rollback_to_savepoint(0, 1);
/* There is no need to search on left page if
divergence_height != ULINT_UNDEFINED, as it was already searched before
@@ -6367,16 +5754,21 @@ struct btr_blob_log_check_t {
DEBUG_SYNC_C("blob_write_middle");
- log_free_check();
-
- DEBUG_SYNC_C("blob_write_middle_after_check");
-
const mtr_log_t log_mode = m_mtr->get_log_mode();
m_mtr->start();
m_mtr->set_log_mode(log_mode);
index->set_modified(*m_mtr);
+ log_free_check();
+
+ DEBUG_SYNC_C("blob_write_middle_after_check");
+
if (UNIV_UNLIKELY(page_no != FIL_NULL)) {
+ dberr_t err;
+ if (UNIV_LIKELY(index->page != page_no)) {
+ ut_a(btr_root_block_get(index, RW_SX_LATCH,
+ m_mtr, &err));
+ }
m_pcur->btr_cur.page_cur.block = btr_block_get(
*index, page_no, RW_X_LATCH, false, m_mtr);
/* The page should not be evicted or corrupted while
@@ -6389,7 +5781,7 @@ struct btr_blob_log_check_t {
ut_ad(m_pcur->rel_pos == BTR_PCUR_ON);
mtr_sx_lock_index(index, m_mtr);
ut_a(m_pcur->restore_position(
- BTR_MODIFY_LEAF_ALREADY_LATCHED,
+ BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED,
m_mtr) == btr_pcur_t::SAME_ALL);
}
@@ -6556,6 +5948,10 @@ btr_store_big_rec_extern_fields(
page_zip = buf_block_get_page_zip(rec_block);
}
+ ut_ad(btr_mtr->get_already_latched(
+ page_id_t{index->table->space_id, index->page},
+ MTR_MEMO_PAGE_SX_FIX));
+
mtr.start();
index->set_modified(mtr);
mtr.set_log_mode_sub(*btr_mtr);
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index 76b173359da..642db0e9f1c 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved.
-Copyright (C) 2014, 2022, MariaDB Corporation.
+Copyright (C) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -280,6 +280,70 @@ btr_defragment_calc_n_recs_for_size(
return n_recs;
}
+MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result))
+/************************************************************//**
+Returns the upper level node pointer to a page. It is assumed that mtr holds
+an sx-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+static
+rec_offs*
+btr_page_search_father_node_ptr(
+ rec_offs* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ btr_cur_t* cursor, /*!< in: cursor pointing to user record,
+ out: cursor on node pointer record,
+ its page x-latched */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no();
+ dict_index_t* index = btr_cur_get_index(cursor);
+ ut_ad(!index->is_spatial());
+
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ ut_ad(dict_index_get_page(index) != page_no);
+
+ const auto level = btr_page_get_level(btr_cur_get_page(cursor));
+
+ const rec_t* user_rec = btr_cur_get_rec(cursor);
+ ut_a(page_rec_is_user_rec(user_rec));
+
+ if (btr_cur_search_to_nth_level(level + 1,
+ dict_index_build_node_ptr(index,
+ user_rec, 0,
+ heap, level),
+ RW_X_LATCH,
+ cursor, mtr) != DB_SUCCESS) {
+ return nullptr;
+ }
+
+ const rec_t* node_ptr = btr_cur_get_rec(cursor);
+ ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive()
+ || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
+
+ offsets = rec_get_offsets(node_ptr, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+
+ if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) {
+ offsets = nullptr;
+ }
+
+ return(offsets);
+}
+
+static bool btr_page_search_father(mtr_t *mtr, btr_cur_t *cursor)
+{
+ rec_t *rec=
+ page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame));
+ if (UNIV_UNLIKELY(!rec))
+ return false;
+ cursor->page_cur.rec= rec;
+ mem_heap_t *heap= mem_heap_create(100);
+ const bool got= btr_page_search_father_node_ptr(nullptr, heap, cursor, mtr);
+ mem_heap_free(heap);
+ return got;
+}
+
/*************************************************************//**
Merge as many records from the from_block to the to_block. Delete
the from_block if all records are successfully merged to to_block.
@@ -408,7 +472,7 @@ btr_defragment_merge_pages(
parent.page_cur.index = index;
parent.page_cur.block = from_block;
- if (!btr_page_get_father(mtr, &parent)) {
+ if (!btr_page_search_father(mtr, &parent)) {
to_block = nullptr;
} else if (n_recs_to_move == n_recs) {
/* The whole page is merged with the previous page,
@@ -699,10 +763,9 @@ processed:
acquire index->lock X-latch. This entitles us to
acquire page latches in any order for the index. */
mtr_x_lock_index(index, &mtr);
- /* This will acquire index->lock U latch, which is allowed
- when we are already holding the X-latch. */
if (buf_block_t *last_block =
- item->pcur->restore_position(BTR_MODIFY_TREE, &mtr)
+ item->pcur->restore_position(
+ BTR_PURGE_TREE_ALREADY_LATCHED, &mtr)
== btr_pcur_t::CORRUPTED
? nullptr
: btr_defragment_n_pages(btr_pcur_get_block(item->pcur),
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 46b8d487850..1dd26f8c467 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -212,24 +212,98 @@ btr_pcur_copy_stored_position(
pcur_receive->old_n_fields = pcur_donate->old_n_fields;
}
+/** Optimistically latches the leaf page or pages requested.
+@param[in] block guessed buffer block
+@param[in,out] pcur cursor
+@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
+@param[in,out] mtr mini-transaction
+@return true if success */
+TRANSACTIONAL_TARGET
+static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block,
+ btr_pcur_t *pcur,
+ btr_latch_mode *latch_mode,
+ mtr_t *mtr)
+{
+ ut_ad(block->page.buf_fix_count());
+ ut_ad(block->page.in_file());
+ ut_ad(block->page.frame);
+
+ static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
+ static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
+ static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) ==
+ (RW_S_LATCH ^ RW_X_LATCH), "");
+
+ const rw_lock_type_t mode=
+ rw_lock_type_t(*latch_mode & (RW_X_LATCH | RW_S_LATCH));
+
+ switch (*latch_mode) {
+ default:
+ ut_ad(*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF);
+ return buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr);
+ case BTR_SEARCH_PREV:
+ case BTR_MODIFY_PREV:
+ page_id_t id{0};
+ uint32_t left_page_no;
+ ulint zip_size;
+ buf_block_t *left_block= nullptr;
+ {
+ transactional_shared_lock_guard<block_lock> g{block->page.lock};
+ if (block->modify_clock != pcur->modify_clock)
+ return false;
+ id= block->page.id();
+ zip_size= block->zip_size();
+ left_page_no= btr_page_get_prev(block->page.frame);
+ }
+
+ if (left_page_no != FIL_NULL)
+ {
+ left_block=
+ buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size,
+ mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr);
+
+ if (left_block &&
+ btr_page_get_next(left_block->page.frame) != id.page_no())
+ {
+release_left_block:
+ mtr->release_last_page();
+ return false;
+ }
+ }
+
+ if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr))
+ {
+ if (btr_page_get_prev(block->page.frame) == left_page_no)
+ {
+ /* block was already buffer-fixed while entering the function and
+ buf_page_optimistic_get() buffer-fixes it again. */
+ ut_ad(2 <= block->page.buf_fix_count());
+ *latch_mode= btr_latch_mode(mode);
+ return true;
+ }
+
+ mtr->release_last_page();
+ }
+
+ ut_ad(block->page.buf_fix_count());
+ if (left_block)
+ goto release_left_block;
+ return false;
+ }
+}
+
/** Structure acts as functor to do the latching of leaf pages.
It returns true if latching of leaf pages succeeded and false
otherwise. */
struct optimistic_latch_leaves
{
btr_pcur_t *const cursor;
- btr_latch_mode *latch_mode;
+ btr_latch_mode *const latch_mode;
mtr_t *const mtr;
- optimistic_latch_leaves(btr_pcur_t *cursor, btr_latch_mode *latch_mode,
- mtr_t *mtr)
- : cursor(cursor), latch_mode(latch_mode), mtr(mtr) {}
-
- bool operator() (buf_block_t *hint) const
+ bool operator()(buf_block_t *hint) const
{
- return hint && btr_cur_optimistic_latch_leaves(
- hint, cursor->modify_clock, latch_mode,
- btr_pcur_get_btr_cur(cursor), mtr);
+ return hint &&
+ btr_pcur_optimistic_latch_leaves(hint, cursor, latch_mode, mtr);
}
};
@@ -246,8 +320,8 @@ record GREATER than the user record which was the predecessor of the
supremum.
(4) cursor was positioned before the first or after the last in an
empty tree: restores to before first or after the last in the tree.
-@param restore_latch_mode BTR_SEARCH_LEAF, ...
-@param mtr mtr
+@param latch_mode BTR_SEARCH_LEAF, ...
+@param mtr mini-transaction
@return btr_pcur_t::SAME_ALL cursor position on user rec and points on
the record with the same field values as in the stored record,
btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the
@@ -301,10 +375,9 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr)
case BTR_SEARCH_PREV:
case BTR_MODIFY_PREV:
/* Try optimistic restoration. */
-
if (block_when_stored.run_with_hint(
- optimistic_latch_leaves(this, &restore_latch_mode,
- mtr))) {
+ optimistic_latch_leaves{this, &restore_latch_mode,
+ mtr})) {
pos_state = BTR_PCUR_IS_POSITIONED;
latch_mode = restore_latch_mode;
@@ -465,18 +538,9 @@ btr_pcur_move_to_next_page(
return DB_CORRUPTION;
}
- ulint mode = cursor->latch_mode;
- switch (mode) {
- case BTR_SEARCH_TREE:
- mode = BTR_SEARCH_LEAF;
- break;
- case BTR_MODIFY_TREE:
- mode = BTR_MODIFY_LEAF;
- }
-
dberr_t err;
buf_block_t* next_block = btr_block_get(
- *cursor->index(), next_page_no, mode,
+ *cursor->index(), next_page_no, cursor->latch_mode & ~12,
page_is_leaf(page), mtr, &err);
if (UNIV_UNLIKELY(!next_block)) {
@@ -538,26 +602,42 @@ btr_pcur_move_backward_from_page(
return true;
}
- buf_block_t* release_block = nullptr;
-
- if (!page_has_prev(btr_pcur_get_page(cursor))) {
- } else if (btr_pcur_is_before_first_on_page(cursor)) {
- release_block = btr_pcur_get_block(cursor);
- page_cur_set_after_last(cursor->btr_cur.left_block,
- btr_pcur_get_page_cur(cursor));
- } else {
- /* The repositioned cursor did not end on an infimum
- record on a page. Cursor repositioning acquired a latch
- also on the previous page, but we do not need the latch:
- release it. */
- release_block = cursor->btr_cur.left_block;
+ buf_block_t* block = btr_pcur_get_block(cursor);
+
+ if (page_has_prev(block->page.frame)) {
+ buf_block_t* left_block
+ = mtr->at_savepoint(mtr->get_savepoint() - 1);
+ const page_t* const left = left_block->page.frame;
+ if (memcmp_aligned<4>(left + FIL_PAGE_NEXT,
+ block->page.frame
+ + FIL_PAGE_OFFSET, 4)) {
+ /* This should be the right sibling page, or
+ if there is none, the current block. */
+ ut_ad(left_block == block
+ || !memcmp_aligned<4>(left + FIL_PAGE_PREV,
+ block->page.frame
+ + FIL_PAGE_OFFSET, 4));
+ /* The previous one must be the left sibling. */
+ left_block
+ = mtr->at_savepoint(mtr->get_savepoint() - 2);
+ ut_ad(!memcmp_aligned<4>(left_block->page.frame
+ + FIL_PAGE_NEXT,
+ block->page.frame
+ + FIL_PAGE_OFFSET, 4));
+ }
+ if (btr_pcur_is_before_first_on_page(cursor)) {
+ page_cur_set_after_last(left_block,
+ &cursor->btr_cur.page_cur);
+ /* Release the right sibling. */
+ } else {
+ /* Release the left sibling. */
+ block = left_block;
+ }
+ mtr->release(*block);
}
cursor->latch_mode = latch_mode;
cursor->old_rec = nullptr;
- if (release_block) {
- mtr->release(*release_block);
- }
return false;
}
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index c61cdd9f604..300276ff3a6 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -1057,26 +1057,24 @@ btr_search_guess_on_hash(
index_id_t index_id;
ut_ad(mtr->is_active());
+ ut_ad(index->is_btree() || index->is_ibuf());
- if (!btr_search_enabled) {
+ /* Note that, for efficiency, the struct info may not be protected by
+ any latch here! */
+
+ if (latch_mode > BTR_MODIFY_LEAF
+ || !info->last_hash_succ || !info->n_hash_potential
+ || (tuple->info_bits & REC_INFO_MIN_REC_FLAG)) {
return false;
}
- ut_ad(!index->is_ibuf());
+ ut_ad(index->is_btree());
+ ut_ad(!index->table->is_temporary());
+
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH});
compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH});
- /* Not supported for spatial index */
- ut_ad(!dict_index_is_spatial(index));
-
- /* Note that, for efficiency, the struct info may not be protected by
- any latch here! */
-
- if (info->n_hash_potential == 0) {
- return false;
- }
-
cursor->n_fields = info->n_fields;
cursor->n_bytes = info->n_bytes;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 9b8e843eab7..f87888d90da 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2689,6 +2689,18 @@ re_evict:
&& mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
} else if (!ibuf_debug || recv_recovery_is_on()) {
} else if (fil_space_t* space = fil_space_t::get(page_id.space())) {
+ for (ulint i = 0; i < mtr->get_savepoint(); i++) {
+ if (buf_block_t* b = mtr->block_at_savepoint(i)) {
+ if (b->page.oldest_modification() > 2
+ && b->page.lock.have_any()) {
+ /* We are holding a dirty page latch
+ that would hang buf_flush_sync(). */
+ space->release();
+ goto re_evict_fail;
+ }
+ }
+ }
+
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
@@ -2730,9 +2742,9 @@ re_evict:
/* Failed to evict the page; change it directly */
}
+re_evict_fail:
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
- ut_ad(state > buf_page_t::FREED);
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) {
goto ignore_block;
}
@@ -2788,8 +2800,7 @@ ibuf_merge_corrupted:
}
if (rw_latch == RW_X_LATCH) {
- mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
- goto got_latch;
+ goto get_latch_valid;
} else {
block->page.lock.x_unlock();
goto get_latch;
@@ -2797,12 +2808,10 @@ ibuf_merge_corrupted:
} else {
get_latch:
switch (rw_latch) {
- mtr_memo_type_t fix_type;
case RW_NO_LATCH:
mtr->memo_push(block, MTR_MEMO_BUF_FIX);
return block;
case RW_S_LATCH:
- fix_type = MTR_MEMO_PAGE_S_FIX;
block->page.lock.s_lock();
ut_ad(!block->page.is_read_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
@@ -2811,13 +2820,12 @@ get_latch:
goto page_id_mismatch;
}
get_latch_valid:
- mtr->memo_push(block, fix_type);
+ mtr->memo_push(block, mtr_memo_type_t(rw_latch));
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
break;
case RW_SX_LATCH:
- fix_type = MTR_MEMO_PAGE_SX_FIX;
block->page.lock.u_lock();
ut_ad(!block->page.is_io_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
@@ -2827,7 +2835,6 @@ get_latch_valid:
goto get_latch_valid;
default:
ut_ad(rw_latch == RW_X_LATCH);
- fix_type = MTR_MEMO_PAGE_X_FIX;
if (block->page.lock.x_lock_upgraded()) {
ut_ad(block->page.id() == page_id);
block->unfix();
@@ -2840,7 +2847,6 @@ get_latch_valid:
goto get_latch_valid;
}
-got_latch:
ut_ad(page_id_t(page_get_space_id(block->page.frame),
page_get_page_no(block->page.frame))
== page_id);
@@ -3029,8 +3035,7 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
ut_ad(!block->page.is_read_fixed());
block->page.set_accessed();
buf_page_make_young_if_needed(&block->page);
- mtr->memo_push(block, rw_latch == RW_S_LATCH
- ? MTR_MEMO_PAGE_S_FIX : MTR_MEMO_PAGE_X_FIX);
+ mtr->memo_push(block, mtr_memo_type_t(rw_latch));
}
ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index e2afe17f892..cce5f2f24d0 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -725,7 +725,7 @@ dict_build_field_def_step(
}
/***************************************************************//**
-Creates an index tree for the index if it is not a member of a cluster.
+Creates an index tree for the index.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
@@ -758,9 +758,8 @@ dict_create_index_tree_step(
pcur.btr_cur.page_cur.index =
UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes);
- dberr_t err =
- btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
- &pcur, 0, &mtr);
+ dberr_t err = btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
if (err != DB_SUCCESS) {
func_exit:
@@ -771,10 +770,25 @@ func_exit:
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
if (UNIV_UNLIKELY(btr_pcur_is_after_last_on_page(&pcur))) {
+corrupted:
err = DB_CORRUPTION;
goto func_exit;
}
+ ulint len;
+ byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
+ DICT_FLD__SYS_INDEXES__ID,
+ &len);
+ if (UNIV_UNLIKELY(len != 8 || mach_read_from_8(data) != index->id)) {
+ goto corrupted;
+ }
+
+ data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
+ DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+ if (len != 4) {
+ goto corrupted;
+ }
+
if (index->is_readable()) {
index->set_modified(mtr);
@@ -787,11 +801,6 @@ func_exit:
err = DB_OUT_OF_FILE_SPACE; );
}
- ulint len;
- byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
- DICT_FLD__SYS_INDEXES__PAGE_NO,
- &len);
- ut_ad(len == 4);
mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data,
node->page_no);
goto func_exit;
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index e90dff03e16..d2fa8555e43 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -2,7 +2,7 @@
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -4149,8 +4149,7 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx)
dict_index_copy_types(tuple, sys_index, 2);
cursor.page_cur.index = sys_index;
- if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_LE,
- BTR_MODIFY_LEAF, &cursor, &mtr)
+ if (cursor.search_leaf(tuple, PAGE_CUR_LE, BTR_MODIFY_LEAF, &mtr)
!= DB_SUCCESS) {
goto fail;
}
@@ -4225,8 +4224,7 @@ dict_index_set_merge_threshold(
dict_index_copy_types(tuple, sys_index, 2);
cursor.page_cur.index = sys_index;
- if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &cursor, &mtr)
+ if (cursor.search_leaf(tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &mtr)
!= DB_SUCCESS) {
goto func_exit;
}
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 1ee10ec8232..bd3bd71544a 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1319,7 +1319,7 @@ static dberr_t dict_load_columns(dict_table_t *table, unsigned use_uncommitted,
dict_index_copy_types(&tuple, sys_index, 1);
pcur.btr_cur.page_cur.index = sys_index;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -1450,7 +1450,7 @@ dict_load_virtual_col(dict_table_t *table, bool uncommitted, ulint nth_v_col)
dict_index_copy_types(&tuple, sys_virtual_index, 2);
pcur.btr_cur.page_cur.index = sys_virtual_index;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -1690,8 +1690,7 @@ static dberr_t dict_load_fields(dict_index_t *index, bool uncommitted,
dict_index_copy_types(&tuple, sys_index, 1);
pcur.btr_cur.page_cur.index = sys_index;
- dberr_t error = btr_pcur_open_on_user_rec(&tuple,
- PAGE_CUR_GE, BTR_SEARCH_LEAF,
+ dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF,
&pcur, &mtr);
if (error != DB_SUCCESS) {
goto func_exit;
@@ -1949,8 +1948,7 @@ dberr_t dict_load_indexes(dict_table_t *table, bool uncommitted,
dict_index_copy_types(&tuple, sys_index, 1);
pcur.btr_cur.page_cur.index = sys_index;
- dberr_t error = btr_pcur_open_on_user_rec(&tuple,
- PAGE_CUR_GE, BTR_SEARCH_LEAF,
+ dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF,
&pcur, &mtr);
if (error != DB_SUCCESS) {
goto func_exit;
@@ -2347,7 +2345,7 @@ static dict_table_t *dict_load_table_one(const span<const char> &name,
bool uncommitted = false;
reload:
mtr.start();
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS || !btr_pcur_is_on_user_rec(&pcur)) {
@@ -2605,8 +2603,7 @@ dict_load_table_on_id(
dict_table_t* table = nullptr;
- if (btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr)
+ if (btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr)
== DB_SUCCESS
&& btr_pcur_is_on_user_rec(&pcur)) {
/*---------------------------------------------------*/
@@ -2712,7 +2709,7 @@ static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id)
pcur.btr_cur.page_cur.index = sys_index;
mem_heap_t* heap = nullptr;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -2889,7 +2886,7 @@ dict_load_foreign(
mtr.start();
mem_heap_t* heap = nullptr;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto err_exit;
@@ -3100,7 +3097,7 @@ start_load:
dict_index_copy_types(&tuple, sec_index, 1);
pcur.btr_cur.page_cur.index = sec_index;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
DBUG_RETURN(err);
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 06d30515229..7f453b1d8e0 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1697,7 +1697,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level,
static dberr_t btr_pcur_open_level(btr_pcur_t *pcur, ulint level, mtr_t *mtr,
dict_index_t *index)
{
- pcur->latch_mode= BTR_SEARCH_TREE;
+ pcur->latch_mode= BTR_SEARCH_LEAF;
pcur->search_mode= PAGE_CUR_G;
pcur->pos_state= BTR_PCUR_IS_POSITIONED;
pcur->btr_cur.page_cur.index= index;
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 481a2dbce53..e482abeb848 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1429,7 +1429,7 @@ inline void mtr_t::log_file_op(mfile_type_t type, uint32_t space_id,
ut_ad(strchr(path, '/'));
ut_ad(!strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD));
- flag_modified();
+ m_modifications= true;
if (!is_logged())
return;
m_last= nullptr;
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 09583e157b7..d63febf01f1 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -122,15 +122,22 @@ MY_ATTRIBUTE((nonnull, warn_unused_result))
static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr,
dberr_t *err)
{
- buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0),
- space->zip_size(), RW_SX_LATCH,
- nullptr, BUF_GET_POSSIBLY_FREED,
- mtr, err);
- if (block && space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID +
- block->page.frame))
+ const page_id_t id{space->id, 0};
+ buf_block_t *block= mtr->get_already_latched(id, MTR_MEMO_PAGE_SX_FIX);
+ if (block)
+ *err= DB_SUCCESS;
+ else
{
- *err= DB_CORRUPTION;
- block= nullptr;
+ block= buf_page_get_gen(id, space->zip_size(), RW_SX_LATCH,
+ nullptr, BUF_GET_POSSIBLY_FREED,
+ mtr, err);
+ if (block &&
+ space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID +
+ block->page.frame))
+ {
+ *err= DB_CORRUPTION;
+ block= nullptr;
+ }
}
return block;
}
diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc
index 207d49abeba..8ca8681bce9 100644
--- a/storage/innobase/gis/gis0sea.cc
+++ b/storage/innobase/gis/gis0sea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -44,7 +44,6 @@ Created 2014/01/16 Jimmy Yang
static
bool
rtr_cur_restore_position(
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in: detached persistent cursor */
ulint level, /*!< in: index level */
mtr_t* mtr); /*!< in: mtr */
@@ -74,6 +73,70 @@ rtr_adjust_parent_path(
}
}
+/** Latches the leaf page or pages requested.
+@param[in] block_savepoint leaf page where the search converged
+@param[in] latch_mode BTR_SEARCH_LEAF, ...
+@param[in] cursor cursor
+@param[in] mtr mini-transaction */
+static void
+rtr_latch_leaves(
+ ulint block_savepoint,
+ btr_latch_mode latch_mode,
+ btr_cur_t* cursor,
+ mtr_t* mtr)
+{
+ compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH));
+ compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH));
+ compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH));
+
+ buf_block_t* block = mtr->at_savepoint(block_savepoint);
+
+ ut_ad(block->page.id().space() == cursor->index()->table->space->id);
+ ut_ad(block->page.in_file());
+ ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock,
+ MTR_MEMO_S_LOCK
+ | MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+
+ switch (latch_mode) {
+ uint32_t left_page_no;
+ uint32_t right_page_no;
+ default:
+ ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
+ break;
+ case BTR_MODIFY_TREE:
+ /* It is exclusive for other operations which calls
+ btr_page_set_prev() */
+ ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock,
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ /* x-latch also siblings from left to right */
+ left_page_no = btr_page_get_prev(block->page.frame);
+
+ if (left_page_no != FIL_NULL) {
+ btr_block_get(*cursor->index(), left_page_no, RW_X_LATCH,
+ true, mtr);
+ }
+
+ mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH);
+
+ right_page_no = btr_page_get_next(block->page.frame);
+
+ if (right_page_no != FIL_NULL) {
+ btr_block_get(*cursor->index(), right_page_no,
+ RW_X_LATCH, true, mtr);
+ }
+ break;
+ case BTR_SEARCH_LEAF:
+ case BTR_MODIFY_LEAF:
+ rw_lock_type_t mode =
+ rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH));
+ static_assert(int{RW_S_LATCH} == int{BTR_SEARCH_LEAF}, "");
+ static_assert(int{RW_X_LATCH} == int{BTR_MODIFY_LEAF}, "");
+ mtr->upgrade_buffer_fix(block_savepoint, mode);
+ }
+}
+
/*************************************************************//**
Find the next matching record. This function is used by search
or record locating during index delete/update.
@@ -135,6 +198,7 @@ rtr_pcur_getnext_from_path(
&& (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE;
if (!index_locked) {
+ ut_ad(mtr->is_empty());
mtr_s_lock_index(index, mtr);
} else {
ut_ad(mtr->memo_contains_flagged(&index->lock,
@@ -154,14 +218,12 @@ rtr_pcur_getnext_from_path(
node_seq_t path_ssn;
const page_t* page;
rw_lock_type_t rw_latch;
- ulint tree_idx;
mysql_mutex_lock(&rtr_info->rtr_path_mutex);
next_rec = rtr_info->path->back();
rtr_info->path->pop_back();
level = next_rec.level;
path_ssn = next_rec.seq_no;
- tree_idx = btr_cur->tree_height - level - 1;
/* Maintain the parent path info as well, if needed */
if (need_parent && !skip_parent && !new_split) {
@@ -223,37 +285,15 @@ rtr_pcur_getnext_from_path(
rw_latch = RW_X_LATCH;
}
- /* Release previous locked blocks */
- if (my_latch_mode != BTR_SEARCH_LEAF) {
- for (ulint idx = 0; idx < btr_cur->tree_height;
- idx++) {
- if (rtr_info->tree_blocks[idx]) {
- mtr_release_block_at_savepoint(
- mtr,
- rtr_info->tree_savepoints[idx],
- rtr_info->tree_blocks[idx]);
- rtr_info->tree_blocks[idx] = NULL;
- }
- }
- for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3;
- idx++) {
- if (rtr_info->tree_blocks[idx]) {
- mtr_release_block_at_savepoint(
- mtr,
- rtr_info->tree_savepoints[idx],
- rtr_info->tree_blocks[idx]);
- rtr_info->tree_blocks[idx] = NULL;
- }
- }
+ if (my_latch_mode == BTR_MODIFY_LEAF) {
+ mtr->rollback_to_savepoint(1);
}
- /* set up savepoint to record any locks to be taken */
- rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr);
-
ut_ad((my_latch_mode | 4) == BTR_CONT_MODIFY_TREE
|| !page_is_leaf(btr_cur_get_page(btr_cur))
|| !btr_cur->page_cur.block->page.lock.have_any());
+ const auto block_savepoint = mtr->get_savepoint();
block = buf_page_get_gen(
page_id_t(index->table->space_id,
next_rec.page_no), zip_size,
@@ -264,8 +304,6 @@ rtr_pcur_getnext_from_path(
break;
}
- rtr_info->tree_blocks[tree_idx] = block;
-
page = buf_block_get_frame(block);
page_ssn = page_get_ssn_id(page);
@@ -396,24 +434,23 @@ rtr_pcur_getnext_from_path(
if (found) {
if (level == target_level) {
- page_cur_t* r_cur;;
+ ut_ad(block
+ == mtr->at_savepoint(block_savepoint));
if (my_latch_mode == BTR_MODIFY_TREE
&& level == 0) {
ut_ad(rw_latch == RW_NO_LATCH);
- btr_cur_latch_leaves(
- block,
+ rtr_latch_leaves(
+ block_savepoint,
BTR_MODIFY_TREE,
btr_cur, mtr);
}
- r_cur = btr_cur_get_page_cur(btr_cur);
-
page_cur_position(
page_cur_get_rec(page_cursor),
page_cur_get_block(page_cursor),
- r_cur);
+ btr_cur_get_page_cur(btr_cur));
btr_cur->low_match = level != 0 ?
DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1
@@ -425,13 +462,7 @@ rtr_pcur_getnext_from_path(
last node just located */
skip_parent = true;
} else {
- /* Release latch on the current page */
- ut_ad(rtr_info->tree_blocks[tree_idx]);
-
- mtr_release_block_at_savepoint(
- mtr, rtr_info->tree_savepoints[tree_idx],
- rtr_info->tree_blocks[tree_idx]);
- rtr_info->tree_blocks[tree_idx] = NULL;
+ mtr->release_last_page();
}
} while (!rtr_info->path->empty());
@@ -509,50 +540,524 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index,
}
#endif
+TRANSACTIONAL_TARGET
+dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
+ page_cur_mode_t mode,
+ btr_latch_mode latch_mode,
+ btr_cur_t *cur, mtr_t *mtr)
+{
+ page_cur_mode_t page_mode;
+ page_cur_mode_t search_mode= PAGE_CUR_UNSUPP;
+
+ bool mbr_adj= false;
+ bool found= false;
+ dict_index_t *const index= cur->index();
+
+ mem_heap_t *heap= nullptr;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs *offsets= offsets_;
+ rec_offs_init(offsets_);
+ ut_ad(level == 0 || mode == PAGE_CUR_LE || RTREE_SEARCH_MODE(mode));
+ ut_ad(dict_index_check_search_tuple(index, tuple));
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(index->is_spatial());
+ ut_ad(index->page != FIL_NULL);
+
+ MEM_UNDEFINED(&cur->up_match, sizeof cur->up_match);
+ MEM_UNDEFINED(&cur->up_bytes, sizeof cur->up_bytes);
+ MEM_UNDEFINED(&cur->low_match, sizeof cur->low_match);
+ MEM_UNDEFINED(&cur->low_bytes, sizeof cur->low_bytes);
+ ut_d(cur->up_match= ULINT_UNDEFINED);
+ ut_d(cur->low_match= ULINT_UNDEFINED);
+
+ const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
+
+ ut_ad(!latch_by_caller
+ || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK
+ | MTR_MEMO_SX_LOCK));
+ latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+ ut_ad(!latch_by_caller || latch_mode == BTR_SEARCH_LEAF ||
+ latch_mode == BTR_MODIFY_LEAF);
+
+ cur->flag= BTR_CUR_BINARY;
+
+#ifndef BTR_CUR_ADAPT
+ buf_block_t *guess= nullptr;
+#else
+ btr_search_t *const info= btr_search_get_info(index);
+ buf_block_t *guess= info->root_guess;
+#endif
+
+ /* Store the position of the tree latch we push to mtr so that we
+ know how to release it when we have latched leaf node(s) */
+
+ const ulint savepoint= mtr->get_savepoint();
+
+ rw_lock_type_t upper_rw_latch, root_leaf_rw_latch= RW_NO_LATCH;
+
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ mtr_x_lock_index(index, mtr);
+ upper_rw_latch= root_leaf_rw_latch= RW_X_LATCH;
+ break;
+ case BTR_CONT_MODIFY_TREE:
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK |
+ MTR_MEMO_SX_LOCK));
+ upper_rw_latch= RW_X_LATCH;
+ break;
+ default:
+ ut_ad(latch_mode != BTR_MODIFY_PREV);
+ ut_ad(latch_mode != BTR_SEARCH_PREV);
+ if (!latch_by_caller)
+ mtr_s_lock_index(index, mtr);
+ upper_rw_latch= root_leaf_rw_latch= RW_S_LATCH;
+ if (latch_mode == BTR_MODIFY_LEAF)
+ root_leaf_rw_latch= RW_X_LATCH;
+ }
+
+ auto root_savepoint= mtr->get_savepoint();
+ const ulint zip_size= index->table->space->zip_size();
+
+ /* Start with the root page. */
+ page_id_t page_id(index->table->space_id, index->page);
+
+ ulint up_match= 0, up_bytes= 0, low_match= 0, low_bytes= 0;
+ ulint height= ULINT_UNDEFINED;
+
+ /* We use these modified search modes on non-leaf levels of the
+ B-tree. These let us end up in the right B-tree leaf. In that leaf
+ we use the original search mode. */
+
+ switch (mode) {
+ case PAGE_CUR_GE:
+ page_mode= PAGE_CUR_L;
+ break;
+ case PAGE_CUR_G:
+ page_mode= PAGE_CUR_LE;
+ break;
+ default:
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || RTREE_SEARCH_MODE(mode)
+ || mode == PAGE_CUR_LE_OR_EXTENDS);
+#else /* PAGE_CUR_LE_OR_EXTENDS */
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || RTREE_SEARCH_MODE(mode));
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ page_mode= mode;
+ break;
+ }
+
+ search_loop:
+ auto buf_mode= BUF_GET;
+ ulint rw_latch= RW_NO_LATCH;
+
+ if (height)
+ {
+ /* We are about to fetch the root or a non-leaf page. */
+ if (latch_mode != BTR_MODIFY_TREE || height == level)
+ /* If doesn't have SX or X latch of index,
+ each page should be latched before reading. */
+ rw_latch= upper_rw_latch;
+ }
+ else if (latch_mode <= BTR_MODIFY_LEAF)
+ rw_latch= latch_mode;
+
+ dberr_t err;
+ auto block_savepoint= mtr->get_savepoint();
+ buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess,
+ buf_mode, mtr, &err, false);
+ if (!block)
+ {
+ if (err == DB_DECRYPTION_FAILED)
+ btr_decryption_failed(*index);
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+
+ if (mbr_adj)
+ /* remember that we will need to adjust parent MBR */
+ cur->rtr_info->mbr_adj= true;
+
+ return err;
+ }
+
+ const page_t *page= buf_block_get_frame(block);
+#ifdef UNIV_ZIP_DEBUG
+ if (rw_latch != RW_NO_LATCH) {
+ const page_zip_des_t *page_zip= buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(index->id == btr_page_get_index_id(page));
+
+ if (height != ULINT_UNDEFINED);
+ else if (page_is_leaf(page) &&
+ rw_latch != RW_NO_LATCH && rw_latch != root_leaf_rw_latch)
+ {
+ /* The root page is also a leaf page (root_leaf).
+ We should reacquire the page, because the root page
+ is latched differently from leaf pages. */
+ ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+ ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH);
+
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
+ mtr->rollback_to_savepoint(block_savepoint);
+
+ upper_rw_latch= root_leaf_rw_latch;
+ goto search_loop;
+ }
+ else
+ {
+ /* We are in the root node */
+
+ height= btr_page_get_level(page);
+ cur->tree_height= height + 1;
+
+ ut_ad(cur->rtr_info);
+
+ /* If SSN in memory is not initialized, fetch it from root page */
+ if (!rtr_get_current_ssn_id(index))
+ /* FIXME: do this in dict_load_table_one() */
+ index->set_ssn(page_get_ssn_id(page) + 1);
+
+ /* Save the MBR */
+ cur->rtr_info->thr= cur->thr;
+ rtr_get_mbr_from_tuple(tuple, &cur->rtr_info->mbr);
+
+#ifdef BTR_CUR_ADAPT
+ info->root_guess= block;
+#endif
+ }
+
+ if (height == 0) {
+ if (rw_latch == RW_NO_LATCH)
+ {
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
+ rtr_latch_leaves(block_savepoint, latch_mode, cur, mtr);
+ }
+
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ break;
+ default:
+ if (!latch_by_caller)
+ {
+ /* Release the tree s-latch */
+ mtr->rollback_to_savepoint(savepoint,
+ savepoint + 1);
+ block_savepoint--;
+ root_savepoint--;
+ }
+ /* release upper blocks */
+ if (savepoint < block_savepoint)
+ mtr->rollback_to_savepoint(savepoint, block_savepoint);
+ }
+
+ page_mode= mode;
+ }
+
+ /* Remember the page search mode */
+ search_mode= page_mode;
+
+ /* Some adjustment on search mode, when the page search mode is
+ PAGE_CUR_RTREE_LOCATE or PAGE_CUR_RTREE_INSERT, as we are searching
+ with MBRs. When it is not the target level, we should search all
+ sub-trees that "CONTAIN" the search range/MBR. When it is at the
+ target level, the search becomes PAGE_CUR_LE */
+
+ if (page_mode == PAGE_CUR_RTREE_INSERT)
+ {
+ page_mode= (level == height)
+ ? PAGE_CUR_LE
+ : PAGE_CUR_RTREE_INSERT;
+
+ ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE);
+ }
+ else if (page_mode == PAGE_CUR_RTREE_LOCATE && level == height)
+ page_mode= level == 0 ? PAGE_CUR_LE : PAGE_CUR_RTREE_GET_FATHER;
+
+ up_match= 0;
+ low_match= 0;
+
+ if (latch_mode == BTR_MODIFY_TREE || latch_mode == BTR_CONT_MODIFY_TREE)
+ /* Tree are locked, no need for Page Lock to protect the "path" */
+ cur->rtr_info->need_page_lock= false;
+
+ cur->page_cur.block= block;
+
+ if (page_mode >= PAGE_CUR_CONTAIN)
+ {
+ found= rtr_cur_search_with_match(block, index, tuple, page_mode,
+ &cur->page_cur, cur->rtr_info);
+
+ /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */
+ if (search_mode == PAGE_CUR_RTREE_INSERT && cur->rtr_info->mbr_adj) {
+ static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
+
+ if (!(latch_mode & 8))
+ /* Parent MBR needs updated, should retry with BTR_MODIFY_TREE */
+ goto func_exit;
+
+ cur->rtr_info->mbr_adj= false;
+ mbr_adj= true;
+ }
+
+ if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER)
+ cur->low_match= DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
+ }
+ else
+ {
+ /* Search for complete index fields. */
+ up_bytes= low_bytes= 0;
+ if (page_cur_search_with_match(tuple, page_mode, &up_match,
+ &low_match, &cur->page_cur, nullptr)) {
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
+ }
+
+ /* If this is the desired level, leave the loop */
+
+ ut_ad(height == btr_page_get_level(btr_cur_get_page(cur)));
+
+ /* Add Predicate lock if it is serializable isolation
+ and only if it is in the search case */
+ if (mode >= PAGE_CUR_CONTAIN && mode != PAGE_CUR_RTREE_INSERT &&
+ mode != PAGE_CUR_RTREE_LOCATE && cur->rtr_info->need_prdt_lock)
+ {
+ lock_prdt_t prdt;
+
+ {
+ trx_t* trx= thr_get_trx(cur->thr);
+ TMLockTrxGuard g{TMLockTrxArgs(*trx)};
+ lock_init_prdt_from_mbr(&prdt, &cur->rtr_info->mbr, mode,
+ trx->lock.lock_heap);
+ }
+
+ if (rw_latch == RW_NO_LATCH && height != 0)
+ block->page.lock.s_lock();
+
+ lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cur->thr);
+
+ if (rw_latch == RW_NO_LATCH && height != 0)
+ block->page.lock.s_unlock();
+ }
+
+ if (level != height)
+ {
+ ut_ad(height > 0);
+
+ height--;
+ guess= nullptr;
+
+ const rec_t *node_ptr= btr_cur_get_rec(cur);
+
+ offsets= rec_get_offsets(node_ptr, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+
+ if (page_rec_is_supremum(node_ptr))
+ {
+ cur->low_match= 0;
+ cur->up_match= 0;
+ goto func_exit;
+ }
+
+ /* If we are doing insertion or record locating,
+ remember the tree nodes we visited */
+ if (page_mode == PAGE_CUR_RTREE_INSERT ||
+ (search_mode == PAGE_CUR_RTREE_LOCATE &&
+ latch_mode != BTR_MODIFY_LEAF))
+ {
+ const bool add_latch= latch_mode == BTR_MODIFY_TREE &&
+ rw_latch == RW_NO_LATCH;
+
+ if (add_latch)
+ {
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK |
+ MTR_MEMO_SX_LOCK));
+ block->page.lock.s_lock();
+ }
+
+ /* Store the parent cursor location */
+ ut_d(auto num_stored=)
+ rtr_store_parent_path(block, cur, latch_mode, height + 1, mtr);
+
+ if (page_mode == PAGE_CUR_RTREE_INSERT)
+ {
+ btr_pcur_t *r_cursor= rtr_get_parent_cursor(cur, height + 1, true);
+ /* If it is insertion, there should be only one parent for
+ each level traverse */
+ ut_ad(num_stored == 1);
+ node_ptr= btr_pcur_get_rec(r_cursor);
+ }
+
+ if (add_latch)
+ block->page.lock.s_unlock();
+
+ ut_ad(!page_rec_is_supremum(node_ptr));
+ }
+
+ ut_ad(page_mode == search_mode ||
+ (page_mode == PAGE_CUR_WITHIN &&
+ search_mode == PAGE_CUR_RTREE_LOCATE));
+ page_mode= search_mode;
+
+ if (height == level && latch_mode == BTR_MODIFY_TREE)
+ {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ for (auto i= root_savepoint, n= mtr->get_savepoint(); i < n; i++)
+ mtr->upgrade_buffer_fix(i, RW_X_LATCH);
+ }
+
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+ if (page_mode >= PAGE_CUR_CONTAIN && page_mode != PAGE_CUR_RTREE_INSERT)
+ {
+ rtr_node_path_t *path= cur->rtr_info->path;
+
+ if (found && !path->empty())
+ {
+ ut_ad(path->back().page_no == page_id.page_no());
+ path->pop_back();
+#ifdef UNIV_DEBUG
+ if (page_mode == PAGE_CUR_RTREE_LOCATE &&
+ latch_mode != BTR_MODIFY_LEAF)
+ {
+ btr_pcur_t* pcur= cur->rtr_info->parent_path->back().cursor;
+ rec_t *my_node_ptr= btr_pcur_get_rec(pcur);
+
+ offsets= rec_get_offsets(my_node_ptr, index, offsets,
+ 0, ULINT_UNDEFINED, &heap);
+
+ ut_ad(page_id.page_no() ==
+ btr_node_ptr_get_child_page_no(my_node_ptr, offsets));
+ }
+#endif
+ }
+ }
+
+ goto search_loop;
+ }
+
+ if (level)
+ {
+ if (upper_rw_latch == RW_NO_LATCH)
+ {
+ ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
+ btr_block_get(*index, page_id.page_no(), RW_X_LATCH, false, mtr, &err);
+ }
+ else
+ {
+ ut_ad(mtr->memo_contains_flagged(block, upper_rw_latch));
+ ut_ad(!latch_by_caller);
+ }
+
+ if (page_mode <= PAGE_CUR_LE)
+ {
+ cur->low_match= low_match;
+ cur->up_match= up_match;
+ }
+ }
+ else
+ {
+ cur->low_match= low_match;
+ cur->low_bytes= low_bytes;
+ cur->up_match= up_match;
+ cur->up_bytes= up_bytes;
+
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ }
+
+ goto func_exit;
+}
+
+dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+ btr_latch_mode latch_mode,
+ mtr_t *mtr, page_cur_mode_t mode)
+{
+ return rtr_search_to_nth_level(0, tuple, mode, latch_mode, cur, mtr);
+}
+
+/** Search for a spatial index leaf page record.
+@param pcur cursor
+@param tuple search tuple
+@param mode search mode
+@param mtr mini-transaction */
+dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple,
+ page_cur_mode_t mode, mtr_t *mtr)
+{
+#ifdef UNIV_DEBUG
+ switch (mode) {
+ case PAGE_CUR_CONTAIN:
+ case PAGE_CUR_INTERSECT:
+ case PAGE_CUR_WITHIN:
+ case PAGE_CUR_DISJOINT:
+ case PAGE_CUR_MBR_EQUAL:
+ break;
+ default:
+ ut_ad("invalid mode" == 0);
+ }
+#endif
+ pcur->latch_mode= BTR_SEARCH_LEAF;
+ pcur->search_mode= mode;
+ pcur->pos_state= BTR_PCUR_IS_POSITIONED;
+ pcur->trx_if_known= nullptr;
+ return rtr_search_leaf(&pcur->btr_cur, tuple, BTR_SEARCH_LEAF, mtr, mode);
+}
+
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. Mainly called by row_search_index_entry() */
-bool
-rtr_pcur_open(
- dict_index_t* index, /*!< in: index */
+closed with btr_pcur_close. */
+bool rtr_search(
const dtuple_t* tuple, /*!< in: tuple on which search done */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
mtr_t* mtr) /*!< in: mtr */
{
static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
ut_ad(latch_mode & BTR_MODIFY_LEAF);
+ ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED));
+ ut_ad(mtr->is_empty());
/* Initialize the cursor */
btr_pcur_init(cursor);
cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- cursor->search_mode = PAGE_CUR_RTREE_LOCATE;
- cursor->trx_if_known = NULL;
+ cursor->search_mode = PAGE_CUR_RTREE_LOCATE;
+ cursor->trx_if_known = nullptr;
+
+ if (latch_mode & 8) {
+ mtr_x_lock_index(cursor->index(), mtr);
+ } else {
+ latch_mode
+ = btr_latch_mode(latch_mode | BTR_ALREADY_S_LATCHED);
+ mtr_sx_lock_index(cursor->index(), mtr);
+ }
/* Search with the tree cursor */
btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor);
- btr_cursor->page_cur.index = index;
- btr_cursor->rtr_info = rtr_create_rtr_info(false, false,
- btr_cursor, index);
+ btr_cursor->rtr_info
+ = rtr_create_rtr_info(false, false,
+ btr_cursor, cursor->index());
- /* Purge will SX lock the tree instead of take Page Locks */
if (btr_cursor->thr) {
btr_cursor->rtr_info->need_page_lock = true;
btr_cursor->rtr_info->thr = btr_cursor->thr;
}
- if ((latch_mode & 8) && index->lock.have_u_not_x()) {
- index->lock.u_x_upgrade(SRW_LOCK_CALL);
- mtr->lock_upgrade(index->lock);
- }
-
- if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_RTREE_LOCATE,
- latch_mode,
- btr_cursor, mtr) != DB_SUCCESS) {
+ if (rtr_search_leaf(btr_cursor, tuple, latch_mode, mtr)
+ != DB_SUCCESS) {
return true;
}
@@ -560,7 +1065,8 @@ rtr_pcur_open(
const rec_t* rec = btr_pcur_get_rec(cursor);
- const bool d= rec_get_deleted_flag(rec, index->table->not_redundant());
+ const bool d= rec_get_deleted_flag(
+ rec, cursor->index()->table->not_redundant());
if (page_rec_is_infimum(rec)
|| btr_pcur_get_low_match(cursor) != dtuple_get_n_fields(tuple)
@@ -571,26 +1077,12 @@ rtr_pcur_open(
btr_cursor->rtr_info->fd_del = true;
btr_cursor->low_match = 0;
}
- /* Did not find matched row in first dive. Release
- latched block if any before search more pages */
- if (!(latch_mode & 8)) {
- ulint tree_idx = btr_cursor->tree_height - 1;
- rtr_info_t* rtr_info = btr_cursor->rtr_info;
-
- if (rtr_info->tree_blocks[tree_idx]) {
- mtr_release_block_at_savepoint(
- mtr,
- rtr_info->tree_savepoints[tree_idx],
- rtr_info->tree_blocks[tree_idx]);
- rtr_info->tree_blocks[tree_idx] = NULL;
- }
- }
+
+ mtr->rollback_to_savepoint(1);
if (!rtr_pcur_getnext_from_path(tuple, PAGE_CUR_RTREE_LOCATE,
btr_cursor, 0, latch_mode,
- latch_mode
- & (8 | BTR_ALREADY_S_LATCHED),
- mtr)) {
+ true, mtr)) {
return true;
}
@@ -598,6 +1090,10 @@ rtr_pcur_open(
== dtuple_get_n_fields(tuple));
}
+ if (!(latch_mode & 8)) {
+ mtr->rollback_to_savepoint(0, 1);
+ }
+
return false;
}
@@ -641,8 +1137,7 @@ static const rec_t* rtr_get_father_node(
if (sea_cur && sea_cur->tree_height > level) {
ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
| MTR_MEMO_SX_LOCK));
- if (rtr_cur_restore_position(BTR_CONT_MODIFY_TREE, sea_cur,
- level, mtr)) {
+ if (rtr_cur_restore_position(sea_cur, level, mtr)) {
btr_pcur_t* r_cursor = rtr_get_parent_cursor(
sea_cur, level, false);
@@ -668,9 +1163,8 @@ static const rec_t* rtr_get_father_node(
btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index);
- if (btr_cur_search_to_nth_level(level, tuple,
- PAGE_CUR_RTREE_LOCATE,
- BTR_CONT_MODIFY_TREE, btr_cur, mtr)
+ if (rtr_search_to_nth_level(level, tuple, PAGE_CUR_RTREE_LOCATE,
+ BTR_CONT_MODIFY_TREE, btr_cur, mtr)
!= DB_SUCCESS) {
} else if (sea_cur && sea_cur->tree_height == level) {
rec = btr_cur_get_rec(btr_cur);
@@ -729,9 +1223,8 @@ rtr_page_get_father_node_ptr(
page_no = btr_cur_get_block(cursor)->page.id().page_no();
index = btr_cur_get_index(cursor);
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
ut_ad(dict_index_get_page(index) != page_no);
@@ -879,32 +1372,10 @@ rtr_init_rtr_info(
if (!reinit) {
/* Reset all members. */
- rtr_info->path = NULL;
- rtr_info->parent_path = NULL;
- rtr_info->matches = NULL;
-
+ memset(rtr_info, 0, sizeof *rtr_info);
+ static_assert(PAGE_CUR_UNSUPP == 0, "compatibility");
mysql_mutex_init(rtr_path_mutex_key, &rtr_info->rtr_path_mutex,
nullptr);
-
- memset(rtr_info->tree_blocks, 0x0,
- sizeof(rtr_info->tree_blocks));
- memset(rtr_info->tree_savepoints, 0x0,
- sizeof(rtr_info->tree_savepoints));
- rtr_info->mbr.xmin = 0.0;
- rtr_info->mbr.xmax = 0.0;
- rtr_info->mbr.ymin = 0.0;
- rtr_info->mbr.ymax = 0.0;
- rtr_info->thr = NULL;
- rtr_info->heap = NULL;
- rtr_info->cursor = NULL;
- rtr_info->index = NULL;
- rtr_info->need_prdt_lock = false;
- rtr_info->need_page_lock = false;
- rtr_info->allocated = false;
- rtr_info->mbr_adj = false;
- rtr_info->fd_del = false;
- rtr_info->search_tuple = NULL;
- rtr_info->search_mode = PAGE_CUR_UNSUPP;
}
ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty());
@@ -1130,7 +1601,6 @@ struct optimistic_get
static
bool
rtr_cur_restore_position(
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* btr_cur, /*!< in: detached persistent cursor */
ulint level, /*!< in: index level */
mtr_t* mtr) /*!< in: mtr */
@@ -1158,8 +1628,6 @@ rtr_cur_restore_position(
r_cursor->modify_clock = 100;
);
- ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
-
if (r_cursor->block_when_stored.run_with_hint(
optimistic_get(r_cursor, mtr))) {
ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index daeb2df64b7..8542ae793e9 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1543,8 +1543,7 @@ static void innodb_drop_database(handlerton*, char *path)
mtr_t mtr;
mtr.start();
pcur.btr_cur.page_cur.index = sys_index;
- err= btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
+ err= btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS)
goto err_exit;
@@ -7977,6 +7976,7 @@ report_error:
#ifdef WITH_WSREP
if (!error_result && trx->is_wsrep()
+ && !trx->is_bulk_insert()
&& wsrep_thd_is_local(m_user_thd)
&& !wsrep_thd_ignore_table(m_user_thd)
&& !wsrep_consistency_check(m_user_thd)
@@ -10080,6 +10080,8 @@ wsrep_append_key(
(shared, exclusive, semi...) */
)
{
+ ut_ad(!trx->is_bulk_insert());
+
DBUG_ENTER("wsrep_append_key");
DBUG_PRINT("enter",
("thd: %lu trx: %lld", thd_get_thread_id(thd),
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 91c1ff53d2d..4cd1505e0d8 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -6097,7 +6097,8 @@ func_exit:
que_thr_t* thr = pars_complete_graph_for_exec(
NULL, trx, ctx->heap, NULL);
- const bool is_root = block->page.id().page_no() == index->page;
+ page_id_t id{block->page.id()};
+ const bool is_root = id.page_no() == index->page;
if (rec_is_metadata(rec, *index)) {
ut_ad(page_rec_is_user_rec(rec));
@@ -6114,8 +6115,10 @@ func_exit:
}
/* Ensure that the root page is in the correct format. */
- buf_block_t* root = btr_root_block_get(index, RW_X_LATCH,
- &mtr, &err);
+ id.set_page_no(index->page);
+ buf_block_t* root = mtr.get_already_latched(
+ id, MTR_MEMO_PAGE_SX_FIX);
+
if (UNIV_UNLIKELY(!root)) {
goto func_exit;
}
@@ -11293,7 +11296,8 @@ err_index:
}
DBUG_EXECUTE_IF("stats_lock_fail",
- error = DB_LOCK_WAIT_TIMEOUT;);
+ error = DB_LOCK_WAIT_TIMEOUT;
+ trx_rollback_for_mysql(trx););
if (error == DB_SUCCESS) {
error = lock_sys_tables(trx);
@@ -11311,6 +11315,18 @@ err_index:
if (fts_exist) {
purge_sys.resume_FTS();
}
+
+ if (trx->state == TRX_STATE_NOT_STARTED) {
+ /* Transaction may have been rolled back
+ due to a lock wait timeout, deadlock,
+ or a KILL statement. So restart the
+ transaction to remove the newly created
+ table or index stubs from data dictionary
+ and table cache in
+ rollback_inplace_alter_table() */
+ trx_start_for_ddl(trx);
+ }
+
DBUG_RETURN(true);
}
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 66e330a0592..e55835425be 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -2298,7 +2298,7 @@ loop:
btr_pcur_t pcur;
pcur.btr_cur.page_cur.index= ibuf.index;
ibuf_mtr_start(&mtr);
- if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, 0, &mtr))
+ if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr))
goto func_exit;
if (!btr_pcur_is_on_user_rec(&pcur))
{
@@ -2494,8 +2494,8 @@ ibuf_merge_space(
/* Position the cursor on the first matching record. */
pcur.btr_cur.page_cur.index = ibuf.index;
- dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, 0, &mtr);
+ dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF,
+ &pcur, &mtr);
ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur),
ibuf.index));
@@ -3239,7 +3239,7 @@ ibuf_insert_low(
ibuf_mtr_start(&mtr);
pcur.btr_cur.page_cur.index = ibuf.index;
- err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, 0, &mtr);
+ err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
if (err != DB_SUCCESS) {
func_exit:
ibuf_mtr_commit(&mtr);
@@ -3956,8 +3956,6 @@ ibuf_restore_pos(
position is to be restored */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_PURGE_TREE);
-
if (UNIV_LIKELY(pcur->restore_position(mode, mtr) ==
btr_pcur_t::SAME_ALL)) {
return true;
@@ -4038,12 +4036,11 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
ibuf_mtr_start(mtr);
mysql_mutex_lock(&ibuf_mutex);
+ mtr_x_lock_index(ibuf.index, mtr);
- if (!ibuf_restore_pos(page_id, search_tuple, BTR_PURGE_TREE,
- pcur, mtr)) {
-
+ if (!ibuf_restore_pos(page_id, search_tuple,
+ BTR_PURGE_TREE_ALREADY_LATCHED, pcur, mtr)) {
mysql_mutex_unlock(&ibuf_mutex);
- ut_ad(mtr->has_committed());
goto func_exit;
}
@@ -4054,13 +4051,10 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
ut_a(err == DB_SUCCESS);
ibuf_size_update(ibuf_root->page.frame);
- mysql_mutex_unlock(&ibuf_mutex);
-
ibuf.empty = page_is_empty(ibuf_root->page.frame);
- } else {
- mysql_mutex_unlock(&ibuf_mutex);
}
+ mysql_mutex_unlock(&ibuf_mutex);
ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
func_exit:
@@ -4238,7 +4232,7 @@ loop:
/* Position pcur in the insert buffer at the first entry for this
index page */
- if (btr_pcur_open_on_user_rec(search_tuple, PAGE_CUR_GE,
+ if (btr_pcur_open_on_user_rec(search_tuple,
BTR_MODIFY_LEAF, &pcur, &mtr)
!= DB_SUCCESS) {
err = DB_CORRUPTION;
@@ -4455,7 +4449,7 @@ loop:
/* Position pcur in the insert buffer at the first entry for the
space */
- if (btr_pcur_open_on_user_rec(&search_tuple, PAGE_CUR_GE,
+ if (btr_pcur_open_on_user_rec(&search_tuple,
BTR_MODIFY_LEAF, &pcur, &mtr)
!= DB_SUCCESS) {
goto leave_loop;
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index a2aa46b62da..a1cc10b05db 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -211,13 +211,12 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false)
@param[in,out] mtr mini-transaction */
void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr);
-ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result))
+ATTRIBUTE_COLD __attribute__((nonnull))
/** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE.
@param[in] index clustered index with instant ALTER TABLE
@param[in] all whether to reset FIL_PAGE_TYPE as well
-@param[in,out] mtr mini-transaction
-@return error code */
-dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr);
+@param[in,out] mtr mini-transaction */
+void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr);
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
@@ -241,7 +240,7 @@ btr_root_raise_and_insert(
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
- MY_ATTRIBUTE((warn_unused_result));
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 49bc8a4ff1b..f6abc9f5e52 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,12 +63,6 @@ enum {
BTR_KEEP_IBUF_BITMAP = 32
};
-/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
-struct btr_latch_leaves_t {
- buf_block_t* blocks[3];
- ulint savepoints[3];
-};
-
#include "que0types.h"
#include "row0types.h"
@@ -126,51 +120,28 @@ bool
btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
-/** Optimistically latches the leaf page or pages requested.
-@param[in] block guessed buffer block
-@param[in] modify_clock modify clock value
-@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
-@param[in,out] cursor cursor
-@param[in] mtr mini-transaction
-@return true if success */
-bool
-btr_cur_optimistic_latch_leaves(
- buf_block_t* block,
- ib_uint64_t modify_clock,
- btr_latch_mode* latch_mode,
- btr_cur_t* cursor,
- mtr_t* mtr);
-
MY_ATTRIBUTE((warn_unused_result))
-/** Searches an index tree and positions a tree cursor on a given level.
+/********************************************************************//**
+Searches an index tree and positions a tree cursor on a given non-leaf level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
+Cursor is left at the place where an insert of the
+search tuple should be performed in the B-tree. InnoDB does an insert
+immediately after the cursor. Thus, the cursor may end up on a user record,
+or on a page infimum record.
@param level the tree level of search
@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that
it cannot get compared to the node ptr page number field!
-@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a
- unique prefix of a record, mode should be PAGE_CUR_LE, not
- PAGE_CUR_GE, as the latter may end up on the previous page of
- the record! Inserts should always be made using PAGE_CUR_LE
- to search the position!
-@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT,
- BTR_DELETE_MARK, or BTR_DELETE;
- cursor->left_block is used to store a pointer to the left
- neighbor page
+@param latch RW_S_LATCH or RW_X_LATCH
@param cursor tree cursor; the cursor page is s- or x-latched, but see also
above!
@param mtr mini-transaction
-@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none)
@return DB_SUCCESS on success or error code otherwise */
dberr_t btr_cur_search_to_nth_level(ulint level,
const dtuple_t *tuple,
- page_cur_mode_t mode,
- btr_latch_mode latch_mode,
- btr_cur_t *cursor, mtr_t *mtr,
- ib_uint64_t autoinc= 0);
+ rw_lock_type_t rw_latch,
+ btr_cur_t *cursor, mtr_t *mtr);
/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
@@ -653,20 +624,6 @@ btr_rec_copy_externally_stored_field(
ulint* len,
mem_heap_t* heap);
-/** Latches the leaf page or pages requested.
-@param[in] block leaf page where the search converged
-@param[in] latch_mode BTR_SEARCH_LEAF, ...
-@param[in] cursor cursor
-@param[in,out] mtr mini-transaction
-@param[out] latch_leaves latched blocks and savepoints */
-void
-btr_cur_latch_leaves(
- buf_block_t* block,
- btr_latch_mode latch_mode,
- btr_cur_t* cursor,
- mtr_t* mtr,
- btr_latch_leaves_t* latch_leaves = nullptr);
-
/*######################################################################*/
/** In the pessimistic delete, if the page data size drops below this
@@ -727,21 +684,16 @@ to know struct size! */
struct btr_cur_t {
page_cur_t page_cur; /*!< page cursor */
purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
- buf_block_t* left_block; /*!< this field is used to store
- a pointer to the left neighbor
- page, in the cases
- BTR_SEARCH_PREV and
- BTR_MODIFY_PREV */
/*------------------------------*/
que_thr_t* thr; /*!< this field is only used
- when btr_cur_search_to_nth_level
+ when search_leaf()
is called for an index entry
insertion: the calling query
thread is passed here to be
used in the insert buffer */
/*------------------------------*/
/** The following fields are used in
- btr_cur_search_to_nth_level to pass information: */
+ search_leaf() to pass information: */
/* @{ */
enum btr_cur_method flag; /*!< Search method used */
ulint tree_height; /*!< Tree height if the search is done
@@ -750,8 +702,7 @@ struct btr_cur_t {
ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
the number of matched fields to the
the first user record to the right of
- the cursor record after
- btr_cur_search_to_nth_level;
+ the cursor record after search_leaf();
for the mode PAGE_CUR_GE, the matched
fields to the first user record AT THE
CURSOR or to the right of it;
@@ -768,8 +719,7 @@ struct btr_cur_t {
ulint low_match; /*!< if search mode was PAGE_CUR_LE,
the number of matched fields to the
first user record AT THE CURSOR or
- to the left of it after
- btr_cur_search_to_nth_level;
+ to the left of it after search_leaf();
NOT defined for PAGE_CUR_GE or any
other search modes; see also the NOTE
in up_match! */
@@ -803,6 +753,24 @@ struct btr_cur_t {
dberr_t open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode,
mtr_t *mtr);
+ /** Search the leaf page record corresponding to a key.
+ @param tuple key to search for, with correct n_fields_cmp
+ @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting
+ @param latch_mode latch mode
+ @param mtr mini-transaction
+ @return error code */
+ dberr_t search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
+ btr_latch_mode latch_mode, mtr_t *mtr);
+
+ /** Search the leaf page record corresponding to a key, exclusively latching
+ all sibling pages on the way.
+ @param tuple key to search for, with correct n_fields_cmp
+ @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting
+ @param mtr mini-transaction
+ @return error code */
+ dberr_t pessimistic_search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
+ mtr_t *mtr);
+
/** Open the cursor at a random leaf page record.
@param offsets temporary memory for rec_get_offsets()
@param heap memory heap for rec_get_offsets()
@@ -862,14 +830,14 @@ inherited external field. */
#define BTR_EXTERN_INHERITED_FLAG 64U
#ifdef BTR_CUR_HASH_ADAPT
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */
extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_non_sea_old;
/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
+btr_cur_t::search_leaf(). */
extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index cd8eacdc212..c66a3bfa329 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -70,24 +70,6 @@ btr_pcur_init(
/*==========*/
btr_pcur_t* pcur); /*!< in: persistent cursor */
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. */
-inline
-dberr_t
-btr_pcur_open(
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written
- (0 if none) */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Opens an persistent cursor to an index tree without initializing the
cursor.
@param tuple tuple on which search done
@@ -100,8 +82,7 @@ cursor.
@param mtr mini-transaction
@return DB_SUCCESS on success or error code otherwise. */
inline
-dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple,
- page_cur_mode_t mode,
+dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode,
btr_latch_mode latch_mode,
btr_pcur_t *cursor, mtr_t *mtr);
@@ -356,7 +337,7 @@ struct btr_pcur_t
/** the modify clock value of the buffer block when the cursor position
was stored */
ib_uint64_t modify_clock= 0;
- /** btr_pcur_store_position() and btr_pcur_restore_position() state. */
+ /** btr_pcur_store_position() and restore_position() state. */
enum pcur_pos_t pos_state= BTR_PCUR_NOT_POSITIONED;
page_cur_mode_t search_mode= PAGE_CUR_UNSUPP;
/** the transaction, if we know it; otherwise this field is not defined;
@@ -383,8 +364,8 @@ struct btr_pcur_t
supremum.
(4) cursor was positioned before the first or after the last in an
empty tree: restores to before first or after the last in the tree.
- @param restore_latch_mode BTR_SEARCH_LEAF, ...
- @param mtr mtr
+ @param latch_mode BTR_SEARCH_LEAF, ...
+ @param mtr mini-transaction
@retval SAME_ALL cursor position on user rec and points on
the record with the same field values as in the stored record,
@retval SAME_UNIQ cursor position is on user rec and points on the
@@ -409,8 +390,7 @@ struct btr_pcur_t
pos_state= BTR_PCUR_IS_POSITIONED;
old_rec= nullptr;
- return btr_cur.open_leaf(first, index,
- BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode), mtr);
+ return btr_cur.open_leaf(first, index, this->latch_mode, mtr);
}
};
@@ -433,6 +413,24 @@ inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor)
return cursor->btr_cur.page_cur.rec;
}
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. */
+inline
+dberr_t
+btr_pcur_open(
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_LE, ... */
+ btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+ cursor->search_mode= mode;
+ cursor->pos_state= BTR_PCUR_IS_POSITIONED;
+ cursor->trx_if_known= nullptr;
+ return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr);
+}
+
/** Open a cursor on the first user record satisfying the search condition;
in case of no match, after the last index record. */
MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -440,16 +438,15 @@ inline
dberr_t
btr_pcur_open_on_user_rec(
const dtuple_t* tuple, /*!< in: tuple on which search done */
- page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */
btr_latch_mode latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mode == PAGE_CUR_GE || mode == PAGE_CUR_G);
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
- if (dberr_t err= btr_pcur_open(tuple, mode, latch_mode, cursor, 0, mtr))
+ if (dberr_t err=
+ btr_pcur_open(tuple, PAGE_CUR_GE, latch_mode, cursor, mtr))
return err;
if (!btr_pcur_is_after_last_on_page(cursor) ||
btr_pcur_is_after_last_in_tree(cursor))
diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl
index 551f8f20fca..b827d70dc47 100644
--- a/storage/innobase/include/btr0pcur.inl
+++ b/storage/innobase/include/btr0pcur.inl
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -299,38 +299,10 @@ btr_pcur_init(
pcur->btr_cur.rtr_info = NULL;
}
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. */
-inline
-dberr_t
-btr_pcur_open(
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written
- (0 if none) */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(!cursor->index()->is_spatial());
- cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- cursor->search_mode= mode;
- cursor->pos_state= BTR_PCUR_IS_POSITIONED;
- cursor->trx_if_known= nullptr;
- return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode,
- btr_pcur_get_btr_cur(cursor),
- mtr, autoinc);
-}
-
/** Opens an persistent cursor to an index tree without initializing the
cursor.
@param tuple tuple on which search done
-@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a
+@param mode search mode; NOTE that if the search is made using a
unique prefix of a record, mode should be PAGE_CUR_LE, not
PAGE_CUR_GE, as the latter may end up on the previous page of
the record!
@@ -339,8 +311,7 @@ cursor.
@param mtr mini-transaction
@return DB_SUCCESS on success or error code otherwise. */
inline
-dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple,
- page_cur_mode_t mode,
+dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode,
btr_latch_mode latch_mode,
btr_pcur_t *cursor, mtr_t *mtr)
{
@@ -348,10 +319,7 @@ dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple,
cursor->search_mode= mode;
cursor->pos_state= BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known= nullptr;
-
- /* Search with the tree cursor */
- return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode,
- btr_pcur_get_btr_cur(cursor), mtr);
+ return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr);
}
/**************************************************************//**
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 6118bfbc128..912c022c64f 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2022, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -55,25 +55,26 @@ in the index record. */
#define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \
(BTR_EXTERN_FIELD_REF_SIZE * 2)
-/** Latching modes for btr_cur_search_to_nth_level(). */
+/** Latching modes for btr_cur_t::search_leaf(). */
enum btr_latch_mode {
/** Search a record on a leaf page and S-latch it. */
BTR_SEARCH_LEAF = RW_S_LATCH,
/** (Prepare to) modify a record on a leaf page and X-latch it. */
BTR_MODIFY_LEAF = RW_X_LATCH,
+ /** U-latch root and X-latch a leaf page */
+ BTR_MODIFY_ROOT_AND_LEAF = RW_SX_LATCH,
/** Obtain no latches. */
BTR_NO_LATCHES = RW_NO_LATCH,
- /** Search the previous record. */
+ /** Search the previous record.
+ Used in btr_pcur_move_backward_from_page(). */
BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF,
- /** Modify the previous record. */
+ /** Modify the previous record.
+ Used in btr_pcur_move_backward_from_page() and ibuf_insert(). */
BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF,
- /** Start searching the entire B-tree. */
- BTR_SEARCH_TREE = 8 | BTR_SEARCH_LEAF,
- /** Start modifying1 the entire B-tree. */
+ /** Start modifying the entire B-tree. */
BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF,
- /** Continue searching the entire B-tree. */
- BTR_CONT_SEARCH_TREE = 4 | BTR_SEARCH_TREE,
- /** Continue modifying the entire B-tree. */
+ /** Continue modifying the entire R-tree.
+ Only used by rtr_search_to_nth_level(). */
BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE,
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually
@@ -98,14 +99,14 @@ enum btr_latch_mode {
dict_index_t::lock S-latch is being held. */
BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF
| BTR_ALREADY_S_LATCHED,
- /** Search the entire index tree, assuming that the
- dict_index_t::lock S-latch is being held. */
- BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE
- | BTR_ALREADY_S_LATCHED,
/** Search and X-latch a leaf page, assuming that the
dict_index_t::lock is being held in non-exclusive mode. */
BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF
| BTR_ALREADY_S_LATCHED,
+ /** U-latch root and X-latch a leaf page, assuming that
+ dict_index_t::lock is being held in U mode. */
+ BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF
+ | BTR_ALREADY_S_LATCHED,
/** Attempt to delete-mark a secondary index record. */
BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK,
@@ -132,6 +133,9 @@ enum btr_latch_mode {
/** Attempt to delete a record in the tree. */
BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ /** Attempt to delete a record in an x-latched tree. */
+ BTR_PURGE_TREE_ALREADY_LATCHED = BTR_PURGE_TREE
+ | BTR_ALREADY_S_LATCHED,
/** Attempt to insert a record into the tree. */
BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT,
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 24571483d05..5eb245b5d95 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -787,7 +787,7 @@ public:
{
ut_ad(fsp_is_system_temporary(id().space()));
ut_ad(in_file());
- ut_ad(!oldest_modification() || oldest_modification() == 2);
+ ut_ad((oldest_modification() | 2) == 2);
oldest_modification_= 2;
}
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
index 777f2432c93..b07261ce042 100644
--- a/storage/innobase/include/gis0rtree.h
+++ b/storage/innobase/include/gis0rtree.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -59,6 +59,44 @@ Created 2013/03/27 Jimmy Yang and Allen Lai
/* Geometry data header */
#define GEO_DATA_HEADER_SIZE 4
+
+/** Search for a spatial index leaf page record.
+@param cur cursor
+@param tuple search tuple
+@param latch_mode latching mode
+@param mtr mini-transaction
+@param mode search mode */
+dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+ btr_latch_mode latch_mode, mtr_t *mtr,
+ page_cur_mode_t mode= PAGE_CUR_RTREE_LOCATE)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Search for inserting a spatial index leaf page record.
+@param cur cursor
+@param tuple search tuple
+@param latch_mode latching mode
+@param mtr mini-transaction */
+inline dberr_t rtr_insert_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+ btr_latch_mode latch_mode, mtr_t *mtr)
+{
+ return rtr_search_leaf(cur, tuple, latch_mode, mtr, PAGE_CUR_RTREE_INSERT);
+}
+
+/** Search for a spatial index leaf page record.
+@param pcur cursor
+@param tuple search tuple
+@param mode search mode
+@param mtr mini-transaction */
+dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple,
+ page_cur_mode_t mode, mtr_t *mtr)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
+ page_cur_mode_t mode,
+ btr_latch_mode latch_mode,
+ btr_cur_t *cur, mtr_t *mtr)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/**********************************************************************//**
Builds a Rtree node pointer out of a physical record and a page number.
@return own: node pointer */
@@ -295,11 +333,9 @@ rtr_store_parent_path(
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
-bool
-rtr_pcur_open(
- dict_index_t* index, /*!< in: index */
+bool rtr_search(
const dtuple_t* tuple, /*!< in: tuple on which search done */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((warn_unused_result));
diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h
index 4fccfdb6c26..d6a4ef67a38 100644
--- a/storage/innobase/include/gis0type.h
+++ b/storage/innobase/include/gis0type.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2020, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -105,12 +105,6 @@ typedef struct rtr_info{
matched_rec_t* matches;/*!< struct holding matching leaf records */
mysql_mutex_t rtr_path_mutex;
/*!< mutex protect the "path" vector */
- buf_block_t* tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
- /*!< tracking pages that would be locked
- at leaf level, for future free */
- ulint tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
- /*!< savepoint used to release latches/blocks
- on each level and leaf level */
rtr_mbr_t mbr; /*!< the search MBR */
que_thr_t* thr; /*!< the search thread */
mem_heap_t* heap; /*!< memory heap */
diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl
index 12aa1ca6837..003bf22a047 100644
--- a/storage/innobase/include/ibuf0ibuf.inl
+++ b/storage/innobase/include/ibuf0ibuf.inl
@@ -100,8 +100,8 @@ ibuf_should_try(
a secondary index when we
decide */
{
- if (!innodb_change_buffering || !ibuf.max_size || index->is_clust() ||
- index->is_spatial())
+ if (index->type & (DICT_CLUSTERED | DICT_IBUF | DICT_SPATIAL) ||
+ !innodb_change_buffering || !ibuf.max_size)
return false;
if (!ignore_sec_unique && index->is_unique())
return false;
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index ca194f905b5..e2419309764 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2019, 2022, MariaDB Corporation.
+Copyright (c) 2019, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -402,7 +402,8 @@ inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage,
ut_ad(have_offset || offset == 0);
ut_ad(offset + len <= srv_page_size);
static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency");
-
+ ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) ||
+ memo_contains_flagged(bpage, MTR_MEMO_MODIFY));
size_t max_len;
if (!have_len)
max_len= 1 + 5 + 5;
@@ -512,33 +513,6 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str,
memcpy(b, ut_align_offset(d, srv_page_size), len);
}
-/** Initialize an entire page.
-@param[in,out] b buffer page */
-inline void mtr_t::init(buf_block_t *b)
-{
- const page_id_t id{b->page.id()};
- ut_ad(is_named_space(id.space()));
- ut_ad(!m_freed_pages == !m_freed_space);
-
- if (UNIV_LIKELY_NULL(m_freed_space) &&
- m_freed_space->id == id.space() &&
- m_freed_pages->remove_if_exists(b->page.id().page_no()) &&
- m_freed_pages->empty())
- {
- delete m_freed_pages;
- m_freed_pages= nullptr;
- m_freed_space= nullptr;
- }
-
- b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK);
-
- if (!is_logged())
- return;
-
- m_log.close(log_write<INIT_PAGE>(b->page.id(), &b->page));
- m_last_offset= FIL_PAGE_TYPE;
-}
-
/** Write an EXTENDED log record.
@param block buffer pool page
@param type extended record subtype; @see mrec_ext_t */
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index abc1f65e692..299f658e98a 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 11/26/1995 Heikki Tuuri
#include "fil0fil.h"
#include "dyn0buf.h"
#include "buf0buf.h"
-#include <vector>
+#include "small_vector.h"
/** Start a mini-transaction. */
#define mtr_start(m) (m)->start()
@@ -37,15 +37,6 @@ Created 11/26/1995 Heikki Tuuri
/** Commit a mini-transaction. */
#define mtr_commit(m) (m)->commit()
-/** Set and return a savepoint in mtr.
-@return savepoint */
-#define mtr_set_savepoint(m) (m)->get_savepoint()
-
-/** Release the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-#define mtr_release_s_latch_at_savepoint(m, s, l) \
- (m)->release_s_latch_at_savepoint((s), (l))
-
/** Change the logging mode of a mini-transaction.
@return old mode */
#define mtr_set_log_mode(m, d) (m)->set_log_mode((d))
@@ -60,13 +51,10 @@ savepoint. */
# define mtr_sx_lock_index(i,m) (m)->u_lock(&(i)->lock)
#endif
-#define mtr_release_block_at_savepoint(m, s, b) \
- (m)->release_block_at_savepoint((s), (b))
-
/** Mini-transaction memo stack slot. */
struct mtr_memo_slot_t
{
- /** pointer to the object, or nullptr if released */
+ /** pointer to the object */
void *object;
/** type of the stored object */
mtr_memo_type_t type;
@@ -77,6 +65,9 @@ struct mtr_memo_slot_t
/** Mini-transaction handle and buffer */
struct mtr_t {
+ mtr_t();
+ ~mtr_t();
+
/** Start a mini-transaction. */
void start();
@@ -91,11 +82,11 @@ struct mtr_t {
/** Release latches of unmodified buffer pages.
@param begin first slot to release */
void rollback_to_savepoint(ulint begin)
- { rollback_to_savepoint(begin, m_memo->size()); }
+ { rollback_to_savepoint(begin, m_memo.size()); }
/** Release the last acquired buffer page latch. */
void release_last_page()
- { auto s= m_memo->size(); rollback_to_savepoint(s - 1, s); }
+ { auto s= m_memo.size(); rollback_to_savepoint(s - 1, s); }
/** Commit a mini-transaction that is shrinking a tablespace.
@param space tablespace that is being shrunk */
@@ -120,86 +111,39 @@ struct mtr_t {
ulint get_savepoint() const
{
ut_ad(is_active());
- return m_memo ? m_memo->size() : 0;
+ return m_memo.size();
}
- /** Release the (index tree) s-latch stored in an mtr memo after a savepoint.
- @param savepoint value returned by get_savepoint()
- @param lock index latch to release */
- void release_s_latch_at_savepoint(ulint savepoint, index_lock *lock)
+ /** Get the block at a savepoint */
+ buf_block_t *at_savepoint(ulint savepoint) const
{
ut_ad(is_active());
- mtr_memo_slot_t &slot= m_memo->at(savepoint);
- ut_ad(slot.object == lock);
- ut_ad(slot.type == MTR_MEMO_S_LOCK);
- slot.object= nullptr;
- lock->s_unlock();
- }
- /** Release the block in an mtr memo after a savepoint. */
- void release_block_at_savepoint(ulint savepoint, buf_block_t *block)
- {
- ut_ad(is_active());
- mtr_memo_slot_t &slot= m_memo->at(savepoint);
- ut_ad(slot.object == block);
- ut_ad(!(slot.type & MTR_MEMO_MODIFY));
- slot.object= nullptr;
- block->page.unfix();
-
- switch (slot.type) {
- case MTR_MEMO_PAGE_S_FIX:
- block->page.lock.s_unlock();
- break;
- case MTR_MEMO_PAGE_SX_FIX:
- case MTR_MEMO_PAGE_X_FIX:
- block->page.lock.u_or_x_unlock(slot.type == MTR_MEMO_PAGE_SX_FIX);
- break;
- default:
- break;
- }
- }
-
- /** @return if we are about to make a clean buffer block dirty */
- static bool is_block_dirtied(const buf_page_t &b)
- {
- ut_ad(b.in_file());
- ut_ad(b.frame);
- ut_ad(b.buf_fix_count());
- return b.oldest_modification() <= 1 && b.id().space() < SRV_TMP_SPACE_ID;
+ const mtr_memo_slot_t &slot= m_memo[savepoint];
+ ut_ad(slot.type < MTR_MEMO_S_LOCK);
+ ut_ad(slot.object);
+ return static_cast<buf_block_t*>(slot.object);
}
- /** X-latch a not yet latched block after a savepoint. */
- void x_latch_at_savepoint(ulint savepoint, buf_block_t *block)
+ /** Try to get a block at a savepoint.
+ @param savepoint the savepoint right before the block was acquired
+ @return the block at the savepoint
+ @retval nullptr if no buffer block was registered at that savepoint */
+ buf_block_t *block_at_savepoint(ulint savepoint) const
{
ut_ad(is_active());
- ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX |
- MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
- mtr_memo_slot_t &slot= m_memo->at(savepoint);
- ut_ad(slot.object == block);
- ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.type= MTR_MEMO_PAGE_X_FIX;
- block->page.lock.x_lock();
- ut_ad(!block->page.is_io_fixed());
-
- if (!m_made_dirty)
- m_made_dirty= is_block_dirtied(block->page);
+ const mtr_memo_slot_t &slot= m_memo[savepoint];
+ return slot.type < MTR_MEMO_S_LOCK
+ ? static_cast<buf_block_t*>(slot.object)
+ : nullptr;
}
- /** U-latch a not yet latched block after a savepoint. */
- void sx_latch_at_savepoint(ulint savepoint, buf_block_t *block)
- {
- ut_ad(is_active());
- ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX |
- MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
- mtr_memo_slot_t &slot= m_memo->at(savepoint);
- ut_ad(slot.object == block);
- ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.type= MTR_MEMO_PAGE_SX_FIX;
- block->page.lock.u_lock();
- ut_ad(!block->page.is_io_fixed());
-
- if (!m_made_dirty)
- m_made_dirty= is_block_dirtied(block->page);
- }
+ /** Retrieve a page that has already been latched.
+ @param id page identifier
+ @param type page latch type
+ @return block
+ @retval nullptr if the block had not been latched yet */
+ buf_block_t *get_already_latched(const page_id_t id, mtr_memo_type_t type)
+ const;
/** @return the logging mode */
mtr_log_t get_log_mode() const
@@ -358,23 +302,17 @@ struct mtr_t {
void release(const index_lock &lock) { release(&lock); }
/** Release a latch to an unmodified page. */
void release(const buf_block_t &block) { release(&block); }
-
- /** Note that the mini-transaction will modify data. */
- void flag_modified() { m_modifications = true; }
private:
/** Release an unmodified object. */
void release(const void *object);
+public:
/** Mark the given latched page as modified.
@param block page that will be modified */
- void modify(const buf_block_t& block);
-public:
- /** Note that the mini-transaction will modify a block. */
- void set_modified(const buf_block_t &block)
- { flag_modified(); if (m_log_mode != MTR_LOG_NONE) modify(block); }
+ void set_modified(const buf_block_t &block);
/** Set the state to not-modified. This will not log the changes.
This is only used during redo log apply, to avoid logging the changes. */
- void discard_modifications() { m_modifications = false; }
+ void discard_modifications() { m_modifications= false; }
/** Get the LSN of commit().
@return the commit LSN
@@ -403,28 +341,17 @@ public:
@param rw_latch latch to acquire */
void upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch);
- /** Register a page latch on a buffer-fixed block was buffer-fixed.
- @param latch latch type */
- void u_lock_register(ulint savepoint)
+ /** Register a change to the page latch state. */
+ void lock_register(ulint savepoint, mtr_memo_type_t type)
{
- mtr_memo_slot_t &slot= m_memo->at(savepoint);
- ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.type= MTR_MEMO_PAGE_SX_FIX;
- }
-
- /** Register a page latch on a buffer-fixed block was buffer-fixed.
- @param latch latch type */
- void s_lock_register(ulint savepoint)
- {
- mtr_memo_slot_t &slot= m_memo->at(savepoint);
- ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.type= MTR_MEMO_PAGE_S_FIX;
+ mtr_memo_slot_t &slot= m_memo[savepoint];
+ ut_ad(slot.type <= MTR_MEMO_BUF_FIX);
+ ut_ad(type <= MTR_MEMO_BUF_FIX);
+ slot.type= type;
}
/** Upgrade U locks on a block to X */
void page_lock_upgrade(const buf_block_t &block);
- /** Upgrade U lock to X */
- void lock_upgrade(const index_lock &lock);
/** Check if we are holding tablespace latch
@param space tablespace to search for
@@ -454,31 +381,66 @@ public:
@retval nullptr if not found */
buf_block_t *memo_contains_page_flagged(const byte *ptr, ulint flags) const;
- /** @return true if mini-transaction contains modifications. */
+ /** @return whether this mini-transaction modifies persistent data */
bool has_modifications() const { return m_modifications; }
#endif /* UNIV_DEBUG */
- /** Push an object to an mtr memo stack.
- @param object object
+ /** Push a buffer page to an the memo.
+ @param block buffer block
@param type object type: MTR_MEMO_S_LOCK, ... */
- void memo_push(void *object, mtr_memo_type_t type) __attribute__((nonnull))
+ void memo_push(buf_block_t *block, mtr_memo_type_t type)
+ __attribute__((nonnull))
{
ut_ad(is_active());
- /* If this mtr has U or X latched a clean page then we set
- the m_made_dirty flag. This tells us if we need to
- grab log_sys.flush_order_mutex at mtr_t::commit() so that we
- can insert the dirtied page into the buf_pool.flush_list.
-
- FIXME: Do this only when the MTR_MEMO_MODIFY flag is set! */
- if (!m_made_dirty &&
- (type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)))
- m_made_dirty=
- is_block_dirtied(*static_cast<const buf_page_t*>(object));
-
- if (!m_memo)
- m_memo= new std::vector<mtr_memo_slot_t>(1, {object, type});
+ ut_ad(type <= MTR_MEMO_PAGE_SX_MODIFY);
+ ut_ad(block->page.buf_fix_count());
+ ut_ad(block->page.in_file());
+#ifdef UNIV_DEBUG
+ switch (type) {
+ case MTR_MEMO_PAGE_S_FIX:
+ ut_ad(block->page.lock.have_s());
+ break;
+ case MTR_MEMO_PAGE_X_FIX: case MTR_MEMO_PAGE_X_MODIFY:
+ ut_ad(block->page.lock.have_x());
+ break;
+ case MTR_MEMO_PAGE_SX_FIX: case MTR_MEMO_PAGE_SX_MODIFY:
+ ut_ad(block->page.lock.have_u_or_x());
+ break;
+ case MTR_MEMO_BUF_FIX:
+ break;
+ case MTR_MEMO_MODIFY:
+ case MTR_MEMO_S_LOCK: case MTR_MEMO_X_LOCK: case MTR_MEMO_SX_LOCK:
+ case MTR_MEMO_SPACE_X_LOCK: case MTR_MEMO_SPACE_S_LOCK:
+ ut_ad("invalid type" == 0);
+ }
+#endif
+ if (!(type & MTR_MEMO_MODIFY));
+ else if (block->page.id().space() >= SRV_TMP_SPACE_ID)
+ {
+ block->page.set_temp_modified();
+ type= mtr_memo_type_t(type & ~MTR_MEMO_MODIFY);
+ }
else
- m_memo->emplace_back(mtr_memo_slot_t{object, type});
+ {
+ m_modifications= true;
+ if (!m_made_dirty)
+ /* If we are going to modify a previously clean persistent page,
+ we must set m_made_dirty, so that commit() will acquire
+ log_sys.flush_order_mutex and insert the block into
+ buf_pool.flush_list. */
+ m_made_dirty= block->page.oldest_modification() <= 1;
+ }
+ m_memo.emplace_back(mtr_memo_slot_t{block, type});
+ }
+
+ /** Push an index lock or tablespace latch to the memo.
+ @param object index lock or tablespace latch
+ @param type object type: MTR_MEMO_S_LOCK, ... */
+ void memo_push(void *object, mtr_memo_type_t type) __attribute__((nonnull))
+ {
+ ut_ad(is_active());
+ ut_ad(type >= MTR_MEMO_S_LOCK);
+ m_memo.emplace_back(mtr_memo_slot_t{object, type});
}
/** @return the size of the log is empty */
@@ -783,7 +745,7 @@ private:
/** specifies which operations should be logged; default MTR_LOG_ALL */
uint16_t m_log_mode:2;
- /** whether at least one buffer pool page was written to */
+ /** whether at least one persistent page was written to */
uint16_t m_modifications:1;
/** whether at least one previously clean buffer pool page was written to */
@@ -809,7 +771,7 @@ private:
#endif /* UNIV_DEBUG */
/** acquired dict_index_t::lock, fil_space_t::latch, buf_block_t */
- std::vector<mtr_memo_slot_t> *m_memo= nullptr;
+ small_vector<mtr_memo_slot_t, 16> m_memo;
/** mini-transaction log */
mtr_buf_t m_log;
diff --git a/storage/innobase/include/small_vector.h b/storage/innobase/include/small_vector.h
new file mode 100644
index 00000000000..d28a36184b8
--- /dev/null
+++ b/storage/innobase/include/small_vector.h
@@ -0,0 +1,100 @@
+/*****************************************************************************
+
+Copyright (c) 2023, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+#pragma once
+/* A normally small vector, inspired by llvm::SmallVector */
+#include "my_global.h"
+#include <iterator>
+#include <memory>
+
+class small_vector_base
+{
+protected:
+ typedef uint32_t Size_T;
+ void *BeginX;
+ Size_T Size= 0, Capacity;
+ small_vector_base()= delete;
+ small_vector_base(void *small, size_t small_size)
+ : BeginX(small), Capacity(Size_T(small_size)) {}
+ ATTRIBUTE_COLD void grow_by_1(void *small, size_t element_size);
+public:
+ size_t size() const { return Size; }
+ size_t capacity() const { return Capacity; }
+ bool empty() const { return !Size; }
+ void clear() { Size= 0; }
+protected:
+ void set_size(size_t N) { Size= Size_T(N); }
+};
+
+template <typename T, unsigned N>
+class small_vector : public small_vector_base
+{
+ /** The fixed storage allocation */
+ T small[N];
+
+ using small_vector_base::set_size;
+
+ void grow_if_needed()
+ {
+ if (unlikely(size() >= capacity()))
+ grow_by_1(small, sizeof *small);
+ }
+
+public:
+ small_vector() : small_vector_base(small, N)
+ {
+ TRASH_ALLOC(small, sizeof small);
+ }
+ ~small_vector()
+ {
+ if (small != begin())
+ my_free(begin());
+ MEM_MAKE_ADDRESSABLE(small, sizeof small);
+ }
+
+ using iterator= T *;
+ using const_iterator= const T *;
+ using reverse_iterator= std::reverse_iterator<iterator>;
+ using reference= T &;
+ using const_reference= const T&;
+
+ iterator begin() { return static_cast<iterator>(BeginX); }
+ const_iterator begin() const { return static_cast<const_iterator>(BeginX); }
+ iterator end() { return begin() + size(); }
+ const_iterator end() const { return begin() + size(); }
+
+ reverse_iterator rbegin() { return reverse_iterator(end()); }
+ reverse_iterator rend() { return reverse_iterator(begin()); }
+
+ reference operator[](size_t i) { assert(i < size()); return begin()[i]; }
+ const_reference operator[](size_t i) const
+ { return const_cast<small_vector&>(*this)[i]; }
+
+ void erase(const_iterator S, const_iterator E)
+ {
+ set_size(std::move(const_cast<iterator>(E), end(),
+ const_cast<iterator>(S)) - begin());
+ }
+
+ void emplace_back(T &&arg)
+ {
+ grow_if_needed();
+ ::new (end()) T(arg);
+ set_size(size() + 1);
+ }
+};
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 3eacd3ab7e1..91d11b8b478 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -37,6 +37,8 @@ Created 11/26/1995 Heikki Tuuri
void mtr_memo_slot_t::release() const
{
+ ut_ad(object);
+
switch (type) {
case MTR_MEMO_S_LOCK:
static_cast<index_lock*>(object)->s_unlock();
@@ -153,10 +155,13 @@ inline void buf_pool_t::insert_into_flush_list(buf_page_t *prev,
block->page.set_oldest_modification(lsn);
}
+mtr_t::mtr_t()= default;
+mtr_t::~mtr_t()= default;
+
/** Start a mini-transaction. */
void mtr_t::start()
{
- ut_ad(!m_memo);
+ ut_ad(m_memo.empty());
ut_ad(!m_freed_pages);
ut_ad(!m_freed_space);
MEM_UNDEFINED(this, sizeof *this);
@@ -188,7 +193,7 @@ void mtr_t::start()
inline void mtr_t::release_resources()
{
ut_ad(is_active());
- ut_ad(!m_memo);
+ ut_ad(m_memo.empty());
m_log.erase();
ut_d(m_commit= true);
}
@@ -243,15 +248,13 @@ void mtr_t::release_unlogged()
{
ut_ad(m_log_mode == MTR_LOG_NO_REDO);
ut_ad(m_log.size() == 0);
- ut_ad(m_memo);
process_freed_pages();
- for (auto it= m_memo->rbegin(); it != m_memo->rend(); it++)
+ for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++)
{
mtr_memo_slot_t &slot= *it;
- if (!slot.object)
- continue;
+ ut_ad(slot.object);
switch (slot.type) {
case MTR_MEMO_S_LOCK:
static_cast<index_lock*>(slot.object)->s_unlock();
@@ -278,10 +281,8 @@ void mtr_t::release_unlogged()
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
- if (UNIV_LIKELY(block->page.id() >= end_page_id))
- block->page.set_temp_modified();
- else
- insert_imported(block);
+ ut_ad(block->page.id() < end_page_id);
+ insert_imported(block);
}
switch (slot.type) {
@@ -300,23 +301,14 @@ void mtr_t::release_unlogged()
}
}
- delete m_memo;
- m_memo= nullptr;
+ m_memo.clear();
}
void mtr_t::release()
{
- if (m_memo)
- {
- for (auto it= m_memo->rbegin(); it != m_memo->rend(); it++)
- {
- mtr_memo_slot_t &slot= *it;
- if (slot.object)
- slot.release();
- }
- delete m_memo;
- m_memo= nullptr;
- }
+ for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++)
+ it->release();
+ m_memo.clear();
}
/** Commit a mini-transaction. */
@@ -344,19 +336,18 @@ void mtr_t::commit()
if (m_made_dirty)
{
- ut_ad(m_memo);
size_t modified= 0;
- auto it= m_memo->rbegin();
+ auto it= m_memo.rbegin();
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_page_t *const prev=
buf_pool.prepare_insert_into_flush_list(lsns.first);
- while (it != m_memo->rend())
+ while (it != m_memo.rend())
{
const mtr_memo_slot_t &slot= *it++;
- if (slot.object && slot.type & MTR_MEMO_MODIFY)
+ if (slot.type & MTR_MEMO_MODIFY)
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
@@ -401,72 +392,67 @@ void mtr_t::commit()
else
log_sys.latch.rd_unlock();
- if (m_memo)
- {
- size_t modified= 0;
+ size_t modified= 0;
- for (auto it= m_memo->rbegin(); it != m_memo->rend(); )
- {
- const mtr_memo_slot_t &slot= *it++;
- if (!slot.object)
- continue;
- switch (slot.type) {
- case MTR_MEMO_S_LOCK:
- static_cast<index_lock*>(slot.object)->s_unlock();
- break;
- case MTR_MEMO_SPACE_X_LOCK:
- static_cast<fil_space_t*>(slot.object)->set_committed_size();
- static_cast<fil_space_t*>(slot.object)->x_unlock();
- break;
- case MTR_MEMO_SPACE_S_LOCK:
- static_cast<fil_space_t*>(slot.object)->s_unlock();
- break;
- case MTR_MEMO_X_LOCK:
- case MTR_MEMO_SX_LOCK:
- static_cast<index_lock*>(slot.object)->
- u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK);
- break;
- default:
- buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
- const auto s= bpage->unfix();
- if (slot.type & MTR_MEMO_MODIFY)
+ for (auto it= m_memo.rbegin(); it != m_memo.rend(); )
+ {
+ const mtr_memo_slot_t &slot= *it++;
+ ut_ad(slot.object);
+ switch (slot.type) {
+ case MTR_MEMO_S_LOCK:
+ static_cast<index_lock*>(slot.object)->s_unlock();
+ break;
+ case MTR_MEMO_SPACE_X_LOCK:
+ static_cast<fil_space_t*>(slot.object)->set_committed_size();
+ static_cast<fil_space_t*>(slot.object)->x_unlock();
+ break;
+ case MTR_MEMO_SPACE_S_LOCK:
+ static_cast<fil_space_t*>(slot.object)->s_unlock();
+ break;
+ case MTR_MEMO_X_LOCK:
+ case MTR_MEMO_SX_LOCK:
+ static_cast<index_lock*>(slot.object)->
+ u_or_x_unlock(slot.type == MTR_MEMO_SX_LOCK);
+ break;
+ default:
+ buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
+ const auto s= bpage->unfix();
+ if (slot.type & MTR_MEMO_MODIFY)
+ {
+ ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
+ slot.type == MTR_MEMO_PAGE_SX_MODIFY);
+ ut_ad(bpage->oldest_modification() > 1);
+ ut_ad(bpage->oldest_modification() < m_commit_lsn);
+ ut_ad(bpage->id() < end_page_id);
+ ut_ad(s >= buf_page_t::FREED);
+ ut_ad(s < buf_page_t::READ_FIX);
+ ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <=
+ m_commit_lsn);
+ if (s >= buf_page_t::UNFIXED)
{
- ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
- slot.type == MTR_MEMO_PAGE_SX_MODIFY);
- ut_ad(bpage->oldest_modification() > 1);
- ut_ad(bpage->oldest_modification() < m_commit_lsn);
- ut_ad(bpage->id() < end_page_id);
- ut_ad(s >= buf_page_t::FREED);
- ut_ad(s < buf_page_t::READ_FIX);
- ut_ad(mach_read_from_8(bpage->frame + FIL_PAGE_LSN) <=
- m_commit_lsn);
- if (s >= buf_page_t::UNFIXED)
- {
- mach_write_to_8(bpage->frame + FIL_PAGE_LSN, m_commit_lsn);
- if (UNIV_LIKELY_NULL(bpage->zip.data))
- memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data,
- FIL_PAGE_LSN + bpage->frame, 8);
- }
- modified++;
- }
- switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) {
- case MTR_MEMO_PAGE_S_FIX:
- bpage->lock.s_unlock();
- continue;
- case MTR_MEMO_PAGE_SX_FIX:
- case MTR_MEMO_PAGE_X_FIX:
- bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX);
- continue;
- default:
- ut_ad(latch == MTR_MEMO_BUF_FIX);
+ mach_write_to_8(bpage->frame + FIL_PAGE_LSN, m_commit_lsn);
+ if (UNIV_LIKELY_NULL(bpage->zip.data))
+ memcpy_aligned<8>(FIL_PAGE_LSN + bpage->zip.data,
+ FIL_PAGE_LSN + bpage->frame, 8);
}
+ modified++;
+ }
+ switch (auto latch= slot.type & ~MTR_MEMO_MODIFY) {
+ case MTR_MEMO_PAGE_S_FIX:
+ bpage->lock.s_unlock();
+ continue;
+ case MTR_MEMO_PAGE_SX_FIX:
+ case MTR_MEMO_PAGE_X_FIX:
+ bpage->lock.u_or_x_unlock(latch == MTR_MEMO_PAGE_SX_FIX);
+ continue;
+ default:
+ ut_ad(latch == MTR_MEMO_BUF_FIX);
}
}
-
- buf_pool.add_flush_list_requests(modified);
- delete m_memo;
- m_memo= nullptr;
}
+
+ buf_pool.add_flush_list_requests(modified);
+ m_memo.clear();
}
if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO))
@@ -481,16 +467,14 @@ func_exit:
void mtr_t::rollback_to_savepoint(ulint begin, ulint end)
{
- ut_ad(m_memo);
- ut_ad(end <= m_memo->size());
+ ut_ad(end <= m_memo.size());
ut_ad(begin <= end);
ulint s= end;
while (s-- > begin)
{
- const mtr_memo_slot_t &slot= (*m_memo)[s];
- if (!slot.object)
- continue;
+ const mtr_memo_slot_t &slot= m_memo[s];
+ ut_ad(slot.object);
/* This is intended for releasing latches on indexes or unmodified
buffer pool pages. */
ut_ad(slot.type <= MTR_MEMO_SX_LOCK);
@@ -498,7 +482,7 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end)
slot.release();
}
- m_memo->erase(m_memo->begin() + begin, m_memo->begin() + end);
+ m_memo.erase(m_memo.begin() + begin, m_memo.begin() + end);
}
/** Commit a mini-transaction that is shrinking a tablespace.
@@ -510,9 +494,10 @@ void mtr_t::commit_shrink(fil_space_t &space)
ut_ad(!high_level_read_only);
ut_ad(m_modifications);
ut_ad(m_made_dirty);
- ut_ad(m_memo);
+ ut_ad(!m_memo.empty());
ut_ad(!recv_recovery_is_on());
ut_ad(m_log_mode == MTR_LOG_ALL);
+ ut_ad(!m_freed_pages);
ut_ad(UT_LIST_GET_LEN(space.chain) == 1);
log_write_and_flush_prepare();
@@ -531,22 +516,21 @@ void mtr_t::commit_shrink(fil_space_t &space)
os_file_truncate(space.chain.start->name, space.chain.start->handle,
os_offset_t{space.size} << srv_page_size_shift, true);
- ut_ad(!m_freed_pages || m_freed_space == &space);
- process_freed_pages();
+ space.clear_freed_ranges();
const page_id_t high{space.id, space.size};
size_t modified= 0;
- auto it= m_memo->rbegin();
+ auto it= m_memo.rbegin();
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_page_t *const prev= buf_pool.prepare_insert_into_flush_list(start_lsn);
- while (it != m_memo->rend())
+ while (it != m_memo.rend())
{
mtr_memo_slot_t &slot= *it++;
- if (!slot.object);
- else if (slot.type == MTR_MEMO_SPACE_X_LOCK)
+ ut_ad(slot.object);
+ if (slot.type == MTR_MEMO_SPACE_X_LOCK)
ut_ad(high.space() == static_cast<fil_space_t*>(slot.object)->id);
else
{
@@ -727,7 +711,7 @@ lsn_t mtr_t::commit_files(lsn_t checkpoint_lsn)
ut_ad(!is_inside_ibuf());
ut_ad(m_log_mode == MTR_LOG_ALL);
ut_ad(!m_made_dirty);
- ut_ad(!m_memo);
+ ut_ad(m_memo.empty());
ut_ad(!srv_read_only_mode);
ut_ad(!m_freed_space);
ut_ad(!m_freed_pages);
@@ -837,19 +821,18 @@ void mtr_t::x_lock_space(fil_space_t *space)
void mtr_t::release(const void *object)
{
ut_ad(is_active());
- ut_ad(m_memo);
auto it=
- std::find_if(m_memo->begin(), m_memo->end(),
+ std::find_if(m_memo.begin(), m_memo.end(),
[object](const mtr_memo_slot_t& slot)
{ return slot.object == object; });
- ut_ad(it != m_memo->end());
+ ut_ad(it != m_memo.end());
ut_ad(!(it->type & MTR_MEMO_MODIFY));
it->release();
- m_memo->erase(it);
- ut_ad(std::find_if(m_memo->begin(), m_memo->end(),
+ m_memo.erase(it, it + 1);
+ ut_ad(std::find_if(m_memo.begin(), m_memo.end(),
[object](const mtr_memo_slot_t& slot)
- { return slot.object == &object; }) == m_memo->end());
+ { return slot.object == &object; }) == m_memo.end());
}
static time_t log_close_warn_time;
@@ -1028,11 +1011,11 @@ std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::do_write()
#ifndef DBUG_OFF
do
{
- if (!m_memo || m_log_mode != MTR_LOG_ALL)
+ if (m_log_mode != MTR_LOG_ALL)
continue;
DBUG_EXECUTE_IF("skip_page_checksum", continue;);
- for (const mtr_memo_slot_t& slot : *m_memo)
+ for (const mtr_memo_slot_t& slot : m_memo)
if (slot.type & MTR_MEMO_MODIFY)
{
const buf_page_t &b= *static_cast<const buf_page_t*>(slot.object);
@@ -1284,12 +1267,9 @@ mtr_t::finish_write(size_t len)
bool mtr_t::have_x_latch(const buf_block_t &block) const
{
- if (!m_memo)
- return false;
-
ut_d(const mtr_memo_slot_t *found= nullptr);
- for (const mtr_memo_slot_t &slot : *m_memo)
+ for (const mtr_memo_slot_t &slot : m_memo)
{
if (slot.object != &block)
continue;
@@ -1309,16 +1289,13 @@ bool mtr_t::have_x_latch(const buf_block_t &block) const
bool mtr_t::have_u_or_x_latch(const buf_block_t &block) const
{
- if (m_memo)
+ for (const mtr_memo_slot_t &slot : m_memo)
{
- for (const mtr_memo_slot_t &slot : *m_memo)
+ if (slot.object == &block &&
+ slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX))
{
- if (slot.object == &block &&
- slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX))
- {
- ut_ad(block.page.lock.have_u_or_x());
- return true;
- }
+ ut_ad(block.page.lock.have_u_or_x());
+ return true;
}
}
return false;
@@ -1330,18 +1307,15 @@ bool mtr_t::have_u_or_x_latch(const buf_block_t &block) const
@return whether space.latch is being held */
bool mtr_t::memo_contains(const fil_space_t& space, bool shared) const
{
- if (m_memo)
- {
- const mtr_memo_type_t type= shared
- ? MTR_MEMO_SPACE_S_LOCK : MTR_MEMO_SPACE_X_LOCK;
+ const mtr_memo_type_t type= shared
+ ? MTR_MEMO_SPACE_S_LOCK : MTR_MEMO_SPACE_X_LOCK;
- for (const mtr_memo_slot_t &slot : *m_memo)
+ for (const mtr_memo_slot_t &slot : m_memo)
+ {
+ if (slot.object == &space && slot.type == type)
{
- if (slot.object == &space && slot.type == type)
- {
- ut_ad(shared || space.is_owner());
- return true;
- }
+ ut_ad(shared || space.is_owner());
+ return true;
}
}
@@ -1351,9 +1325,8 @@ bool mtr_t::memo_contains(const fil_space_t& space, bool shared) const
void mtr_t::page_lock_upgrade(const buf_block_t &block)
{
ut_ad(block.page.lock.have_x());
- ut_ad(m_memo);
- for (mtr_memo_slot_t &slot : *m_memo)
+ for (mtr_memo_slot_t &slot : m_memo)
if (slot.object == &block && slot.type & MTR_MEMO_PAGE_SX_FIX)
slot.type= mtr_memo_type_t(slot.type ^
(MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX));
@@ -1363,16 +1336,6 @@ void mtr_t::page_lock_upgrade(const buf_block_t &block)
#endif /* BTR_CUR_HASH_ADAPT */
}
-void mtr_t::lock_upgrade(const index_lock &lock)
-{
- ut_ad(lock.have_x());
- ut_ad(m_memo);
-
- for (mtr_memo_slot_t &slot : *m_memo)
- if (slot.object == &lock && slot.type == MTR_MEMO_SX_LOCK)
- slot.type= MTR_MEMO_X_LOCK;
-}
-
/** Latch a buffer pool block.
@param block block to be latched
@param rw_latch RW_S_LATCH, RW_SX_LATCH, RW_X_LATCH, RW_NO_LATCH */
@@ -1421,27 +1384,29 @@ done:
void mtr_t::upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch)
{
ut_ad(is_active());
- ut_ad(m_memo);
- ut_ad(savepoint < m_memo->size());
-
- mtr_memo_slot_t &slot= (*m_memo)[savepoint];
+ mtr_memo_slot_t &slot= m_memo[savepoint];
ut_ad(slot.type == MTR_MEMO_BUF_FIX);
buf_block_t *block= static_cast<buf_block_t*>(slot.object);
ut_d(const auto state= block->page.state());
ut_ad(state > buf_page_t::UNFIXED);
ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX);
+ static_assert(int{MTR_MEMO_PAGE_S_FIX} == int{RW_S_LATCH}, "");
+ static_assert(int{MTR_MEMO_PAGE_X_FIX} == int{RW_X_LATCH}, "");
+ static_assert(int{MTR_MEMO_PAGE_SX_FIX} == int{RW_SX_LATCH}, "");
+ slot.type= mtr_memo_type_t(rw_latch);
switch (rw_latch) {
default:
ut_ad("invalid state" == 0);
break;
+ case RW_S_LATCH:
+ block->page.lock.s_lock();
+ break;
case RW_SX_LATCH:
- slot.type= MTR_MEMO_PAGE_SX_FIX;
block->page.lock.u_lock();
ut_ad(!block->page.is_io_fixed());
break;
case RW_X_LATCH:
- slot.type= MTR_MEMO_PAGE_X_FIX;
block->page.lock.x_lock();
ut_ad(!block->page.is_io_fixed());
}
@@ -1463,27 +1428,24 @@ bool mtr_t::memo_contains(const index_lock &lock, mtr_memo_type_t type) const
ut_ad(type == MTR_MEMO_X_LOCK || type == MTR_MEMO_S_LOCK ||
type == MTR_MEMO_SX_LOCK);
- if (m_memo)
+ for (const mtr_memo_slot_t &slot : m_memo)
{
- for (const mtr_memo_slot_t &slot : *m_memo)
+ if (slot.object == &lock && slot.type == type)
{
- if (slot.object == &lock && slot.type == type)
- {
- switch (type) {
- case MTR_MEMO_X_LOCK:
- ut_ad(lock.have_x());
- break;
- case MTR_MEMO_SX_LOCK:
- ut_ad(lock.have_u_or_x());
- break;
- case MTR_MEMO_S_LOCK:
- ut_ad(lock.have_s());
- break;
- default:
- break;
- }
- return true;
+ switch (type) {
+ case MTR_MEMO_X_LOCK:
+ ut_ad(lock.have_x());
+ break;
+ case MTR_MEMO_SX_LOCK:
+ ut_ad(lock.have_u_or_x());
+ break;
+ case MTR_MEMO_S_LOCK:
+ ut_ad(lock.have_s());
+ break;
+ default:
+ break;
}
+ return true;
}
}
@@ -1511,7 +1473,7 @@ bool mtr_t::memo_contains_flagged(const void *object, ulint flags) const
MTR_MEMO_MODIFY)) ==
!!(flags & (MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK)));
- for (const mtr_memo_slot_t &slot : *m_memo)
+ for (const mtr_memo_slot_t &slot : m_memo)
{
if (object != slot.object)
continue;
@@ -1546,9 +1508,10 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags)
{
ptr= page_align(ptr);
- for (const mtr_memo_slot_t &slot : *m_memo)
+ for (const mtr_memo_slot_t &slot : m_memo)
{
- if (!slot.object || !(flags & slot.type))
+ ut_ad(slot.object);
+ if (!(flags & slot.type))
continue;
buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
@@ -1569,35 +1532,84 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags)
/** Mark the given latched page as modified.
@param block page that will be modified */
-void mtr_t::modify(const buf_block_t &block)
+void mtr_t::set_modified(const buf_block_t &block)
{
- if (UNIV_UNLIKELY(!m_memo))
+ if (block.page.id().space() >= SRV_TMP_SPACE_ID)
{
- /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */
- ut_ad(!block.page.in_LRU_list);
+ const_cast<buf_block_t&>(block).page.set_temp_modified();
return;
}
- mtr_memo_slot_t *found= nullptr;
+ m_modifications= true;
- for (mtr_memo_slot_t &slot : *m_memo)
+ if (UNIV_UNLIKELY(m_log_mode == MTR_LOG_NONE))
+ return;
+
+ for (mtr_memo_slot_t &slot : m_memo)
{
if (slot.object == &block &&
slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX))
{
- found= &slot;
- break;
+ if (slot.type & MTR_MEMO_MODIFY)
+ ut_ad(m_made_dirty || block.page.oldest_modification() > 1);
+ else
+ {
+ slot.type= static_cast<mtr_memo_type_t>(slot.type | MTR_MEMO_MODIFY);
+ if (!m_made_dirty)
+ m_made_dirty= block.page.oldest_modification() <= 1;
+ }
+ return;
}
}
- if (UNIV_UNLIKELY(!found))
+ /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */
+ ut_ad(m_memo.empty());
+ ut_ad(!block.page.in_LRU_list);
+}
+
+void mtr_t::init(buf_block_t *b)
+{
+ const page_id_t id{b->page.id()};
+ ut_ad(is_named_space(id.space()));
+ ut_ad(!m_freed_pages == !m_freed_space);
+ ut_ad(memo_contains_flagged(b, MTR_MEMO_PAGE_X_FIX));
+
+ if (id.space() >= SRV_TMP_SPACE_ID)
+ b->page.set_temp_modified();
+ else
{
- ut_ad("modifying an unlatched page" == 0);
- return;
+ for (mtr_memo_slot_t &slot : m_memo)
+ {
+ if (slot.object == b && slot.type & MTR_MEMO_PAGE_X_FIX)
+ {
+ slot.type= MTR_MEMO_PAGE_X_MODIFY;
+ m_modifications= true;
+ if (!m_made_dirty)
+ m_made_dirty= b->page.oldest_modification() <= 1;
+ goto found;
+ }
+ }
+ ut_ad("block not X-latched" == 0);
}
- found->type= static_cast<mtr_memo_type_t>(found->type | MTR_MEMO_MODIFY);
- if (!m_made_dirty)
- m_made_dirty= is_block_dirtied(block.page);
+
+ found:
+ if (UNIV_LIKELY_NULL(m_freed_space) &&
+ m_freed_space->id == id.space() &&
+ m_freed_pages->remove_if_exists(id.page_no()) &&
+ m_freed_pages->empty())
+ {
+ delete m_freed_pages;
+ m_freed_pages= nullptr;
+ m_freed_space= nullptr;
+ }
+
+ b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK);
+
+ if (!is_logged())
+ return;
+
+ m_log.close(log_write<INIT_PAGE>(id, &b->page));
+ m_last_offset= FIL_PAGE_TYPE;
}
/** Free a page.
@@ -1610,24 +1622,26 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset)
if (is_logged())
{
- ut_ad(m_memo);
buf_block_t *freed= nullptr;
const page_id_t id{space.id, offset};
- for (auto it= m_memo->rbegin(); it != m_memo->rend(); it++)
+ for (auto it= m_memo.end(); it != m_memo.begin(); )
{
+ it--;
+ next:
mtr_memo_slot_t &slot= *it;
buf_block_t *block= static_cast<buf_block_t*>(slot.object);
- if (!block);
- else if (block == freed)
+ ut_ad(block);
+ if (block == freed)
{
if (slot.type & (MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX))
slot.type= MTR_MEMO_PAGE_X_FIX;
else
{
ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.object= nullptr;
block->page.unfix();
+ m_memo.erase(it, it + 1);
+ goto next;
}
}
else if (slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) &&
@@ -1641,7 +1655,17 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset)
ut_d(bool upgraded=) block->page.lock.x_lock_upgraded();
ut_ad(upgraded);
}
- slot.type= MTR_MEMO_PAGE_X_MODIFY;
+ if (id.space() >= SRV_TMP_SPACE_ID)
+ {
+ block->page.set_temp_modified();
+ slot.type= MTR_MEMO_PAGE_X_FIX;
+ }
+ else
+ {
+ slot.type= MTR_MEMO_PAGE_X_MODIFY;
+ if (!m_made_dirty)
+ m_made_dirty= block->page.oldest_modification() <= 1;
+ }
#ifdef BTR_CUR_HASH_ADAPT
if (block->index)
btr_search_drop_page_hash_index(block, false);
@@ -1650,8 +1674,22 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset)
}
}
- if (freed && !m_made_dirty)
- m_made_dirty= is_block_dirtied(freed->page);
m_log.close(log_write<FREE_PAGE>(id, nullptr));
}
}
+
+void small_vector_base::grow_by_1(void *small, size_t element_size)
+{
+ const size_t cap= Capacity*= 2, s= cap * element_size;
+ void *new_begin;
+ if (BeginX == small)
+ {
+ new_begin= my_malloc(PSI_NOT_INSTRUMENTED, s, MYF(0));
+ memcpy(new_begin, BeginX, size() * element_size);
+ TRASH_FREE(small, size() * element_size);
+ }
+ else
+ new_begin= my_realloc(PSI_NOT_INSTRUMENTED, BeginX, s, MYF(0));
+
+ BeginX= new_begin;
+}
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index c4ee55e79b3..028f73db9c9 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1617,6 +1617,9 @@ inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept
dberr_t IndexPurge::purge() noexcept
{
btr_pcur_store_position(&m_pcur, &m_mtr);
+ m_mtr.commit();
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
dberr_t err= purge_pessimistic_delete();
m_mtr.start();
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index de58e3896b0..3b21b0315cd 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -314,8 +314,10 @@ row_ins_clust_index_entry_by_modify(
}
if (mode != BTR_MODIFY_TREE) {
- ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
- == BTR_MODIFY_LEAF);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED
+ || mode == BTR_MODIFY_ROOT_AND_LEAF
+ || mode == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED);
/* Try optimistic updating of the record, keeping changes
within the page */
@@ -1621,8 +1623,7 @@ row_ins_check_foreign_constraint(
dtuple_set_n_fields_cmp(entry, foreign->n_fields);
pcur.btr_cur.page_cur.index = check_index;
- err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, 0,
- &mtr);
+ err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto end_scan;
}
@@ -2119,7 +2120,7 @@ row_ins_scan_sec_index_for_duplicate(
pcur.btr_cur.page_cur.index = index;
trx_t* const trx = thr_get_trx(thr);
dberr_t err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF,
- &pcur, 0, mtr);
+ &pcur, mtr);
if (err != DB_SUCCESS) {
goto end_scan;
}
@@ -2543,8 +2544,8 @@ row_ins_index_entry_big_rec(
index->set_modified(mtr);
}
- dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, 0, &mtr);
+ dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
+ &pcur, &mtr);
if (error != DB_SUCCESS) {
return error;
}
@@ -2577,6 +2578,42 @@ but GCC 4.8.5 does not support pop_options. */
# pragma GCC optimize ("O0")
#endif
+#ifdef WITH_WSREP
+/** Start bulk insert operation for Galera by appending
+table-level exclusive key for bulk insert.
+@param trx transaction
+@param index index
+@retval false on success
+@retval true on failure */
+ATTRIBUTE_COLD static bool row_ins_wsrep_start_bulk(trx_t *trx, const dict_index_t &index)
+{
+ char db_buf[NAME_LEN + 1];
+ char tbl_buf[NAME_LEN + 1];
+ ulint db_buf_len, tbl_buf_len;
+
+ if (!index.table->parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len))
+ {
+ WSREP_ERROR("Parse_name for bulk insert failed: %s",
+ wsrep_thd_query(trx->mysql_thd));
+ trx->error_state = DB_ROLLBACK;
+ return true;
+ }
+
+ /* Append table-level exclusive key for bulk insert. */
+ const int rcode = wsrep_thd_append_table_key(trx->mysql_thd, db_buf,
+ tbl_buf, WSREP_SERVICE_KEY_EXCLUSIVE);
+ if (rcode)
+ {
+ WSREP_ERROR("Appending table key for bulk insert failed: %s, %d",
+ wsrep_thd_query(trx->mysql_thd), rcode);
+ trx->error_state = DB_ROLLBACK;
+ return true;
+ }
+
+ return false;
+}
+#endif
+
/***************************************************************//**
Tries to insert an entry into a clustered index, ignoring foreign key
constraints. If a record with the same unique key is found, the other
@@ -2602,11 +2639,10 @@ row_ins_clust_index_entry_low(
que_thr_t* thr) /*!< in: query thread */
{
btr_pcur_t pcur;
- btr_cur_t* cursor;
dberr_t err = DB_SUCCESS;
big_rec_t* big_rec = NULL;
mtr_t mtr;
- ib_uint64_t auto_inc = 0;
+ uint64_t auto_inc = 0;
mem_heap_t* offsets_heap = NULL;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs* offsets = offsets_;
@@ -2622,7 +2658,7 @@ row_ins_clust_index_entry_low(
ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
ut_ad(!trx->in_rollback);
- mtr_start(&mtr);
+ mtr.start();
if (index->table->is_temporary()) {
/* Disable REDO logging as the lifetime of temp-tables is
@@ -2662,6 +2698,13 @@ row_ins_clust_index_entry_low(
dfield->type.mtype,
dfield->type.prtype
& DATA_UNSIGNED);
+ if (auto_inc
+ && mode != BTR_MODIFY_TREE) {
+ mode = btr_latch_mode(
+ BTR_MODIFY_ROOT_AND_LEAF
+ ^ BTR_MODIFY_LEAF
+ ^ mode);
+ }
}
}
}
@@ -2671,20 +2714,26 @@ row_ins_clust_index_entry_low(
the function will return in both low_match and up_match of the
cursor sensible values */
pcur.btr_cur.page_cur.index = index;
- err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, auto_inc, &mtr);
+ err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, &mtr);
if (err != DB_SUCCESS) {
index->table->file_unreadable = true;
-commit_exit:
+err_exit:
mtr.commit();
goto func_exit;
}
- cursor = btr_pcur_get_btr_cur(&pcur);
- cursor->thr = thr;
+ if (auto_inc) {
+ buf_block_t* root
+ = mtr.at_savepoint(mode != BTR_MODIFY_ROOT_AND_LEAF);
+ ut_ad(index->page == root->page.id().page_no());
+ page_set_autoinc(root, auto_inc, &mtr, false);
+ }
+
+ btr_pcur_get_btr_cur(&pcur)->thr = thr;
#ifdef UNIV_DEBUG
{
- page_t* page = btr_cur_get_page(cursor);
+ page_t* page = btr_pcur_get_page(&pcur);
rec_t* first_rec = page_rec_get_next(
page_get_infimum_rec(page));
@@ -2693,7 +2742,7 @@ commit_exit:
}
#endif /* UNIV_DEBUG */
- block = btr_cur_get_block(cursor);
+ block = btr_pcur_get_block(&pcur);
DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;);
@@ -2707,7 +2756,7 @@ commit_exit:
&& !index->table->n_rec_locks
&& !index->table->is_active_ddl()
&& !index->table->has_spatial_index()
- && !trx->is_wsrep() /* FIXME: MDEV-24623 */
+ && !index->table->versioned()
&& !thd_is_slave(trx->mysql_thd) /* FIXME: MDEV-24622 */) {
DEBUG_SYNC_C("empty_root_page_insert");
@@ -2719,7 +2768,7 @@ commit_exit:
if (err != DB_SUCCESS) {
trx->error_state = err;
trx->bulk_insert = false;
- goto commit_exit;
+ goto err_exit;
}
if (index->table->n_rec_locks) {
@@ -2728,6 +2777,16 @@ avoid_bulk:
goto skip_bulk_insert;
}
+#ifdef WITH_WSREP
+ if (trx->is_wsrep())
+ {
+ if (!wsrep_thd_is_local_transaction(trx->mysql_thd))
+ goto skip_bulk_insert;
+ if (row_ins_wsrep_start_bulk(trx, *index))
+ goto err_exit;
+ }
+#endif /* WITH_WSREP */
+
#ifdef BTR_CUR_HASH_ADAPT
if (btr_search_enabled) {
btr_search_x_lock_all();
@@ -2751,7 +2810,7 @@ avoid_bulk:
goto avoid_bulk;
}
- goto commit_exit;
+ goto err_exit;
}
}
@@ -2762,7 +2821,7 @@ skip_bulk_insert:
ut_ad(index->is_instant());
ut_ad(!dict_index_is_online_ddl(index));
- const rec_t* rec = btr_cur_get_rec(cursor);
+ const rec_t* rec = btr_pcur_get_rec(&pcur);
if (rec_get_info_bits(rec, page_rec_is_comp(rec))
& REC_INFO_MIN_REC_FLAG) {
@@ -2771,16 +2830,17 @@ skip_bulk_insert:
goto err_exit;
}
- ut_ad(!row_ins_must_modify_rec(cursor));
+ ut_ad(!row_ins_must_modify_rec(&pcur.btr_cur));
goto do_insert;
}
- if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) {
+ if (rec_is_metadata(btr_pcur_get_rec(&pcur), *index)) {
goto do_insert;
}
if (n_uniq
- && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
+ && (pcur.btr_cur.up_match >= n_uniq
+ || pcur.btr_cur.low_match >= n_uniq)) {
if (flags
== (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
@@ -2788,7 +2848,7 @@ skip_bulk_insert:
/* Set no locks when applying log
in online table rebuild. Only check for duplicates. */
err = row_ins_duplicate_error_in_clust_online(
- n_uniq, entry, cursor,
+ n_uniq, entry, &pcur.btr_cur,
&offsets, &offsets_heap);
switch (err) {
@@ -2799,26 +2859,24 @@ skip_bulk_insert:
/* fall through */
case DB_SUCCESS_LOCKED_REC:
case DB_DUPLICATE_KEY:
- trx->error_info = cursor->index();
+ trx->error_info = index;
}
} else {
/* Note that the following may return also
DB_LOCK_WAIT */
err = row_ins_duplicate_error_in_clust(
- flags, cursor, entry, thr);
+ flags, &pcur.btr_cur, entry, thr);
}
if (err != DB_SUCCESS) {
-err_exit:
- mtr_commit(&mtr);
- goto func_exit;
+ goto err_exit;
}
}
/* Note: Allowing duplicates would qualify for modification of
an existing record as the new entry is exactly same as old entry. */
- if (row_ins_must_modify_rec(cursor)) {
+ if (row_ins_must_modify_rec(&pcur.btr_cur)) {
/* There is already an index entry with a long enough common
prefix, we must convert the insert into a modify of an
existing record */
@@ -2836,10 +2894,13 @@ do_insert:
rec_t* insert_rec;
if (mode != BTR_MODIFY_TREE) {
- ut_ad(mode == BTR_MODIFY_LEAF ||
- mode == BTR_MODIFY_LEAF_ALREADY_LATCHED);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED
+ || mode == BTR_MODIFY_ROOT_AND_LEAF
+ || mode
+ == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED);
err = btr_cur_optimistic_insert(
- flags, cursor, &offsets, &offsets_heap,
+ flags, &pcur.btr_cur, &offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
} else {
@@ -2848,17 +2909,15 @@ do_insert:
goto err_exit;
}
- DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust");
-
err = btr_cur_optimistic_insert(
- flags, cursor,
+ flags, &pcur.btr_cur,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
if (err == DB_FAIL) {
err = btr_cur_pessimistic_insert(
- flags, cursor,
+ flags, &pcur.btr_cur,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
@@ -2970,9 +3029,7 @@ row_ins_sec_index_entry_low(
rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
rtr_info_update_btr(&cursor, &rtr_info);
- err = btr_cur_search_to_nth_level(0, entry,
- PAGE_CUR_RTREE_INSERT,
- search_mode, &cursor, &mtr);
+ err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr);
if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF
&& rtr_info.mbr_adj) {
@@ -2988,9 +3045,8 @@ row_ins_sec_index_entry_low(
} else {
index->set_modified(mtr);
}
- err = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_RTREE_INSERT,
- search_mode, &cursor, &mtr);
+ err = rtr_insert_leaf(&cursor, entry,
+ search_mode, &mtr);
}
DBUG_EXECUTE_IF(
@@ -3006,8 +3062,8 @@ row_ins_sec_index_entry_low(
: BTR_INSERT));
}
- err = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE,
- search_mode, &cursor, &mtr);
+ err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode,
+ &mtr);
}
if (err != DB_SUCCESS) {
@@ -3083,12 +3139,12 @@ row_ins_sec_index_entry_low(
prevent any insertion of a duplicate by another
transaction. Let us now reposition the cursor and
continue the insertion (bypassing the change buffer). */
- err = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_LE,
+ err = cursor.search_leaf(
+ entry, PAGE_CUR_LE,
btr_latch_mode(search_mode
& ~(BTR_INSERT
| BTR_IGNORE_SEC_UNIQUE)),
- &cursor, &mtr);
+ &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
}
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 64b4d02fa18..60eda9b14a1 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1696,8 +1696,8 @@ err_exit:
mtr->start();
index->set_modified(*mtr);
pcur->btr_cur.page_cur.index = index;
- error = btr_pcur_open(entry, PAGE_CUR_LE,
- BTR_PURGE_TREE, pcur, 0, mtr);
+ error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_PURGE_TREE, pcur,
+ mtr);
if (error) {
goto err_exit;
}
@@ -1780,8 +1780,8 @@ row_log_table_apply_delete(
mtr_start(&mtr);
index->set_modified(mtr);
- dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE,
- BTR_PURGE_TREE, &pcur, 0, &mtr);
+ dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_PURGE_TREE, &pcur,
+ &mtr);
if (err != DB_SUCCESS) {
goto all_done;
}
@@ -1917,8 +1917,8 @@ row_log_table_apply_update(
mtr.start();
index->set_modified(mtr);
- error = btr_pcur_open(old_pk, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, 0, &mtr);
+ error = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_MODIFY_TREE, &pcur,
+ &mtr);
if (error != DB_SUCCESS) {
func_exit:
mtr.commit();
@@ -3084,11 +3084,8 @@ row_log_apply_op_low(
record. The operation may already have been performed,
depending on when the row in the clustered index was
scanned. */
- *error = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE,
- has_index_lock
- ? BTR_MODIFY_TREE
- : BTR_MODIFY_LEAF,
- &cursor, &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock
+ ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr);
if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
goto func_exit;
}
@@ -3138,9 +3135,9 @@ row_log_apply_op_low(
mtr_commit(&mtr);
mtr_start(&mtr);
index->set_modified(mtr);
- *error = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE,
+ &mtr);
if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
goto func_exit;
}
@@ -3242,9 +3239,9 @@ insert_the_rec:
mtr_commit(&mtr);
mtr_start(&mtr);
index->set_modified(mtr);
- *error = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE,
+ &mtr);
if (*error != DB_SUCCESS) {
break;
}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 723b4b1d387..5601a786555 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -150,9 +150,8 @@ public:
false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- error = btr_cur_search_to_nth_level(
- 0, dtuple, PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_LEAF, &ins_cur, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_LEAF, &mtr);
/* It need to update MBR in parent entry,
so change search mode to BTR_MODIFY_TREE */
@@ -164,10 +163,8 @@ public:
rtr_info_update_btr(&ins_cur, &rtr_info);
mtr.start();
index->set_modified(mtr);
- error = btr_cur_search_to_nth_level(
- 0, dtuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_TREE, &ins_cur, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_TREE, &mtr);
}
if (error == DB_SUCCESS) {
@@ -189,11 +186,8 @@ public:
&ins_cur, index, false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- error = btr_cur_search_to_nth_level(
- 0, dtuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_TREE,
- &ins_cur, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_TREE, &mtr);
if (error == DB_SUCCESS) {
error = btr_cur_pessimistic_insert(
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index df042f66521..65d26e0a733 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -104,7 +104,7 @@ bool
row_purge_remove_clust_if_poss_low(
/*===============================*/
purge_node_t* node, /*!< in/out: row purge node */
- btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */
{
dict_index_t* index = dict_table_get_first_index(node->table);
table_id_t table_id = 0;
@@ -342,17 +342,20 @@ row_purge_remove_sec_if_poss_tree(
ibool success = TRUE;
dberr_t err;
mtr_t mtr;
- enum row_search_result search_result;
log_free_check();
mtr.start();
index->set_modified(mtr);
pcur.btr_cur.page_cur.index = index;
- search_result = row_search_index_entry(entry, BTR_PURGE_TREE,
- &pcur, &mtr);
+ if (index->is_spatial()) {
+ if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
+ goto found;
+ }
+ goto func_exit;
+ }
- switch (search_result) {
+ switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
case ROW_NOT_FOUND:
/* Not found. This is a legitimate condition. In a
rollback, InnoDB will remove secondary recs that would
@@ -381,6 +384,7 @@ row_purge_remove_sec_if_poss_tree(
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
+found:
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
/* Remove the index record, which should have been
@@ -439,8 +443,6 @@ row_purge_remove_sec_if_poss_leaf(
{
mtr_t mtr;
btr_pcur_t pcur;
- enum btr_latch_mode mode;
- enum row_search_result search_result;
bool success = true;
log_free_check();
@@ -449,31 +451,27 @@ row_purge_remove_sec_if_poss_leaf(
mtr.start();
index->set_modified(mtr);
- /* Change buffering is disabled for spatial index and
- virtual index. */
- mode = (index->type & (DICT_SPATIAL | DICT_VIRTUAL))
- ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF;
pcur.btr_cur.page_cur.index = index;
/* Set the purge node for the call to row_purge_poss_sec(). */
pcur.btr_cur.purge_node = node;
if (index->is_spatial()) {
pcur.btr_cur.thr = NULL;
- index->lock.u_lock(SRW_LOCK_CALL);
- search_result = row_search_index_entry(
- entry, mode, &pcur, &mtr);
- index->lock.u_unlock();
- } else {
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- pcur.btr_cur.thr = static_cast<que_thr_t*>(
- que_node_get_parent(node));
- search_result = row_search_index_entry(
- entry, mode, &pcur, &mtr);
+ if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) {
+ goto found;
+ }
+ goto func_exit;
}
- switch (search_result) {
+ /* Set the query thread, so that ibuf_insert_low() will be
+ able to invoke thd_get_trx(). */
+ pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
+
+ switch (row_search_index_entry(entry, index->has_virtual()
+ ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF,
+ &pcur, &mtr)) {
case ROW_FOUND:
+found:
/* Before attempting to purge a record, check
if it is safe to do so. */
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) {
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index b998d27d836..4a00b2a430e 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2022, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1216,7 +1216,7 @@ row_search_on_row_ref(
& REC_INFO_MIN_REC_FLAG;
} else {
ut_a(ref->n_fields == index->n_uniq);
- if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, 0, mtr)
+ if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, mtr)
!= DB_SUCCESS) {
return false;
}
@@ -1278,21 +1278,13 @@ row_search_index_entry(
ut_ad(dtuple_check_typed(entry));
- if (pcur->index()->is_spatial()) {
- if (rtr_pcur_open(pcur->index(), entry, mode, pcur, mtr)) {
- return ROW_NOT_FOUND;
- }
- } else {
- if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, 0, mtr)
- != DB_SUCCESS) {
- return ROW_NOT_FOUND;
- }
+ if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) {
+ return ROW_NOT_FOUND;
}
switch (btr_pcur_get_btr_cur(pcur)->flag) {
case BTR_CUR_DELETE_REF:
ut_ad(!(~mode & BTR_DELETE));
- ut_ad(!pcur->index()->is_spatial());
return(ROW_NOT_DELETED_REF);
case BTR_CUR_DEL_MARK_IBUF:
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 23100a96ebd..716e5351446 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -2,7 +2,7 @@
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -4775,7 +4775,7 @@ wait_table_again:
pcur->btr_cur.thr = thr;
pcur->old_rec = nullptr;
- if (dict_index_is_spatial(index)) {
+ if (index->is_spatial()) {
if (!prebuilt->rtr_info) {
prebuilt->rtr_info = rtr_create_rtr_info(
set_also_gap_locks, true,
@@ -4791,10 +4791,13 @@ wait_table_again:
prebuilt->rtr_info->search_tuple = search_tuple;
prebuilt->rtr_info->search_mode = mode;
}
- }
- err = btr_pcur_open_with_no_init(search_tuple, mode,
- BTR_SEARCH_LEAF, pcur, &mtr);
+ err = rtr_search_leaf(pcur, search_tuple, mode, &mtr);
+ } else {
+ err = btr_pcur_open_with_no_init(search_tuple, mode,
+ BTR_SEARCH_LEAF,
+ pcur, &mtr);
+ }
if (err != DB_SUCCESS) {
page_corrupted:
@@ -5771,8 +5774,7 @@ next_rec_after_check:
if (spatial_search) {
/* No need to do store restore for R-tree */
- mtr.commit();
- mtr.start();
+ mtr.rollback_to_savepoint(0);
} else if (mtr_extra_clust_savepoint) {
/* We must release any clustered index latches
if we are moving to the next non-clustered
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 6567019a33d..50196e78092 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -233,7 +233,7 @@ func_exit:
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) {
/* When rolling back the very first instant ADD COLUMN
operation, reset the root page to the basic state. */
- err = btr_reset_instant(*index, true, &mtr);
+ btr_reset_instant(*index, true, &mtr);
}
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
@@ -268,21 +268,32 @@ row_undo_ins_remove_sec_low(
pcur.btr_cur.page_cur.index = index;
row_mtr_start(&mtr, index, !modify_leaf);
- if (modify_leaf) {
- mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
- mtr_s_lock_index(index, &mtr);
- } else {
- ut_ad(mode == BTR_PURGE_TREE);
- mtr_sx_lock_index(index, &mtr);
- }
-
if (index->is_spatial()) {
mode = modify_leaf
- ? btr_latch_mode(BTR_MODIFY_LEAF_ALREADY_LATCHED
+ ? btr_latch_mode(BTR_MODIFY_LEAF
| BTR_RTREE_DELETE_MARK
| BTR_RTREE_UNDO_INS)
: btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
btr_pcur_get_btr_cur(&pcur)->thr = thr;
+ if (rtr_search(entry, mode, &pcur, &mtr)) {
+ goto func_exit;
+ }
+
+ if (rec_get_deleted_flag(
+ btr_pcur_get_rec(&pcur),
+ dict_table_is_comp(index->table))) {
+ ib::error() << "Record found in index " << index->name
+ << " is deleted marked on insert rollback.";
+ ut_ad(0);
+ }
+ goto found;
+ } else if (modify_leaf) {
+ mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
+ mtr_s_lock_index(index, &mtr);
+ } else {
+ ut_ad(mode == BTR_PURGE_TREE);
+ mode = BTR_PURGE_TREE_ALREADY_LATCHED;
+ mtr_x_lock_index(index, &mtr);
}
switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
@@ -295,15 +306,7 @@ row_undo_ins_remove_sec_low(
case ROW_NOT_FOUND:
break;
case ROW_FOUND:
- if (dict_index_is_spatial(index)
- && rec_get_deleted_flag(
- btr_pcur_get_rec(&pcur),
- dict_table_is_comp(index->table))) {
- ib::error() << "Record found in index " << index->name
- << " is deleted marked on insert rollback.";
- ut_ad(0);
- }
-
+ found:
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
if (modify_leaf) {
@@ -318,6 +321,7 @@ row_undo_ins_remove_sec_low(
}
}
+func_exit:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 2d04dca4003..50e15e03cc9 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -133,8 +133,7 @@ row_undo_mod_clust_low(
&& node->ref == &trx_undo_metadata
&& btr_cur_get_index(btr_cur)->table->instant
&& node->update->info_bits == REC_INFO_METADATA_ADD) {
- err = btr_reset_instant(*btr_cur_get_index(btr_cur),
- false, mtr);
+ btr_reset_instant(*btr_cur->index(), false, mtr);
}
}
@@ -490,7 +489,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
dberr_t err = DB_SUCCESS;
mtr_t mtr;
mtr_t mtr_vers;
- row_search_result search_result;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
row_mtr_start(&mtr, index, !modify_leaf);
@@ -505,6 +503,11 @@ row_undo_mod_del_mark_or_remove_sec_low(
| BTR_RTREE_UNDO_INS)
: btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
btr_cur->thr = thr;
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ goto found;
+ } else {
+ goto func_exit;
+ }
} else if (!index->is_committed()) {
/* The index->online_status may change if the index is
or was being created online, but not committed yet. It
@@ -514,7 +517,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
mtr_s_lock_index(index, &mtr);
} else {
ut_ad(mode == BTR_PURGE_TREE);
- mtr_sx_lock_index(index, &mtr);
+ mode = BTR_PURGE_TREE_ALREADY_LATCHED;
+ mtr_x_lock_index(index, &mtr);
}
} else {
/* For secondary indexes,
@@ -523,9 +527,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
ut_ad(!dict_index_is_online_ddl(index));
}
- search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
-
- switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
+ switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr),
+ ROW_FOUND)) {
case ROW_NOT_FOUND:
/* In crash recovery, the secondary index record may
be missing if the UPDATE did not have time to insert
@@ -547,6 +550,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
ut_error;
}
+found:
/* We should remove the index record if no prior version of the row,
which cannot be purged yet, requires its existence. If some requires,
we should delete mark the record. */
@@ -665,13 +669,12 @@ row_undo_mod_del_unmark_sec_and_undo_update(
trx_t* trx = thr_get_trx(thr);
const ulint flags
= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
- row_search_result search_result;
const auto orig_mode = mode;
pcur.btr_cur.page_cur.index = index;
ut_ad(trx->id != 0);
- if (dict_index_is_spatial(index)) {
+ if (index->is_spatial()) {
/* FIXME: Currently we do a 2-pass search for the undo
due to avoid undel-mark a wrong rec in rolling back in
partial update. Later, we could log some info in
@@ -686,9 +689,22 @@ try_again:
btr_cur->thr = thr;
- search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
+ if (index->is_spatial()) {
+ if (!rtr_search(entry, mode, &pcur, &mtr)) {
+ goto found;
+ }
- switch (search_result) {
+ if (mode != orig_mode && btr_cur->rtr_info->fd_del) {
+ mode = orig_mode;
+ btr_pcur_close(&pcur);
+ mtr.commit();
+ goto try_again;
+ }
+
+ goto not_found;
+ }
+
+ switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
mem_heap_t* heap;
mem_heap_t* offsets_heap;
rec_offs* offsets;
@@ -699,17 +715,7 @@ try_again:
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
ut_error;
case ROW_NOT_FOUND:
- /* For spatial index, if first search didn't find an
- undel-marked rec, try to find a del-marked rec. */
- if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
- if (mode != orig_mode) {
- mode = orig_mode;
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- goto try_again;
- }
- }
-
+not_found:
if (btr_cur->up_match >= dict_index_get_n_unique(index)
|| btr_cur->low_match >= dict_index_get_n_unique(index)) {
ib::warn() << "Record in index " << index->name
@@ -767,6 +773,7 @@ try_again:
break;
case ROW_FOUND:
+found:
btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur),
btr_cur_get_rec(btr_cur), &mtr);
heap = mem_heap_create(
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index a3f940adff5..fe88fce58a2 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1832,12 +1832,10 @@ row_upd_sec_index_entry(
que_thr_t* thr) /*!< in: query thread */
{
mtr_t mtr;
- const rec_t* rec;
btr_pcur_t pcur;
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
- btr_cur_t* btr_cur;
dberr_t err = DB_SUCCESS;
trx_t* trx = thr_get_trx(thr);
btr_latch_mode mode;
@@ -1876,10 +1874,6 @@ row_upd_sec_index_entry(
case SRV_TMP_SPACE_ID:
mtr.set_log_mode(MTR_LOG_NO_REDO);
flags = BTR_NO_LOCKING_FLAG;
- if (index->is_spatial()) {
- mode = btr_latch_mode(BTR_MODIFY_LEAF
- | BTR_RTREE_DELETE_MARK);
- }
break;
default:
index->set_modified(mtr);
@@ -1888,26 +1882,35 @@ row_upd_sec_index_entry(
flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0;
/* We can only buffer delete-mark operations if there
are no foreign key constraints referring to the index. */
- mode = index->is_spatial()
- ? btr_latch_mode(BTR_MODIFY_LEAF
- | BTR_RTREE_DELETE_MARK)
- : referenced
- ? BTR_MODIFY_LEAF : BTR_DELETE_MARK_LEAF;
+ if (!referenced) {
+ mode = BTR_DELETE_MARK_LEAF;
+ }
break;
}
/* Set the query thread, so that ibuf_insert_low() will be
able to invoke thd_get_trx(). */
- btr_pcur_get_btr_cur(&pcur)->thr = thr;
+ pcur.btr_cur.thr = thr;
pcur.btr_cur.page_cur.index = index;
- search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
+ if (index->is_spatial()) {
+ mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK);
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ goto found;
+ }
- btr_cur = btr_pcur_get_btr_cur(&pcur);
+ if (pcur.btr_cur.rtr_info->fd_del) {
+ /* We found the record, but a delete marked */
+ goto close;
+ }
- rec = btr_cur_get_rec(btr_cur);
+ goto not_found;
+ }
+
+ search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
switch (search_result) {
+ const rec_t* rec;
case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */
ut_error;
break;
@@ -1916,11 +1919,8 @@ row_upd_sec_index_entry(
break;
case ROW_NOT_FOUND:
- if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
- /* We found the record, but a delete marked */
- break;
- }
-
+not_found:
+ rec = btr_pcur_get_rec(&pcur);
ib::error()
<< "Record in index " << index->name
<< " of table " << index->table->name
@@ -1934,7 +1934,9 @@ row_upd_sec_index_entry(
#endif /* UNIV_DEBUG */
break;
case ROW_FOUND:
+found:
ut_ad(err == DB_SUCCESS);
+ rec = btr_pcur_get_rec(&pcur);
/* Delete mark the old index record; it can already be
delete marked if we return after a lock wait in
@@ -1943,14 +1945,14 @@ row_upd_sec_index_entry(
rec, dict_table_is_comp(index->table))) {
err = lock_sec_rec_modify_check_and_lock(
flags,
- btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur), index, thr, &mtr);
+ btr_pcur_get_block(&pcur),
+ btr_pcur_get_rec(&pcur), index, thr, &mtr);
if (err != DB_SUCCESS) {
break;
}
- btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur),
+ btr_rec_set_deleted<true>(btr_pcur_get_block(&pcur),
+ btr_pcur_get_rec(&pcur),
&mtr);
#ifdef WITH_WSREP
if (!referenced && foreign
@@ -2009,6 +2011,7 @@ row_upd_sec_index_entry(
}
}
+close:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 22782cc0d1e..a1d569571c8 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -424,7 +424,6 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr)
block->fix();
mtr.commit();
mtr.start();
- mtr.flag_modified();
rseg->latch.wr_lock(SRW_LOCK_CALL);
rseg_hdr->page.lock.x_lock();
block->page.lock.x_lock();
diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc
index 141af463d1a..53f74d5fc0d 100644
--- a/storage/spider/ha_spider.cc
+++ b/storage/spider/ha_spider.cc
@@ -996,10 +996,8 @@ int ha_spider::reset()
result_list.snap_direct_aggregate = FALSE;
result_list.direct_distinct = FALSE;
store_error_num = 0;
- if (
- wide_handler &&
- wide_handler->sql_command != SQLCOM_END
- ) {
+ if (wide_handler)
+ {
wide_handler->sql_command = SQLCOM_END;
wide_handler->between_flg = FALSE;
wide_handler->idx_bitmap_is_set = FALSE;
diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_30191.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_30191.result
new file mode 100644
index 00000000000..941f210cbbe
--- /dev/null
+++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_30191.result
@@ -0,0 +1,44 @@
+#
+# MDEV-30191 SIGSEGV & heap-use-after-free in spider_db_print_item_type, SIGABRT in __cxa_pure_virtual/spider_db_print_item_type, Got error 128 "Out of memory in engine", 56/112 memory not freed, and Assertion `fixed()' failed in Item_sp_variable::val_str on SP call
+#
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
+connection child2_1;
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+CREATE TABLE tbl_a (c INT);
+connection master_1;
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+CREATE TABLE tbl_a (
+c INT
+) ENGINE=Spider DEFAULT CHARSET=utf8 COMMENT='table "tbl_a", srv "s_2_1"';
+CREATE TABLE tbl_b (c INT);
+CREATE PROCEDURE sp() BEGIN
+DECLARE v1 DATE;
+WHILE EXISTS (SELECT 1 FROM tbl_a WHERE c>v1 AND c<=v1) DO
+SELECT 1;
+END WHILE;
+WHILE EXISTS (SELECT 1
+FROM tbl_a
+WHERE c<v1 AND EXISTS (SELECT 1
+FROM tbl_b
+WHERE tbl_a.c=tbl_b.c)) DO
+SELECT 1;
+END WHILE;
+END $$
+CALL sp();
+connection master_1;
+DROP DATABASE IF EXISTS auto_test_local;
+connection child2_1;
+DROP DATABASE IF EXISTS auto_test_remote;
+for master_1
+for child2
+child2_1
+child2_2
+child2_3
+for child3
diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.cnf
new file mode 100644
index 00000000000..05dfd8a0bce
--- /dev/null
+++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.cnf
@@ -0,0 +1,3 @@
+!include include/default_mysqld.cnf
+!include ../my_1_1.cnf
+!include ../my_2_1.cnf
diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.test
new file mode 100644
index 00000000000..8d19a3515cf
--- /dev/null
+++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_30191.test
@@ -0,0 +1,51 @@
+--echo #
+--echo # MDEV-30191 SIGSEGV & heap-use-after-free in spider_db_print_item_type, SIGABRT in __cxa_pure_virtual/spider_db_print_item_type, Got error 128 "Out of memory in engine", 56/112 memory not freed, and Assertion `fixed()' failed in Item_sp_variable::val_str on SP call
+--echo #
+
+--disable_query_log
+--disable_result_log
+--source ../../t/test_init.inc
+--enable_result_log
+--enable_query_log
+
+--connection child2_1
+CREATE DATABASE auto_test_remote;
+USE auto_test_remote;
+CREATE TABLE tbl_a (c INT);
+
+--connection master_1
+CREATE DATABASE auto_test_local;
+USE auto_test_local;
+
+eval CREATE TABLE tbl_a (
+ c INT
+) $MASTER_1_ENGINE $MASTER_1_CHARSET COMMENT='table "tbl_a", srv "s_2_1"';
+CREATE TABLE tbl_b (c INT);
+
+--delimiter $$
+CREATE PROCEDURE sp() BEGIN
+ DECLARE v1 DATE;
+ WHILE EXISTS (SELECT 1 FROM tbl_a WHERE c>v1 AND c<=v1) DO
+ SELECT 1;
+ END WHILE;
+ WHILE EXISTS (SELECT 1
+ FROM tbl_a
+ WHERE c<v1 AND EXISTS (SELECT 1
+ FROM tbl_b
+ WHERE tbl_a.c=tbl_b.c)) DO
+ SELECT 1;
+ END WHILE;
+END $$
+--delimiter ;
+CALL sp();
+
+--connection master_1
+DROP DATABASE IF EXISTS auto_test_local;
+--connection child2_1
+DROP DATABASE IF EXISTS auto_test_remote;
+
+--disable_query_log
+--disable_result_log
+--source ../t/test_deinit.inc
+--enable_query_log
+--enable_result_log