From 7d3d3838c1b8af98a9704120b79c481a4113b862 Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Mon, 23 May 2022 13:11:14 +0200 Subject: MDEV-28583: post-merge fixes --- .../galera/r/galera_ist_MDEV-28423,debug.rdiff | 202 +++----- .../suite/galera/r/galera_ist_MDEV-28423.result | 558 ++++++--------------- .../galera/r/galera_ist_MDEV-28583,debug.rdiff | 202 +++----- .../suite/galera/r/galera_ist_MDEV-28583.result | 558 ++++++--------------- scripts/wsrep_sst_mariabackup.sh | 5 +- 5 files changed, 454 insertions(+), 1071 deletions(-) diff --git a/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff b/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff index 8c84321e774..f9f81ea3b40 100644 --- a/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff +++ b/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff @@ -1,27 +1,27 @@ --- r/galera_ist_MDEV-28423.result -+++ r/galera_ist_MDEV-28423,debug.reject -@@ -517,3 +517,187 @@ - 1 ++++ r/galera_ist_MDEV-28423.reject +@@ -286,3 +286,111 @@ DROP TABLE t1; COMMIT; + SET AUTOCOMMIT=ON; +Performing State Transfer on a server that has been killed and restarted +while a DDL was in progress on it +connection node_1; -+CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; ++CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 VALUES (1,'node1_committed_before'); -+INSERT INTO t1 VALUES (2,'node1_committed_before'); -+INSERT INTO t1 VALUES (3,'node1_committed_before'); -+INSERT INTO t1 VALUES (4,'node1_committed_before'); -+INSERT INTO t1 VALUES (5,'node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); +connection node_2; +START TRANSACTION; -+INSERT INTO t1 VALUES (6,'node2_committed_before'); -+INSERT INTO t1 VALUES (7,'node2_committed_before'); -+INSERT INTO t1 VALUES (8,'node2_committed_before'); -+INSERT INTO t1 VALUES (9,'node2_committed_before'); -+INSERT INTO t1 VALUES (10,'node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); +COMMIT; +SET GLOBAL debug_dbug = 'd,sync.alter_opened_table'; +connection node_1; @@ -32,26 +32,26 @@ +connection node_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (11,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (12,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (13,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (14,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (15,'node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); +COMMIT; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (16,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (17,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (18,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (19,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (20,'node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); +connect node_1a_galera_st_kill_slave_ddl, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (21,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (22,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (23,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (24,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (25,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); +connection node_2; +Performing --wsrep-recover ... +connection node_2; @@ -59,132 +59,56 @@ +Using --wsrep-start-position when starting mysqld ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (26,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (27,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (28,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (29,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (30,'node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); +COMMIT; +connection node_1; -+INSERT INTO t1 (id,f1) VALUES (31,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (32,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (33,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (34,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (35,'node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (36,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (37,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (38,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (39,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (40,'node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); +COMMIT; +connection node_1a_galera_st_kill_slave_ddl; -+INSERT INTO t1 (id,f1) VALUES (41,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (42,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (43,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (44,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (45,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); +ROLLBACK; -+SET AUTOCOMMIT=ON; -+SET SESSION wsrep_sync_wait=15; -+SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -+EXPECT_3 -+3 -+SELECT COUNT(*) AS EXPECT_35 FROM t1; -+EXPECT_35 -+35 -+SELECT * FROM t1; -+id f1 f2 -+1 node1_committed_before NULL -+2 node1_committed_before NULL -+3 node1_committed_before NULL -+4 node1_committed_before NULL -+5 node1_committed_before NULL -+6 node2_committed_before NULL -+7 node2_committed_before NULL -+8 node2_committed_before NULL -+9 node2_committed_before NULL -+10 node2_committed_before NULL -+11 node1_committed_during NULL -+12 node1_committed_during NULL -+13 node1_committed_during NULL -+14 node1_committed_during NULL -+15 node1_committed_during NULL -+16 node1_to_be_committed_after NULL -+17 node1_to_be_committed_after NULL -+18 node1_to_be_committed_after NULL -+19 node1_to_be_committed_after NULL -+20 node1_to_be_committed_after NULL -+26 node2_committed_after NULL -+27 node2_committed_after NULL -+28 node2_committed_after NULL -+29 node2_committed_after NULL -+30 node2_committed_after NULL -+31 node1_to_be_committed_after NULL -+32 node1_to_be_committed_after NULL -+33 node1_to_be_committed_after NULL -+34 node1_to_be_committed_after NULL -+35 node1_to_be_committed_after NULL -+36 node1_committed_after NULL -+37 node1_committed_after NULL -+38 node1_committed_after NULL -+39 node1_committed_after NULL -+40 node1_committed_after NULL ++SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++COUNT(*) = 2 ++1 ++SELECT COUNT(*) = 35 FROM t1; ++COUNT(*) = 35 ++1 +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; -+connection node_1; +SET AUTOCOMMIT=ON; -+SET SESSION wsrep_sync_wait=15; -+SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -+EXPECT_3 -+3 -+SELECT COUNT(*) AS EXPECT_35 FROM t1; -+EXPECT_35 -+35 -+SELECT * FROM t1; -+id f1 f2 -+1 node1_committed_before NULL -+2 node1_committed_before NULL -+3 node1_committed_before NULL -+4 node1_committed_before NULL -+5 node1_committed_before NULL -+6 node2_committed_before NULL -+7 node2_committed_before NULL -+8 node2_committed_before NULL -+9 node2_committed_before NULL -+10 node2_committed_before NULL -+11 node1_committed_during NULL -+12 node1_committed_during NULL -+13 node1_committed_during NULL -+14 node1_committed_during NULL -+15 node1_committed_during NULL -+16 node1_to_be_committed_after NULL -+17 node1_to_be_committed_after NULL -+18 node1_to_be_committed_after NULL -+19 node1_to_be_committed_after NULL -+20 node1_to_be_committed_after NULL -+26 node2_committed_after NULL -+27 node2_committed_after NULL -+28 node2_committed_after NULL -+29 node2_committed_after NULL -+30 node2_committed_after NULL -+31 node1_to_be_committed_after NULL -+32 node1_to_be_committed_after NULL -+33 node1_to_be_committed_after NULL -+34 node1_to_be_committed_after NULL -+35 node1_to_be_committed_after NULL -+36 node1_committed_after NULL -+37 node1_committed_after NULL -+38 node1_committed_after NULL -+39 node1_committed_after NULL -+40 node1_committed_after NULL ++connection node_1; ++SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++COUNT(*) = 2 ++1 ++SELECT COUNT(*) = 35 FROM t1; ++COUNT(*) = 35 ++1 +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; ++SET AUTOCOMMIT=ON; +SET GLOBAL debug_dbug = $debug_orig; diff --git a/mysql-test/suite/galera/r/galera_ist_MDEV-28423.result b/mysql-test/suite/galera/r/galera_ist_MDEV-28423.result index 5a71b490a80..80a28d349ba 100644 --- a/mysql-test/suite/galera/r/galera_ist_MDEV-28423.result +++ b/mysql-test/suite/galera/r/galera_ist_MDEV-28423.result @@ -1,519 +1,287 @@ -connection node_2; -connection node_1; connection node_1; connection node_2; Performing State Transfer on a server that has been temporarily disconnected connection node_1; -CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (1,'node1_committed_before'); -INSERT INTO t1 VALUES (2,'node1_committed_before'); -INSERT INTO t1 VALUES (3,'node1_committed_before'); -INSERT INTO t1 VALUES (4,'node1_committed_before'); -INSERT INTO t1 VALUES (5,'node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (6,'node2_committed_before'); -INSERT INTO t1 VALUES (7,'node2_committed_before'); -INSERT INTO t1 VALUES (8,'node2_committed_before'); -INSERT INTO t1 VALUES (9,'node2_committed_before'); -INSERT INTO t1 VALUES (10,'node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Unloading wsrep provider ... SET GLOBAL wsrep_cluster_address = ''; connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (11,'node1_committed_during'); -INSERT INTO t1 VALUES (12,'node1_committed_during'); -INSERT INTO t1 VALUES (13,'node1_committed_during'); -INSERT INTO t1 VALUES (14,'node1_committed_during'); -INSERT INTO t1 VALUES (15,'node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); COMMIT; START TRANSACTION; -INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); connect node_1a_galera_st_disconnect_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); connection node_2; Loading wsrep provider ... -disconnect node_2; -connect node_2, 127.0.0.1, root, , test, $NODE_MYPORT_2; -connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (26,'node2_committed_after'); -INSERT INTO t1 VALUES (27,'node2_committed_after'); -INSERT INTO t1 VALUES (28,'node2_committed_after'); -INSERT INTO t1 VALUES (29,'node2_committed_after'); -INSERT INTO t1 VALUES (30,'node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; connection node_1; -INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); COMMIT; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (36,'node1_committed_after'); -INSERT INTO t1 VALUES (37,'node1_committed_after'); -INSERT INTO t1 VALUES (38,'node1_committed_after'); -INSERT INTO t1 VALUES (39,'node1_committed_after'); -INSERT INTO t1 VALUES (40,'node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; connection node_1a_galera_st_disconnect_slave; -INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); ROLLBACK; -SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 -connection node_1; +COMMIT; SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +connection node_1; +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 DROP TABLE t1; COMMIT; +SET AUTOCOMMIT=ON; Performing State Transfer on a server that has been shut down cleanly and restarted connection node_1; -CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (1,'node1_committed_before'); -INSERT INTO t1 VALUES (2,'node1_committed_before'); -INSERT INTO t1 VALUES (3,'node1_committed_before'); -INSERT INTO t1 VALUES (4,'node1_committed_before'); -INSERT INTO t1 VALUES (5,'node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (6,'node2_committed_before'); -INSERT INTO t1 VALUES (7,'node2_committed_before'); -INSERT INTO t1 VALUES (8,'node2_committed_before'); -INSERT INTO t1 VALUES (9,'node2_committed_before'); -INSERT INTO t1 VALUES (10,'node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Shutting down server ... connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (11,'node1_committed_during'); -INSERT INTO t1 VALUES (12,'node1_committed_during'); -INSERT INTO t1 VALUES (13,'node1_committed_during'); -INSERT INTO t1 VALUES (14,'node1_committed_during'); -INSERT INTO t1 VALUES (15,'node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); COMMIT; START TRANSACTION; -INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); connect node_1a_galera_st_shutdown_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); connection node_2; Starting server ... SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (26,'node2_committed_after'); -INSERT INTO t1 VALUES (27,'node2_committed_after'); -INSERT INTO t1 VALUES (28,'node2_committed_after'); -INSERT INTO t1 VALUES (29,'node2_committed_after'); -INSERT INTO t1 VALUES (30,'node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; connection node_1; -INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); COMMIT; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (36,'node1_committed_after'); -INSERT INTO t1 VALUES (37,'node1_committed_after'); -INSERT INTO t1 VALUES (38,'node1_committed_after'); -INSERT INTO t1 VALUES (39,'node1_committed_after'); -INSERT INTO t1 VALUES (40,'node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; connection node_1a_galera_st_shutdown_slave; -INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); ROLLBACK; -SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_15 FROM t1; -EXPECT_15 -35 -SELECT * from t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 COMMIT; -connection node_1; SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_15 FROM t1; -EXPECT_15 -35 -SELECT * from t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +connection node_1; +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 DROP TABLE t1; COMMIT; +SET AUTOCOMMIT=ON; Performing State Transfer on a server that has been killed and restarted connection node_1; -CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (1,'node1_committed_before'); -INSERT INTO t1 VALUES (2,'node1_committed_before'); -INSERT INTO t1 VALUES (3,'node1_committed_before'); -INSERT INTO t1 VALUES (4,'node1_committed_before'); -INSERT INTO t1 VALUES (5,'node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (6,'node2_committed_before'); -INSERT INTO t1 VALUES (7,'node2_committed_before'); -INSERT INTO t1 VALUES (8,'node2_committed_before'); -INSERT INTO t1 VALUES (9,'node2_committed_before'); -INSERT INTO t1 VALUES (10,'node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Killing server ... connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (11,'node1_committed_during'); -INSERT INTO t1 VALUES (12,'node1_committed_during'); -INSERT INTO t1 VALUES (13,'node1_committed_during'); -INSERT INTO t1 VALUES (14,'node1_committed_during'); -INSERT INTO t1 VALUES (15,'node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); COMMIT; START TRANSACTION; -INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); connect node_1a_galera_st_kill_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); connection node_2; Performing --wsrep-recover ... Starting server ... Using --wsrep-start-position when starting mysqld ... SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (26,'node2_committed_after'); -INSERT INTO t1 VALUES (27,'node2_committed_after'); -INSERT INTO t1 VALUES (28,'node2_committed_after'); -INSERT INTO t1 VALUES (29,'node2_committed_after'); -INSERT INTO t1 VALUES (30,'node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; connection node_1; -INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); COMMIT; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (36,'node1_committed_after'); -INSERT INTO t1 VALUES (37,'node1_committed_after'); -INSERT INTO t1 VALUES (38,'node1_committed_after'); -INSERT INTO t1 VALUES (39,'node1_committed_after'); -INSERT INTO t1 VALUES (40,'node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; connection node_1a_galera_st_kill_slave; -INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (46,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); ROLLBACK; -SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 COMMIT; -connection node_1; SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +connection node_1; +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 DROP TABLE t1; COMMIT; +SET AUTOCOMMIT=ON; diff --git a/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff b/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff index 51d2a6bf157..ce78050538d 100644 --- a/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff +++ b/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff @@ -1,27 +1,27 @@ --- r/galera_ist_MDEV-28583.result -+++ r/galera_ist_MDEV-28583,debug.reject -@@ -517,3 +517,187 @@ - 1 ++++ r/galera_ist_MDEV-28583.reject +@@ -285,3 +285,111 @@ DROP TABLE t1; COMMIT; + SET AUTOCOMMIT=ON; +Performing State Transfer on a server that has been killed and restarted +while a DDL was in progress on it +connection node_1; -+CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; ++CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 VALUES (1,'node1_committed_before'); -+INSERT INTO t1 VALUES (2,'node1_committed_before'); -+INSERT INTO t1 VALUES (3,'node1_committed_before'); -+INSERT INTO t1 VALUES (4,'node1_committed_before'); -+INSERT INTO t1 VALUES (5,'node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); ++INSERT INTO t1 VALUES ('node1_committed_before'); +connection node_2; +START TRANSACTION; -+INSERT INTO t1 VALUES (6,'node2_committed_before'); -+INSERT INTO t1 VALUES (7,'node2_committed_before'); -+INSERT INTO t1 VALUES (8,'node2_committed_before'); -+INSERT INTO t1 VALUES (9,'node2_committed_before'); -+INSERT INTO t1 VALUES (10,'node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); ++INSERT INTO t1 VALUES ('node2_committed_before'); +COMMIT; +SET GLOBAL debug_dbug = 'd,sync.alter_opened_table'; +connection node_1; @@ -32,26 +32,26 @@ +connection node_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (11,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (12,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (13,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (14,'node1_committed_during'); -+INSERT INTO t1 (id,f1) VALUES (15,'node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_during'); +COMMIT; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (16,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (17,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (18,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (19,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (20,'node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); +connect node_1a_galera_st_kill_slave_ddl, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (21,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (22,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (23,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (24,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (25,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); +connection node_2; +Performing --wsrep-recover ... +connection node_2; @@ -59,132 +59,56 @@ +Using --wsrep-start-position when starting mysqld ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (26,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (27,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (28,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (29,'node2_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (30,'node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node2_committed_after'); +COMMIT; +connection node_1; -+INSERT INTO t1 (id,f1) VALUES (31,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (32,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (33,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (34,'node1_to_be_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (35,'node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; -+INSERT INTO t1 (id,f1) VALUES (36,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (37,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (38,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (39,'node1_committed_after'); -+INSERT INTO t1 (id,f1) VALUES (40,'node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); ++INSERT INTO t1 (f1) VALUES ('node1_committed_after'); +COMMIT; +connection node_1a_galera_st_kill_slave_ddl; -+INSERT INTO t1 (id,f1) VALUES (41,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (42,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (43,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (44,'node1_to_be_rollbacked_after'); -+INSERT INTO t1 (id,f1) VALUES (45,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); ++INSERT INTO t1 (f1) VALUES ('node1_to_be_rollbacked_after'); +ROLLBACK; -+SET AUTOCOMMIT=ON; -+SET SESSION wsrep_sync_wait=15; -+SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -+EXPECT_3 -+3 -+SELECT COUNT(*) AS EXPECT_35 FROM t1; -+EXPECT_35 -+35 -+SELECT * FROM t1; -+id f1 f2 -+1 node1_committed_before NULL -+2 node1_committed_before NULL -+3 node1_committed_before NULL -+4 node1_committed_before NULL -+5 node1_committed_before NULL -+6 node2_committed_before NULL -+7 node2_committed_before NULL -+8 node2_committed_before NULL -+9 node2_committed_before NULL -+10 node2_committed_before NULL -+11 node1_committed_during NULL -+12 node1_committed_during NULL -+13 node1_committed_during NULL -+14 node1_committed_during NULL -+15 node1_committed_during NULL -+16 node1_to_be_committed_after NULL -+17 node1_to_be_committed_after NULL -+18 node1_to_be_committed_after NULL -+19 node1_to_be_committed_after NULL -+20 node1_to_be_committed_after NULL -+26 node2_committed_after NULL -+27 node2_committed_after NULL -+28 node2_committed_after NULL -+29 node2_committed_after NULL -+30 node2_committed_after NULL -+31 node1_to_be_committed_after NULL -+32 node1_to_be_committed_after NULL -+33 node1_to_be_committed_after NULL -+34 node1_to_be_committed_after NULL -+35 node1_to_be_committed_after NULL -+36 node1_committed_after NULL -+37 node1_committed_after NULL -+38 node1_committed_after NULL -+39 node1_committed_after NULL -+40 node1_committed_after NULL ++SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++COUNT(*) = 2 ++1 ++SELECT COUNT(*) = 35 FROM t1; ++COUNT(*) = 35 ++1 +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; -+connection node_1; +SET AUTOCOMMIT=ON; -+SET SESSION wsrep_sync_wait=15; -+SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -+EXPECT_3 -+3 -+SELECT COUNT(*) AS EXPECT_35 FROM t1; -+EXPECT_35 -+35 -+SELECT * FROM t1; -+id f1 f2 -+1 node1_committed_before NULL -+2 node1_committed_before NULL -+3 node1_committed_before NULL -+4 node1_committed_before NULL -+5 node1_committed_before NULL -+6 node2_committed_before NULL -+7 node2_committed_before NULL -+8 node2_committed_before NULL -+9 node2_committed_before NULL -+10 node2_committed_before NULL -+11 node1_committed_during NULL -+12 node1_committed_during NULL -+13 node1_committed_during NULL -+14 node1_committed_during NULL -+15 node1_committed_during NULL -+16 node1_to_be_committed_after NULL -+17 node1_to_be_committed_after NULL -+18 node1_to_be_committed_after NULL -+19 node1_to_be_committed_after NULL -+20 node1_to_be_committed_after NULL -+26 node2_committed_after NULL -+27 node2_committed_after NULL -+28 node2_committed_after NULL -+29 node2_committed_after NULL -+30 node2_committed_after NULL -+31 node1_to_be_committed_after NULL -+32 node1_to_be_committed_after NULL -+33 node1_to_be_committed_after NULL -+34 node1_to_be_committed_after NULL -+35 node1_to_be_committed_after NULL -+36 node1_committed_after NULL -+37 node1_committed_after NULL -+38 node1_committed_after NULL -+39 node1_committed_after NULL -+40 node1_committed_after NULL ++connection node_1; ++SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++COUNT(*) = 2 ++1 ++SELECT COUNT(*) = 35 FROM t1; ++COUNT(*) = 35 ++1 +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; ++SET AUTOCOMMIT=ON; +SET GLOBAL debug_dbug = $debug_orig; diff --git a/mysql-test/suite/galera/r/galera_ist_MDEV-28583.result b/mysql-test/suite/galera/r/galera_ist_MDEV-28583.result index 5a71b490a80..80a28d349ba 100644 --- a/mysql-test/suite/galera/r/galera_ist_MDEV-28583.result +++ b/mysql-test/suite/galera/r/galera_ist_MDEV-28583.result @@ -1,519 +1,287 @@ -connection node_2; -connection node_1; connection node_1; connection node_2; Performing State Transfer on a server that has been temporarily disconnected connection node_1; -CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (1,'node1_committed_before'); -INSERT INTO t1 VALUES (2,'node1_committed_before'); -INSERT INTO t1 VALUES (3,'node1_committed_before'); -INSERT INTO t1 VALUES (4,'node1_committed_before'); -INSERT INTO t1 VALUES (5,'node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (6,'node2_committed_before'); -INSERT INTO t1 VALUES (7,'node2_committed_before'); -INSERT INTO t1 VALUES (8,'node2_committed_before'); -INSERT INTO t1 VALUES (9,'node2_committed_before'); -INSERT INTO t1 VALUES (10,'node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Unloading wsrep provider ... SET GLOBAL wsrep_cluster_address = ''; connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (11,'node1_committed_during'); -INSERT INTO t1 VALUES (12,'node1_committed_during'); -INSERT INTO t1 VALUES (13,'node1_committed_during'); -INSERT INTO t1 VALUES (14,'node1_committed_during'); -INSERT INTO t1 VALUES (15,'node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); COMMIT; START TRANSACTION; -INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); connect node_1a_galera_st_disconnect_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); connection node_2; Loading wsrep provider ... -disconnect node_2; -connect node_2, 127.0.0.1, root, , test, $NODE_MYPORT_2; -connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (26,'node2_committed_after'); -INSERT INTO t1 VALUES (27,'node2_committed_after'); -INSERT INTO t1 VALUES (28,'node2_committed_after'); -INSERT INTO t1 VALUES (29,'node2_committed_after'); -INSERT INTO t1 VALUES (30,'node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; connection node_1; -INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); COMMIT; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (36,'node1_committed_after'); -INSERT INTO t1 VALUES (37,'node1_committed_after'); -INSERT INTO t1 VALUES (38,'node1_committed_after'); -INSERT INTO t1 VALUES (39,'node1_committed_after'); -INSERT INTO t1 VALUES (40,'node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; connection node_1a_galera_st_disconnect_slave; -INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); ROLLBACK; -SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 -connection node_1; +COMMIT; SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +connection node_1; +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 DROP TABLE t1; COMMIT; +SET AUTOCOMMIT=ON; Performing State Transfer on a server that has been shut down cleanly and restarted connection node_1; -CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (1,'node1_committed_before'); -INSERT INTO t1 VALUES (2,'node1_committed_before'); -INSERT INTO t1 VALUES (3,'node1_committed_before'); -INSERT INTO t1 VALUES (4,'node1_committed_before'); -INSERT INTO t1 VALUES (5,'node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (6,'node2_committed_before'); -INSERT INTO t1 VALUES (7,'node2_committed_before'); -INSERT INTO t1 VALUES (8,'node2_committed_before'); -INSERT INTO t1 VALUES (9,'node2_committed_before'); -INSERT INTO t1 VALUES (10,'node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Shutting down server ... connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (11,'node1_committed_during'); -INSERT INTO t1 VALUES (12,'node1_committed_during'); -INSERT INTO t1 VALUES (13,'node1_committed_during'); -INSERT INTO t1 VALUES (14,'node1_committed_during'); -INSERT INTO t1 VALUES (15,'node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); COMMIT; START TRANSACTION; -INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); connect node_1a_galera_st_shutdown_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); connection node_2; Starting server ... SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (26,'node2_committed_after'); -INSERT INTO t1 VALUES (27,'node2_committed_after'); -INSERT INTO t1 VALUES (28,'node2_committed_after'); -INSERT INTO t1 VALUES (29,'node2_committed_after'); -INSERT INTO t1 VALUES (30,'node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; connection node_1; -INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); COMMIT; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (36,'node1_committed_after'); -INSERT INTO t1 VALUES (37,'node1_committed_after'); -INSERT INTO t1 VALUES (38,'node1_committed_after'); -INSERT INTO t1 VALUES (39,'node1_committed_after'); -INSERT INTO t1 VALUES (40,'node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; connection node_1a_galera_st_shutdown_slave; -INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); ROLLBACK; -SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_15 FROM t1; -EXPECT_15 -35 -SELECT * from t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 COMMIT; -connection node_1; SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_15 FROM t1; -EXPECT_15 -35 -SELECT * from t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +connection node_1; +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 DROP TABLE t1; COMMIT; +SET AUTOCOMMIT=ON; Performing State Transfer on a server that has been killed and restarted connection node_1; -CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +CREATE TABLE t1 (f1 CHAR(255)) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (1,'node1_committed_before'); -INSERT INTO t1 VALUES (2,'node1_committed_before'); -INSERT INTO t1 VALUES (3,'node1_committed_before'); -INSERT INTO t1 VALUES (4,'node1_committed_before'); -INSERT INTO t1 VALUES (5,'node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); +INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; connection node_2; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (6,'node2_committed_before'); -INSERT INTO t1 VALUES (7,'node2_committed_before'); -INSERT INTO t1 VALUES (8,'node2_committed_before'); -INSERT INTO t1 VALUES (9,'node2_committed_before'); -INSERT INTO t1 VALUES (10,'node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); +INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; Killing server ... connection node_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (11,'node1_committed_during'); -INSERT INTO t1 VALUES (12,'node1_committed_during'); -INSERT INTO t1 VALUES (13,'node1_committed_during'); -INSERT INTO t1 VALUES (14,'node1_committed_during'); -INSERT INTO t1 VALUES (15,'node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); +INSERT INTO t1 VALUES ('node1_committed_during'); COMMIT; START TRANSACTION; -INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); connect node_1a_galera_st_kill_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); connection node_2; Performing --wsrep-recover ... Starting server ... Using --wsrep-start-position when starting mysqld ... SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (26,'node2_committed_after'); -INSERT INTO t1 VALUES (27,'node2_committed_after'); -INSERT INTO t1 VALUES (28,'node2_committed_after'); -INSERT INTO t1 VALUES (29,'node2_committed_after'); -INSERT INTO t1 VALUES (30,'node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); +INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; connection node_1; -INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); -INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); +INSERT INTO t1 VALUES ('node1_to_be_committed_after'); COMMIT; SET AUTOCOMMIT=OFF; START TRANSACTION; -INSERT INTO t1 VALUES (36,'node1_committed_after'); -INSERT INTO t1 VALUES (37,'node1_committed_after'); -INSERT INTO t1 VALUES (38,'node1_committed_after'); -INSERT INTO t1 VALUES (39,'node1_committed_after'); -INSERT INTO t1 VALUES (40,'node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); +INSERT INTO t1 VALUES ('node1_committed_after'); COMMIT; connection node_1a_galera_st_kill_slave; -INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); -INSERT INTO t1 VALUES (46,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES ('node1_to_be_rollbacked_after'); ROLLBACK; -SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 COMMIT; -connection node_1; SET AUTOCOMMIT=ON; -SET SESSION wsrep_sync_wait=15; -SELECT COUNT(*) AS EXPECT_35 FROM t1; -EXPECT_35 -35 -SELECT * FROM t1; -id f1 -1 node1_committed_before -2 node1_committed_before -3 node1_committed_before -4 node1_committed_before -5 node1_committed_before -6 node2_committed_before -7 node2_committed_before -8 node2_committed_before -9 node2_committed_before -10 node2_committed_before -11 node1_committed_during -12 node1_committed_during -13 node1_committed_during -14 node1_committed_during -15 node1_committed_during -16 node1_to_be_committed_after -17 node1_to_be_committed_after -18 node1_to_be_committed_after -19 node1_to_be_committed_after -20 node1_to_be_committed_after -26 node2_committed_after -27 node2_committed_after -28 node2_committed_after -29 node2_committed_after -30 node2_committed_after -31 node1_to_be_committed_after -32 node1_to_be_committed_after -33 node1_to_be_committed_after -34 node1_to_be_committed_after -35 node1_to_be_committed_after -36 node1_committed_after -37 node1_committed_after -38 node1_committed_after -39 node1_committed_after -40 node1_committed_after +connection node_1; +SELECT COUNT(*) = 35 FROM t1; +COUNT(*) = 35 +1 SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; COUNT(*) = 0 1 DROP TABLE t1; COMMIT; +SET AUTOCOMMIT=ON; diff --git a/scripts/wsrep_sst_mariabackup.sh b/scripts/wsrep_sst_mariabackup.sh index a3fc4b78718..a66a792b5ea 100644 --- a/scripts/wsrep_sst_mariabackup.sh +++ b/scripts/wsrep_sst_mariabackup.sh @@ -86,7 +86,6 @@ encrypt_threads="" encrypt_chunk="" readonly SECRET_TAG='secret' -readonly TOTAL_TAG='secret /total' # Required for backup locks # For backup locks it is 1 sent by joiner @@ -419,7 +418,7 @@ get_transfer() get_footprint() { - pushd "$WSREP_SST_OPT_DATA" 1>/dev/null + cd "$DATA_DIR" payload=$(find . -regex '.*\.ibd$\|.*\.MYI$\|.*\.MYD$\|.*ibdata1$' \ -type f -print0 | du --files0-from=- --block-size=1 -c -s | \ awk 'END { print $1 }') @@ -428,7 +427,7 @@ get_footprint() # When compression/compaction used, the progress is only an approximate. payload=$(( payload*1/2 )) fi - popd 1>/dev/null + cd "$OLD_PWD" pcmd="$pcmd -s $payload" adjust_progress } -- cgit v1.2.1 From af869493b4879b5c70db8005dadf59990c98014d Mon Sep 17 00:00:00 2001 From: Norio Akagi Date: Mon, 9 May 2022 21:08:33 -0700 Subject: MDEV-27892 Improve an error message for foreign server exists Improve and add more languages for the ERROR 1476: Foreign Server already exists. --- mysql-test/main/create_drop_binlog.result | 2 +- mysql-test/main/create_drop_server.result | 6 +++--- sql/share/errmsg-utf8.txt | 18 +++++++++++++++--- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/mysql-test/main/create_drop_binlog.result b/mysql-test/main/create_drop_binlog.result index 4a8c75a3dae..30ccc116a85 100644 --- a/mysql-test/main/create_drop_binlog.result +++ b/mysql-test/main/create_drop_binlog.result @@ -142,7 +142,7 @@ DROP SERVER s1; CREATE SERVER IF NOT EXISTS s1 FOREIGN DATA WRAPPER mysql OPTIONS (USER 'root', HOST 'localhost', DATABASE 'test'); CREATE SERVER IF NOT EXISTS s1 FOREIGN DATA WRAPPER mysql OPTIONS (USER 'root', HOST 'localhost', DATABASE 'test'); Warnings: -Note 1476 The foreign server, s1, you are trying to create already exists +Note 1476 Cannot create foreign server 's1' as it already exists DROP SERVER IF EXISTS s1; DROP SERVER IF EXISTS s1; SHOW BINLOG EVENTS; diff --git a/mysql-test/main/create_drop_server.result b/mysql-test/main/create_drop_server.result index 4f5d13b3541..dc3c5eefbf8 100644 --- a/mysql-test/main/create_drop_server.result +++ b/mysql-test/main/create_drop_server.result @@ -7,7 +7,7 @@ server_name username db server1 user1 test0 CREATE SERVER IF NOT EXISTS server1 FOREIGN DATA WRAPPER mysql OPTIONS (USER 'user2', HOST 'localhost', DATABASE 'test1'); Warnings: -Note 1476 The foreign server, server1, you are trying to create already exists +Note 1476 Cannot create foreign server 'server1' as it already exists SELECT server_name, username, db FROM mysql.servers; server_name username db server1 user1 test0 @@ -21,10 +21,10 @@ server_name username db DROP SERVER IF EXISTS server1; CREATE SERVER server_1 FOREIGN DATA WRAPPER mysql OPTIONS (USER 'mysqltest_1', HOST 'localhost', DATABASE 'test0'); CREATE SERVER server_1 FOREIGN DATA WRAPPER mysql OPTIONS (USER 'mysqltest_1', HOST 'localhost', DATABASE 'test1'); -ERROR HY000: The foreign server, server_1, you are trying to create already exists +ERROR HY000: Cannot create foreign server 'server_1' as it already exists CREATE SERVER IF NOT EXISTS server_1 FOREIGN DATA WRAPPER mysql OPTIONS (USER 'mysqltest_1', HOST 'localhost', DATABASE 'test2'); Warnings: -Note 1476 The foreign server, server_1, you are trying to create already exists +Note 1476 Cannot create foreign server 'server_1' as it already exists SELECT server_name, username, db FROM mysql.servers; server_name username db server_1 mysqltest_1 test0 diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 9b16247784f..11b9775a4f9 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -6257,9 +6257,21 @@ ER_AMBIGUOUS_FIELD_TERM eng "First character of the FIELDS TERMINATED string is ambiguous; please use non-optional and non-empty FIELDS ENCLOSED BY" ger "Das erste Zeichen der Zeichenkette FIELDS TERMINATED ist mehrdeutig; bitte benutzen Sie nicht optionale und nicht leere FIELDS ENCLOSED BY" ER_FOREIGN_SERVER_EXISTS - chi "您正在尝试创建的外来服务器%s已存在" - eng "The foreign server, %s, you are trying to create already exists" - ger "Der entfernte Server %s, den Sie versuchen zu erzeugen, existiert schon" + chi "无法创建外部服务器'%s',因为它已经存在" + eng "Cannot create foreign server '%s' as it already exists" + fin "Vieraata palvelinta '%s' ei voida luoda, koska se on jo olemassa" + fre "Impossible de créer le serveur étranger '%s' car il existe déjà" + ger "Der auswärtige Server '%s' kann nicht erstellt werden, da er bereits vorhanden ist" + greek "Δεν είναι δυνατή η δημιουργία ξένου διακομιστή '%s' επειδή υπάρχει ήδη" + ita "Impossibile creare il server esterno '%s' poiché esiste già" + jpn "外部サーバー '%s'は既に存在するため、作成できません" + nla "Kan geen externe server '%s' maken omdat deze al bestaat" + nor "Kan ikke opprette utenlandsk server '%s' fordi den allerede eksisterer" + pol "Nie można utworzyć obcego serwera '%s', ponieważ już istnieje" + por "Não foi possível criar o servidor externo '%s' porque ele já existe" + rus "Невозможно создать сторонний сервер '%s', так как он уже существует" + spa "No se puede crear el servidor externo '%s' porque ya existe" + swe "Det gick inte att skapa främmande server '%s' eftersom den redan finns" ER_FOREIGN_SERVER_DOESNT_EXIST chi "您尝试引用的外部服务器名称不存在。数据源错误:%-.64s" eng "The foreign server name you are trying to reference does not exist. Data source error: %-.64s" -- cgit v1.2.1 From d3d50570debd93acf6a85001b0a6201a4392f2f9 Mon Sep 17 00:00:00 2001 From: Tuukka Pasanen Date: Wed, 11 May 2022 11:45:57 +0300 Subject: MDEV-28376: Make sure available Perl MariaDB DBI driver is chosen Commit introduces automatic detection which supported Perl MariaDB DBI driver is available: * DBD::mysql * DBD::MariaDB If nothing is then bail out and die Current Detection prefers Perl DBD:MariaDB driver. This is mainly for older Linux distros or Windows which does not have Perl DBD:MariaDB packaged or does not want to use Perl cpan command. --- debian/additions/mariadb-report | 70 +++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/debian/additions/mariadb-report b/debian/additions/mariadb-report index 48ee7bf8f53..665add394ca 100755 --- a/debian/additions/mariadb-report +++ b/debian/additions/mariadb-report @@ -240,26 +240,56 @@ sub get_user_mycnf sub connect_to_MySQL { - print "connect_to_MySQL\n" if $op{debug}; - - my $dsn; - - if($mycnf{'socket'} && -S $mycnf{'socket'}) - { - $dsn = "DBI:MariaDB:mariadb_socket=$mycnf{socket}"; - } - elsif($mycnf{'host'}) - { - $dsn = "DBI:MariaDB:host=$mycnf{host}" . ($mycnf{port} ? ";port=$mycnf{port}" : ""); - } - else - { - $dsn = "DBI:MariaDB:host=localhost"; - } - - print "connect_to_MySQL: DBI DSN: $dsn\n" if $op{debug}; - - $dbh = DBI->connect($dsn, $mycnf{'user'}, $mycnf{'pass'}) or die; + print "connect_to_MySQL\n" if $op{debug}; + + if(my @driverList = grep {/mariadb|mysql/i} DBI->available_drivers()) { + my $dsn; + my $driver = undef; + + if(grep {/mariadb/i} @driverList) + { + $driver = "DBI:MariaDB"; + } + elsif(grep {/mysql/i} @driverList) + { + $driver = "DBI:mysql"; + } + + if($mycnf{'socket'} && -S $mycnf{'socket'}) + { + if(grep {/mariadb/i} @driverList) + { + $dsn = $driver . ":mariadb_socket=$mycnf{socket}"; + } + elsif(grep {/mysql/i} @driverList) + { + $dsn = $driver . ":mysql_socket=$mycnf{socket}"; + } + } + elsif($mycnf{'host'}) + { + $dsn = $driver . ":host=$mycnf{host}" . ($mycnf{port} ? ";port=$mycnf{port}" : ""); + } + else + { + $dsn = $driver . ":host=localhost"; + } + + print "connect_to_MySQL: DBI DSN: " . $dsn . "\n" if $op{debug}; + + $dbh = DBI->connect($dsn, $mycnf{'user'}, $mycnf{'pass'}) or die; + } + else + { + print STDERR "Install Perl 5.x driver: DBD:mysql or DBD:MariaDB\n"; + print STDERR "currently installed Perl DBD drivers:\n"; + foreach my $driver (DBI->available_drivers()) + { + print STDERR " * " . $driver . "\n"; + } + print STDERR "\n"; + die("Exit as no MariaDB DBI driver found!\n"); + } } sub collect_reports -- cgit v1.2.1 From cc4384badf818f4c46c92e89e99a6996e6c96d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 24 May 2022 07:37:08 +0300 Subject: Update galera_sr disabled.def file --- mysql-test/suite/galera_sr/disabled.def | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mysql-test/suite/galera_sr/disabled.def b/mysql-test/suite/galera_sr/disabled.def index 4e77dd50bf7..54701b1eb11 100644 --- a/mysql-test/suite/galera_sr/disabled.def +++ b/mysql-test/suite/galera_sr/disabled.def @@ -10,8 +10,5 @@ # ############################################################################## -GCF-1018B : MDEV-18534 wsrep::transaction::adopt(): Assertion `transaction.is_streaming()' failed -GCF-1060 : MDEV-20848 galera_sr.GCF_1060 -GCF-585 : MDEV-24698 galera_sr.GCF-585 MTR failed with SIGABRT: no such a transition REPLICATING -> APPLYING GCF-1060 : MDEV-26528 wrong usage of mutex LOCK_thd_kill and LOCK_thd_kill -galera_sr_shutdown_master : MDEV-23612: galera_sr.galera_sr_shutdown_master MTR failed: WSREP_SST: [ERROR] Possible timeout in receving first data from donor in gtid stage + -- cgit v1.2.1 From 82f45ef57670c46c67158a5ed3048083ffd13439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 24 May 2022 07:08:12 +0300 Subject: MDEV-18179 : Galera test failure on galera.galera_kill_largechanges MDEV-18283 : Galera test failure on galera.GCF-1081 Remove tests. --- mysql-test/suite/galera/r/GCF-1081.result | 47 -------------- .../suite/galera/r/galera_kill_largechanges.result | 24 -------- mysql-test/suite/galera/t/GCF-1081.test | 72 ---------------------- .../suite/galera/t/galera_kill_largechanges.test | 50 --------------- 4 files changed, 193 deletions(-) delete mode 100644 mysql-test/suite/galera/r/GCF-1081.result delete mode 100644 mysql-test/suite/galera/r/galera_kill_largechanges.result delete mode 100644 mysql-test/suite/galera/t/GCF-1081.test delete mode 100644 mysql-test/suite/galera/t/galera_kill_largechanges.test diff --git a/mysql-test/suite/galera/r/GCF-1081.result b/mysql-test/suite/galera/r/GCF-1081.result deleted file mode 100644 index ede512ec6b1..00000000000 --- a/mysql-test/suite/galera/r/GCF-1081.result +++ /dev/null @@ -1,47 +0,0 @@ -connection node_2; -connection node_1; -connection node_1; -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 INTEGER) ENGINE=InnoDB; -INSERT INTO t1 VALUES (1, 0), (3, 0); -CREATE PROCEDURE proc_update () -BEGIN -UPDATE t1 SET f2 = 1 where f1 > 0; -END| -connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; -SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; -connection node_1; -CALL proc_update ();; -connection node_1a; -SET SESSION wsrep_sync_wait = 0; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -connection node_1a; -SET GLOBAL DEBUG = 'd,sync.wsrep_before_BF_victim_unlock'; -Warnings: -Warning 1287 '@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead -connection node_2; -INSERT INTO t1 VALUES (2, 2);; -connection node_1a; -SET SESSION DEBUG_SYNC = 'now WAIT_FOR sync.wsrep_before_BF_victim_unlock_reached'; -SET GLOBAL DEBUG = ''; -Warnings: -Warning 1287 '@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead -connection node_1a; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; -connection node_2; -SELECT * FROM t1; -f1 f2 -1 1 -2 2 -3 1 -connection node_1; -SELECT * FROM t1; -f1 f2 -1 1 -2 2 -3 1 -wsrep_local_replays -1 -DROP PROCEDURE proc_update; -DROP TABLE t1; diff --git a/mysql-test/suite/galera/r/galera_kill_largechanges.result b/mysql-test/suite/galera/r/galera_kill_largechanges.result deleted file mode 100644 index 99a8005e03e..00000000000 --- a/mysql-test/suite/galera/r/galera_kill_largechanges.result +++ /dev/null @@ -1,24 +0,0 @@ -connection node_2; -connection node_1; -connection node_1; -connection node_2; -connection node_1; -SET GLOBAL wsrep_provider_options = 'pc.ignore_sb=true'; -CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; -INSERT INTO ten VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10),(11); -CREATE TABLE t1 (f1 VARCHAR(128)) ENGINE=InnoDB; -connection node_2; -Killing server ... -connection node_1; -INSERT INTO t1 SELECT REPEAT('a', 128) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4, ten AS a5, ten AS a6; -connection node_2; -connection node_2a; -SELECT COUNT(*) FROM t1; -COUNT(*) -1771561 -SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; -VARIABLE_VALUE -2 -connection node_1; -DROP TABLE t1; -DROP TABLE ten; diff --git a/mysql-test/suite/galera/t/GCF-1081.test b/mysql-test/suite/galera/t/GCF-1081.test deleted file mode 100644 index 38553feda39..00000000000 --- a/mysql-test/suite/galera/t/GCF-1081.test +++ /dev/null @@ -1,72 +0,0 @@ -# -# GCF-1081 - Assertion `!thd->sp_runtime_ctx` -# -# Test replaying of stored procedures -# - ---source include/galera_cluster.inc ---source include/have_innodb.inc ---source include/have_debug_sync.inc ---source include/galera_have_debug_sync.inc - ---connection node_1 - ---let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` - -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 INTEGER) ENGINE=InnoDB; -INSERT INTO t1 VALUES (1, 0), (3, 0); - -DELIMITER |; -CREATE PROCEDURE proc_update () -BEGIN - UPDATE t1 SET f2 = 1 where f1 > 0; -END| -DELIMITER ;| - -# Block the SP ---connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 ---let $galera_sync_point = commit_monitor_master_enter_sync ---source include/galera_set_sync_point.inc - ---connection node_1 ---send CALL proc_update (); - -# Wait until SP is blocked ---connection node_1a -SET SESSION wsrep_sync_wait = 0; ---source include/galera_wait_sync_point.inc - -# Issue a conflicting insert on node #2 ---connection node_1a -SET GLOBAL debug_dbug = 'd,sync.wsrep_before_BF_victim_unlock'; - ---connection node_2 ---send INSERT INTO t1 VALUES (2, 2); - -# Wait until it BF aborts the SP ---connection node_1a -SET SESSION DEBUG_SYNC = 'now WAIT_FOR sync.wsrep_before_BF_victim_unlock_reached'; -SET GLOBAL debug_dbug = ''; - -# Unblock the SP ---connection node_1a ---source include/galera_clear_sync_point.inc ---source include/galera_signal_sync_point.inc - ---connection node_2 ---reap -SELECT * FROM t1; - -# SP succeeds ---connection node_1 ---reap -SELECT * FROM t1; - -# wsrep_local_replays has increased by 1 ---let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` ---disable_query_log ---eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old AS wsrep_local_replays; ---enable_query_log - -DROP PROCEDURE proc_update; -DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_kill_largechanges.test b/mysql-test/suite/galera/t/galera_kill_largechanges.test deleted file mode 100644 index c671764fa9e..00000000000 --- a/mysql-test/suite/galera/t/galera_kill_largechanges.test +++ /dev/null @@ -1,50 +0,0 @@ -# -# This test kill -9-s a slave while a large update has been performed on the master. SST is performed. -# - ---source include/big_test.inc ---source include/galera_cluster.inc - -# Save original auto_increment_offset values. ---let $node_1=node_1 ---let $node_2=node_2 ---source include/auto_increment_offset_save.inc - ---connection node_1 -# Enable the master to continue running during the split-brain situation that -# occurs when the slave is killed ---let $wsrep_provider_options_orig = `SELECT @@wsrep_provider_options` -SET GLOBAL wsrep_provider_options = 'pc.ignore_sb=true'; - -CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; -INSERT INTO ten VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10),(11); -CREATE TABLE t1 (f1 VARCHAR(128)) ENGINE=InnoDB; - ---connection node_2 ---source include/kill_galera.inc - ---connection node_1 -# We create a 128Mb (or so) transaction that is larger than gcache. The size of the gcache is not adjustable dynamically -INSERT INTO t1 SELECT REPEAT('a', 128) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4, ten AS a5, ten AS a6; - ---connection node_2 ---source include/start_mysqld.inc - ---let $galera_connection_name = node_2a ---let $galera_server_number = 2 ---source include/galera_connect.inc ---connection node_2a - -SELECT COUNT(*) FROM t1; -SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; - ---connection node_1 ---disable_query_log ---eval SET GLOBAL wsrep_provider_options = '$wsrep_provider_options_orig'; ---enable_query_log - ---let $node_2=node_2a ---source include/auto_increment_offset_restore.inc - -DROP TABLE t1; -DROP TABLE ten; -- cgit v1.2.1 From 665c01d1f202595bd04af0962002fcd631125d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 24 May 2022 07:09:27 +0300 Subject: MDEV-15794 : Test failure on galera.galera_var_retry_autocommit Add expected error. --- .../galera/r/galera_var_retry_autocommit.result | 29 +++++++++++----------- .../galera/t/galera_var_retry_autocommit.test | 16 ++++++------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/mysql-test/suite/galera/r/galera_var_retry_autocommit.result b/mysql-test/suite/galera/r/galera_var_retry_autocommit.result index b8943464cb7..56c2c995402 100644 --- a/mysql-test/suite/galera/r/galera_var_retry_autocommit.result +++ b/mysql-test/suite/galera/r/galera_var_retry_autocommit.result @@ -12,9 +12,9 @@ connection node_2; TRUNCATE TABLE t1; connection node_1; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction -SELECT COUNT(*) = 0 FROM t1; -COUNT(*) = 0 -1 +SELECT COUNT(*) FROM t1; +COUNT(*) +0 SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; connection node_1; @@ -27,9 +27,9 @@ SET DEBUG_SYNC = 'now WAIT_FOR before_cert'; connection node_2; TRUNCATE TABLE t1; connection node_1; -SELECT COUNT(*) = 1 FROM t1; -COUNT(*) = 1 -1 +SELECT COUNT(*) FROM t1; +COUNT(*) +0 SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; connection node_1; @@ -44,18 +44,17 @@ connection node_2; TRUNCATE TABLE t1; connection node_1a; SET DEBUG_SYNC = 'now WAIT_FOR wsrep_retry_autocommit_reached'; -SELECT COUNT(*) = 0 FROM t1; -COUNT(*) = 0 -1 +SELECT COUNT(*) FROM t1; +COUNT(*) +0 SET DEBUG_SYNC = 'now SIGNAL wsrep_retry_autocommit_continue WAIT_FOR before_cert'; connection node_2; TRUNCATE TABLE t1; connection node_1a; -SELECT COUNT(*) = 0 FROM t1; -COUNT(*) = 0 -1 +SELECT COUNT(*) FROM t1; +COUNT(*) +0 connection node_1; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction SET DEBUG_SYNC = 'RESET'; SET GLOBAL debug_dbug = NULL; DROP TABLE t1; @@ -66,8 +65,8 @@ SET GLOBAL debug_dbug = '+d,sync.wsrep_retry_autocommit'; SET DEBUG_SYNC = 'wsrep_before_certification SIGNAL before_cert WAIT_FOR continue EXECUTE 64'; INSERT INTO t1 VALUES (5); connection node_1; -SELECT COUNT(*) = 1 FROM t1; -COUNT(*) = 1 +SELECT COUNT(*) FROM t1; +COUNT(*) 1 SET DEBUG_SYNC = 'RESET'; SET GLOBAL debug_dbug = NULL; diff --git a/mysql-test/suite/galera/t/galera_var_retry_autocommit.test b/mysql-test/suite/galera/t/galera_var_retry_autocommit.test index 6bed1b0120f..bd10e448e06 100644 --- a/mysql-test/suite/galera/t/galera_var_retry_autocommit.test +++ b/mysql-test/suite/galera/t/galera_var_retry_autocommit.test @@ -30,7 +30,7 @@ TRUNCATE TABLE t1; --connection node_1 --error ER_LOCK_DEADLOCK --reap -SELECT COUNT(*) = 0 FROM t1; +SELECT COUNT(*) FROM t1; SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; @@ -54,8 +54,9 @@ SET DEBUG_SYNC = 'now WAIT_FOR before_cert'; TRUNCATE TABLE t1; --connection node_1 +--error 0,ER_LOCK_DEADLOCK --reap -SELECT COUNT(*) = 1 FROM t1; +SELECT COUNT(*) FROM t1; SET DEBUG_SYNC = 'RESET'; DROP TABLE t1; @@ -82,17 +83,17 @@ TRUNCATE TABLE t1; --connection node_1a SET DEBUG_SYNC = 'now WAIT_FOR wsrep_retry_autocommit_reached'; -SELECT COUNT(*) = 0 FROM t1; +SELECT COUNT(*) FROM t1; SET DEBUG_SYNC = 'now SIGNAL wsrep_retry_autocommit_continue WAIT_FOR before_cert'; --connection node_2 TRUNCATE TABLE t1; --connection node_1a -SELECT COUNT(*) = 0 FROM t1; +SELECT COUNT(*) FROM t1; --connection node_1 ---error ER_LOCK_DEADLOCK +--error 0,ER_LOCK_DEADLOCK --reap SET DEBUG_SYNC = 'RESET'; @@ -126,7 +127,7 @@ while ($count) --connection node_1a SET DEBUG_SYNC = 'now WAIT_FOR wsrep_retry_autocommit_reached'; - SELECT COUNT(*) = 1 FROM t1; + SELECT COUNT(*) FROM t1; SET DEBUG_SYNC = 'now SIGNAL wsrep_retry_autocommit_continue'; --dec $count @@ -135,8 +136,9 @@ while ($count) --enable_query_log --connection node_1 +--error 0,ER_LOCK_DEADLOCK --reap -SELECT COUNT(*) = 1 FROM t1; +SELECT COUNT(*) FROM t1; SET DEBUG_SYNC = 'RESET'; SET GLOBAL debug_dbug = NULL; -- cgit v1.2.1 From 0263944a7f0b8b57aaf9831b570c4d8c6db11cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 24 May 2022 07:11:10 +0300 Subject: MDEV-20888 : Galera test failure on galera.galera_pc_ignore_sb: 2013: Lost connection to MySQL server during query Add debug info. --- mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf diff --git a/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf b/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf new file mode 100644 index 00000000000..ae48aa3ae0f --- /dev/null +++ b/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf @@ -0,0 +1,11 @@ +!include ../galera_2nodes.cnf + +[mysqld] +wsrep_debug=1 + +[mysqld.1] +wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' + +[mysqld.2] +wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' + -- cgit v1.2.1 From c1d380aa883f9326f24eb2faf3f2d5dba43d9c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 24 May 2022 07:17:16 +0300 Subject: Update galera disabled.def file --- mysql-test/suite/galera/disabled.def | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index 9f360c52ada..6727aa3d945 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -10,7 +10,6 @@ # ############################################################################## -GCF-1081 : MDEV-18283 Galera test failure on galera.GCF-1081 GCF-939 : MDEV-21520 galera.GCF-939 MW-329 : MDEV-19962 Galera test failure on MW-329 galera_as_slave_ctas : MDEV-28378 timeout @@ -19,10 +18,7 @@ galera_bf_abort_group_commit : MDEV-18282 Galera test failure on galera.galera_b galera_bf_lock_wait : MDEV-21597 wsrep::transaction::start_transaction(): Assertion `active() == false' failed galera_encrypt_tmp_files : Get error failed to enable encryption of temporary files galera_gcache_recover_manytrx : MDEV-18834 Galera test failure -galera_kill_largechanges : MDEV-18179 Galera test failure on galera.galera_kill_largechanges -galera_mdl_race : MDEV-21524 galera.galera_mdl_race galera_parallel_simple : MDEV-20318 galera.galera_parallel_simple fails -galera_pc_ignore_sb : MDEV-20888 galera.galera_pc_ignore_sb galera_pc_recovery : MDEV-25199 cluster fails to start up galera_shutdown_nonprim : MDEV-21493 galera.galera_shutdown_nonprim galera_var_ignore_apply_errors : MDEV-26770 galera_var_ignore_apply_errors fails Server did not transition to READY state -- cgit v1.2.1 From 771c61e9659d7fd469b91736462f5e3399f70868 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Tue, 24 May 2022 17:12:54 +1000 Subject: man: merge error in mysqld.8 --- man/mysqld.8 | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/man/mysqld.8 b/man/mysqld.8 index dd19bcb8480..83a4efb5895 100644 --- a/man/mysqld.8 +++ b/man/mysqld.8 @@ -1,10 +1,6 @@ '\" t .\" -<<<<<<< HEAD -.TH "\FBMYSQLD\FR" "8" "15 May 2020" "MariaDB 10\&.6" "MariaDB Database System" -======= -.TH "\FBMARIADBD\FR" "8" "27 June 2019" "MariaDB 10\&.5" "MariaDB Database System" ->>>>>>> 10.5-MDEV-28376-mariadb-report +.TH "\FBMARIADBD\FR" "8" "15 May 2020" "MariaDB 10\&.6" "MariaDB Database System" .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- -- cgit v1.2.1 From e8cb91943c7fde6a55382fef927efb7067f48231 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 24 May 2022 11:02:46 +0200 Subject: fix a bad merge in ec62f46a612b --- debian/mariadb-plugin-columnstore.install | 119 ----------------------------- debian/mariadb-plugin-columnstore.postinst | 8 -- debian/mariadb-plugin-columnstore.postrm | 24 ------ debian/mariadb-plugin-columnstore.prerm | 8 -- debian/mariadb-plugin-columnstore.triggers | 1 - 5 files changed, 160 deletions(-) delete mode 100644 debian/mariadb-plugin-columnstore.install delete mode 100644 debian/mariadb-plugin-columnstore.postinst delete mode 100644 debian/mariadb-plugin-columnstore.postrm delete mode 100644 debian/mariadb-plugin-columnstore.prerm delete mode 100644 debian/mariadb-plugin-columnstore.triggers diff --git a/debian/mariadb-plugin-columnstore.install b/debian/mariadb-plugin-columnstore.install deleted file mode 100644 index 39f4e8c7279..00000000000 --- a/debian/mariadb-plugin-columnstore.install +++ /dev/null @@ -1,119 +0,0 @@ -etc/columnstore/Columnstore.xml -etc/columnstore/ErrorMessage.txt -etc/columnstore/MessageFile.txt -etc/columnstore/storagemanager.cnf -etc/mysql/mariadb.conf.d/columnstore.cnf -usr/bin/mcsRebuildEM -usr/bin/DDLProc -usr/bin/DMLProc -usr/bin/ExeMgr -usr/bin/PrimProc -usr/bin/StorageManager -usr/bin/WriteEngineServer -usr/bin/clearShm -usr/bin/cleartablelock -usr/bin/columnstore-post-install -usr/bin/columnstore-pre-uninstall -usr/bin/columnstoreDBWrite -usr/bin/columnstoreSyslogSetup.sh -usr/bin/columnstore_run.sh -usr/bin/colxml -usr/bin/controllernode -usr/bin/cpimport -usr/bin/cpimport.bin -usr/bin/cplogger -usr/bin/cspasswd -usr/bin/cskeys -usr/bin/dbbuilder -usr/bin/dbrmctl -usr/bin/ddlcleanup -usr/bin/editem -usr/bin/idbmeminfo -usr/bin/load_brm -usr/bin/mariadb-columnstore-start.sh -usr/bin/mariadb-columnstore-stop.sh -usr/bin/mcs-savebrm.py -usr/bin/mcs-loadbrm.py -usr/bin/mcs-stop-controllernode.sh -usr/bin/mcsGetConfig -usr/bin/mcsSetConfig -usr/bin/mycnfUpgrade -usr/bin/post-mysql-install -usr/bin/post-mysqld-install -usr/bin/reset_locks -usr/bin/rollback -usr/bin/save_brm -usr/bin/smcat -usr/bin/smls -usr/bin/smput -usr/bin/smrm -usr/bin/testS3Connection -usr/bin/viewtablelock -usr/bin/workernode -usr/lib/*/libbatchloader.so -usr/lib/*/libbrm.so -usr/lib/*/libcacheutils.so -usr/lib/*/libcloudio.so -usr/lib/*/libcommon.so -usr/lib/*/libcompress.so -usr/lib/*/libconfigcpp.so -usr/lib/*/libdataconvert.so -usr/lib/*/libddlcleanuputil.so -usr/lib/*/libddlpackage.so -usr/lib/*/libddlpackageproc.so -usr/lib/*/libdmlpackage.so -usr/lib/*/libdmlpackageproc.so -usr/lib/*/libexecplan.so -usr/lib/*/libfuncexp.so -usr/lib/*/libidbdatafile.so -usr/lib/*/libjoblist.so -usr/lib/*/libjoiner.so -usr/lib/*/liblibmysql_client.so -usr/lib/*/libloggingcpp.so -usr/lib/*/libmarias3.so -usr/lib/*/libmessageqcpp.so -usr/lib/*/liboamcpp.so -usr/lib/*/libquerystats.so -usr/lib/*/libquerytele.so -usr/lib/*/libregr.so -usr/lib/*/librowgroup.so -usr/lib/*/librwlock.so -usr/lib/*/libstoragemanager.so -usr/lib/*/libthreadpool.so -usr/lib/*/libthrift.so -usr/lib/*/libudfsdk.so -usr/lib/*/libwindowfunction.so -usr/lib/*/libwriteengine.so -usr/lib/*/libwriteengineclient.so -usr/lib/*/libwriteengineredistribute.so -usr/lib/*/libdatatypes.so -usr/lib/mysql/plugin/ha_columnstore.so -usr/lib/mysql/plugin/libregr_mysql.so -usr/lib/mysql/plugin/libudf_mysql.so -usr/sbin/install_mcs_mysql.sh -usr/share/columnstore/calremoveuserpriority.sql -usr/share/columnstore/calsetuserpriority.sql -usr/share/columnstore/calshowprocesslist.sql -usr/share/columnstore/columnstoreAlias -usr/share/columnstore/columnstoreLogRotate -usr/share/columnstore/columnstoreSyslog -usr/share/columnstore/columnstoreSyslog-ng -usr/share/columnstore/columnstoreSyslog7 -usr/share/columnstore/columnstore_info.sql -usr/share/columnstore/dumpcat_mysql.sql -usr/share/columnstore/gitversionEngine -usr/share/columnstore/mariadb-columnstore.service -usr/share/columnstore/mcs-controllernode.service -usr/share/columnstore/mcs-ddlproc.service -usr/share/columnstore/mcs-dmlproc.service -usr/share/columnstore/mcs-exemgr.service -usr/share/columnstore/mcs-loadbrm.service -usr/share/columnstore/mcs-primproc.service -usr/share/columnstore/mcs-storagemanager.service -usr/share/columnstore/mcs-workernode.service -usr/share/columnstore/mcs-writeengineserver.service -usr/share/columnstore/myCnf-exclude-args.text -usr/share/columnstore/myCnf-include-args.text -usr/share/columnstore/releasenum -usr/share/columnstore/syscatalog_mysql.sql -var/lib/columnstore/local/module diff --git a/debian/mariadb-plugin-columnstore.postinst b/debian/mariadb-plugin-columnstore.postinst deleted file mode 100644 index 97a1bba22c8..00000000000 --- a/debian/mariadb-plugin-columnstore.postinst +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e - -# Install ColumnStore -columnstore-post-install install - -#DEBHELPER# diff --git a/debian/mariadb-plugin-columnstore.postrm b/debian/mariadb-plugin-columnstore.postrm deleted file mode 100644 index b2e19774ec1..00000000000 --- a/debian/mariadb-plugin-columnstore.postrm +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -set -e - -if [ "$1" = "purge" ]; then - rm -rf /var/lib/columnstore - rm -rf /etc/columnstore - rm -f /etc/mysql/mariadb.conf.d/columnstore.cnf.rpmsave -fi - -# Automatically restart MariaDB after ColumnStore plugin has been removed -case "$1" in - purge|remove|disappear) - if [ -d /run/systemd/system ]; then - # If systemd - deb-systemd-invoke restart mariadb.service >/dev/null - elif [ -x "/etc/init.d/mariadb" ]; then - # Fall-back to SysV init - invoke-rc.d mariadb restart || exit $? - fi - ;; -esac - -#DEBHELPER# diff --git a/debian/mariadb-plugin-columnstore.prerm b/debian/mariadb-plugin-columnstore.prerm deleted file mode 100644 index f6a27f14499..00000000000 --- a/debian/mariadb-plugin-columnstore.prerm +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e - -columnstore-pre-uninstall - - -#DEBHELPER# diff --git a/debian/mariadb-plugin-columnstore.triggers b/debian/mariadb-plugin-columnstore.triggers deleted file mode 100644 index dd866036784..00000000000 --- a/debian/mariadb-plugin-columnstore.triggers +++ /dev/null @@ -1 +0,0 @@ -activate-noawait ldconfig -- cgit v1.2.1 From 734f10f60155acd70db9227e25242e7ad6a3700d Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sun, 22 May 2022 19:03:54 +0300 Subject: Fix that spider test doesn't crash if my_gethwaddr() fails This can happen if one doesn't have a working network connection when running spider tests --- storage/spider/spd_table.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index 00542f05f09..4f3bc25c92a 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -7234,8 +7234,9 @@ int spider_db_init( if (my_gethwaddr((uchar *) addr)) { - my_printf_error(ER_SPIDER_CANT_NUM, ER_SPIDER_CANT_STR1, MYF(0), + my_printf_error(ER_SPIDER_CANT_NUM, ER_SPIDER_CANT_STR1, MYF(ME_WARNING), "get hardware address with error ", errno); + bzero(addr,6); } spider_unique_id.str = spider_unique_id_buf; spider_unique_id.length = my_sprintf(spider_unique_id_buf, -- cgit v1.2.1 From 847ca89d6d352360e8ab6ffee7ef7aa35aff692d Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Thu, 19 May 2022 02:32:18 +0300 Subject: Added check for libxml2 for connect.misc Test will fail if XML support is not compiled. This can happen if XML support is disabled or if the libxml2 library is not installed. --- storage/connect/mysql-test/connect/t/misc.test | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/connect/mysql-test/connect/t/misc.test b/storage/connect/mysql-test/connect/t/misc.test index 4dc8dded651..5cfe2a8fdbb 100644 --- a/storage/connect/mysql-test/connect/t/misc.test +++ b/storage/connect/mysql-test/connect/t/misc.test @@ -1,3 +1,4 @@ +--source have_libxml2.inc # Overlong table type --error ER_UNKNOWN_ERROR -- cgit v1.2.1 From ddb1f7c4e445c8bcab2e6ffa5473c53c62227939 Mon Sep 17 00:00:00 2001 From: Monty Date: Tue, 24 May 2022 15:08:03 +0300 Subject: Remove warning when using connect have_libxml2.inc --- storage/connect/mysql-test/connect/r/alter_xml2.result | 2 -- storage/connect/mysql-test/connect/r/infoschema2-9739.result | 2 -- storage/connect/mysql-test/connect/r/xml2.result | 2 -- storage/connect/mysql-test/connect/r/xml2_grant.result | 2 -- storage/connect/mysql-test/connect/r/xml2_html.result | 2 -- storage/connect/mysql-test/connect/r/xml2_mdev5261.result | 2 -- storage/connect/mysql-test/connect/r/xml2_mult.result | 2 -- storage/connect/mysql-test/connect/r/xml2_zip.result | 2 -- storage/connect/mysql-test/connect/t/have_libxml2.inc | 2 ++ 9 files changed, 2 insertions(+), 16 deletions(-) diff --git a/storage/connect/mysql-test/connect/r/alter_xml2.result b/storage/connect/mysql-test/connect/r/alter_xml2.result index a15be966aa8..e97e167e800 100644 --- a/storage/connect/mysql-test/connect/r/alter_xml2.result +++ b/storage/connect/mysql-test/connect/r/alter_xml2.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml # # Testing changing table type (not in-place) # diff --git a/storage/connect/mysql-test/connect/r/infoschema2-9739.result b/storage/connect/mysql-test/connect/r/infoschema2-9739.result index 32ca77dc71d..6abc576ff8e 100644 --- a/storage/connect/mysql-test/connect/r/infoschema2-9739.result +++ b/storage/connect/mysql-test/connect/r/infoschema2-9739.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml create table t1 (i int) engine=Connect table_type=XML option_list='xmlsup=libxml2'; Warnings: Warning 1105 No file name. Table will use t1.xml diff --git a/storage/connect/mysql-test/connect/r/xml2.result b/storage/connect/mysql-test/connect/r/xml2.result index 891c6e6f8dd..66ef5f15091 100644 --- a/storage/connect/mysql-test/connect/r/xml2.result +++ b/storage/connect/mysql-test/connect/r/xml2.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml SET NAMES utf8; # # Testing tag values diff --git a/storage/connect/mysql-test/connect/r/xml2_grant.result b/storage/connect/mysql-test/connect/r/xml2_grant.result index a6e6c150b59..e014f00f577 100644 --- a/storage/connect/mysql-test/connect/r/xml2_grant.result +++ b/storage/connect/mysql-test/connect/r/xml2_grant.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml # # Beginning of grant.inc # diff --git a/storage/connect/mysql-test/connect/r/xml2_html.result b/storage/connect/mysql-test/connect/r/xml2_html.result index 499108b724d..0ec47ad8038 100644 --- a/storage/connect/mysql-test/connect/r/xml2_html.result +++ b/storage/connect/mysql-test/connect/r/xml2_html.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml SET NAMES utf8; # # Testing HTML like XML file diff --git a/storage/connect/mysql-test/connect/r/xml2_mdev5261.result b/storage/connect/mysql-test/connect/r/xml2_mdev5261.result index 0ee5712dd02..8913663f6ae 100644 --- a/storage/connect/mysql-test/connect/r/xml2_mdev5261.result +++ b/storage/connect/mysql-test/connect/r/xml2_mdev5261.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml SET NAMES utf8; CREATE TABLE t1 (i INT UNIQUE NOT NULL) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='xt1.xml' OPTION_LIST='xmlsup=libxml2,Rownode=N'; ERROR HY000: Table type XML is not indexable diff --git a/storage/connect/mysql-test/connect/r/xml2_mult.result b/storage/connect/mysql-test/connect/r/xml2_mult.result index 0146baa89c0..5d68527b592 100644 --- a/storage/connect/mysql-test/connect/r/xml2_mult.result +++ b/storage/connect/mysql-test/connect/r/xml2_mult.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml SET NAMES utf8; # # Testing expanded values diff --git a/storage/connect/mysql-test/connect/r/xml2_zip.result b/storage/connect/mysql-test/connect/r/xml2_zip.result index e743af32418..7e36650da71 100644 --- a/storage/connect/mysql-test/connect/r/xml2_zip.result +++ b/storage/connect/mysql-test/connect/r/xml2_zip.result @@ -1,5 +1,3 @@ -Warnings: -Warning 1105 No file name. Table will use t1.xml # # Testing zipped XML tables # diff --git a/storage/connect/mysql-test/connect/t/have_libxml2.inc b/storage/connect/mysql-test/connect/t/have_libxml2.inc index fc16265aa12..64722af0ec4 100644 --- a/storage/connect/mysql-test/connect/t/have_libxml2.inc +++ b/storage/connect/mysql-test/connect/t/have_libxml2.inc @@ -1,4 +1,5 @@ --disable_query_log +--disable_warnings --error 0,ER_UNKNOWN_ERROR CREATE TABLE t1 (a VARCHAR(10)) ENGINE=CONNECT TABLE_TYPE=XML OPTION_LIST='xmlsup=libxml2'; @@ -12,6 +13,7 @@ if (!`SELECT count(*) FROM INFORMATION_SCHEMA.TABLES Skip Need LIBXML2; } DROP TABLE t1; +--enable_warnings --enable_query_log -- cgit v1.2.1 From 915afddba2d831d3d0201838b941e834b599e6b9 Mon Sep 17 00:00:00 2001 From: Monty Date: Tue, 24 May 2022 15:11:34 +0300 Subject: main.alter_table_lock could fail with query "'LOCK TABLE t1 WRITE' failed" Backported fix from MariaDB 10.5 --- mysql-test/main/alter_table_lock.result | 2 -- mysql-test/main/alter_table_lock.test | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mysql-test/main/alter_table_lock.result b/mysql-test/main/alter_table_lock.result index ce6097c9ada..620fca23315 100644 --- a/mysql-test/main/alter_table_lock.result +++ b/mysql-test/main/alter_table_lock.result @@ -11,8 +11,6 @@ ERROR 42S02: Table 'test.x' doesn't exist SET SESSION max_session_mem_used= 8192; LOCK TABLE t1 WRITE; ALTER TABLE t1 CHANGE COLUMN IF EXISTS b c INT; -Warnings: -Note 1054 Unknown column 'b' in 't1' SET SESSION max_session_mem_used = @max_session_mem_used_save; UNLOCK TABLES; DROP TABLE t1; diff --git a/mysql-test/main/alter_table_lock.test b/mysql-test/main/alter_table_lock.test index f1f60e38b61..bd26c1ac7d0 100644 --- a/mysql-test/main/alter_table_lock.test +++ b/mysql-test/main/alter_table_lock.test @@ -14,9 +14,13 @@ SELECT * FROM t1; ALTER TABLE x MODIFY xx INT; SET SESSION max_session_mem_used= 8192; +--error 0,ER_OPTION_PREVENTS_STATEMENT LOCK TABLE t1 WRITE; +--disable_warnings +--error 0,ER_OPTION_PREVENTS_STATEMENT ALTER TABLE t1 CHANGE COLUMN IF EXISTS b c INT; +--enable_warnings SET SESSION max_session_mem_used = @max_session_mem_used_save; UNLOCK TABLES; -- cgit v1.2.1 From 7fe474fe7e25cb503b585755c7e0a036e01e0df7 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Thu, 12 May 2022 17:10:58 +0530 Subject: MDEV-25257 SEGV in fts_get_next_doc_id upon some INSERT - InnoDB fails to create a fts cache while loading the innodb fts table which is stored in system tablespace. InnoDB should create the fts cache while loading FTS_DOC_ID column from system column. --- mysql-test/suite/innodb_fts/r/fulltext.result | 14 ++++++++++++++ mysql-test/suite/innodb_fts/t/fulltext.test | 16 ++++++++++++++++ storage/innobase/dict/dict0load.cc | 1 + 3 files changed, 31 insertions(+) diff --git a/mysql-test/suite/innodb_fts/r/fulltext.result b/mysql-test/suite/innodb_fts/r/fulltext.result index abcdebca01d..7775a46e0d8 100644 --- a/mysql-test/suite/innodb_fts/r/fulltext.result +++ b/mysql-test/suite/innodb_fts/r/fulltext.result @@ -718,4 +718,18 @@ t1 CREATE TABLE `t1` ( FULLTEXT KEY `a_2` (`a`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 DROP TABLE t1; +# +# MDEV-25257 SEGV in fts_get_next_doc_id upon some INSERT +# +SET @save = @@global.innodb_file_per_table; +SET @@global.innodb_file_per_table = 0; +CREATE TABLE t1 ( +col_int INTEGER, col_text TEXT, +col_int_g INTEGER GENERATED ALWAYS AS (col_int) +) ENGINE = InnoDB ROW_FORMAT = Redundant ; +ALTER TABLE t1 ADD FULLTEXT KEY `ftidx` ( col_text ) ; +ALTER TABLE t1 DROP KEY `ftidx` ; +INSERT INTO t1 (col_int, col_text) VALUES ( 1255, NULL); +DROP TABLE t1; +SET @@global.innodb_file_per_table = @save; # End of 10.3 tests diff --git a/mysql-test/suite/innodb_fts/t/fulltext.test b/mysql-test/suite/innodb_fts/t/fulltext.test index f3bcaaec5cc..4878a0bee3f 100644 --- a/mysql-test/suite/innodb_fts/t/fulltext.test +++ b/mysql-test/suite/innodb_fts/t/fulltext.test @@ -741,4 +741,20 @@ ALTER TABLE t1 ADD FULLTEXT INDEX (a); SHOW CREATE TABLE t1; DROP TABLE t1; +--echo # +--echo # MDEV-25257 SEGV in fts_get_next_doc_id upon some INSERT +--echo # +SET @save = @@global.innodb_file_per_table; +SET @@global.innodb_file_per_table = 0; + +CREATE TABLE t1 ( + col_int INTEGER, col_text TEXT, + col_int_g INTEGER GENERATED ALWAYS AS (col_int) +) ENGINE = InnoDB ROW_FORMAT = Redundant ; +ALTER TABLE t1 ADD FULLTEXT KEY `ftidx` ( col_text ) ; +ALTER TABLE t1 DROP KEY `ftidx` ; +INSERT INTO t1 (col_int, col_text) VALUES ( 1255, NULL); +DROP TABLE t1; +SET @@global.innodb_file_per_table = @save; + --echo # End of 10.3 tests diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 386b99bcaad..dade0c5b19e 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1851,6 +1851,7 @@ dict_load_columns( the flag is set before the table is created. */ if (table->fts == NULL) { table->fts = fts_create(table); + table->fts->cache = fts_cache_create(table); fts_optimize_add_table(table); } -- cgit v1.2.1 From a0e4853eff028fa9db9ba0421309e2bd1124ab26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 25 May 2022 13:15:56 +0300 Subject: MDEV-28668 Recovery or backup of INSERT may be incorrect page_cur_insert_rec_low(): When checking for common bytes with the preceding record, exclude the header bytes of next_rec that could have been updated by this function. The scenario where this caused corruption was an insert of a node pointer record. The child page number was written as 0x203 but recovered as 0x103 because the n_owned field of next_rec was changed from 1 to 2 before the comparison was invoked. --- storage/innobase/include/page0cur.h | 7 +++---- storage/innobase/page/page0cur.cc | 11 +++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h index 6ce31dea0c1..d80eb4567e5 100644 --- a/storage/innobase/include/page0cur.h +++ b/storage/innobase/include/page0cur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -154,9 +154,8 @@ page_cur_tuple_insert( MY_ATTRIBUTE((nonnull, warn_unused_result)); /***********************************************************//** Inserts a record next to page cursor on an uncompressed page. -Returns pointer to inserted record if succeed, i.e., enough -space available, NULL otherwise. The cursor stays at the same position. -@return pointer to record if succeed, NULL otherwise */ +@return pointer to record +@retval nullptr if not enough space was available */ rec_t* page_cur_insert_rec_low( /*====================*/ diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index cc6b1797d61..31a780d8644 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1280,9 +1280,8 @@ inline void mtr_t::page_insert(const buf_block_t &block, bool reuse, /***********************************************************//** Inserts a record next to page cursor on an uncompressed page. -Returns pointer to inserted record if succeed, i.e., enough -space available, NULL otherwise. The cursor stays at the same position. -@return pointer to record if succeed, NULL otherwise */ +@return pointer to record +@retval nullptr if not enough space was available */ rec_t* page_cur_insert_rec_low( /*====================*/ @@ -1575,8 +1574,12 @@ inc_dir: const byte *r= rec; const byte *c= cur->rec; const byte *c_end= cur->rec + data_size; + static_assert(REC_N_OLD_EXTRA_BYTES == REC_N_NEW_EXTRA_BYTES + 1, ""); if (c <= insert_buf && c_end > insert_buf) c_end= insert_buf; + else if (c_end < next_rec && + c_end >= next_rec - REC_N_OLD_EXTRA_BYTES + comp) + c_end= next_rec - REC_N_OLD_EXTRA_BYTES + comp; else c_end= std::min(c_end, block->frame + srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * -- cgit v1.2.1 From db0fde3f24b37cfac9a4125ce888f1650a20db7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 25 May 2022 13:18:24 +0300 Subject: MDEV-28665 aio_uring::thread_routine terminates prematurely, causing hang aio_uring::thread_routine(): Handle -EINTR from io_uring_wait_cqe() in the same way as aio_linux::getevent_thread_routine() does it: simply ignore it and invoke the system call again. Reviewed by: Vladislav Vaintroub --- tpool/aio_liburing.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tpool/aio_liburing.cc b/tpool/aio_liburing.cc index a4a899d08d4..2a9c79f9020 100644 --- a/tpool/aio_liburing.cc +++ b/tpool/aio_liburing.cc @@ -1,4 +1,4 @@ -/* Copyright (C) 2021, MariaDB Corporation. +/* Copyright (C) 2021, 2022, MariaDB Corporation. This program is free software; you can redistribute itand /or modify it under the terms of the GNU General Public License as published by @@ -137,8 +137,8 @@ private: io_uring_cqe *cqe; if (int ret= io_uring_wait_cqe(&aio->uring_, &cqe)) { - if (ret == -EINTR) // this may occur during shutdown - break; + if (ret == -EINTR) + continue; my_printf_error(ER_UNKNOWN_ERROR, "io_uring_wait_cqe() returned %d\n", ME_ERROR_LOG | ME_FATAL, ret); @@ -147,7 +147,7 @@ private: auto *iocb= static_cast(io_uring_cqe_get_data(cqe)); if (!iocb) - break; + break; // ~aio_uring() told us to terminate int res= cqe->res; if (res < 0) -- cgit v1.2.1 From 99c8aed00d832d838ee4b746dc6ae18a8f281709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 25 May 2022 14:06:04 +0300 Subject: MDEV-28601 InnoDB history list length was reverted to 32 bits srv_do_purge(): In commit edde1f6e0d5f5a0115a5253c9b8d428af132f2d1 when the de-facto 32-bit trx_sys_t::history_size() was replaced with 32-bit trx_sys.rseg_history_len, some more variables were changed from ulint (size_t) to uint32_t. The history list length is the number of committed transactions whose undo logs are waiting to be purged. Each TRX_RSEG_HISTORY list is storing the number of entries in a 32-bit field and each transaction will occupy at least one undo log page. It is thinkable that the length of each TRX_RSEG_HISTORY list may approach the maximum representable number. The number cannot be exceeded, because the rollback segment header is allocated from the same tablespace as the undo log header pages it is pointing to, and because the page numbers of a tablespace are stored in 32 bits. In any case, it is possible that the total number of unpurged committed transactions cannot be represented in 32 but 39 bits (corresponding to 128 rollback segments and undo tablespaces). --- storage/innobase/include/trx0sys.h | 4 ++-- storage/innobase/lock/lock0lock.cc | 4 ++-- storage/innobase/srv/srv0srv.cc | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 323994fa14f..246af942419 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -811,7 +811,7 @@ public: /** TRX_RSEG_HISTORY list length (number of committed transactions to purge) */ - MY_ALIGNED(CACHE_LINE_SIZE) Atomic_counter rseg_history_len; + MY_ALIGNED(CACHE_LINE_SIZE) Atomic_counter rseg_history_len; /** Mutex protecting trx_list. */ MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 01a31b41662..a5ac7de0a92 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4602,14 +4602,14 @@ lock_print_info_summary( fprintf(file, "Purge done for trx's n:o < " TRX_ID_FMT " undo n:o < " TRX_ID_FMT " state: %s\n" - "History list length %u\n", + "History list length %zu\n", purge_sys.tail.trx_no, purge_sys.tail.undo_no, purge_sys.enabled() ? (purge_sys.running() ? "running" : purge_sys.paused() ? "stopped" : "running but idle") : "disabled", - uint32_t{trx_sys.rseg_history_len}); + size_t{trx_sys.rseg_history_len}); #ifdef PRINT_NUM_OF_LOCK_STRUCTS fprintf(file, diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 0caf5b9033f..47e0fe13053 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -2393,7 +2393,7 @@ static bool srv_purge_should_exit() return true; /* Slow shutdown was requested. */ - if (const uint32_t history_size= trx_sys.rseg_history_len) + if (const size_t history_size= trx_sys.rseg_history_len) { static time_t progress_time; time_t now= time(NULL); @@ -2402,7 +2402,7 @@ static bool srv_purge_should_exit() progress_time= now; #if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, - "InnoDB: to purge %u transactions", + "InnoDB: to purge %zu transactions", history_size); ib::info() << "to purge " << history_size << " transactions"; #endif @@ -2516,17 +2516,17 @@ DECLARE_THREAD(srv_worker_thread)( /** Do the actual purge operation. @param[in,out] n_total_purged total number of purged pages @return length of history list before the last purge batch. */ -static uint32_t srv_do_purge(ulint* n_total_purged +static size_t srv_do_purge(ulint* n_total_purged #ifdef UNIV_DEBUG - , srv_slot_t* slot /*!< purge coordinator */ + , srv_slot_t* slot /*!< purge coordinator */ #endif - ) + ) { ulint n_pages_purged; static ulint count = 0; static ulint n_use_threads = 0; - static uint32_t rseg_history_len = 0; + static size_t rseg_history_len = 0; ulint old_activity_count = srv_get_activity_count(); const ulint n_threads = srv_n_purge_threads; @@ -2606,7 +2606,7 @@ srv_purge_coordinator_suspend( /*==========================*/ srv_slot_t* slot, /*!< in/out: Purge coordinator thread slot */ - uint32_t rseg_history_len) /*!< in: history list length + size_t rseg_history_len) /*!< in: history list length before last purge */ { ut_ad(!srv_read_only_mode); @@ -2697,7 +2697,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( rw_lock_create(PFS_NOT_INSTRUMENTED, &slot->debug_sync_lock, SYNC_NO_ORDER_CHECK); #endif - uint32_t rseg_history_len = trx_sys.rseg_history_len; + size_t rseg_history_len = trx_sys.rseg_history_len; do { /* If there are no records to purge or the last -- cgit v1.2.1 From c1752a9f8f1ab3826a8e2b2da60ccf31d91cba8f Mon Sep 17 00:00:00 2001 From: karmengc Date: Wed, 25 May 2022 14:08:28 +0100 Subject: server.cnf: adjust major version to 10.8 --- support-files/rpm/server.cnf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/support-files/rpm/server.cnf b/support-files/rpm/server.cnf index 9b384c9c6ee..bf9ed9c4f46 100644 --- a/support-files/rpm/server.cnf +++ b/support-files/rpm/server.cnf @@ -39,8 +39,8 @@ # you can put MariaDB-only options here [mariadb] -# This group is only read by MariaDB-10.7 servers. +# This group is only read by MariaDB-10.8 servers. # If you use the same .cnf file for MariaDB of different versions, # use this group for options that older servers don't understand -[mariadb-10.7] +[mariadb-10.8] -- cgit v1.2.1 From f85d488ad2132db7b294cde3909cb5992b752143 Mon Sep 17 00:00:00 2001 From: kkz Date: Tue, 24 May 2022 20:02:54 +0800 Subject: remove obsolete fix_session_vcol_expr{,_for_read} function declarations --- sql/table.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/sql/table.h b/sql/table.h index 34de21a39d5..097deca46b2 100644 --- a/sql/table.h +++ b/sql/table.h @@ -3117,9 +3117,6 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, uint ha_open_flags, TABLE *outparam, bool is_create_table, List *partitions_to_open= NULL); -bool fix_session_vcol_expr(THD *thd, Virtual_column_info *vcol); -bool fix_session_vcol_expr_for_read(THD *thd, Field *field, - Virtual_column_info *vcol); bool parse_vcol_defs(THD *thd, MEM_ROOT *mem_root, TABLE *table, bool *error_reported, vcol_init_mode expr); TABLE_SHARE *alloc_table_share(const char *db, const char *table_name, -- cgit v1.2.1 From 41068a890e3695a0005778d072bd01fbd660f836 Mon Sep 17 00:00:00 2001 From: Haidong Ji Date: Tue, 17 May 2022 15:53:54 -0500 Subject: MDEV-27314 Condense innodb buffer pool resize message InnoDB buffer pool resize messages are more succinct from this change: Before: ``` 2022-05-07 17:10:33 0 [Note] InnoDB: Completed resizing buffer pool from 14745600 to 19660800 bytes. 2022-05-07 17:10:33 0 [Note] InnoDB: Completed resizing buffer pool. 2022-05-07 17:10:33 8 [Note] InnoDB: Completed resizing buffer pool. (New size: 19660800 bytes). ``` After: ``` 2022-05-07 17:10:33 0 [Note] InnoDB: Completed resizing buffer pool from 14745600 to 19660800 bytes. ``` Additionally, the INNODB_BUFFER_POOL_RESIZE_STATUS has more complete info: it contains both the old and new buffer pool size values. --- mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test | 2 +- .../suite/innodb/t/innodb_buffer_pool_resize_temporary.test | 2 +- .../suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test | 2 +- mysql-test/suite/innodb/t/restart.test | 2 +- mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test | 2 +- storage/innobase/buf/buf0buf.cc | 10 +++------- storage/innobase/handler/ha_innodb.cc | 3 --- 7 files changed, 8 insertions(+), 15 deletions(-) diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test index 06760e09998..051f38a572e 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test @@ -7,7 +7,7 @@ let $wait_timeout = 180; let $wait_condition = - SELECT SUBSTR(variable_value, 1, 31) = 'Completed resizing buffer pool.' + SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test index 8ada5bdb183..c49ae451638 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test @@ -15,7 +15,7 @@ SET GLOBAL innodb_buffer_pool_size=8388608; let $wait_timeout = 60; let $wait_condition = - SELECT SUBSTR(variable_value, 1, 31) = 'Completed resizing buffer pool.' + SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' FROM information_schema.global_status WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; --source include/wait_condition.inc diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test index 10ce14d4af7..78db6bf0d5a 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test @@ -8,7 +8,7 @@ let $wait_timeout = 180; let $wait_condition = - SELECT SUBSTR(variable_value, 1, 31) = 'Completed resizing buffer pool.' + SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; diff --git a/mysql-test/suite/innodb/t/restart.test b/mysql-test/suite/innodb/t/restart.test index 6e80a684b68..bb3d08a8779 100644 --- a/mysql-test/suite/innodb/t/restart.test +++ b/mysql-test/suite/innodb/t/restart.test @@ -93,7 +93,7 @@ DROP TABLE tr,tc,td; let $wait_timeout = 180; let $wait_condition = - SELECT SUBSTR(variable_value, 1, 31) = 'Completed resizing buffer pool.' + SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test index ad33ada1098..dada2a9a455 100644 --- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test @@ -25,7 +25,7 @@ --source include/have_innodb.inc let $wait_condition = - SELECT SUBSTR(variable_value, 1, 31) = 'Completed resizing buffer pool.' + SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index b986b5c2325..60ede653bee 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1933,9 +1933,8 @@ calc_buf_pool_size: if (srv_buf_pool_old_size != srv_buf_pool_size) { - ib::info() << "Completed resizing buffer pool from " - << srv_buf_pool_old_size - << " to " << srv_buf_pool_size << " bytes."; + buf_resize_status("Completed resizing buffer pool from %zu to %zu bytes." + ,srv_buf_pool_old_size, srv_buf_pool_size); srv_buf_pool_old_size = srv_buf_pool_size; } @@ -1947,11 +1946,8 @@ calc_buf_pool_size: } #endif /* BTR_CUR_HASH_ADAPT */ - if (!warning) { - buf_resize_status("Completed resizing buffer pool."); - } else { + if (warning) buf_resize_status("Resizing buffer pool failed"); - } ut_d(validate()); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 74fac409ea0..98e6cdae801 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -17419,9 +17419,6 @@ innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save) "Buffer pool resize requested"); buf_resize_start(); - - ib::info() << export_vars.innodb_buffer_pool_resize_status - << " (New size: " << in_val << " bytes)."; } /** The latest assigned innodb_ft_aux_table name */ -- cgit v1.2.1 From 31e30329a368a32d9de202748987592cc86db241 Mon Sep 17 00:00:00 2001 From: Ting Nian Date: Tue, 3 May 2022 17:25:48 +0000 Subject: Add option --enable-cleartext-plugin to the MariaDB client For compatibility reasons, add the option to the MariaDB client without any functional changes besides simply accepting the option and emitting a warning that it is obsolete. In MySQL this security related option is compulsory in certain use cases. When users switch to MariaDB, this client command that used to work starts failing without a sensible error message. In worst case users resort to re-installing the mysql client from MySQL. In MariaDB the option is obsolete and should simply be ignored. Users however don't have any opportunity to learn that unless the client program tells them so. Before: mysql --enable-cleartext-plugin ... mysql: unknown option '--enable-cleartext-plugin' (program terminates) After: mysql --enable-cleartext-plugin ... WARNING: option '--enable-cleartext-plugin' is obsolete. (program executes) All new code of the whole pull request, including one or several files that are either new files or modified ones, are contributed under the BSD-new license. I am contributing on behalf of my employer Amazon Web Services, Inc. --- client/client_priv.h | 1 + client/mysql.cc | 10 ++++++++++ man/mysql.1 | 15 +++++++++++++++ mysql-test/main/mysql.result | 5 ++++- mysql-test/main/mysql.test | 8 ++++++++ 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/client/client_priv.h b/client/client_priv.h index 5e764cc33fd..275d3c6853b 100644 --- a/client/client_priv.h +++ b/client/client_priv.h @@ -102,6 +102,7 @@ enum options_client OPT_IGNORE_DATA, OPT_PRINT_ROW_COUNT, OPT_PRINT_ROW_EVENT_POSITIONS, OPT_CHECK_IF_UPGRADE_NEEDED, + OPT_COMPATIBILTY_CLEARTEXT_PLUGIN, OPT_MAX_CLIENT_OPTION /* should be always the last */ }; diff --git a/client/mysql.cc b/client/mysql.cc index 558b54e3909..ea92c84e1d1 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -1521,6 +1521,8 @@ static struct my_option my_long_options[] = &delimiter_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"execute", 'e', "Execute command and quit. (Disables --force and history file.)", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"enable-cleartext-plugin", OPT_COMPATIBILTY_CLEARTEXT_PLUGIN, "Obsolete option. Exists only for MySQL compatibility.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"vertical", 'E', "Print the output of a query (rows) vertically.", &vertical, &vertical, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -1817,6 +1819,14 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), printf("WARNING: --server-arg option not supported in this configuration.\n"); #endif break; + case OPT_COMPATIBILTY_CLEARTEXT_PLUGIN: + /* + This option exists in MySQL client but not in MariaDB. Users switching from + MySQL might still have this option in their commands, and it will not work + in MariaDB unless it is handled. Therefore output a warning and continue. + */ + printf("WARNING: option '--enable-cleartext-plugin' is obsolete.\n"); + break; case 'A': opt_rehash= 0; break; diff --git a/man/mysql.1 b/man/mysql.1 index 3ae8fdf9d69..238106843ef 100644 --- a/man/mysql.1 +++ b/man/mysql.1 @@ -507,6 +507,21 @@ the section called \(lqMYSQL COMMANDS\(rq\&. .sp -1 .IP \(bu 2.3 .\} +.\" mysql: enable cleartext plugin option +.\" enable cleartext plugin option: mysql +\fB\-\-enable\-cleartext\-plugin\fR +.sp +Obsolete option\&. Exists only for MySQL compatibility\&. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +.sp -1 +.IP \(bu 2.3 +.\} .\" mysql: execute option .\" execute option: mysql \fB\-\-execute=\fR\fB\fIstatement\fR\fR, diff --git a/mysql-test/main/mysql.result b/mysql-test/main/mysql.result index ec2760ce8a7..9b0936b5924 100644 --- a/mysql-test/main/mysql.result +++ b/mysql-test/main/mysql.result @@ -629,4 +629,7 @@ drop table t1; # # MDEV-15538 '-N' Produce html output wrong # -
1
\ No newline at end of file +
1
+WARNING: option '--enable-cleartext-plugin' is obsolete. +1 +1 diff --git a/mysql-test/main/mysql.test b/mysql-test/main/mysql.test index 1cb4977a32d..0f41add821a 100644 --- a/mysql-test/main/mysql.test +++ b/mysql-test/main/mysql.test @@ -708,3 +708,11 @@ drop table t1; --echo # MDEV-15538 '-N' Produce html output wrong --echo # --exec $MYSQL -NHe "select 1 as a" + + +# +# Test obsolete option --enable-cleartext-plugin +# This should proceed with a warning +# +--echo +--exec $MYSQL test --enable-cleartext-plugin -e "select 1" -- cgit v1.2.1 From c4e87cb22cd78b5539202d9dcf13954375decb67 Mon Sep 17 00:00:00 2001 From: Mathew Heard Date: Thu, 3 Feb 2022 17:39:29 +1100 Subject: MDEV-9020: Connect issues ALTER TABLE DISABLE KEYS when inserting data If the connecting user doesn't have alter table privilege this isn't allowed. This patch removes enable / disable key commands that should never have been here Closes #2002 --- storage/connect/tabmysql.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/storage/connect/tabmysql.cpp b/storage/connect/tabmysql.cpp index a701db1e8e9..0d7cb6d50c4 100644 --- a/storage/connect/tabmysql.cpp +++ b/storage/connect/tabmysql.cpp @@ -946,15 +946,6 @@ bool TDBMYSQL::OpenDB(PGLOBAL g) } // endif MakeInsert - if (m_Rc != RC_FX) { - char cmd[64]; - int w; - - sprintf(cmd, "ALTER TABLE `%s` DISABLE KEYS", TableName); - - m_Rc = Myc.ExecSQL(g, cmd, &w); // may fail for some engines - } // endif m_Rc - } else // m_Rc = (Mode == MODE_DELETE) ? MakeDelete(g) : MakeUpdate(g); m_Rc = (MakeCommand(g)) ? RC_FX : RC_OK; @@ -1216,16 +1207,6 @@ int TDBMYSQL::DeleteDB(PGLOBAL g, int irc) void TDBMYSQL::CloseDB(PGLOBAL g) { if (Myc.Connected()) { - if (Mode == MODE_INSERT) { - char cmd[64]; - int w; - PDBUSER dup = PlgGetUser(g); - - dup->Step = "Enabling indexes"; - sprintf(cmd, "ALTER TABLE `%s` ENABLE KEYS", TableName); - Myc.m_Rows = -1; // To execute the query - m_Rc = Myc.ExecSQL(g, cmd, &w); // May fail for some engines - } // endif m_Rc Myc.Close(); } // endif Myc -- cgit v1.2.1 From 0e0a3580efdae313fab340bbb308d371fa36c021 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Sat, 28 May 2022 09:04:28 +1000 Subject: MDEV-27314 Condense innodb buffer pool resize message (postfix) --- storage/innobase/handler/ha_innodb.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 98e6cdae801..fd168adb702 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -17412,8 +17412,6 @@ static void innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save) { - longlong in_val = *static_cast(save); - snprintf(export_vars.innodb_buffer_pool_resize_status, sizeof(export_vars.innodb_buffer_pool_resize_status), "Buffer pool resize requested"); -- cgit v1.2.1 From 96329d632159f1f5a017e002c3217e19c79e5731 Mon Sep 17 00:00:00 2001 From: Monty Date: Sun, 29 May 2022 12:10:37 +0300 Subject: Fixed that CHECK TABLE on an S3 table doesn't try to write to files The symtom of the bug was that check table on an S3 table when using --s3_slave-ignore-updates=1 could print "9 when updating keyfile" --- storage/maria/ha_s3.cc | 1 + storage/maria/ma_check.c | 3 +++ storage/maria/ma_create.c | 1 + storage/maria/ma_locking.c | 12 ++++++++++++ storage/maria/maria_def.h | 3 +++ 5 files changed, 20 insertions(+) diff --git a/storage/maria/ha_s3.cc b/storage/maria/ha_s3.cc index 9a0a458bfe5..158fa8d0430 100644 --- a/storage/maria/ha_s3.cc +++ b/storage/maria/ha_s3.cc @@ -640,6 +640,7 @@ int ha_s3::open(const char *name, int mode, uint open_flags) file->dfile.big_block_size= file->s->kfile.big_block_size= file->s->bitmap.file.big_block_size= file->s->base.s3_block_size; file->s->kfile.head_blocks= file->s->base.keystart / file->s->block_size; + file->s->no_status_updates= in_alter_table == S3_NO_ALTER; } } open_args= 0; diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index f8377df8350..70a1c976f46 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -6326,6 +6326,9 @@ int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update) MARIA_SHARE *share= info->s; DBUG_ENTER("maria_update_state_info"); + if (info->s->no_status_updates) + DBUG_RETURN(0); /* S3 readonly table */ + if (update & UPDATE_OPEN_COUNT) { share->state.open_count=0; diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 83d4f584e69..43fbfa22178 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -1461,6 +1461,7 @@ int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn, TrID create_trid, File file= share->kfile.file; DBUG_ENTER("_ma_update_state_lsns_sub"); DBUG_ASSERT(file >= 0); + CRASH_IF_S3_TABLE(share); if (lsn == LSN_IMPOSSIBLE) { diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index 200a728626f..f895e66d9e0 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -303,6 +303,8 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation) /* transactional tables flush their state at Checkpoint */ if (operation) { /* Two threads can't be here */ + CRASH_IF_S3_TABLE(info->s); /* S3 readonly doesn't come here */ + olderror= my_errno; /* Remember last error */ #ifdef MARIA_EXTERNAL_LOCKING @@ -446,6 +448,7 @@ int _ma_mark_file_changed_now(register MARIA_SHARE *share) */ if (!share->temporary) { + CRASH_IF_S3_TABLE(share); mi_int2store(buff,share->state.open_count); buff[2]=1; /* Mark that it's changed */ if (my_pwrite(share->kfile.file, buff, sizeof(buff), @@ -458,6 +461,7 @@ int _ma_mark_file_changed_now(register MARIA_SHARE *share) if (share->base.born_transactional && !(share->state.org_changed & STATE_NOT_MOVABLE)) { + CRASH_IF_S3_TABLE(share); /* Lock table to current installation */ if (_ma_set_uuid(share, 0) || (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS && @@ -518,6 +522,7 @@ int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables) /* Its not fatal even if we couldn't get the lock ! */ if (share->state.open_count > 0) { + CRASH_IF_S3_TABLE(share); share->state.open_count--; share->changed= 1; /* We have to update state */ /* @@ -548,9 +553,15 @@ void _ma_mark_file_crashed(MARIA_SHARE *share) { uchar buff[2]; DBUG_ENTER("_ma_mark_file_crashed"); + CRASH_IF_S3_TABLE(share); share->state.changed|= STATE_CRASHED; + if (share->no_status_updates) + DBUG_VOID_RETURN; /* Safety */ + mi_int2store(buff, share->state.changed); + + /* We can ignore the errors, as if the mark failed, there isn't anything else we can do; The user should already have got an error that the @@ -606,6 +617,7 @@ my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid) bzero(buff, sizeof(buff)); uuid= buff; } + CRASH_IF_S3_TABLE(share); return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE, mi_uint2korr(share->state.header.base_pos), MYF(MY_NABP)); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 59f92dd91b1..2be30a22e76 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -787,6 +787,7 @@ typedef struct st_maria_share my_bool changed, /* If changed since lock */ global_changed, /* If changed since open */ not_flushed; + my_bool no_status_updates; /* Set to 1 if S3 readonly table */ my_bool internal_table; /* Internal tmp table */ my_bool lock_key_trees; /* If we have to lock trees on read */ my_bool non_transactional_concurrent_insert; @@ -1768,3 +1769,5 @@ static inline void decrement_share_in_trans(MARIA_SHARE *share) } C_MODE_END #endif + +#define CRASH_IF_S3_TABLE(share) DBUG_ASSERT(!share->no_status_updates) -- cgit v1.2.1 From 8b19f521f10282b75b2a1009a7a8241ca341f6f9 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 24 May 2022 17:27:18 +0200 Subject: move alter_table combinations to a separate test file no need to run all alter tests three times with no changes whatsoever --- mysql-test/main/alter_table,aria.rdiff | 22 -- mysql-test/main/alter_table,heap.rdiff | 59 ---- mysql-test/main/alter_table.combinations | 5 - mysql-test/main/alter_table.result | 324 --------------------- mysql-test/main/alter_table.test | 285 ------------------ .../main/alter_table_combinations,aria.rdiff | 22 ++ .../main/alter_table_combinations,heap.rdiff | 77 +++++ .../main/alter_table_combinations.combinations | 6 + mysql-test/main/alter_table_combinations.result | 324 +++++++++++++++++++++ mysql-test/main/alter_table_combinations.test | 263 +++++++++++++++++ 10 files changed, 692 insertions(+), 695 deletions(-) delete mode 100644 mysql-test/main/alter_table,aria.rdiff delete mode 100644 mysql-test/main/alter_table,heap.rdiff delete mode 100644 mysql-test/main/alter_table.combinations create mode 100644 mysql-test/main/alter_table_combinations,aria.rdiff create mode 100644 mysql-test/main/alter_table_combinations,heap.rdiff create mode 100644 mysql-test/main/alter_table_combinations.combinations create mode 100644 mysql-test/main/alter_table_combinations.result create mode 100644 mysql-test/main/alter_table_combinations.test diff --git a/mysql-test/main/alter_table,aria.rdiff b/mysql-test/main/alter_table,aria.rdiff deleted file mode 100644 index 40bbf95494c..00000000000 --- a/mysql-test/main/alter_table,aria.rdiff +++ /dev/null @@ -1,22 +0,0 @@ ---- ./mysql-test/main/alter_table.result 2020-02-27 19:35:41.279992329 +0300 -+++ ./mysql-test/main/alter_table,aria.reject 2020-02-27 19:37:13.251994491 +0300 -@@ -2716,8 +2716,7 @@ - t3 CREATE TABLE `t3` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, -- KEY `b` (`b`), -- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`bb`) -+ KEY `b` (`b`) - ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 - ALTER TABLE t1 RENAME COLUMN bb TO b; - SHOW CREATE TABLE t1; -@@ -2733,8 +2732,7 @@ - t3 CREATE TABLE `t3` ( - `a` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, -- KEY `b` (`c`), -- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`c`) REFERENCES `t1` (`b`) -+ KEY `b` (`c`) - ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 - CREATE TABLE t4(a int); - ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; diff --git a/mysql-test/main/alter_table,heap.rdiff b/mysql-test/main/alter_table,heap.rdiff deleted file mode 100644 index ad6fd194cc9..00000000000 --- a/mysql-test/main/alter_table,heap.rdiff +++ /dev/null @@ -1,59 +0,0 @@ ---- ./mysql-test/main/alter_table.result 2020-02-27 19:35:41.279992329 +0300 -+++ ./mysql-test/main/alter_table,heap.reject 2020-02-27 19:39:44.175998039 +0300 -@@ -2716,8 +2716,7 @@ - t3 CREATE TABLE `t3` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, -- KEY `b` (`b`), -- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`bb`) -+ KEY `b` (`b`) - ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 - ALTER TABLE t1 RENAME COLUMN bb TO b; - SHOW CREATE TABLE t1; -@@ -2733,8 +2732,7 @@ - t3 CREATE TABLE `t3` ( - `a` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, -- KEY `b` (`c`), -- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`c`) REFERENCES `t1` (`b`) -+ KEY `b` (`c`) - ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 - CREATE TABLE t4(a int); - ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; -@@ -2774,36 +2772,6 @@ - ERROR 42S22: Unknown column 'd' in 'field list' - DROP TRIGGER trg1; - DROP PROCEDURE sp1; --CREATE TABLE t_gen(a INT, b DOUBLE GENERATED ALWAYS AS (SQRT(a))); --INSERT INTO t_gen(a) VALUES(4); --SELECT * FROM t_gen; --a b --4 2 --SHOW CREATE TABLE t_gen; --Table Create Table --t_gen CREATE TABLE `t_gen` ( -- `a` int(11) DEFAULT NULL, -- `b` double GENERATED ALWAYS AS (sqrt(`a`)) VIRTUAL --) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 --ALTER TABLE t_gen RENAME COLUMN a TO c, CHANGE COLUMN b b DOUBLE GENERATED ALWAYS AS (SQRT(c)); --SELECT * FROM t_gen; --c b --4 2 --SHOW CREATE TABLE t_gen; --Table Create Table --t_gen CREATE TABLE `t_gen` ( -- `c` int(11) DEFAULT NULL, -- `b` double GENERATED ALWAYS AS (sqrt(`c`)) VIRTUAL --) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 --ALTER TABLE t_gen CHANGE COLUMN c x INT; --show create table t_gen; --Table Create Table --t_gen CREATE TABLE `t_gen` ( -- `x` int(11) DEFAULT NULL, -- `b` double GENERATED ALWAYS AS (sqrt(`x`)) VIRTUAL --) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 --ALTER TABLE t_gen RENAME COLUMN x TO a; --DROP TABLE t_gen; - SHOW CREATE TABLE t1; - Table Create Table - t1 CREATE TABLE `t1` ( diff --git a/mysql-test/main/alter_table.combinations b/mysql-test/main/alter_table.combinations deleted file mode 100644 index 824e0a3be04..00000000000 --- a/mysql-test/main/alter_table.combinations +++ /dev/null @@ -1,5 +0,0 @@ -[innodb] - -[aria] - -[heap] diff --git a/mysql-test/main/alter_table.result b/mysql-test/main/alter_table.result index 755de4336b9..a73fde559e5 100644 --- a/mysql-test/main/alter_table.result +++ b/mysql-test/main/alter_table.result @@ -1,5 +1,3 @@ -drop table if exists t1,t2; -drop database if exists mysqltest; set @save_max_allowed_packet=@@global.max_allowed_packet; create table t1 ( col1 int not null auto_increment primary key, @@ -2588,22 +2586,6 @@ set max_statement_time= 0; drop table t1; drop view v1; # -# MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed -# -set @save_default_engine= @@default_storage_engine; -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; -alter table t1 change x xx int, algorithm=inplace; -check table t1; -Table Op Msg_type Msg_text -test.t1 check status OK -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); -alter table t1 change x xx int, algorithm=inplace; -check table t1; -Table Op Msg_type Msg_text -test.t1 check status OK -drop table t1; -set @@default_storage_engine= @save_default_engine; -# # End of 10.3 tests # # @@ -2650,296 +2632,6 @@ DROP TABLE t1; # End of 10.4 tests # # -# MDEV-16290 ALTER TABLE ... RENAME COLUMN syntax -# -SET @save_default_engine= @@DEFAULT_STORAGE_ENGINE; -CREATE TABLE t1(a INT, b VARCHAR(30), c FLOAT); -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` varchar(30) DEFAULT NULL, - `c` float DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -INSERT INTO t1 VALUES(1,'abcd',1.234); -CREATE TABLE t2(a INT, b VARCHAR(30), c FLOAT) ENGINE=MyIsam; -SHOW CREATE TABLE t2; -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` varchar(30) DEFAULT NULL, - `c` float DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 -INSERT INTO t2 VALUES(1,'abcd',1.234); -ALTER TABLE t1 RENAME COLUMN a TO a; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` varchar(30) DEFAULT NULL, - `c` float DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 RENAME COLUMN a TO m; -ALTER TABLE t1 RENAME COLUMN a TO m; -ERROR 42S22: Unknown column 'a' in 't1' -ALTER TABLE t1 RENAME COLUMN IF EXISTS a TO m; -Warnings: -Note 1054 Unknown column 'a' in 't1' -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `m` int(11) DEFAULT NULL, - `b` varchar(30) DEFAULT NULL, - `c` float DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -SELECT * FROM t1; -m b c -1 abcd 1.234 -ALTER TABLE t1 RENAME COLUMN m TO x, -RENAME COLUMN b TO y, -RENAME COLUMN c TO z; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `x` int(11) DEFAULT NULL, - `y` varchar(30) DEFAULT NULL, - `z` float DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -SELECT * FROM t1; -x y z -1 abcd 1.234 -ALTER TABLE t2 RENAME COLUMN a TO d, RENAME COLUMN b TO e, RENAME COLUMN c to f; -SHOW CREATE TABLE t2; -Table Create Table -t2 CREATE TABLE `t2` ( - `d` int(11) DEFAULT NULL, - `e` varchar(30) DEFAULT NULL, - `f` float DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 -SELECT * FROM t2; -d e f -1 abcd 1.234 -ALTER TABLE t1 CHANGE COLUMN x a INT, RENAME COLUMN y TO b; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` varchar(30) DEFAULT NULL, - `z` float DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 CHANGE COLUMN z c DOUBLE, RENAME COLUMN b to b; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` varchar(30) DEFAULT NULL, - `c` double DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 CHANGE COLUMN a b int, RENAME COLUMN b TO c, CHANGE COLUMN c d FLOAT; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `b` int(11) DEFAULT NULL, - `c` varchar(30) DEFAULT NULL, - `d` float DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 ADD COLUMN zz INT, RENAME COLUMN d TO f; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `b` int(11) DEFAULT NULL, - `c` varchar(30) DEFAULT NULL, - `f` float DEFAULT NULL, - `zz` int(11) DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 DROP COLUMN zz, RENAME COLUMN c TO zz; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `b` int(11) DEFAULT NULL, - `zz` varchar(30) DEFAULT NULL, - `f` float DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 RENAME COLUMN zz to c, DROP COLUMN f; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `b` int(11) DEFAULT NULL, - `c` varchar(30) DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 ADD COLUMN d INT DEFAULT 5, RENAME COLUMN c TO b, DROP COLUMN b; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `b` varchar(30) DEFAULT NULL, - `d` int(11) DEFAULT 5 -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 RENAME COLUMN b TO d, RENAME COLUMN d TO b; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `d` varchar(30) DEFAULT NULL, - `b` int(11) DEFAULT 5 -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 ADD KEY(b); -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `d` varchar(30) DEFAULT NULL, - `b` int(11) DEFAULT 5, - KEY `b` (`b`) -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 RENAME COLUMN b TO bb; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `d` varchar(30) DEFAULT NULL, - `bb` int(11) DEFAULT 5, - KEY `b` (`bb`) -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -SELECT * FROM t1; -d bb -abcd 5 -CREATE TABLE t3(a int, b int, KEY(b)); -ALTER TABLE t3 ADD CONSTRAINT FOREIGN KEY(b) REFERENCES t1(bb); -SHOW CREATE TABLE t3; -Table Create Table -t3 CREATE TABLE `t3` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `b` (`b`), - CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`bb`) -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 RENAME COLUMN bb TO b; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `d` varchar(30) DEFAULT NULL, - `b` int(11) DEFAULT 5, - KEY `b` (`b`) -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t3 RENAME COLUMN b TO c; -SHOW CREATE TABLE t3; -Table Create Table -t3 CREATE TABLE `t3` ( - `a` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `b` (`c`), - CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`c`) REFERENCES `t1` (`b`) -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -CREATE TABLE t4(a int); -ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; -SHOW CREATE TABLE t4; -Table Create Table -t4 CREATE TABLE `t4` ( - `aa` int(11) DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t4 RENAME COLUMN aa TO a, ALGORITHM = COPY; -SHOW CREATE TABLE t4; -Table Create Table -t4 CREATE TABLE `t4` ( - `a` int(11) DEFAULT NULL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -DROP TABLE t4; -CREATE VIEW v1 AS SELECT d,e,f FROM t2; -CREATE TRIGGER trg1 BEFORE UPDATE on t2 FOR EACH ROW SET NEW.d=OLD.d + 10; -CREATE PROCEDURE sp1() INSERT INTO t2(d) VALUES(10); -ALTER TABLE t2 RENAME COLUMN d TO g; -SHOW CREATE TABLE t2; -Table Create Table -t2 CREATE TABLE `t2` ( - `g` int(11) DEFAULT NULL, - `e` varchar(30) DEFAULT NULL, - `f` float DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 -SHOW CREATE VIEW v1; -View Create View character_set_client collation_connection -v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `test`.`t2`.`d` AS `d`,`test`.`t2`.`e` AS `e`,`test`.`t2`.`f` AS `f` from `t2` koi8r koi8r_general_ci -Warnings: -Warning 1356 View 'test.v1' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them -SELECT * FROM v1; -ERROR HY000: View 'test.v1' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them -UPDATE t2 SET f = f + 10; -ERROR 42S22: Unknown column 'd' in 'OLD' -CALL sp1(); -ERROR 42S22: Unknown column 'd' in 'field list' -DROP TRIGGER trg1; -DROP PROCEDURE sp1; -CREATE TABLE t_gen(a INT, b DOUBLE GENERATED ALWAYS AS (SQRT(a))); -INSERT INTO t_gen(a) VALUES(4); -SELECT * FROM t_gen; -a b -4 2 -SHOW CREATE TABLE t_gen; -Table Create Table -t_gen CREATE TABLE `t_gen` ( - `a` int(11) DEFAULT NULL, - `b` double GENERATED ALWAYS AS (sqrt(`a`)) VIRTUAL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t_gen RENAME COLUMN a TO c, CHANGE COLUMN b b DOUBLE GENERATED ALWAYS AS (SQRT(c)); -SELECT * FROM t_gen; -c b -4 2 -SHOW CREATE TABLE t_gen; -Table Create Table -t_gen CREATE TABLE `t_gen` ( - `c` int(11) DEFAULT NULL, - `b` double GENERATED ALWAYS AS (sqrt(`c`)) VIRTUAL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t_gen CHANGE COLUMN c x INT; -show create table t_gen; -Table Create Table -t_gen CREATE TABLE `t_gen` ( - `x` int(11) DEFAULT NULL, - `b` double GENERATED ALWAYS AS (sqrt(`x`)) VIRTUAL -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t_gen RENAME COLUMN x TO a; -DROP TABLE t_gen; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `d` varchar(30) DEFAULT NULL, - `b` int(11) DEFAULT 5, - KEY `b` (`b`) -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -ALTER TABLE t1 RENAME COLUMN b z; -ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'z' at line 1 -ALTER TABLE t1 RENAME COLUMN FROM b TO z; -ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'FROM b TO z' at line 1 -ALTER TABLE t1 RENAME COLUMN b TO 1; -ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '1' at line 1 -ALTER TABLE t1 RENAME COLUMN b TO e, RENAME COLUMN c TO e; -ERROR 42S22: Unknown column 'c' in 't1' -ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN b TO z; -ERROR 42S21: Duplicate column name 'z' -ALTER TABLE t1 DROP COLUMN b, RENAME COLUMN b TO z; -ERROR 42S22: Unknown column 'b' in 't1' -ALTER TABLE t1 RENAME COLUMN b TO b, RENAME COLUMN b TO b; -ERROR 42S22: Unknown column 'b' in 't1' -ALTER TABLE t1 RENAME COLUMN b TO c3, DROP COLUMN c3; -ERROR 42000: Can't DROP COLUMN `c3`; check that it exists -ALTER TABLE t1 ADD COLUMN z INT, CHANGE COLUMN z y INT, DROP COLUMN y; -ERROR 42S22: Unknown column 'z' in 't1' -ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN z TO y, DROP COLUMN y; -ERROR 42S22: Unknown column 'z' in 't1' -ALTER TABLE t1 RENAME COLUMN b TO `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn`; -ERROR 42000: Incorrect column name 'nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn' -ALTER TABLE t1 CHANGE b `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn` int; -ERROR 42000: Identifier name 'nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn' is too long -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `d` varchar(30) DEFAULT NULL, - `b` int(11) DEFAULT 5, - KEY `b` (`b`) -) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 -SELECT * FROM t1; -d b -abcd 5 -DROP VIEW v1; -DROP TABLE t3,t1,t2; -SET DEFAULT_STORAGE_ENGINE= @save_default_engine; -# # MDEV-7318 RENAME INDEX # # @@ -3389,22 +3081,6 @@ t1 CREATE TABLE `t1` ( ) ENGINE=MyISAM DEFAULT CHARSET=latin1 drop table t1; # -# MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed -# -set @save_default_engine= @@default_storage_engine; -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; -alter table t1 change x xx int, algorithm=inplace; -check table t1; -Table Op Msg_type Msg_text -test.t1 check status OK -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); -alter table t1 change x xx int, algorithm=inplace; -check table t1; -Table Op Msg_type Msg_text -test.t1 check status OK -drop table t1; -set @@default_storage_engine= @save_default_engine; -# # MDEV-25555 Server crashes in tree_record_pos after INPLACE-recreating index on HEAP table # create table t1 (a int, key idx1(a), key idx2 using btree(a)) engine=memory; diff --git a/mysql-test/main/alter_table.test b/mysql-test/main/alter_table.test index 1bff20a7bd7..b339da5edb2 100644 --- a/mysql-test/main/alter_table.test +++ b/mysql-test/main/alter_table.test @@ -2,10 +2,6 @@ # # Test of alter table # ---disable_warnings -drop table if exists t1,t2; -drop database if exists mysqltest; ---enable_warnings set @save_max_allowed_packet=@@global.max_allowed_packet; create table t1 ( @@ -2099,47 +2095,6 @@ set max_statement_time= 0; drop table t1; drop view v1; ---echo # ---echo # MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed ---echo # -set @save_default_engine= @@default_storage_engine; ---disable_query_log -if ($MTR_COMBINATION_INNODB) -{ - set default_storage_engine= innodb; -} -if ($MTR_COMBINATION_ARIA) -{ - set default_storage_engine= aria; -} ---enable_query_log - -if (!$MTR_COMBINATION_INNODB) -{ - --disable_query_log - --disable_result_log - # There is no inplace ADD INDEX for MyISAM/Aria: - create or replace table t1 (x int); - --error ER_ALTER_OPERATION_NOT_SUPPORTED - alter table t1 add unique (x), algorithm=inplace; - --error ER_ALTER_OPERATION_NOT_SUPPORTED - alter table t1 add primary key(x), algorithm=inplace; - --error ER_ALTER_OPERATION_NOT_SUPPORTED - alter table t1 add index(x), algorithm=inplace; - --enable_query_log - --enable_result_log -} - -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; -alter table t1 change x xx int, algorithm=inplace; -check table t1; -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); -alter table t1 change x xx int, algorithm=inplace; -check table t1; -# cleanup -drop table t1; -set @@default_storage_engine= @save_default_engine; - --echo # --echo # End of 10.3 tests --echo # @@ -2192,205 +2147,6 @@ DROP TABLE t1; --echo # End of 10.4 tests --echo # ---echo # ---echo # MDEV-16290 ALTER TABLE ... RENAME COLUMN syntax ---echo # -SET @save_default_engine= @@DEFAULT_STORAGE_ENGINE; ---disable_query_log -if ($MTR_COMBINATION_INNODB) -{ -SET DEFAULT_STORAGE_ENGINE= INNODB; -} -if ($MTR_COMBINATION_ARIA) -{ -SET DEFAULT_STORAGE_ENGINE= ARIA; -} -if ($MTR_COMBINATION_HEAP) -{ -SET DEFAULT_STORAGE_ENGINE= MEMORY; -} ---enable_query_log -let $default_engine= `select @@default_storage_engine`; - -CREATE TABLE t1(a INT, b VARCHAR(30), c FLOAT); ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -INSERT INTO t1 VALUES(1,'abcd',1.234); -CREATE TABLE t2(a INT, b VARCHAR(30), c FLOAT) ENGINE=MyIsam; -SHOW CREATE TABLE t2; -INSERT INTO t2 VALUES(1,'abcd',1.234); - -# Rename one column -ALTER TABLE t1 RENAME COLUMN a TO a; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 RENAME COLUMN a TO m; ---error ER_BAD_FIELD_ERROR -ALTER TABLE t1 RENAME COLUMN a TO m; -ALTER TABLE t1 RENAME COLUMN IF EXISTS a TO m; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -SELECT * FROM t1; - -# Rename multiple column -ALTER TABLE t1 RENAME COLUMN m TO x, - RENAME COLUMN b TO y, - RENAME COLUMN c TO z; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -SELECT * FROM t1; - -# Rename multiple columns with MyIsam Engine -ALTER TABLE t2 RENAME COLUMN a TO d, RENAME COLUMN b TO e, RENAME COLUMN c to f; -SHOW CREATE TABLE t2; -SELECT * FROM t2; - -# Mix different ALTER operations with RENAME COLUMN -ALTER TABLE t1 CHANGE COLUMN x a INT, RENAME COLUMN y TO b; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 CHANGE COLUMN z c DOUBLE, RENAME COLUMN b to b; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 CHANGE COLUMN a b int, RENAME COLUMN b TO c, CHANGE COLUMN c d FLOAT; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 ADD COLUMN zz INT, RENAME COLUMN d TO f; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 DROP COLUMN zz, RENAME COLUMN c TO zz; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 RENAME COLUMN zz to c, DROP COLUMN f; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 ADD COLUMN d INT DEFAULT 5, RENAME COLUMN c TO b, DROP COLUMN b; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; - -#Cyclic Rename -ALTER TABLE t1 RENAME COLUMN b TO d, RENAME COLUMN d TO b; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; - -# Rename with Indexes -ALTER TABLE t1 ADD KEY(b); ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t1 RENAME COLUMN b TO bb; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -SELECT * FROM t1; - -# Rename with Foreign keys. -CREATE TABLE t3(a int, b int, KEY(b)); -ALTER TABLE t3 ADD CONSTRAINT FOREIGN KEY(b) REFERENCES t1(bb); ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t3; -ALTER TABLE t1 RENAME COLUMN bb TO b; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -ALTER TABLE t3 RENAME COLUMN b TO c; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t3; - -# Different Algorithm -CREATE TABLE t4(a int); -ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t4; -ALTER TABLE t4 RENAME COLUMN aa TO a, ALGORITHM = COPY; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t4; -DROP TABLE t4; - -# View, Trigger and SP -CREATE VIEW v1 AS SELECT d,e,f FROM t2; -CREATE TRIGGER trg1 BEFORE UPDATE on t2 FOR EACH ROW SET NEW.d=OLD.d + 10; -CREATE PROCEDURE sp1() INSERT INTO t2(d) VALUES(10); -ALTER TABLE t2 RENAME COLUMN d TO g; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t2; -SHOW CREATE VIEW v1; ---error ER_VIEW_INVALID -SELECT * FROM v1; ---error ER_BAD_FIELD_ERROR -UPDATE t2 SET f = f + 10; ---error ER_BAD_FIELD_ERROR -CALL sp1(); -DROP TRIGGER trg1; -DROP PROCEDURE sp1; - -# Generated Columns -if (!$MTR_COMBINATION_HEAP) -{ -CREATE TABLE t_gen(a INT, b DOUBLE GENERATED ALWAYS AS (SQRT(a))); -INSERT INTO t_gen(a) VALUES(4); -SELECT * FROM t_gen; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t_gen; -ALTER TABLE t_gen RENAME COLUMN a TO c, CHANGE COLUMN b b DOUBLE GENERATED ALWAYS AS (SQRT(c)); -SELECT * FROM t_gen; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t_gen; -#--error ER_DEPENDENT_BY_GENERATED_COLUMN -ALTER TABLE t_gen CHANGE COLUMN c x INT; ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -show create table t_gen; -#--error ER_DEPENDENT_BY_GENERATED_COLUMN -ALTER TABLE t_gen RENAME COLUMN x TO a; -DROP TABLE t_gen; -} - - -# -# Negative tests -# ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; - -# Invalid Syntax ---error ER_PARSE_ERROR -ALTER TABLE t1 RENAME COLUMN b z; ---error ER_PARSE_ERROR -ALTER TABLE t1 RENAME COLUMN FROM b TO z; ---error ER_PARSE_ERROR -ALTER TABLE t1 RENAME COLUMN b TO 1; - -# Duplicate column name ---error ER_BAD_FIELD_ERROR -ALTER TABLE t1 RENAME COLUMN b TO e, RENAME COLUMN c TO e; ---error ER_DUP_FIELDNAME -ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN b TO z; - -# Multiple operation on same column ---error ER_BAD_FIELD_ERROR -ALTER TABLE t1 DROP COLUMN b, RENAME COLUMN b TO z; ---error ER_BAD_FIELD_ERROR -ALTER TABLE t1 RENAME COLUMN b TO b, RENAME COLUMN b TO b; ---error ER_CANT_DROP_FIELD_OR_KEY -ALTER TABLE t1 RENAME COLUMN b TO c3, DROP COLUMN c3; ---error ER_BAD_FIELD_ERROR -ALTER TABLE t1 ADD COLUMN z INT, CHANGE COLUMN z y INT, DROP COLUMN y; ---error ER_BAD_FIELD_ERROR -ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN z TO y, DROP COLUMN y; - -# Invalid column name while renaming ---error ER_WRONG_COLUMN_NAME -ALTER TABLE t1 RENAME COLUMN b TO `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn`; -# This error is different compared to ALTER TABLE ... CHANGE command ---error ER_TOO_LONG_IDENT -ALTER TABLE t1 CHANGE b `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn` int; - ---replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" -SHOW CREATE TABLE t1; -SELECT * FROM t1; - -# Cleanup -DROP VIEW v1; -DROP TABLE t3,t1,t2; -SET DEFAULT_STORAGE_ENGINE= @save_default_engine; - --echo # --echo # MDEV-7318 RENAME INDEX --echo # @@ -2608,47 +2364,6 @@ alter table t1 rename column abc to ABC; show create table t1; drop table t1; ---echo # ---echo # MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed ---echo # -set @save_default_engine= @@default_storage_engine; ---disable_query_log -if ($MTR_COMBINATION_INNODB) -{ - set default_storage_engine= innodb; -} -if ($MTR_COMBINATION_ARIA) -{ - set default_storage_engine= aria; -} ---enable_query_log - -if (!$MTR_COMBINATION_INNODB) -{ - --disable_query_log - --disable_result_log - # There is no inplace ADD INDEX for MyISAM/Aria: - create or replace table t1 (x int); - --error ER_ALTER_OPERATION_NOT_SUPPORTED - alter table t1 add unique (x), algorithm=inplace; - --error ER_ALTER_OPERATION_NOT_SUPPORTED - alter table t1 add primary key(x), algorithm=inplace; - --error ER_ALTER_OPERATION_NOT_SUPPORTED - alter table t1 add index(x), algorithm=inplace; - --enable_query_log - --enable_result_log -} - -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; -alter table t1 change x xx int, algorithm=inplace; -check table t1; -create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); -alter table t1 change x xx int, algorithm=inplace; -check table t1; -# cleanup -drop table t1; -set @@default_storage_engine= @save_default_engine; - --echo # --echo # MDEV-25555 Server crashes in tree_record_pos after INPLACE-recreating index on HEAP table --echo # diff --git a/mysql-test/main/alter_table_combinations,aria.rdiff b/mysql-test/main/alter_table_combinations,aria.rdiff new file mode 100644 index 00000000000..c549f307d1f --- /dev/null +++ b/mysql-test/main/alter_table_combinations,aria.rdiff @@ -0,0 +1,22 @@ +--- main/alter_table_combinations.result 2022-05-24 17:16:56.769146869 +0200 ++++ main/alter_table_combinations.reject 2022-05-24 17:25:20.847126357 +0200 +@@ -173,8 +173,7 @@ + t3 CREATE TABLE `t3` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, +- KEY `b` (`b`), +- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`bb`) ++ KEY `b` (`b`) + ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 + ALTER TABLE t1 RENAME COLUMN bb TO b; + SHOW CREATE TABLE t1; +@@ -190,8 +189,7 @@ + t3 CREATE TABLE `t3` ( + `a` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, +- KEY `b` (`c`), +- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`c`) REFERENCES `t1` (`b`) ++ KEY `b` (`c`) + ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 + CREATE TABLE t4(a int); + ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; diff --git a/mysql-test/main/alter_table_combinations,heap.rdiff b/mysql-test/main/alter_table_combinations,heap.rdiff new file mode 100644 index 00000000000..ed84bbe73c1 --- /dev/null +++ b/mysql-test/main/alter_table_combinations,heap.rdiff @@ -0,0 +1,77 @@ +--- main/alter_table_combinations.result 2022-05-24 17:16:56.769146869 +0200 ++++ main/alter_table_combinations.reject 2022-05-24 17:25:01.216127156 +0200 +@@ -11,7 +11,7 @@ + alter table t1 change x xx int, algorithm=inplace; + check table t1; + Table Op Msg_type Msg_text +-test.t1 check status OK ++test.t1 check note The storage engine for the table doesn't support check + drop table t1; + # + # End of 10.3 tests +@@ -173,8 +173,7 @@ + t3 CREATE TABLE `t3` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, +- KEY `b` (`b`), +- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`bb`) ++ KEY `b` (`b`) + ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 + ALTER TABLE t1 RENAME COLUMN bb TO b; + SHOW CREATE TABLE t1; +@@ -190,8 +189,7 @@ + t3 CREATE TABLE `t3` ( + `a` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, +- KEY `b` (`c`), +- CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`c`) REFERENCES `t1` (`b`) ++ KEY `b` (`c`) + ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 + CREATE TABLE t4(a int); + ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; +@@ -231,36 +229,6 @@ + ERROR 42S22: Unknown column 'd' in 'field list' + DROP TRIGGER trg1; + DROP PROCEDURE sp1; +-CREATE TABLE t_gen(a INT, b DOUBLE GENERATED ALWAYS AS (SQRT(a))); +-INSERT INTO t_gen(a) VALUES(4); +-SELECT * FROM t_gen; +-a b +-4 2 +-SHOW CREATE TABLE t_gen; +-Table Create Table +-t_gen CREATE TABLE `t_gen` ( +- `a` int(11) DEFAULT NULL, +- `b` double GENERATED ALWAYS AS (sqrt(`a`)) VIRTUAL +-) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +-ALTER TABLE t_gen RENAME COLUMN a TO c, CHANGE COLUMN b b DOUBLE GENERATED ALWAYS AS (SQRT(c)); +-SELECT * FROM t_gen; +-c b +-4 2 +-SHOW CREATE TABLE t_gen; +-Table Create Table +-t_gen CREATE TABLE `t_gen` ( +- `c` int(11) DEFAULT NULL, +- `b` double GENERATED ALWAYS AS (sqrt(`c`)) VIRTUAL +-) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +-ALTER TABLE t_gen CHANGE COLUMN c x INT; +-show create table t_gen; +-Table Create Table +-t_gen CREATE TABLE `t_gen` ( +- `x` int(11) DEFAULT NULL, +- `b` double GENERATED ALWAYS AS (sqrt(`x`)) VIRTUAL +-) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +-ALTER TABLE t_gen RENAME COLUMN x TO a; +-DROP TABLE t_gen; + SHOW CREATE TABLE t1; + Table Create Table + t1 CREATE TABLE `t1` ( +@@ -316,7 +284,7 @@ + alter table t1 change x xx int, algorithm=inplace; + check table t1; + Table Op Msg_type Msg_text +-test.t1 check status OK ++test.t1 check note The storage engine for the table doesn't support check + drop table t1; + # + # End of 10.5 tests diff --git a/mysql-test/main/alter_table_combinations.combinations b/mysql-test/main/alter_table_combinations.combinations new file mode 100644 index 00000000000..01c0e4b31b2 --- /dev/null +++ b/mysql-test/main/alter_table_combinations.combinations @@ -0,0 +1,6 @@ +[innodb] +innodb + +[aria] + +[heap] diff --git a/mysql-test/main/alter_table_combinations.result b/mysql-test/main/alter_table_combinations.result new file mode 100644 index 00000000000..459447f343e --- /dev/null +++ b/mysql-test/main/alter_table_combinations.result @@ -0,0 +1,324 @@ +set @save_default_engine= @@default_storage_engine; +# +# MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed +# +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; +alter table t1 change x xx int, algorithm=inplace; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); +alter table t1 change x xx int, algorithm=inplace; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +# +# End of 10.3 tests +# +# +# MDEV-16290 ALTER TABLE ... RENAME COLUMN syntax +# +CREATE TABLE t1(a INT, b VARCHAR(30), c FLOAT); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` varchar(30) DEFAULT NULL, + `c` float DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES(1,'abcd',1.234); +CREATE TABLE t2(a INT, b VARCHAR(30), c FLOAT) ENGINE=MyIsam; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` varchar(30) DEFAULT NULL, + `c` float DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t2 VALUES(1,'abcd',1.234); +ALTER TABLE t1 RENAME COLUMN a TO a; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` varchar(30) DEFAULT NULL, + `c` float DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 RENAME COLUMN a TO m; +ALTER TABLE t1 RENAME COLUMN a TO m; +ERROR 42S22: Unknown column 'a' in 't1' +ALTER TABLE t1 RENAME COLUMN IF EXISTS a TO m; +Warnings: +Note 1054 Unknown column 'a' in 't1' +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `m` int(11) DEFAULT NULL, + `b` varchar(30) DEFAULT NULL, + `c` float DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +SELECT * FROM t1; +m b c +1 abcd 1.234 +ALTER TABLE t1 RENAME COLUMN m TO x, +RENAME COLUMN b TO y, +RENAME COLUMN c TO z; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `x` int(11) DEFAULT NULL, + `y` varchar(30) DEFAULT NULL, + `z` float DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +SELECT * FROM t1; +x y z +1 abcd 1.234 +ALTER TABLE t2 RENAME COLUMN a TO d, RENAME COLUMN b TO e, RENAME COLUMN c to f; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `d` int(11) DEFAULT NULL, + `e` varchar(30) DEFAULT NULL, + `f` float DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +SELECT * FROM t2; +d e f +1 abcd 1.234 +ALTER TABLE t1 CHANGE COLUMN x a INT, RENAME COLUMN y TO b; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` varchar(30) DEFAULT NULL, + `z` float DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 CHANGE COLUMN z c DOUBLE, RENAME COLUMN b to b; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` varchar(30) DEFAULT NULL, + `c` double DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 CHANGE COLUMN a b int, RENAME COLUMN b TO c, CHANGE COLUMN c d FLOAT; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `b` int(11) DEFAULT NULL, + `c` varchar(30) DEFAULT NULL, + `d` float DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 ADD COLUMN zz INT, RENAME COLUMN d TO f; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `b` int(11) DEFAULT NULL, + `c` varchar(30) DEFAULT NULL, + `f` float DEFAULT NULL, + `zz` int(11) DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 DROP COLUMN zz, RENAME COLUMN c TO zz; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `b` int(11) DEFAULT NULL, + `zz` varchar(30) DEFAULT NULL, + `f` float DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 RENAME COLUMN zz to c, DROP COLUMN f; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `b` int(11) DEFAULT NULL, + `c` varchar(30) DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 ADD COLUMN d INT DEFAULT 5, RENAME COLUMN c TO b, DROP COLUMN b; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `b` varchar(30) DEFAULT NULL, + `d` int(11) DEFAULT 5 +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 RENAME COLUMN b TO d, RENAME COLUMN d TO b; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `d` varchar(30) DEFAULT NULL, + `b` int(11) DEFAULT 5 +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 ADD KEY(b); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `d` varchar(30) DEFAULT NULL, + `b` int(11) DEFAULT 5, + KEY `b` (`b`) +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 RENAME COLUMN b TO bb; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `d` varchar(30) DEFAULT NULL, + `bb` int(11) DEFAULT 5, + KEY `b` (`bb`) +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +SELECT * FROM t1; +d bb +abcd 5 +CREATE TABLE t3(a int, b int, KEY(b)); +ALTER TABLE t3 ADD CONSTRAINT FOREIGN KEY(b) REFERENCES t1(bb); +SHOW CREATE TABLE t3; +Table Create Table +t3 CREATE TABLE `t3` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `b` (`b`), + CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`bb`) +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 RENAME COLUMN bb TO b; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `d` varchar(30) DEFAULT NULL, + `b` int(11) DEFAULT 5, + KEY `b` (`b`) +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t3 RENAME COLUMN b TO c; +SHOW CREATE TABLE t3; +Table Create Table +t3 CREATE TABLE `t3` ( + `a` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `b` (`c`), + CONSTRAINT `t3_ibfk_1` FOREIGN KEY (`c`) REFERENCES `t1` (`b`) +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +CREATE TABLE t4(a int); +ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; +SHOW CREATE TABLE t4; +Table Create Table +t4 CREATE TABLE `t4` ( + `aa` int(11) DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t4 RENAME COLUMN aa TO a, ALGORITHM = COPY; +SHOW CREATE TABLE t4; +Table Create Table +t4 CREATE TABLE `t4` ( + `a` int(11) DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +DROP TABLE t4; +CREATE VIEW v1 AS SELECT d,e,f FROM t2; +CREATE TRIGGER trg1 BEFORE UPDATE on t2 FOR EACH ROW SET NEW.d=OLD.d + 10; +CREATE PROCEDURE sp1() INSERT INTO t2(d) VALUES(10); +ALTER TABLE t2 RENAME COLUMN d TO g; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `g` int(11) DEFAULT NULL, + `e` varchar(30) DEFAULT NULL, + `f` float DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +SHOW CREATE VIEW v1; +View Create View character_set_client collation_connection +v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select `test`.`t2`.`d` AS `d`,`test`.`t2`.`e` AS `e`,`test`.`t2`.`f` AS `f` from `t2` latin1 latin1_swedish_ci +Warnings: +Warning 1356 View 'test.v1' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them +SELECT * FROM v1; +ERROR HY000: View 'test.v1' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them +UPDATE t2 SET f = f + 10; +ERROR 42S22: Unknown column 'd' in 'OLD' +CALL sp1(); +ERROR 42S22: Unknown column 'd' in 'field list' +DROP TRIGGER trg1; +DROP PROCEDURE sp1; +CREATE TABLE t_gen(a INT, b DOUBLE GENERATED ALWAYS AS (SQRT(a))); +INSERT INTO t_gen(a) VALUES(4); +SELECT * FROM t_gen; +a b +4 2 +SHOW CREATE TABLE t_gen; +Table Create Table +t_gen CREATE TABLE `t_gen` ( + `a` int(11) DEFAULT NULL, + `b` double GENERATED ALWAYS AS (sqrt(`a`)) VIRTUAL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t_gen RENAME COLUMN a TO c, CHANGE COLUMN b b DOUBLE GENERATED ALWAYS AS (SQRT(c)); +SELECT * FROM t_gen; +c b +4 2 +SHOW CREATE TABLE t_gen; +Table Create Table +t_gen CREATE TABLE `t_gen` ( + `c` int(11) DEFAULT NULL, + `b` double GENERATED ALWAYS AS (sqrt(`c`)) VIRTUAL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t_gen CHANGE COLUMN c x INT; +show create table t_gen; +Table Create Table +t_gen CREATE TABLE `t_gen` ( + `x` int(11) DEFAULT NULL, + `b` double GENERATED ALWAYS AS (sqrt(`x`)) VIRTUAL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t_gen RENAME COLUMN x TO a; +DROP TABLE t_gen; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `d` varchar(30) DEFAULT NULL, + `b` int(11) DEFAULT 5, + KEY `b` (`b`) +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +ALTER TABLE t1 RENAME COLUMN b z; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'z' at line 1 +ALTER TABLE t1 RENAME COLUMN FROM b TO z; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'FROM b TO z' at line 1 +ALTER TABLE t1 RENAME COLUMN b TO 1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '1' at line 1 +ALTER TABLE t1 RENAME COLUMN b TO e, RENAME COLUMN c TO e; +ERROR 42S22: Unknown column 'c' in 't1' +ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN b TO z; +ERROR 42S21: Duplicate column name 'z' +ALTER TABLE t1 DROP COLUMN b, RENAME COLUMN b TO z; +ERROR 42S22: Unknown column 'b' in 't1' +ALTER TABLE t1 RENAME COLUMN b TO b, RENAME COLUMN b TO b; +ERROR 42S22: Unknown column 'b' in 't1' +ALTER TABLE t1 RENAME COLUMN b TO c3, DROP COLUMN c3; +ERROR 42000: Can't DROP COLUMN `c3`; check that it exists +ALTER TABLE t1 ADD COLUMN z INT, CHANGE COLUMN z y INT, DROP COLUMN y; +ERROR 42S22: Unknown column 'z' in 't1' +ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN z TO y, DROP COLUMN y; +ERROR 42S22: Unknown column 'z' in 't1' +ALTER TABLE t1 RENAME COLUMN b TO `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn`; +ERROR 42000: Incorrect column name 'nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn' +ALTER TABLE t1 CHANGE b `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn` int; +ERROR 42000: Identifier name 'nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn' is too long +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `d` varchar(30) DEFAULT NULL, + `b` int(11) DEFAULT 5, + KEY `b` (`b`) +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 +SELECT * FROM t1; +d b +abcd 5 +DROP VIEW v1; +DROP TABLE t3,t1,t2; +# +# MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed +# +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; +alter table t1 change x xx int, algorithm=inplace; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); +alter table t1 change x xx int, algorithm=inplace; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +# +# End of 10.5 tests +# +set @@default_storage_engine= @save_default_engine; diff --git a/mysql-test/main/alter_table_combinations.test b/mysql-test/main/alter_table_combinations.test new file mode 100644 index 00000000000..7c8d7f42096 --- /dev/null +++ b/mysql-test/main/alter_table_combinations.test @@ -0,0 +1,263 @@ +set @save_default_engine= @@default_storage_engine; +--disable_query_log +if ($MTR_COMBINATION_INNODB) +{ +set default_storage_engine= innodb; +} +if ($MTR_COMBINATION_ARIA) +{ +set default_storage_engine= aria; +} +if ($MTR_COMBINATION_HEAP) +{ +set default_storage_engine= memory; +} +--enable_query_log +let $default_engine= `select @@default_storage_engine`; + +--echo # +--echo # MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed +--echo # + +if (!$MTR_COMBINATION_INNODB) +{ + --disable_query_log + --disable_result_log + # There is no inplace ADD INDEX for MyISAM/Aria: + create or replace table t1 (x int); + --error ER_ALTER_OPERATION_NOT_SUPPORTED + alter table t1 add unique (x), algorithm=inplace; + --error ER_ALTER_OPERATION_NOT_SUPPORTED + alter table t1 add primary key(x), algorithm=inplace; + --error ER_ALTER_OPERATION_NOT_SUPPORTED + alter table t1 add index(x), algorithm=inplace; + --enable_query_log + --enable_result_log +} + +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; +alter table t1 change x xx int, algorithm=inplace; +check table t1; +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); +alter table t1 change x xx int, algorithm=inplace; +check table t1; +drop table t1; + +--echo # +--echo # End of 10.3 tests +--echo # + +--echo # +--echo # MDEV-16290 ALTER TABLE ... RENAME COLUMN syntax +--echo # + +CREATE TABLE t1(a INT, b VARCHAR(30), c FLOAT); +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES(1,'abcd',1.234); +CREATE TABLE t2(a INT, b VARCHAR(30), c FLOAT) ENGINE=MyIsam; +SHOW CREATE TABLE t2; +INSERT INTO t2 VALUES(1,'abcd',1.234); + +# Rename one column +ALTER TABLE t1 RENAME COLUMN a TO a; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 RENAME COLUMN a TO m; +--error ER_BAD_FIELD_ERROR +ALTER TABLE t1 RENAME COLUMN a TO m; +ALTER TABLE t1 RENAME COLUMN IF EXISTS a TO m; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +SELECT * FROM t1; + +# Rename multiple column +ALTER TABLE t1 RENAME COLUMN m TO x, + RENAME COLUMN b TO y, + RENAME COLUMN c TO z; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +SELECT * FROM t1; + +# Rename multiple columns with MyIsam Engine +ALTER TABLE t2 RENAME COLUMN a TO d, RENAME COLUMN b TO e, RENAME COLUMN c to f; +SHOW CREATE TABLE t2; +SELECT * FROM t2; + +# Mix different ALTER operations with RENAME COLUMN +ALTER TABLE t1 CHANGE COLUMN x a INT, RENAME COLUMN y TO b; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 CHANGE COLUMN z c DOUBLE, RENAME COLUMN b to b; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 CHANGE COLUMN a b int, RENAME COLUMN b TO c, CHANGE COLUMN c d FLOAT; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 ADD COLUMN zz INT, RENAME COLUMN d TO f; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 DROP COLUMN zz, RENAME COLUMN c TO zz; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 RENAME COLUMN zz to c, DROP COLUMN f; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 ADD COLUMN d INT DEFAULT 5, RENAME COLUMN c TO b, DROP COLUMN b; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; + +#Cyclic Rename +ALTER TABLE t1 RENAME COLUMN b TO d, RENAME COLUMN d TO b; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; + +# Rename with Indexes +ALTER TABLE t1 ADD KEY(b); +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t1 RENAME COLUMN b TO bb; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +SELECT * FROM t1; + +# Rename with Foreign keys. +CREATE TABLE t3(a int, b int, KEY(b)); +ALTER TABLE t3 ADD CONSTRAINT FOREIGN KEY(b) REFERENCES t1(bb); +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t3; +ALTER TABLE t1 RENAME COLUMN bb TO b; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +ALTER TABLE t3 RENAME COLUMN b TO c; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t3; + +# Different Algorithm +CREATE TABLE t4(a int); +ALTER TABLE t4 RENAME COLUMN a TO aa, ALGORITHM = INPLACE; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t4; +ALTER TABLE t4 RENAME COLUMN aa TO a, ALGORITHM = COPY; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t4; +DROP TABLE t4; + +# View, Trigger and SP +CREATE VIEW v1 AS SELECT d,e,f FROM t2; +CREATE TRIGGER trg1 BEFORE UPDATE on t2 FOR EACH ROW SET NEW.d=OLD.d + 10; +CREATE PROCEDURE sp1() INSERT INTO t2(d) VALUES(10); +ALTER TABLE t2 RENAME COLUMN d TO g; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t2; +SHOW CREATE VIEW v1; +--error ER_VIEW_INVALID +SELECT * FROM v1; +--error ER_BAD_FIELD_ERROR +UPDATE t2 SET f = f + 10; +--error ER_BAD_FIELD_ERROR +CALL sp1(); +DROP TRIGGER trg1; +DROP PROCEDURE sp1; + +# Generated Columns +if (!$MTR_COMBINATION_HEAP) +{ +CREATE TABLE t_gen(a INT, b DOUBLE GENERATED ALWAYS AS (SQRT(a))); +INSERT INTO t_gen(a) VALUES(4); +SELECT * FROM t_gen; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t_gen; +ALTER TABLE t_gen RENAME COLUMN a TO c, CHANGE COLUMN b b DOUBLE GENERATED ALWAYS AS (SQRT(c)); +SELECT * FROM t_gen; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t_gen; +#--error ER_DEPENDENT_BY_GENERATED_COLUMN +ALTER TABLE t_gen CHANGE COLUMN c x INT; +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +show create table t_gen; +#--error ER_DEPENDENT_BY_GENERATED_COLUMN +ALTER TABLE t_gen RENAME COLUMN x TO a; +DROP TABLE t_gen; +} + +# +# Negative tests +# +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; + +# Invalid Syntax +--error ER_PARSE_ERROR +ALTER TABLE t1 RENAME COLUMN b z; +--error ER_PARSE_ERROR +ALTER TABLE t1 RENAME COLUMN FROM b TO z; +--error ER_PARSE_ERROR +ALTER TABLE t1 RENAME COLUMN b TO 1; + +# Duplicate column name +--error ER_BAD_FIELD_ERROR +ALTER TABLE t1 RENAME COLUMN b TO e, RENAME COLUMN c TO e; +--error ER_DUP_FIELDNAME +ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN b TO z; + +# Multiple operation on same column +--error ER_BAD_FIELD_ERROR +ALTER TABLE t1 DROP COLUMN b, RENAME COLUMN b TO z; +--error ER_BAD_FIELD_ERROR +ALTER TABLE t1 RENAME COLUMN b TO b, RENAME COLUMN b TO b; +--error ER_CANT_DROP_FIELD_OR_KEY +ALTER TABLE t1 RENAME COLUMN b TO c3, DROP COLUMN c3; +--error ER_BAD_FIELD_ERROR +ALTER TABLE t1 ADD COLUMN z INT, CHANGE COLUMN z y INT, DROP COLUMN y; +--error ER_BAD_FIELD_ERROR +ALTER TABLE t1 ADD COLUMN z INT, RENAME COLUMN z TO y, DROP COLUMN y; + +# Invalid column name while renaming +--error ER_WRONG_COLUMN_NAME +ALTER TABLE t1 RENAME COLUMN b TO `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn`; +# This error is different compared to ALTER TABLE ... CHANGE command +--error ER_TOO_LONG_IDENT +ALTER TABLE t1 CHANGE b `nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn` int; + +--replace_result $default_engine DEFAULT_ENGINE " PAGE_CHECKSUM=1" "" +SHOW CREATE TABLE t1; +SELECT * FROM t1; + +# Cleanup +DROP VIEW v1; +DROP TABLE t3,t1,t2; + +--echo # +--echo # MDEV-25803 Inplace ALTER breaks MyISAM/Aria tables when order of keys is changed +--echo # + +if (!$MTR_COMBINATION_INNODB) +{ + --disable_query_log + --disable_result_log + # There is no inplace ADD INDEX for MyISAM/Aria: + create or replace table t1 (x int); + --error ER_ALTER_OPERATION_NOT_SUPPORTED + alter table t1 add unique (x), algorithm=inplace; + --error ER_ALTER_OPERATION_NOT_SUPPORTED + alter table t1 add primary key(x), algorithm=inplace; + --error ER_ALTER_OPERATION_NOT_SUPPORTED + alter table t1 add index(x), algorithm=inplace; + --enable_query_log + --enable_result_log +} + +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)) engine myisam; +alter table t1 change x xx int, algorithm=inplace; +check table t1; +create or replace table t1 (x int, y int, unique (y), unique (x), primary key(x)); +alter table t1 change x xx int, algorithm=inplace; +check table t1; +drop table t1; + +--echo # +--echo # End of 10.5 tests +--echo # + +set @@default_storage_engine= @save_default_engine; -- cgit v1.2.1 From 2840d7750db11a8d2ab3f212a05f5afefaef6d4d Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 24 May 2022 17:51:20 +0200 Subject: fix not_valgrind.inc not to error out in embedded --- mysql-test/include/not_valgrind.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/include/not_valgrind.inc b/mysql-test/include/not_valgrind.inc index 8bda3497878..a2372db4c4f 100644 --- a/mysql-test/include/not_valgrind.inc +++ b/mysql-test/include/not_valgrind.inc @@ -1,4 +1,4 @@ --require include/not_valgrind.require --disable_query_log -eval select $VALGRIND_TEST as using_valgrind; +eval select $VALGRIND_TEST+0 as using_valgrind; --enable_query_log -- cgit v1.2.1 From c8fabbed425140332038537c8baeaa33c96a9db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Mon, 30 May 2022 11:11:48 +0300 Subject: MDEV-20627 : Galera 4 not able to report proper wsrep_incoming_addresses wsrep_server_incoming_address function always returned value of the wsrep_node_incoming_address even when actual incoming address was resolved to inc_addr variable. Fixed by returning inc_addr if it does contain incoming address. --- sql/wsrep_mysqld.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index d53fb7320e4..3a5db89d02b 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -593,7 +593,7 @@ static std::string wsrep_server_incoming_address() bool is_ipv6= false; unsigned int my_bind_ip= INADDR_ANY; // default if not set - if (my_bind_addr_str && strlen(my_bind_addr_str) && + if (my_bind_addr_str && strlen(my_bind_addr_str) && strcmp(my_bind_addr_str, "*") != 0) { my_bind_ip= wsrep_check_ip(my_bind_addr_str, &is_ipv6); @@ -658,9 +658,13 @@ static std::string wsrep_server_incoming_address() snprintf(inc_addr, inc_addr_max, fmt, addr.get_address(), port); } - + done: - ret= wsrep_node_incoming_address; + if (!strlen(inc_addr)) + ret= wsrep_node_incoming_address; + else + ret= inc_addr; + WSREP_DEBUG("wsrep_incoming_address = %s", ret.c_str()); return ret; } -- cgit v1.2.1 From ebbd5ef6e2902a51a46e47dbb8a8667593cb25e7 Mon Sep 17 00:00:00 2001 From: mkaruza Date: Tue, 10 May 2022 11:15:32 +0200 Subject: MDEV-27862 Galera should replicate nextval()-related changes in sequences with INCREMENT <> 0, at least NOCACHE ones with engine=InnoDB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sequence storage engine is not transactionl so cache will be written in stmt_cache that is not replicated in cluster. To fix this replicate what is available in both trans_cache and stmt_cache. Sequences will only work when NOCACHE keyword is used when sequnce is created. If WSREP is enabled and we don't have this keyword report error indicting that sequence will not work correctly in cluster. When binlog is enabled statement cache will be cleared in transaction before COMMIT so cache generated from sequence will not be replicated. We need to keep cache until replication. Tests are re-recorded because of replication changes that were introducted with this PR. Reviewed-by: Jan Lindström --- mysql-test/suite/galera/r/MDEV-18832.result | 3 + mysql-test/suite/galera/r/MDEV-27862.result | 54 ++++++++++++++++++ mysql-test/suite/galera/r/galera_sequences.result | 5 +- mysql-test/suite/galera/t/MDEV-18832.test | 8 +++ mysql-test/suite/galera/t/MDEV-27862.combinations | 4 ++ mysql-test/suite/galera/t/MDEV-27862.test | 67 +++++++++++++++++++++++ mysql-test/suite/galera/t/galera_sequences.test | 6 ++ sql/ha_sequence.cc | 3 + sql/log.cc | 10 +++- sql/log.h | 2 +- sql/sql_sequence.cc | 11 ++++ sql/wsrep_binlog.cc | 8 +-- sql/wsrep_client_service.cc | 29 ++++++++-- 13 files changed, 197 insertions(+), 13 deletions(-) create mode 100644 mysql-test/suite/galera/r/MDEV-27862.result create mode 100644 mysql-test/suite/galera/t/MDEV-27862.combinations create mode 100644 mysql-test/suite/galera/t/MDEV-27862.test diff --git a/mysql-test/suite/galera/r/MDEV-18832.result b/mysql-test/suite/galera/r/MDEV-18832.result index 700a0bbefb1..2e0872b9f2e 100644 --- a/mysql-test/suite/galera/r/MDEV-18832.result +++ b/mysql-test/suite/galera/r/MDEV-18832.result @@ -12,3 +12,6 @@ INSERT INTO t1 VALUES (NEXT VALUE FOR Seq1_1); ERROR 23000: Duplicate entry '1' for key 'PRIMARY' DROP SEQUENCE Seq1_1; DROP TABLE t1; +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); +connection node_2; +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); diff --git a/mysql-test/suite/galera/r/MDEV-27862.result b/mysql-test/suite/galera/r/MDEV-27862.result new file mode 100644 index 00000000000..25b7bc6cfd2 --- /dev/null +++ b/mysql-test/suite/galera/r/MDEV-27862.result @@ -0,0 +1,54 @@ +connection node_2; +connection node_1; +CREATE SEQUENCE seq_nocache ENGINE=InnoDB; +DROP SEQUENCE seq_nocache; +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); +connection node_2; +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); +connection node_1; +CREATE SEQUENCE seq NOCACHE ENGINE=InnoDB; +SELECT NEXTVAL(seq) = 1; +NEXTVAL(seq) = 1 +1 +connection node_2; +SELECT NEXTVAL(seq) = 2; +NEXTVAL(seq) = 2 +1 +connection node_1; +SELECT NEXTVAL(seq) = 3; +NEXTVAL(seq) = 3 +1 +SELECT SETVAL(seq, 100); +SETVAL(seq, 100) +100 +connection node_2; +SELECT NEXTVAL(seq) = 101; +NEXTVAL(seq) = 101 +1 +connection node_1; +SELECT NEXTVAL(seq) = 102; +NEXTVAL(seq) = 102 +1 +DROP SEQUENCE seq; +CREATE TABLE t1(f1 INT); +CREATE SEQUENCE seq_transaction NOCACHE ENGINE=InnoDB; +START TRANSACTION; +INSERT INTO t1 VALUES (0); +SELECT NEXTVAL(seq_transaction); +NEXTVAL(seq_transaction) +1 +INSERT INTO t1 VALUES (NEXTVAL(seq_transaction)); +COMMIT; +connection node_2; +SELECT COUNT(*) = 2 FROM t1; +COUNT(*) = 2 +1 +SELECT NEXTVAL(seq_transaction) = 3; +NEXTVAL(seq_transaction) = 3 +1 +connection node_1; +SELECT NEXTVAL(seq_transaction) = 4; +NEXTVAL(seq_transaction) = 4 +1 +DROP SEQUENCE seq_transaction; +DROP TABLE t1; diff --git a/mysql-test/suite/galera/r/galera_sequences.result b/mysql-test/suite/galera/r/galera_sequences.result index 48593d2a258..7276cb8dbde 100644 --- a/mysql-test/suite/galera/r/galera_sequences.result +++ b/mysql-test/suite/galera/r/galera_sequences.result @@ -44,6 +44,9 @@ Table Create Table Seq1_1 CREATE SEQUENCE `Seq1_1` start with 1 minvalue 1 maxvalue 9223372036854775806 increment by 1 cache 1000 nocycle ENGINE=InnoDB select NEXT VALUE FOR Seq1_1; NEXT VALUE FOR Seq1_1 -1 +3001 connection node_1; DROP SEQUENCE Seq1_1; +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); +connection node_2; +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); diff --git a/mysql-test/suite/galera/t/MDEV-18832.test b/mysql-test/suite/galera/t/MDEV-18832.test index e2f1b2afd45..ba93761435a 100644 --- a/mysql-test/suite/galera/t/MDEV-18832.test +++ b/mysql-test/suite/galera/t/MDEV-18832.test @@ -13,3 +13,11 @@ CREATE SEQUENCE Seq1_1 START WITH 1 INCREMENT BY 1; INSERT INTO t1 VALUES (NEXT VALUE FOR Seq1_1); DROP SEQUENCE Seq1_1; DROP TABLE t1; + +# Supress warning for SEQUENCES that are declared without `NOCACHE` introduced with MDEV-27862 + +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); + +--connection node_2 + +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); diff --git a/mysql-test/suite/galera/t/MDEV-27862.combinations b/mysql-test/suite/galera/t/MDEV-27862.combinations new file mode 100644 index 00000000000..1eeb8fb4614 --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-27862.combinations @@ -0,0 +1,4 @@ +[binlogoff] + +[binlogon] +log-bin diff --git a/mysql-test/suite/galera/t/MDEV-27862.test b/mysql-test/suite/galera/t/MDEV-27862.test new file mode 100644 index 00000000000..89d3465b91f --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-27862.test @@ -0,0 +1,67 @@ +--source include/galera_cluster.inc +--source include/have_innodb.inc + +# Report WARNING when SEQUENCE is created without `NOCACHE` + +CREATE SEQUENCE seq_nocache ENGINE=InnoDB; +DROP SEQUENCE seq_nocache; + +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); + +--connection node_2 + +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); + +# NEXTVAL + +--connection node_1 + +CREATE SEQUENCE seq NOCACHE ENGINE=InnoDB; + +SELECT NEXTVAL(seq) = 1; + +--connection node_2 + +SELECT NEXTVAL(seq) = 2; + +--connection node_1 + +SELECT NEXTVAL(seq) = 3; + + +# SETVAL + +SELECT SETVAL(seq, 100); + +--connection node_2 + +SELECT NEXTVAL(seq) = 101; + +--connection node_1 + +SELECT NEXTVAL(seq) = 102; + +DROP SEQUENCE seq; + +# TRANSACTIONS + +CREATE TABLE t1(f1 INT); +CREATE SEQUENCE seq_transaction NOCACHE ENGINE=InnoDB; + +START TRANSACTION; +INSERT INTO t1 VALUES (0); +SELECT NEXTVAL(seq_transaction); +INSERT INTO t1 VALUES (NEXTVAL(seq_transaction)); +COMMIT; + +--connection node_2 + +SELECT COUNT(*) = 2 FROM t1; +SELECT NEXTVAL(seq_transaction) = 3; + +--connection node_1 +SELECT NEXTVAL(seq_transaction) = 4; + +DROP SEQUENCE seq_transaction; +DROP TABLE t1; + diff --git a/mysql-test/suite/galera/t/galera_sequences.test b/mysql-test/suite/galera/t/galera_sequences.test index 480366f6a6f..d469cc73516 100644 --- a/mysql-test/suite/galera/t/galera_sequences.test +++ b/mysql-test/suite/galera/t/galera_sequences.test @@ -44,3 +44,9 @@ select NEXT VALUE FOR Seq1_1; --connection node_1 DROP SEQUENCE Seq1_1; + +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); + +--connection node_2 + +CALL mtr.add_suppression("SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); diff --git a/sql/ha_sequence.cc b/sql/ha_sequence.cc index b0611c1505a..1331fea74d1 100644 --- a/sql/ha_sequence.cc +++ b/sql/ha_sequence.cc @@ -434,6 +434,9 @@ static int sequence_initialize(void *p) HTON_HIDDEN | HTON_TEMPORARY_NOT_SUPPORTED | HTON_ALTER_NOT_SUPPORTED | +#ifdef WITH_WSREP + HTON_WSREP_REPLICATION | +#endif HTON_NO_PARTITION); DBUG_RETURN(0); } diff --git a/sql/log.cc b/sql/log.cc index 25cf0831469..ec96a2f9b23 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2013,7 +2013,13 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) thd->backup_stage(&org_stage); THD_STAGE_INFO(thd, stage_binlog_write); +#ifdef WITH_WSREP + // DON'T clear stmt cache in case we are in transaction + if (!cache_mngr->stmt_cache.empty() && + (!wsrep_on(thd) || ending_trans(thd, all))) +#else if (!cache_mngr->stmt_cache.empty()) +#endif { error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr); } @@ -10777,13 +10783,13 @@ maria_declare_plugin_end; #ifdef WITH_WSREP #include "wsrep_mysqld.h" -IO_CACHE *wsrep_get_trans_cache(THD * thd) +IO_CACHE *wsrep_get_cache(THD * thd, bool is_transactional) { DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF); binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); if (cache_mngr) - return cache_mngr->get_binlog_cache_log(true); + return cache_mngr->get_binlog_cache_log(is_transactional); WSREP_DEBUG("binlog cache not initialized, conn: %llu", thd->thread_id); diff --git a/sql/log.h b/sql/log.h index 5cb75c97b5e..2520c8fede8 100644 --- a/sql/log.h +++ b/sql/log.h @@ -1239,7 +1239,7 @@ static inline TC_LOG *get_tc_log_implementation() } #ifdef WITH_WSREP -IO_CACHE* wsrep_get_trans_cache(THD *); +IO_CACHE* wsrep_get_cache(THD *, bool); void wsrep_thd_binlog_trx_reset(THD * thd); void wsrep_thd_binlog_stmt_rollback(THD * thd); #endif /* WITH_WSREP */ diff --git a/sql/sql_sequence.cc b/sql/sql_sequence.cc index 367fbad144c..8488fa67ecb 100644 --- a/sql/sql_sequence.cc +++ b/sql/sql_sequence.cc @@ -308,6 +308,11 @@ bool sequence_insert(THD *thd, LEX *lex, TABLE_LIST *org_table_list) DBUG_RETURN(TRUE); } +#ifdef WITH_WSREP + if (WSREP_ON && seq->cache != 0) + WSREP_WARN("CREATE SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); +#endif + /* If not temporary table */ if (!temporary_table) { @@ -904,12 +909,18 @@ bool Sql_cmd_alter_sequence::execute(THD *thd) No_such_table_error_handler no_such_table_handler; DBUG_ENTER("Sql_cmd_alter_sequence::execute"); + if (check_access(thd, ALTER_ACL, first_table->db.str, &first_table->grant.privilege, &first_table->grant.m_internal, 0, 0)) DBUG_RETURN(TRUE); /* purecov: inspected */ +#ifdef WITH_WSREP + if (WSREP_ON && new_seq->cache != 0) + WSREP_WARN("ALTER SEQUENCES declared without `NOCACHE` will not behave correctly in galera cluster."); +#endif + if (check_grant(thd, ALTER_ACL, first_table, FALSE, 1, FALSE)) DBUG_RETURN(TRUE); /* purecov: inspected */ diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc index d04d9989e99..5c1c899fcde 100644 --- a/sql/wsrep_binlog.cc +++ b/sql/wsrep_binlog.cc @@ -155,10 +155,10 @@ static int wsrep_write_cache_inc(THD* const thd, goto cleanup; cache->read_pos= cache->read_end; } while ((cache->file >= 0) && (length= my_b_fill(cache))); - } - if (ret == 0) - { - assert(total_length + thd->wsrep_sr().log_position() == saved_pos); + if (ret == 0) + { + assert(total_length + thd->wsrep_sr().log_position() == saved_pos); + } } cleanup: diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc index 464296ea6cf..59ba8c30e43 100644 --- a/sql/wsrep_client_service.cc +++ b/sql/wsrep_client_service.cc @@ -86,18 +86,37 @@ int Wsrep_client_service::prepare_data_for_replication() DBUG_ASSERT(m_thd == current_thd); DBUG_ENTER("Wsrep_client_service::prepare_data_for_replication"); size_t data_len= 0; - IO_CACHE* cache= wsrep_get_trans_cache(m_thd); + IO_CACHE* transactional_cache= wsrep_get_cache(m_thd, true); + IO_CACHE* stmt_cache= wsrep_get_cache(m_thd, false); - if (cache) + if (transactional_cache || stmt_cache) { m_thd->binlog_flush_pending_rows_event(true); - if (wsrep_write_cache(m_thd, cache, &data_len)) + + size_t transactional_data_len= 0; + size_t stmt_data_len= 0; + + // Write transactional cache + if (transactional_cache && + wsrep_write_cache(m_thd, transactional_cache, &transactional_data_len)) { WSREP_ERROR("rbr write fail, data_len: %zu", data_len); // wsrep_override_error(m_thd, ER_ERROR_DURING_COMMIT); DBUG_RETURN(1); } + + // Write stmt cache + if (stmt_cache && wsrep_write_cache(m_thd, stmt_cache, &stmt_data_len)) + { + WSREP_ERROR("rbr write fail, data_len: %zu", + data_len); + // wsrep_override_error(m_thd, ER_ERROR_DURING_COMMIT); + DBUG_RETURN(1); + } + + // Complete data written from both caches + data_len = transactional_data_len + stmt_data_len; } if (data_len == 0) @@ -139,7 +158,7 @@ int Wsrep_client_service::prepare_fragment_for_replication( DBUG_ASSERT(m_thd == current_thd); THD* thd= m_thd; DBUG_ENTER("Wsrep_client_service::prepare_fragment_for_replication"); - IO_CACHE* cache= wsrep_get_trans_cache(thd); + IO_CACHE* cache= wsrep_get_cache(thd, true); thd->binlog_flush_pending_rows_event(true); if (!cache) @@ -221,7 +240,7 @@ bool Wsrep_client_service::statement_allowed_for_streaming() const size_t Wsrep_client_service::bytes_generated() const { - IO_CACHE* cache= wsrep_get_trans_cache(m_thd); + IO_CACHE* cache= wsrep_get_cache(m_thd, true); if (cache) { size_t pending_rows_event_length= 0; -- cgit v1.2.1 From f7137a619f8518edb7eab096e59e3c825046eaeb Mon Sep 17 00:00:00 2001 From: Masashi Tomooka Date: Mon, 30 May 2022 19:28:44 +0900 Subject: MDEV-28599 EXCHANGE PARTITION on view causes ER_CHECK_NO_SUCH_TABLE instead of ER_WRONG_OBJECT ER_CHECK_NO_SUCH_TABLE was raised because a view does not have the corresponding TABLE instance connected to TABLE_LIST and the server interprets the absence as the absence of the table itself. To fix the problem, we add a check to ensure that the target table to be swapped with a partition is not a view. Reviewed by: Nayuta Yanagisawa --- mysql-test/main/partition_error.result | 13 ++++++++++++- mysql-test/main/partition_error.test | 14 +++++++++++++- sql/sql_partition_admin.cc | 8 ++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/mysql-test/main/partition_error.result b/mysql-test/main/partition_error.result index d99473d0718..fdedb5a4633 100644 --- a/mysql-test/main/partition_error.result +++ b/mysql-test/main/partition_error.result @@ -6,7 +6,18 @@ drop table if exists t1, t2; CREATE TABLE t1 (a int); CREATE OR REPLACE VIEW v1 AS SELECT * FROM t1; ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE v1; -ERROR 42000: Can't open table +ERROR HY000: 'test.v1' is not of type 'BASE TABLE' +DROP VIEW v1; +DROP TABLE t1; +# +# MDEV-28599 EXCHANGE PARTITION on view causes ER_CHECK_NO_SUCH_TABLE instead of ER_WRONG_OBJECT +# +CREATE TABLE t1 (a int) +PARTITION BY HASH (a) +PARTITIONS 2; +CREATE OR REPLACE VIEW v1 AS SELECT * FROM t1; +ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE v1; +ERROR HY000: 'test.v1' is not of type 'BASE TABLE' DROP VIEW v1; DROP TABLE t1; # diff --git a/mysql-test/main/partition_error.test b/mysql-test/main/partition_error.test index 8739c93fe92..7d8e76dabf7 100644 --- a/mysql-test/main/partition_error.test +++ b/mysql-test/main/partition_error.test @@ -16,7 +16,19 @@ let $MYSQLD_DATADIR= `SELECT @@datadir`; --echo # CREATE TABLE t1 (a int); CREATE OR REPLACE VIEW v1 AS SELECT * FROM t1; ---error ER_CHECK_NO_SUCH_TABLE +--error ER_WRONG_OBJECT +ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE v1; +DROP VIEW v1; +DROP TABLE t1; + +--echo # +--echo # MDEV-28599 EXCHANGE PARTITION on view causes ER_CHECK_NO_SUCH_TABLE instead of ER_WRONG_OBJECT +--echo # +CREATE TABLE t1 (a int) +PARTITION BY HASH (a) +PARTITIONS 2; +CREATE OR REPLACE VIEW v1 AS SELECT * FROM t1; +--error ER_WRONG_OBJECT ALTER TABLE t1 EXCHANGE PARTITION p0 WITH TABLE v1; DROP VIEW v1; DROP TABLE t1; diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index cec7ecc6806..90b114b792c 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -539,6 +539,14 @@ bool Sql_cmd_alter_table_exchange_partition:: part_table= table_list->table; swap_table= swap_table_list->table; + /* Don't allow to exchange with a VIEW */ + if (unlikely(swap_table_list->view)) + { + my_error(ER_WRONG_OBJECT, MYF(0), table_list->db.str, + swap_table_list->table_name.str, "BASE TABLE"); + DBUG_RETURN(TRUE); + } + if (unlikely(check_exchange_partition(swap_table, part_table))) DBUG_RETURN(TRUE); -- cgit v1.2.1 From 863c3eda872b19f70ce6045119bf621584e1312d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 30 May 2022 15:49:45 +0300 Subject: MDEV-28689, MDEV-28690: Incorrect error handling for ctrl_mutex comp_thread_ctxt_t: Remove ctrl_mutex, ctrl_cond, started. We do not actually need them for anything. destroy_worker_thread(): Split from destroy_worker_threads(). create_worker_threads(): We already initialize thd->data_avail=FALSE and thd->cancelled=FALSE before invoking pthread_create(). If any thread creation fails, clean up by destroy_worker_thread(). compress_worker_thread_func(): Assume that thd->started and thd->data_avail are already initialized. Reviewed by: Vladislav Vaintroub --- extra/mariabackup/ds_compress.cc | 77 ++++++++++++---------------------------- 1 file changed, 22 insertions(+), 55 deletions(-) diff --git a/extra/mariabackup/ds_compress.cc b/extra/mariabackup/ds_compress.cc index 514c86f5a17..dcfb3d5a82f 100644 --- a/extra/mariabackup/ds_compress.cc +++ b/extra/mariabackup/ds_compress.cc @@ -1,5 +1,6 @@ /****************************************************** Copyright (c) 2011-2013 Percona LLC and/or its affiliates. +Copyright (c) 2022, MariaDB Corporation. Compressing datasink implementation for XtraBackup. @@ -32,11 +33,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA typedef struct { pthread_t id; uint num; - pthread_mutex_t ctrl_mutex; - pthread_cond_t ctrl_cond; pthread_mutex_t data_mutex; pthread_cond_t data_cond; - my_bool started; my_bool data_avail; my_bool cancelled; const char *from; @@ -208,14 +206,13 @@ compress_write(ds_file_t *file, const uchar *buf, size_t len) thd = threads + i; - pthread_mutex_lock(&thd->ctrl_mutex); + pthread_mutex_lock(&thd->data_mutex); chunk_len = (len > COMPRESS_CHUNK_SIZE) ? COMPRESS_CHUNK_SIZE : len; thd->from = ptr; thd->from_len = chunk_len; - pthread_mutex_lock(&thd->data_mutex); thd->data_avail = TRUE; pthread_cond_signal(&thd->data_cond); pthread_mutex_unlock(&thd->data_mutex); @@ -260,7 +257,6 @@ compress_write(ds_file_t *file, const uchar *buf, size_t len) } pthread_mutex_unlock(&threads[i].data_mutex); - pthread_mutex_unlock(&threads[i].ctrl_mutex); } } @@ -330,6 +326,23 @@ write_uint64_le(ds_file_t *file, ulonglong n) return ds_write(file, tmp, sizeof(tmp)); } +static +void +destroy_worker_thread(comp_thread_ctxt_t *thd) +{ + pthread_mutex_lock(&thd->data_mutex); + thd->cancelled = TRUE; + pthread_cond_signal(&thd->data_cond); + pthread_mutex_unlock(&thd->data_mutex); + + pthread_join(thd->id, NULL); + + pthread_cond_destroy(&thd->data_cond); + pthread_mutex_destroy(&thd->data_mutex); + + my_free(thd->to); +} + static comp_thread_ctxt_t * create_worker_threads(uint n) @@ -344,7 +357,6 @@ create_worker_threads(uint n) comp_thread_ctxt_t *thd = threads + i; thd->num = i + 1; - thd->started = FALSE; thd->cancelled = FALSE; thd->data_avail = FALSE; @@ -352,46 +364,25 @@ create_worker_threads(uint n) MY_QLZ_COMPRESS_OVERHEAD, MYF(MY_FAE)); - /* Initialize the control mutex and condition var */ - if (pthread_mutex_init(&thd->ctrl_mutex, NULL) || - pthread_cond_init(&thd->ctrl_cond, NULL)) { - goto err; - } - /* Initialize and data mutex and condition var */ if (pthread_mutex_init(&thd->data_mutex, NULL) || pthread_cond_init(&thd->data_cond, NULL)) { goto err; } - pthread_mutex_lock(&thd->ctrl_mutex); - if (pthread_create(&thd->id, NULL, compress_worker_thread_func, thd)) { msg("compress: pthread_create() failed: " "errno = %d", errno); - pthread_mutex_unlock(&thd->ctrl_mutex); goto err; } } - /* Wait for the threads to start */ - for (i = 0; i < n; i++) { - comp_thread_ctxt_t *thd = threads + i; - - while (thd->started == FALSE) - pthread_cond_wait(&thd->ctrl_cond, &thd->ctrl_mutex); - pthread_mutex_unlock(&thd->ctrl_mutex); - } - return threads; err: - while (i > 0) { - comp_thread_ctxt_t *thd; - i--; - thd = threads + i; - pthread_mutex_unlock(&thd->ctrl_mutex); + for (; i; i--) { + destroy_worker_thread(threads + i); } my_free(threads); @@ -405,21 +396,7 @@ destroy_worker_threads(comp_thread_ctxt_t *threads, uint n) uint i; for (i = 0; i < n; i++) { - comp_thread_ctxt_t *thd = threads + i; - - pthread_mutex_lock(&thd->data_mutex); - threads[i].cancelled = TRUE; - pthread_cond_signal(&thd->data_cond); - pthread_mutex_unlock(&thd->data_mutex); - - pthread_join(thd->id, NULL); - - pthread_cond_destroy(&thd->data_cond); - pthread_mutex_destroy(&thd->data_mutex); - pthread_cond_destroy(&thd->ctrl_cond); - pthread_mutex_destroy(&thd->ctrl_mutex); - - my_free(thd->to); + destroy_worker_thread(threads + i); } my_free(threads); @@ -431,19 +408,9 @@ compress_worker_thread_func(void *arg) { comp_thread_ctxt_t *thd = (comp_thread_ctxt_t *) arg; - pthread_mutex_lock(&thd->ctrl_mutex); - pthread_mutex_lock(&thd->data_mutex); - thd->started = TRUE; - pthread_cond_signal(&thd->ctrl_cond); - - pthread_mutex_unlock(&thd->ctrl_mutex); - while (1) { - thd->data_avail = FALSE; - pthread_cond_signal(&thd->data_cond); - while (!thd->data_avail && !thd->cancelled) { pthread_cond_wait(&thd->data_cond, &thd->data_mutex); } -- cgit v1.2.1 From 131c318b165cfee1af4494c5581dd57990fe1a5d Mon Sep 17 00:00:00 2001 From: Monty Date: Mon, 30 May 2022 13:04:14 +0300 Subject: Remove compiler warning about unused variables --- sql/partition_info.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/partition_info.cc b/sql/partition_info.cc index fb55091f05e..cf2536f3969 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -900,10 +900,12 @@ void partition_info::vers_check_limit(THD *thd) bitmap_set_all(), but this is not optimal since there can be quite a number of partitions. */ +#ifndef DBUG_OFF const uint32 sub_factor= num_subparts ? num_subparts : 1; uint32 part_id= vers_info->hist_part->id * sub_factor; const uint32 part_id_end= part_id + sub_factor; DBUG_ASSERT(part_id_end <= num_parts * sub_factor); +#endif ha_partition *hp= (ha_partition*)(table->file); ha_rows hist_rows= hp->part_records(vers_info->hist_part); -- cgit v1.2.1 From 9d10b7107cf022b939dc61cedf8fc8985443c880 Mon Sep 17 00:00:00 2001 From: Monty Date: Mon, 30 May 2022 13:07:21 +0300 Subject: Fixed bug in ma_loghandler.cc that could cause an assert The assert happens in 10.6 with the following command: ./mtr --no-reorder --verbose-restart main.update_ignore_216 main.upgrade_MDEV-19650 main.upgrade_MDEV-23102-1 main.upgrade_MDEV-23102-2 main.upgrade_geometrycolumn_procedure_definer main.upgrade_mdev_24363 main.varbinary sys_vars.aria_log_file_size_basic Reviewer: Oleksandr Byelkin --- storage/maria/ma_loghandler.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index b601a25966a..7e4fb5a8263 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -7993,22 +7993,14 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn, } else { - if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE) + if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE && + log_descriptor.bc.buffer->prev_last_lsn == LSN_IMPOSSIBLE) { - /* - In this case both last_lsn & prev_last_lsn are LSN_IMPOSSIBLE - otherwise it will go in the first IF because LSN_IMPOSSIBLE less - then any real LSN and cmp_translog_addr(*lsn, - log_descriptor.bc.buffer->prev_last_lsn) will be TRUE - */ - DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn == - LSN_IMPOSSIBLE); DBUG_PRINT("info", ("There is no LSNs yet generated => do nothing")); translog_unlock(); DBUG_VOID_RETURN; } - DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn != LSN_IMPOSSIBLE); /* fix lsn if it was horizon */ *lsn= log_descriptor.bc.buffer->prev_last_lsn; DBUG_PRINT("info", ("LSN to flush fixed to prev last lsn: " LSN_FMT, -- cgit v1.2.1 From e7de50a82187cbaaa192c2065d64c0041cd9a6a1 Mon Sep 17 00:00:00 2001 From: Monty Date: Mon, 30 May 2022 17:32:41 +0300 Subject: Bug fixes for S3 - Fixed wrong DBUG_ASSERT when waiting for big-block-read - Update S3_pagecache_reads counter when reading a block from S3. Before this patch the variable value was always 0 Reviewer: Oleksandr Byelkin --- storage/maria/ma_pagecache.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index a2e9a5cc172..00ae13b948a 100644 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -2824,8 +2824,14 @@ static void read_big_block(PAGECACHE *pagecache, have read our block for us */ struct st_my_thread_var *thread; - DBUG_ASSERT(page_st == PAGE_WAIT_TO_BE_READ); - DBUG_ASSERT(page_st != PAGE_TO_BE_READ); + /* + Either the page was not yet read and there is another thread + doing the read (page_st == PAGE_WAIT_TO_BE_READ) or the page + was just read and there are other threads waiting for the page + but they have not yet unmarked the PCLBOCK_BIG_READ flag + (page_st == PAGE_READ) + */ + DBUG_ASSERT(page_st == PAGE_READ || page_st == PAGE_WAIT_TO_BE_READ); block->status|= PCBLOCK_BIG_READ; // will be read by other thread /* Block read failed because somebody else is reading the first block @@ -2844,12 +2850,12 @@ static void read_big_block(PAGECACHE *pagecache, &pagecache->cache_lock); } while (thread->next); - // page should be read by other thread + // page should be read by other thread DBUG_ASSERT(block->status & PCBLOCK_READ || block->status & PCBLOCK_ERROR); /* It is possible that other thread already removed the flag (in - case of two threads waiting) but it will not make harm to try to + case of two threads waiting) but it will not harm to try to remove it even in that case. */ block->status&= ~PCBLOCK_BIG_READ; @@ -2883,6 +2889,7 @@ static void read_big_block(PAGECACHE *pagecache, args.pageno= page_to_read; args.data= block->hash_link->file.callback_data; + pagecache->global_cache_read++; if (pagecache->big_block_read(pagecache, &args, &block->hash_link->file, &data)) { -- cgit v1.2.1 From 6b6d745b9eab64c6c1a3c13c11afe38c6761df69 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Sun, 30 May 2021 05:42:35 +0400 Subject: let numeric in the DEMAULT. --- mysql-test/suite/json/r/json_table.result | 4 ++-- mysql-test/suite/json/t/json_table.test | 4 ++-- sql/sql_yacc.yy | 23 ++++++++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/mysql-test/suite/json/r/json_table.result b/mysql-test/suite/json/r/json_table.result index 4619240ba00..155b8c53b29 100644 --- a/mysql-test/suite/json/r/json_table.result +++ b/mysql-test/suite/json/r/json_table.result @@ -57,14 +57,14 @@ Jeans {"color": "blue", "price": 50} blue select * from t1 right join json_table(t1.item_props,'$' columns( color varchar(100) path '$.color')) as T on 1; ERROR 42S22: Unknown column 't1.item_props' in 'JSON_TABLE argument' DROP TABLE t1; -select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default '101' on empty, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; +select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default 101 on empty, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; a b 101 11 101 111 2 22 2 222 3 NULL -select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default '202' on error, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; +select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default 202 on error, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; a b NULL 11 NULL 111 diff --git a/mysql-test/suite/json/t/json_table.test b/mysql-test/suite/json/t/json_table.test index dbb9f43ab17..f6e8c93e443 100644 --- a/mysql-test/suite/json/t/json_table.test +++ b/mysql-test/suite/json/t/json_table.test @@ -30,9 +30,9 @@ select * from t1 right join json_table(t1.item_props,'$' columns( color varchar( DROP TABLE t1; -select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default '101' on empty, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; +select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default 101 on empty, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; -select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default '202' on error, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; +select * from JSON_TABLE( '[ {"xa": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default 202 on error, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; select * from JSON_TABLE( '[ {"a": [1, 2], "b": [11,111]}, {"a": 2, "b": [22,222]}, {"a":3}]', '$[*]' COLUMNS( a INT PATH '$.a' default '101' on empty, NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$'))) as jt; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 28ab93362c8..172ea82b1a6 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1313,6 +1313,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); TEXT_STRING NCHAR_STRING json_text_literal + json_text_literal_or_num %type opt_table_alias_clause @@ -11570,6 +11571,26 @@ json_text_literal: } ; +json_text_literal_or_num: + json_text_literal + | NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + | LONG_NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + | DECIMAL_NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + | FLOAT_NUM + { + Lex->json_table->m_text_literal_cs= NULL; + } + ; + join_table_list: derived_table_list { MYSQL_YYABORT_UNLESS($$=$1); } ; @@ -11684,7 +11705,7 @@ json_on_response: { $$.m_response= Json_table_column::RESPONSE_NULL; } - | DEFAULT json_text_literal + | DEFAULT json_text_literal_or_num { $$.m_response= Json_table_column::RESPONSE_DEFAULT; $$.m_default= $2; -- cgit v1.2.1 From a9f6abeddecdca22afae841fbd39101d80f406d9 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Mon, 31 May 2021 13:48:09 +0400 Subject: MDEV-25875: JSON_TABLE: extract document fragment into JSON column Accept JSON values for the JSON fields. --- mysql-test/suite/json/r/json_table.result | 14 ++++++++++ mysql-test/suite/json/r/json_table_mysql.result | 34 +++++++++++++------------ mysql-test/suite/json/t/json_table.test | 10 ++++++++ mysql-test/suite/json/t/json_table_mysql.test | 4 +-- sql/json_table.cc | 31 +++++++++++++++++++++- sql/json_table.h | 1 + 6 files changed, 74 insertions(+), 20 deletions(-) diff --git a/mysql-test/suite/json/r/json_table.result b/mysql-test/suite/json/r/json_table.result index 155b8c53b29..19cffb6c94e 100644 --- a/mysql-test/suite/json/r/json_table.result +++ b/mysql-test/suite/json/r/json_table.result @@ -953,6 +953,20 @@ converted original Warnings: Warning 1264 Out of range value for column 'converted' at row 2 Warning 1366 Incorrect integer value: 'foo' for column ``.`(temporary)`.`converted` at row 3 +select * from +json_table('[{"color": "blue", "price": { "high": 10, "low": 5}}, + {"color": "white", "price": "pretty low"}, + {"color": "yellow", "price": 256.20}, + {"color": "red", "price": { "high": 20, "low": 8}}]', +'$[*]' columns(color varchar(100) path '$.color', +price json path '$.price' + ) +) as T; +color price +blue { "high": 10, "low": 5} +white "pretty low" +yellow 256.20 +red { "high": 20, "low": 8} # # MDEV-27696 Json table columns accept redundant COLLATE syntax # diff --git a/mysql-test/suite/json/r/json_table_mysql.result b/mysql-test/suite/json/r/json_table_mysql.result index ec21f18523f..da7aa70be2b 100644 --- a/mysql-test/suite/json/r/json_table_mysql.result +++ b/mysql-test/suite/json/r/json_table_mysql.result @@ -40,7 +40,7 @@ id jpath jsn_path jexst 2 2 2 0 3 33 {"x":33} 1 4 0 0 0 -5 66 NULL 0 +5 66 [1,2] 0 select * from json_table( '[{"a":"3"},{"a":2},{"b":1},{"a":0.33},{"a":"asd"}]', @@ -55,11 +55,11 @@ jsn_path json path '$.a' default '{"x":33}' on empty, jexst int exists path '$.b') ) as tt; id jpath_i jpath_r jsn_path jexst -1 3 3 3 0 +1 3 3 "3" 0 2 2 2 2 0 3 33 33.3 {"x":33} 1 4 0 0.33 0.33 0 -5 0 0 asd 0 +5 0 0 "asd" 0 Warnings: Warning 1366 Incorrect integer value: 'asd' for column ``.`(temporary)`.`jpath_i` at row 5 Warning 1366 Incorrect double value: 'asd' for column ``.`(temporary)`.`jpath_r` at row 5 @@ -78,7 +78,7 @@ id jpath jsn_path jexst 2 2 2 0 3 33 {"x":33} 1 4 0 0 0 -5 66 NULL 0 +5 66 [1,2] 0 select * from json_table( '[{"a":"3"},{"a":2},{"b":1},{"a":0}]', @@ -88,7 +88,7 @@ json_path json path '$.a', jexst int exists path '$.b') ) as tt; id jpath json_path jexst -1 3 3 0 +1 3 "3" 0 2 2 2 0 3 NULL NULL 1 4 0 0 0 @@ -315,24 +315,24 @@ id1 jpath jexst id2 id3 jpath_3 id4 jpath_4 1 3 0 2 1 a1 NULL NULL 1 3 0 2 2 a2 NULL NULL 1 3 0 3 1 c NULL NULL -1 3 0 NULL NULL NULL 1 NULL -1 3 0 NULL NULL NULL 2 NULL -1 3 0 NULL NULL NULL 3 NULL +1 3 0 NULL NULL NULL 1 {"ll":["b1","b2","b3"]} +1 3 0 NULL NULL NULL 2 {"ll": ["a1","a2"]} +1 3 0 NULL NULL NULL 3 {"ll":["c"]} 2 2 0 1 1 1 NULL NULL 2 2 0 1 2 11 NULL NULL 2 2 0 1 3 111 NULL NULL 2 2 0 2 1 2 NULL NULL -2 2 0 NULL NULL NULL 1 NULL -2 2 0 NULL NULL NULL 2 NULL +2 2 0 NULL NULL NULL 1 {"ll":[1,11,111]} +2 2 0 NULL NULL NULL 2 {"ll":[2]} 3 NULL 1 1 1 zzz NULL NULL -3 NULL 1 NULL NULL NULL 1 NULL +3 NULL 1 NULL NULL NULL 1 {"ll":["zzz"]} 4 0 0 1 1 0.1 NULL NULL 4 0 0 1 2 0.01 NULL NULL 4 0 0 2 1 0.02 NULL NULL 4 0 0 2 2 0.002 NULL NULL 4 0 0 2 3 0.0002 NULL NULL -4 0 0 NULL NULL NULL 1 NULL -4 0 0 NULL NULL NULL 2 NULL +4 0 0 NULL NULL NULL 1 {"ll":[0.1,0.01]} +4 0 0 NULL NULL NULL 2 {"ll":[0.02,0.002,0.0002]} ord should be 1,1,1,2, which tells that first two values of 'l' are from the same object, and next two are from different objects SELECT * @@ -551,10 +551,12 @@ JSON_TABLE('{}', '$' COLUMNS (x INT PATH '$.x' DEFAULT NULL ON ERROR)) jt; ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'NULL ON ERROR)) jt' at line 2 SELECT * FROM JSON_TABLE('{}', '$' COLUMNS (x INT PATH '$.x' DEFAULT 0 ON EMPTY)) jt; -ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '0 ON EMPTY)) jt' at line 2 +x +0 SELECT * FROM JSON_TABLE('{}', '$' COLUMNS (x INT PATH '$.x' DEFAULT 0 ON ERROR)) jt; -ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '0 ON ERROR)) jt' at line 2 +x +NULL SELECT * FROM JSON_TABLE('{}', '$' COLUMNS (x DATE PATH '$.x' @@ -604,7 +606,7 @@ SELECT * FROM JSON_TABLE('{"a":"1"}', o FOR ORDINALITY)) AS jt WHERE o = 1; jpath o -1 1 +"1" 1 # # Bug#25427982: ASSERTION `DERIVED' FAILED IN SQL/TABLE.H # diff --git a/mysql-test/suite/json/t/json_table.test b/mysql-test/suite/json/t/json_table.test index f6e8c93e443..a6392b7bfff 100644 --- a/mysql-test/suite/json/t/json_table.test +++ b/mysql-test/suite/json/t/json_table.test @@ -814,6 +814,16 @@ select * from json_table('{"a":"foo", "b":1, "c":1000}', '$.*' columns(converted select * from json_table('{"a":"foo", "b":1, "c":1000}', '$.*' columns(converted tinyint path '$', original text path '$')) as jt order by original; +select * from + json_table('[{"color": "blue", "price": { "high": 10, "low": 5}}, + {"color": "white", "price": "pretty low"}, + {"color": "yellow", "price": 256.20}, + {"color": "red", "price": { "high": 20, "low": 8}}]', + '$[*]' columns(color varchar(100) path '$.color', + price json path '$.price' + ) + ) as T; + --echo # --echo # MDEV-27696 Json table columns accept redundant COLLATE syntax --echo # diff --git a/mysql-test/suite/json/t/json_table_mysql.test b/mysql-test/suite/json/t/json_table_mysql.test index aaf123c6f7c..9f77ad964f3 100644 --- a/mysql-test/suite/json/t/json_table_mysql.test +++ b/mysql-test/suite/json/t/json_table_mysql.test @@ -453,13 +453,11 @@ SELECT * FROM SELECT * FROM JSON_TABLE('{}', '$' COLUMNS (x INT PATH '$.x' DEFAULT NULL ON ERROR)) jt; -# The DEFAULT value must be a string on JSON format for now. ---error 1064 SELECT * FROM JSON_TABLE('{}', '$' COLUMNS (x INT PATH '$.x' DEFAULT 0 ON EMPTY)) jt; ---error 1064 SELECT * FROM JSON_TABLE('{}', '$' COLUMNS (x INT PATH '$.x' DEFAULT 0 ON ERROR)) jt; +# We don't accept dates in DEFAULT --error 1064 SELECT * FROM JSON_TABLE('{}', '$' COLUMNS (x DATE diff --git a/sql/json_table.cc b/sql/json_table.cc index 1eda21112e2..39288a73631 100644 --- a/sql/json_table.cc +++ b/sql/json_table.cc @@ -19,6 +19,7 @@ #include "sql_priv.h" #include "sql_class.h" /* TMP_TABLE_PARAM */ #include "table.h" +#include "sql_type_json.h" #include "item_jsonfunc.h" #include "json_table.h" #include "sql_show.h" @@ -377,6 +378,25 @@ static void store_json_in_field(Field *f, const json_engine_t *je) } +static int store_json_in_json(Field *f, json_engine_t *je) +{ + const uchar *from= je->value_begin; + const uchar *to; + + if (json_value_scalar(je)) + to= je->value_end; + else + { + int error; + if ((error= json_skip_level(je))) + return error; + to= je->s.c_str; + } + f->store((const char *) from, (uint32) (to - from), je->s.cs); + return 0; +} + + bool Json_table_nested_path::check_error(const char *str) { if (m_engine.s.error) @@ -541,7 +561,12 @@ int ha_json_table::fill_column_values(THD *thd, uchar * buf, uchar *pos) } else { - if (!(error= !json_value_scalar(&je))) + if (jc->m_format_json) + { + if (!(error= store_json_in_json(*f, &je))) + error= er_handler.errors; + } + else if (!(error= !json_value_scalar(&je))) { store_json_in_field(*f, &je); error= er_handler.errors; @@ -870,6 +895,10 @@ int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, anctual content. Not sure though if we should. */ m_path.s.c_str= (const uchar *) path.str; + + if (ctype == PATH) + m_format_json= m_field->type_handler() == &type_handler_json_longtext; + return 0; } diff --git a/sql/json_table.h b/sql/json_table.h index 52cdae13e9b..7316edd4ee6 100644 --- a/sql/json_table.h +++ b/sql/json_table.h @@ -147,6 +147,7 @@ public: }; enum_type m_column_type; + bool m_format_json; json_path_t m_path; On_response m_on_error; On_response m_on_empty; -- cgit v1.2.1 From a61603562e09f230e5ab6540b16e1c623520f9b2 Mon Sep 17 00:00:00 2001 From: Rucha Deodhar Date: Wed, 18 May 2022 17:10:57 +0530 Subject: MDEV-25875: JSON_TABLE: extract document fragment into JSON column Fixup --- sql/json_table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/json_table.cc b/sql/json_table.cc index 39288a73631..5fa8434fd67 100644 --- a/sql/json_table.cc +++ b/sql/json_table.cc @@ -897,7 +897,7 @@ int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, m_path.s.c_str= (const uchar *) path.str; if (ctype == PATH) - m_format_json= m_field->type_handler() == &type_handler_json_longtext; + m_format_json= m_field->type_handler() == &type_handler_long_blob_json; return 0; } -- cgit v1.2.1 From 52be05be15c679f2316c65ff7151f54f3957acef Mon Sep 17 00:00:00 2001 From: Hirokazu Hata Date: Wed, 1 Jun 2022 00:22:06 +0900 Subject: MDEV-27926 Deprecate spider_init_sql_alloc_size Reviewed by: Nayuta Yanagisawa --- .../mysql-test/spider/r/variable_deprecation.result | 15 +++++++++++++++ .../spider/mysql-test/spider/t/variable_deprecation.test | 9 +++++++++ storage/spider/spd_param.cc | 2 +- storage/spider/spd_table.cc | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/storage/spider/mysql-test/spider/r/variable_deprecation.result b/storage/spider/mysql-test/spider/r/variable_deprecation.result index 6aba1e6f181..8840f1da892 100644 --- a/storage/spider/mysql-test/spider/r/variable_deprecation.result +++ b/storage/spider/mysql-test/spider/r/variable_deprecation.result @@ -196,6 +196,21 @@ Warnings: Warning 1287 The table parameter 'buffer_size' is deprecated and will be removed in a future release DROP TABLE tbl_a; DROP TABLE tbl_b; +# MDEV-27926 Deprecate spider_init_sql_alloc_size +SET spider_init_sql_alloc_size = 1; +Warnings: +Warning 1287 '@@spider_init_sql_alloc_size' is deprecated and will be removed in a future release +SHOW VARIABLES LIKE "spider_init_sql_alloc_size"; +Variable_name Value +spider_init_sql_alloc_size 1 +CREATE TABLE tbl_a (a INT) ENGINE=Spider COMMENT='isa "1"'; +Warnings: +Warning 1287 The table parameter 'isa' is deprecated and will be removed in a future release +CREATE TABLE tbl_b (a INT) ENGINE=Spider COMMENT='init_sql_alloc_size "1"'; +Warnings: +Warning 1287 The table parameter 'init_sql_alloc_size' is deprecated and will be removed in a future release +DROP TABLE tbl_a; +DROP TABLE tbl_b; DROP DATABASE auto_test_local; for master_1 for child2 diff --git a/storage/spider/mysql-test/spider/t/variable_deprecation.test b/storage/spider/mysql-test/spider/t/variable_deprecation.test index 1d2d6bfd77a..6c59deba5d0 100644 --- a/storage/spider/mysql-test/spider/t/variable_deprecation.test +++ b/storage/spider/mysql-test/spider/t/variable_deprecation.test @@ -117,6 +117,15 @@ eval CREATE TABLE tbl_b (a INT) $MASTER_1_ENGINE COMMENT='buffer_size "1"'; DROP TABLE tbl_a; DROP TABLE tbl_b; +--echo # MDEV-27926 Deprecate spider_init_sql_alloc_size +SET spider_init_sql_alloc_size = 1; +SHOW VARIABLES LIKE "spider_init_sql_alloc_size"; +eval CREATE TABLE tbl_a (a INT) $MASTER_1_ENGINE COMMENT='isa "1"'; +eval CREATE TABLE tbl_b (a INT) $MASTER_1_ENGINE COMMENT='init_sql_alloc_size "1"'; + +DROP TABLE tbl_a; +DROP TABLE tbl_b; + DROP DATABASE auto_test_local; --disable_query_log diff --git a/storage/spider/spd_param.cc b/storage/spider/spd_param.cc index f4844c61653..80db478f2d3 100644 --- a/storage/spider/spd_param.cc +++ b/storage/spider/spd_param.cc @@ -596,7 +596,7 @@ longlong spider_param_semi_split_read_limit( */ static MYSQL_THDVAR_INT( init_sql_alloc_size, /* name */ - PLUGIN_VAR_RQCMDARG, /* opt */ + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED, /* opt */ "Initial sql string alloc size", /* comment */ NULL, /* check */ spider_use_table_value_deprecated, /* update */ diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index f1039358dbd..06cffe2a9a8 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -2377,6 +2377,7 @@ int spider_parse_connect_info( "hwr", hs_write_to_reads, 0, 1); SPIDER_PARAM_STR_LIST("hws", hs_write_socks); #endif + SPIDER_PARAM_DEPRECATED_WARNING("isa"); SPIDER_PARAM_INT("isa", init_sql_alloc_size, 0); SPIDER_PARAM_INT_WITH_MAX("idl", internal_delayed, 0, 1); SPIDER_PARAM_DEPRECATED_WARNING("ilm"); @@ -2682,6 +2683,7 @@ int spider_parse_connect_info( error_num = connect_string_parse.print_param_error(); goto error; case 19: + SPIDER_PARAM_DEPRECATED_WARNING("init_sql_alloc_size"); SPIDER_PARAM_INT("init_sql_alloc_size", init_sql_alloc_size, 0); SPIDER_PARAM_INT_WITH_MAX( "auto_increment_mode", auto_increment_mode, 0, 3); -- cgit v1.2.1 From fde99e006d0947977f196854dd1f5a2eaa030c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 1 Jun 2022 11:13:15 +0300 Subject: MDEV-28716: Portability: unlink() can return EPERM instead of EISDIR --- storage/innobase/os/os0file.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 8c76e86ef0f..2c94ea6e393 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2479,6 +2479,7 @@ os_file_get_last_error_low( case EXDEV: case ENOTDIR: case EISDIR: + case EPERM: return(OS_FILE_PATH_ERROR); case EAGAIN: if (srv_use_native_aio) { -- cgit v1.2.1 From 91d5fffa0796b8208c3d6633c8f296da8914af4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 1 Jun 2022 11:20:47 +0300 Subject: MDEV-28719: compress_write() leaks data_mutex on error --- extra/mariabackup/ds_compress.cc | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/extra/mariabackup/ds_compress.cc b/extra/mariabackup/ds_compress.cc index dcfb3d5a82f..39a72cdca34 100644 --- a/extra/mariabackup/ds_compress.cc +++ b/extra/mariabackup/ds_compress.cc @@ -238,25 +238,24 @@ compress_write(ds_file_t *file, const uchar *buf, size_t len) xb_a(threads[i].to_len > 0); - if (ds_write(dest_file, "NEWBNEWB", 8) || - write_uint64_le(dest_file, - comp_file->bytes_processed)) { - msg("compress: write to the destination stream " - "failed."); - return 1; + bool fail = ds_write(dest_file, "NEWBNEWB", 8) || + write_uint64_le(dest_file, + comp_file->bytes_processed); + comp_file->bytes_processed += threads[i].from_len; + + if (!fail) { + fail = write_uint32_le(dest_file, threads[i].adler) || + ds_write(dest_file, threads[i].to, + threads[i].to_len); } - comp_file->bytes_processed += threads[i].from_len; + pthread_mutex_unlock(&threads[i].data_mutex); - if (write_uint32_le(dest_file, threads[i].adler) || - ds_write(dest_file, threads[i].to, - threads[i].to_len)) { + if (fail) { msg("compress: write to the destination stream " "failed."); return 1; } - - pthread_mutex_unlock(&threads[i].data_mutex); } } -- cgit v1.2.1 From e097abfa662751cc337febad9d8584159f23cc92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 2 Jun 2022 11:09:56 +0300 Subject: MDEV-27926 After-merge fix --- storage/spider/mysql-test/spider/bugfix/r/index.result | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storage/spider/mysql-test/spider/bugfix/r/index.result b/storage/spider/mysql-test/spider/bugfix/r/index.result index d37258ffae1..2aa0b56ef06 100644 --- a/storage/spider/mysql-test/spider/bugfix/r/index.result +++ b/storage/spider/mysql-test/spider/bugfix/r/index.result @@ -14,6 +14,8 @@ SOCKET '$MASTER_1_MYSOCK' SET spider_internal_sql_log_off= 0; SET spider_direct_order_limit= 10000; SET spider_init_sql_alloc_size= 1; +Warnings: +Warning 1287 '@@spider_init_sql_alloc_size' is deprecated and will be removed in a future release for child2 for child3 create database auto_test_local; -- cgit v1.2.1 From 5294695ebd7f0331d69e197b221ab8eecc5ee265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 2 Jun 2022 17:18:00 +0300 Subject: Clean up mtr_t mtr_t::is_empty(): Replaces mtr_t::get_log() and mtr_t::get_memo(). mtr_t::get_log_size(): Replaces mtr_t::get_log(). mtr_t::print(): Remove, unused function. ReleaseBlocks::ReleaseBlocks(): Remove an unused parameter. --- storage/innobase/fil/fil0crypt.cc | 3 +-- storage/innobase/fil/fil0fil.cc | 2 +- storage/innobase/include/mtr0mtr.h | 29 +++++------------------------ storage/innobase/mtr/mtr0mtr.cc | 34 +++++----------------------------- 4 files changed, 12 insertions(+), 56 deletions(-) diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 5830634692b..395dfc8590e 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1961,8 +1961,7 @@ fil_crypt_rotate_page( /* If block read failed mtr memo and log should be empty. */ ut_ad(!mtr.has_modifications()); ut_ad(!mtr.is_dirty()); - ut_ad(mtr.get_memo()->size() == 0); - ut_ad(mtr.get_log()->size() == 0); + ut_ad(mtr.is_empty()); mtr.commit(); } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 7d4938b1cd6..fcb0b06c1c2 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -3676,7 +3676,7 @@ fil_names_clear( for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.named_spaces); space != NULL; ) { - if (mtr.get_log()->size() + if (mtr.get_log_size() + strlen(space->chain.start->name) >= RECV_SCAN_SIZE - (3 + 5 + 1)) { /* Prevent log parse buffer overflow */ diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 0c7051ed31a..903b3f4699f 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -54,13 +54,6 @@ savepoint. */ #define mtr_memo_release(m, o, t) \ (m)->memo_release((o), (t)) -/** Print info of an mtr handle. */ -#define mtr_print(m) (m)->print() - -/** Return the log object of a mini-transaction buffer. -@return log */ -#define mtr_get_log(m) (m)->get_log() - /** Push an object to an mtr memo stack. */ #define mtr_memo_push(m, o, t) (m)->memo_push(o, t) @@ -360,30 +353,13 @@ public: const byte* ptr, ulint flags) const; - /** Print info of an mtr handle. */ - void print() const; - /** @return true if mini-transaction contains modifications. */ bool has_modifications() const { return m_modifications; } - - /** @return the memo stack */ - const mtr_buf_t* get_memo() const { return &m_memo; } - - /** @return the memo stack */ - mtr_buf_t* get_memo() { return &m_memo; } #endif /* UNIV_DEBUG */ /** @return true if a record was added to the mini-transaction */ bool is_dirty() const { return m_made_dirty; } - /** Get the buffered redo log of this mini-transaction. - @return redo log */ - const mtr_buf_t* get_log() const { return &m_log; } - - /** Get the buffered redo log of this mini-transaction. - @return redo log */ - mtr_buf_t* get_log() { return &m_log; } - /** Push an object to an mtr memo stack. @param object object @param type object type: MTR_MEMO_S_LOCK, ... */ @@ -395,6 +371,11 @@ public: static inline bool is_block_dirtied(const buf_block_t* block) MY_ATTRIBUTE((warn_unused_result)); + /** @return the size of the log is empty */ + size_t get_log_size() const { return m_log.size(); } + /** @return whether the log and memo are empty */ + bool is_empty() const { return m_memo.size() == 0 && m_log.size() == 0; } + /** Write request types */ enum write_type { diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 2daada16a91..2feb5a0583f 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -360,22 +360,10 @@ struct DebugCheck { struct ReleaseBlocks { const lsn_t start, end; -#ifdef UNIV_DEBUG - const mtr_buf_t &memo; - - ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t &memo) : - start(start), end(end), memo(memo) -#else /* UNIV_DEBUG */ - ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t&) : - start(start), end(end) -#endif /* UNIV_DEBUG */ - { - ut_ad(start); - ut_ad(end); - } + ReleaseBlocks(lsn_t start, lsn_t end) : start(start), end(end) {} /** @return true always */ - bool operator()(mtr_memo_slot_t* slot) const + bool operator()(mtr_memo_slot_t *slot) const { if (!slot->object) return true; @@ -492,9 +480,8 @@ void mtr_t::commit() else ut_ad(!m_freed_space); - m_memo.for_each_block_in_reverse(CIterate - (ReleaseBlocks(lsns.first, m_commit_lsn, - m_memo))); + m_memo.for_each_block_in_reverse + (CIterate(ReleaseBlocks(lsns.first, m_commit_lsn))); if (m_made_dirty) mysql_mutex_unlock(&log_sys.flush_order_mutex); @@ -615,8 +602,7 @@ void mtr_t::commit_shrink(fil_space_t &space) m_memo.for_each_block_in_reverse(CIterate{space}); m_memo.for_each_block_in_reverse(CIterate - (ReleaseBlocks(start_lsn, m_commit_lsn, - m_memo))); + (ReleaseBlocks(start_lsn, m_commit_lsn))); mysql_mutex_unlock(&log_sys.flush_order_mutex); mutex_enter(&fil_system.mutex); @@ -1238,16 +1224,6 @@ mtr_t::memo_contains_page_flagged( return m_memo.for_each_block_in_reverse(iteration) ? NULL : iteration.functor.get_block(); } - -/** Print info of an mtr handle. */ -void -mtr_t::print() const -{ - ib::info() << "Mini-transaction handle: memo size " - << m_memo.size() << " bytes log size " - << get_log()->size() << " bytes"; -} - #endif /* UNIV_DEBUG */ -- cgit v1.2.1 From 5909e0ec31f089e3677f7177e991dffbfb96a4e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 2 Jun 2022 17:22:16 +0300 Subject: Cleanup: btr_store_big_rec_extern_fields() does not really modify pcur --- storage/innobase/btr/btr0bulk.cc | 8 -------- storage/innobase/btr/btr0cur.cc | 4 +--- storage/innobase/include/btr0cur.h | 6 ++---- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index 9004064a1b9..4e5d41477e5 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -820,14 +820,6 @@ PageBulk::storeExt( dberr_t err = btr_store_big_rec_extern_fields( &btr_pcur, offsets, big_rec, &m_mtr, BTR_STORE_INSERT_BULK); - /* Reset m_block and m_cur_rec from page cursor, because - block may be changed during blob insert. (FIXME: Can it really?) */ - ut_ad(m_block == btr_pcur.btr_cur.page_cur.block); - - m_block = btr_pcur.btr_cur.page_cur.block; - m_cur_rec = btr_pcur.btr_cur.page_cur.rec; - m_page = buf_block_get_frame(m_block); - return(err); } diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 437c7508a0b..a33596509c1 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -7221,9 +7221,7 @@ the file, in case the file was somehow truncated in the crash. dberr_t btr_store_big_rec_extern_fields( /*============================*/ - btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if - btr_mtr is restarted, then this can - be repositioned. */ + btr_pcur_t* pcur, /*!< in: a persistent cursor */ rec_offs* offsets, /*!< in/out: rec_get_offsets() on pcur. the "external storage" flags in offsets will correctly correspond diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 3f9cad12004..923eea0daa5 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -677,9 +677,7 @@ file segment of the index tree. dberr_t btr_store_big_rec_extern_fields( /*============================*/ - btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if - btr_mtr is restarted, then this can - be repositioned. */ + btr_pcur_t* pcur, /*!< in: a persistent cursor */ rec_offs* offsets, /*!< in/out: rec_get_offsets() on pcur. the "external storage" flags in offsets will correctly correspond @@ -690,7 +688,7 @@ btr_store_big_rec_extern_fields( latches to the clustered index. can be committed and restarted. */ enum blob_op op) /*! in: operation code */ - MY_ATTRIBUTE((warn_unused_result)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*******************************************************************//** Frees the space in an externally stored field to the file space -- cgit v1.2.1 From 22f935d6daa70a21a3f640261fcf8c9dfb259250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 2 Jun 2022 17:33:03 +0300 Subject: MDEV-28731 Race condition on log checkpoint mtr_t::modify(): Set the m_made_dirty flag if needed, so that buf_pool_t::insert_into_flush_list() will be invoked while holding log_sys.flush_order_mutex. This is something that was should have been part of commit b212f1dac284cc9b7a060f1eed2cd4604c326966 (MDEV-22107). --- storage/innobase/mtr/mtr0mtr.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 2feb5a0583f..f13b677742b 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1263,4 +1263,6 @@ void mtr_t::modify(const buf_block_t &block) } iteration.functor.found->type= static_cast (iteration.functor.found->type | MTR_MEMO_MODIFY); + if (is_block_dirtied(&block)) + m_made_dirty= true; } -- cgit v1.2.1 From 477776bfed892f3f7cadcef16e860a1232f59952 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 3 Jun 2022 17:34:22 +0400 Subject: MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" This is a 10.7 version of the patch. --- .../type_inet/mysql-test/type_inet/type_inet6.result | 19 +++++++++++++++++++ plugin/type_inet/mysql-test/type_inet/type_inet6.test | 17 +++++++++++++++++ .../type_uuid/mysql-test/type_uuid/type_uuid.result | 19 +++++++++++++++++++ plugin/type_uuid/mysql-test/type_uuid/type_uuid.test | 18 ++++++++++++++++++ sql/sql_type_fixedbin.h | 5 +++-- 5 files changed, 76 insertions(+), 2 deletions(-) diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.result b/plugin/type_inet/mysql-test/type_inet/type_inet6.result index 0ac5f666b86..9f3b8a9715e 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.result +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.result @@ -2194,3 +2194,22 @@ SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != '::1'; f :: DROP TABLE t1; +# +# MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" +# +CREATE TABLE companies (id INET6, name varchar(10)); +INSERT INTO companies (id) values ('00::01'); +CREATE TABLE divisions (company_id INET6); +INSERT INTO divisions (company_id) values ('00::01'); +SELECT * FROM companies WHERE id IN (SELECT company_id FROM divisions); +id name +::1 NULL +UPDATE companies SET name = 'value' WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +id name +::1 value +DELETE FROM companies WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +id name +DROP TABLE divisions; +DROP TABLE companies; diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.test b/plugin/type_inet/mysql-test/type_inet/type_inet6.test index 6a5db1ad43f..f3a57217f49 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.test +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.test @@ -1612,3 +1612,20 @@ SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != ''; SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != '::'; SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != '::1'; DROP TABLE t1; + +--echo # +--echo # MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" +--echo # + +CREATE TABLE companies (id INET6, name varchar(10)); +INSERT INTO companies (id) values ('00::01'); + +CREATE TABLE divisions (company_id INET6); +INSERT INTO divisions (company_id) values ('00::01'); +SELECT * FROM companies WHERE id IN (SELECT company_id FROM divisions); +UPDATE companies SET name = 'value' WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +DELETE FROM companies WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +DROP TABLE divisions; +DROP TABLE companies; diff --git a/plugin/type_uuid/mysql-test/type_uuid/type_uuid.result b/plugin/type_uuid/mysql-test/type_uuid/type_uuid.result index 43ef4e911dc..7ce3b878a1a 100644 --- a/plugin/type_uuid/mysql-test/type_uuid/type_uuid.result +++ b/plugin/type_uuid/mysql-test/type_uuid/type_uuid.result @@ -3152,3 +3152,22 @@ f var_pop('x') Warnings: Warning 1292 Truncated incorrect DOUBLE value: 'x' Warning 1292 Incorrect uuid value: '' +# +# MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" +# +CREATE TABLE companies (id uuid, name varchar(10)); +INSERT INTO companies (id) values ('7bc95b06-cc6c-11ec-96c5-0242ac130002'); +CREATE TABLE divisions (company_id uuid); +INSERT INTO divisions (company_id) values ('7bc95b06-cc6c-11ec-96c5-0242ac130002'); +SELECT * FROM companies WHERE id IN (SELECT company_id FROM divisions); +id name +7bc95b06-cc6c-11ec-96c5-0242ac130002 NULL +UPDATE companies SET name = 'value' WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +id name +7bc95b06-cc6c-11ec-96c5-0242ac130002 value +DELETE FROM companies WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +id name +DROP TABLE divisions; +DROP TABLE companies; diff --git a/plugin/type_uuid/mysql-test/type_uuid/type_uuid.test b/plugin/type_uuid/mysql-test/type_uuid/type_uuid.test index 65574ac11d3..4a3525a783c 100644 --- a/plugin/type_uuid/mysql-test/type_uuid/type_uuid.test +++ b/plugin/type_uuid/mysql-test/type_uuid/type_uuid.test @@ -1652,3 +1652,21 @@ DROP TABLE t1; --echo # SELECT uuid() AS f, var_pop('x') FROM dual HAVING f > ''; + + +--echo # +--echo # MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" +--echo # + +CREATE TABLE companies (id uuid, name varchar(10)); +INSERT INTO companies (id) values ('7bc95b06-cc6c-11ec-96c5-0242ac130002'); + +CREATE TABLE divisions (company_id uuid); +INSERT INTO divisions (company_id) values ('7bc95b06-cc6c-11ec-96c5-0242ac130002'); +SELECT * FROM companies WHERE id IN (SELECT company_id FROM divisions); +UPDATE companies SET name = 'value' WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +DELETE FROM companies WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +DROP TABLE divisions; +DROP TABLE companies; diff --git a/sql/sql_type_fixedbin.h b/sql/sql_type_fixedbin.h index 236241f67b7..bf771f1a878 100644 --- a/sql/sql_type_fixedbin.h +++ b/sql/sql_type_fixedbin.h @@ -1589,8 +1589,9 @@ public: if (!example) return false; value_cached= true; - null_value= example->val_native_with_conversion_result(current_thd, - &m_value, type_handler()); + null_value_inside= null_value= + example->val_native_with_conversion_result(current_thd, + &m_value, type_handler()); return true; } String* val_str(String *to) -- cgit v1.2.1 From 6d99fdce184c2a5d1a0eb0b0cb3360df7cbfb7f8 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 3 Jun 2022 19:58:17 +0400 Subject: MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" This is a 10.5 version of the patch. --- .../type_inet/mysql-test/type_inet/type_inet6.result | 19 +++++++++++++++++++ plugin/type_inet/mysql-test/type_inet/type_inet6.test | 18 ++++++++++++++++++ plugin/type_inet/sql_type_inet.cc | 11 ++++++++--- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.result b/plugin/type_inet/mysql-test/type_inet/type_inet6.result index ac16f5c06ce..2cc6942aa52 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.result +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.result @@ -2194,3 +2194,22 @@ SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != '::1'; f :: DROP TABLE t1; +# +# MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" +# +CREATE TABLE companies (id INET6, name varchar(10)); +INSERT INTO companies (id) values ('00::01'); +CREATE TABLE divisions (company_id INET6); +INSERT INTO divisions (company_id) values ('00::01'); +SELECT * FROM companies WHERE id IN (SELECT company_id FROM divisions); +id name +::1 NULL +UPDATE companies SET name = 'value' WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +id name +::1 value +DELETE FROM companies WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +id name +DROP TABLE divisions; +DROP TABLE companies; diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6.test b/plugin/type_inet/mysql-test/type_inet/type_inet6.test index 6a5db1ad43f..ef8399d981f 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6.test +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6.test @@ -1612,3 +1612,21 @@ SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != ''; SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != '::'; SELECT IF(1, '::', a) AS f FROM t1 GROUP BY 'foo' HAVING f != '::1'; DROP TABLE t1; + + +--echo # +--echo # MDEV-28491 Uuid. "UPDATE/DELETE" not working "WHERE id IN (SELECT id FROM ..)" +--echo # + +CREATE TABLE companies (id INET6, name varchar(10)); +INSERT INTO companies (id) values ('00::01'); + +CREATE TABLE divisions (company_id INET6); +INSERT INTO divisions (company_id) values ('00::01'); +SELECT * FROM companies WHERE id IN (SELECT company_id FROM divisions); +UPDATE companies SET name = 'value' WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +DELETE FROM companies WHERE id IN (SELECT company_id FROM divisions); +SELECT * FROM companies; +DROP TABLE divisions; +DROP TABLE companies; diff --git a/plugin/type_inet/sql_type_inet.cc b/plugin/type_inet/sql_type_inet.cc index 26be505ac65..1c6e0e02e73 100644 --- a/plugin/type_inet/sql_type_inet.cc +++ b/plugin/type_inet/sql_type_inet.cc @@ -1109,9 +1109,14 @@ public: if (!example) return false; value_cached= true; - null_value= example->val_native_with_conversion_result(current_thd, - &m_value, - type_handler()); + /* + Merge comments: in 10.7 this code migrated to + Item_cache_fbt in to sql/sql_type_fixedbin.h + */ + null_value_inside= null_value= + example->val_native_with_conversion_result(current_thd, + &m_value, + type_handler()); return true; } String* val_str(String *to) -- cgit v1.2.1 From 1b03db11d2658b77ce31ffa9261652e626674b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 09:52:11 +0300 Subject: MDEV-15528 fixup: Remove some dead code btr_page_split_and_insert(): Declare all parameters nonnull. btr_pessimistic_scrub() was removed in commit a5584b13d1e04f38b843602413669591aa65c359 (MDEV-15528). --- storage/innobase/btr/btr0btr.cc | 36 ++++++++---------------------------- storage/innobase/include/btr0btr.h | 2 +- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 4436cf0159d..37ffe8a74e8 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2672,9 +2672,6 @@ released within this function! NOTE that the operation of this function must always succeed, we cannot reverse it: therefore enough free disk space (2 pages) must be guaranteed to be available before this function is called. -NOTE: jonaso added support for calling function with tuple == NULL -which cause it to only split a page. - @return inserted record or NULL if run out of space */ rec_t* btr_page_split_and_insert( @@ -2746,7 +2743,7 @@ func_start: uint32_t hint_page_no = block->page.id().page_no() + 1; byte direction = FSP_UP; - if (tuple && n_iterations > 0) { + if (n_iterations > 0) { split_rec = btr_page_get_split_rec(cursor, tuple, n_ext); if (split_rec == NULL) { @@ -2816,8 +2813,7 @@ func_start: ? cursor->index->n_core_fields : 0, n_uniq, heap); - insert_left = !tuple - || cmp_dtuple_rec(tuple, split_rec, *offsets) < 0; + insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0; if (!insert_left && new_page_zip && n_iterations > 0) { /* If a compressed page has already been split, @@ -2853,22 +2849,12 @@ insert_empty: on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, thus reducing the tree latch contention. */ - bool insert_will_fit; - if (tuple == NULL) { - insert_will_fit = true; - } else if (split_rec) { - insert_will_fit = !new_page_zip - && btr_page_insert_fits(cursor, split_rec, - offsets, tuple, n_ext, heap); - } else { - if (!insert_left) { - UT_DELETE_ARRAY(buf); - buf = NULL; - } - - insert_will_fit = !new_page_zip - && btr_page_insert_fits(cursor, NULL, - offsets, tuple, n_ext, heap); + const bool insert_will_fit = !new_page_zip + && btr_page_insert_fits(cursor, split_rec, offsets, tuple, + n_ext, heap); + if (!split_rec && !insert_left) { + UT_DELETE_ARRAY(buf); + buf = NULL; } if (!srv_read_only_mode @@ -2992,11 +2978,6 @@ insert_empty: buf_block_t* const insert_block = insert_left ? left_block : right_block; - if (UNIV_UNLIKELY(!tuple)) { - rec = NULL; - goto func_exit; - } - /* 7. Reposition the cursor for insert and try insertion */ page_cursor = btr_cur_get_page_cur(cursor); @@ -3073,7 +3054,6 @@ func_exit: ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index)); ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index)); - ut_ad(tuple || !rec); ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); return(rec); } diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index c6e90c8cbef..1b5c7e377d9 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -438,7 +438,7 @@ btr_page_split_and_insert( const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((warn_unused_result)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*******************************************************//** Inserts a data tuple to a tree on a non-leaf level. It is assumed that mtr holds an x-latch on the tree. */ -- cgit v1.2.1 From a98ac43649f287a3114ce685cf02c8aba799b93d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 09:58:42 +0300 Subject: MDEV-28752 Rollback of RENAME is broken if innodb_file_per_table=0 This was broken in 54e2e70194b9374543fdfc81a47d583e34771fac (MDEV-25524). --- storage/innobase/row/row0uins.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 446d56f8daa..825e7442746 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -428,7 +428,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) table, name, !dict_table_t::is_temporary_name(name), true); - } else if (table->space) { + } else if (table->space && table->space->id) { const auto s = table->space->name(); if (len != s.size() || memcmp(name, s.data(), len)) { table->rename_tablespace(name, true); -- cgit v1.2.1 From c86d1daa620a7cc6f6f5189831f329217fc3e695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 10:01:32 +0300 Subject: Cleanup: Remove some redundant reads --- storage/innobase/trx/trx0undo.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index ed332ad02af..90372f490c5 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -168,9 +168,10 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec, uint32_t page_no, uint16_t offset, bool shared, mtr_t *mtr) { - uint32_t prev_page_no= flst_get_prev_addr(TRX_UNDO_PAGE_HDR + - TRX_UNDO_PAGE_NODE + - block->page.frame).page; + uint32_t prev_page_no= mach_read_from_4(TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_NODE + + FLST_PREV + FIL_ADDR_PAGE + + block->page.frame); if (prev_page_no == FIL_NULL) return nullptr; @@ -237,12 +238,13 @@ trx_undo_get_next_rec_from_next_page(buf_block_t *&block, uint32_t page_no, { if (page_no == block->page.id().page_no() && mach_read_from_2(block->page.frame + offset + TRX_UNDO_NEXT_LOG)) - return NULL; + return nullptr; - uint32_t next= flst_get_next_addr(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + - block->page.frame).page; + uint32_t next= mach_read_from_4(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE + + FLST_NEXT + FIL_ADDR_PAGE + + block->page.frame); if (next == FIL_NULL) - return NULL; + return nullptr; block= buf_page_get(page_id_t(block->page.id().space(), next), 0, mode, mtr); -- cgit v1.2.1 From b29a8118dd00c2269284793b63e2c5c6a34336d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 11:54:17 +0300 Subject: Cleanup: Remove fil_space_t::magic_n --- storage/innobase/fil/fil0fil.cc | 7 +------ storage/innobase/include/fil0fil.h | 5 ----- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index fa35f219d2c..b261fdce8b7 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -236,9 +236,7 @@ fil_space_get_by_id( mysql_mutex_assert_owner(&fil_system.mutex); HASH_SEARCH(hash, &fil_system.spaces, id, - fil_space_t*, space, - ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), - space->id == id); + fil_space_t*, space,, space->id == id); return(space); } @@ -803,8 +801,6 @@ pfs_os_file_t fil_system_t::detach(fil_space_t *space, bool detach_handle) else if (space == temp_space) temp_space= nullptr; - ut_a(space->magic_n == FIL_SPACE_MAGIC_N); - for (fil_node_t* node= UT_LIST_GET_FIRST(space->chain); node; node= UT_LIST_GET_NEXT(chain, node)) if (node->is_open()) @@ -943,7 +939,6 @@ fil_space_t *fil_space_t::create(ulint id, ulint flags, space->purpose = purpose; space->flags = flags; - space->magic_n = FIL_SPACE_MAGIC_N; space->crypt_data = crypt_data; space->n_pending.store(CLOSING, std::memory_order_relaxed); diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 9df3a260152..73bb1ad32e9 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -434,8 +434,6 @@ public: /** Stores last page freed lsn. Protected by freed_mutex */ lsn_t last_freed_lsn; - ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ - /** @return whether doublewrite buffering is needed */ inline bool use_doublewrite() const; @@ -1085,9 +1083,6 @@ private: }; #ifndef UNIV_INNOCHECKSUM -/** Value of fil_space_t::magic_n */ -#define FIL_SPACE_MAGIC_N 89472 - /** File node of a tablespace or the log data space */ struct fil_node_t final { -- cgit v1.2.1 From aa45850687409ba857eb3a0eb19e7ccc28dc02f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 11:55:29 +0300 Subject: Cleanup: Make fil_space_t::freed_ranges private fil_space_t::is_freed(): Check if a page is in freed_ranges. fil_space_t::flush_freed(): Replaces buf_flush_freed_pages(). --- storage/innobase/buf/buf0flu.cc | 49 +++++++++++++++++--------------------- storage/innobase/buf/buf0rea.cc | 9 ++++++- storage/innobase/include/fil0fil.h | 36 +++++++++++++++++----------- 3 files changed, 52 insertions(+), 42 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 23d01daf5b2..cc2f72c9a62 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1002,63 +1002,58 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, return i; } -MY_ATTRIBUTE((nonnull, warn_unused_result)) -/** Write punch-hole or zeroes of the freed ranges when -innodb_immediate_scrub_data_uncompressed from the freed ranges. -@param space tablespace which may contain ranges of freed pages -@param writable whether the tablespace is writable +MY_ATTRIBUTE((warn_unused_result)) +/** Apply freed_ranges to the file. +@param writable whether the file is writable @return number of pages written or hole-punched */ -static uint32_t buf_flush_freed_pages(fil_space_t *space, bool writable) +uint32_t fil_space_t::flush_freed(bool writable) { - const bool punch_hole= space->chain.start->punch_hole == 1; + const bool punch_hole= chain.start->punch_hole == 1; if (!punch_hole && !srv_immediate_scrub_data_uncompressed) return 0; mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); mysql_mutex_assert_not_owner(&buf_pool.mutex); - space->freed_range_mutex.lock(); - if (space->freed_ranges.empty() || - log_sys.get_flushed_lsn() < space->get_last_freed_lsn()) + freed_range_mutex.lock(); + if (freed_ranges.empty() || log_sys.get_flushed_lsn() < get_last_freed_lsn()) { - space->freed_range_mutex.unlock(); + freed_range_mutex.unlock(); return 0; } - const unsigned physical_size{space->physical_size()}; + const unsigned physical{physical_size()}; - range_set freed_ranges= std::move(space->freed_ranges); + range_set freed= std::move(freed_ranges); uint32_t written= 0; if (!writable); else if (punch_hole) { - for (const auto &range : freed_ranges) + for (const auto &range : freed) { written+= range.last - range.first + 1; - space->reacquire(); - space->io(IORequest(IORequest::PUNCH_RANGE), - os_offset_t{range.first} * physical_size, - (range.last - range.first + 1) * physical_size, - nullptr); + reacquire(); + io(IORequest(IORequest::PUNCH_RANGE), + os_offset_t{range.first} * physical, + (range.last - range.first + 1) * physical, nullptr); } } else { - for (const auto &range : freed_ranges) + for (const auto &range : freed) { written+= range.last - range.first + 1; for (os_offset_t i= range.first; i <= range.last; i++) { - space->reacquire(); - space->io(IORequest(IORequest::WRITE_ASYNC), - i * physical_size, physical_size, - const_cast(field_ref_zero)); + reacquire(); + io(IORequest(IORequest::WRITE_ASYNC), i * physical, physical, + const_cast(field_ref_zero)); } } } - space->freed_range_mutex.unlock(); + freed_range_mutex.unlock(); return written; } @@ -1186,7 +1181,7 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max) static std::pair buf_flush_space(const uint32_t id) { if (fil_space_t *space= fil_space_t::get(id)) - return {space, buf_flush_freed_pages(space, true)}; + return {space, space->flush_freed(true)}; return {nullptr, 0}; } @@ -1568,7 +1563,7 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed) bool acquired= space->acquire(); { - const uint32_t written{buf_flush_freed_pages(space, acquired)}; + const uint32_t written{space->flush_freed(acquired)}; mysql_mutex_lock(&buf_pool.mutex); if (written) buf_pool.stat.n_pages_written+= written; diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 436d2bd0aa3..6a5f973ee22 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -685,6 +685,13 @@ failed: return count; } +/** @return whether a page has been freed */ +inline bool fil_space_t::is_freed(uint32_t page) +{ + std::lock_guard freed_lock(freed_range_mutex); + return freed_ranges.contains(page); +} + /** Issues read requests for pages which recovery wants to read in. @param[in] space_id tablespace id @param[in] page_nos array of page numbers to read, with the @@ -704,7 +711,7 @@ void buf_read_recv_pages(ulint space_id, const uint32_t* page_nos, ulint n) for (ulint i = 0; i < n; i++) { /* Ignore if the page already present in freed ranges. */ - if (space->freed_ranges.contains(page_nos[i])) { + if (space->is_freed(page_nos[i])) { continue; } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 73bb1ad32e9..3bcb3be563d 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -415,28 +415,36 @@ private: pthread_t latch_owner; ut_d(Atomic_relaxed latch_count;) public: - /** MariaDB encryption data */ - fil_space_crypt_t* crypt_data; + /** MariaDB encryption data */ + fil_space_crypt_t *crypt_data; - /** Checks that this tablespace in a list of unflushed tablespaces. */ - bool is_in_unflushed_spaces; + /** Checks that this tablespace in a list of unflushed tablespaces. */ + bool is_in_unflushed_spaces; + /** Checks that this tablespace needs key rotation. */ + bool is_in_default_encrypt; - /** Checks that this tablespace needs key rotation. */ - bool is_in_default_encrypt; - - /** mutex to protect freed ranges */ - std::mutex freed_range_mutex; +private: + /** mutex to protect freed_ranges and last_freed_lsn */ + std::mutex freed_range_mutex; - /** Variables to store freed ranges. This can be used to write - zeroes/punch the hole in files. Protected by freed_mutex */ - range_set freed_ranges; + /** Ranges of freed page numbers; protected by freed_range_mutex */ + range_set freed_ranges; - /** Stores last page freed lsn. Protected by freed_mutex */ - lsn_t last_freed_lsn; + /** LSN of freeing last page; protected by freed_range_mutex */ + lsn_t last_freed_lsn; +public: /** @return whether doublewrite buffering is needed */ inline bool use_doublewrite() const; + /** @return whether a page has been freed */ + inline bool is_freed(uint32_t page); + + /** Apply freed_ranges to the file. + @param writable whether the file is writable + @return number of pages written or hole-punched */ + uint32_t flush_freed(bool writable); + /** Append a file to the chain of files of a space. @param[in] name file name of a file that is not open @param[in] handle file handle, or OS_FILE_CLOSED -- cgit v1.2.1 From 75096c84b44875b5d226a734fffb08578bc21e96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 11:56:29 +0300 Subject: MDEV-28525 Some conditions around btr_latch_mode could be eliminated The types btr_latch_mode and mtr_memo_type_t are partly derived from rw_lock_type_t. Despite that, some code for converting between them is using conditions instead of bitwise arithmetics. Let us define btr_latch_mode in such a way that more conversions to rw_lock_type_t are possible by bitwise and. Some SPATIAL INDEX code that assumed !(BTR_MODIFY_TREE & BTR_MODIFY_LEAF) was adjusted. --- storage/innobase/btr/btr0cur.cc | 47 ++++++++++++++++++++------------------ storage/innobase/btr/btr0pcur.cc | 25 ++++++-------------- storage/innobase/btr/btr0sea.cc | 17 +++++++------- storage/innobase/gis/gis0sea.cc | 46 ++++++++++++++++--------------------- storage/innobase/include/btr0btr.h | 34 +++++++++++++-------------- storage/innobase/row/row0ins.cc | 14 +++++------- storage/innobase/row/row0row.cc | 2 +- storage/innobase/row/row0umod.cc | 5 ++-- 8 files changed, 87 insertions(+), 103 deletions(-) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 2f033d1736b..c01d3f8134f 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -210,9 +210,6 @@ btr_cur_latch_leaves( btr_cur_t* cursor, mtr_t* mtr) { - rw_lock_type_t mode; - uint32_t left_page_no; - uint32_t right_page_no; buf_block_t* get_block; bool spatial; btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}}; @@ -230,7 +227,15 @@ btr_cur_latch_leaves( | MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); + const rw_lock_type_t mode = rw_lock_type_t( + latch_mode & (RW_X_LATCH | RW_S_LATCH)); + static_assert(ulint{RW_S_LATCH} == ulint{BTR_SEARCH_LEAF}, ""); + static_assert(ulint{RW_X_LATCH} == ulint{BTR_MODIFY_LEAF}, ""); + static_assert(BTR_SEARCH_LEAF & BTR_SEARCH_TREE, ""); + switch (latch_mode) { + uint32_t left_page_no; + uint32_t right_page_no; case BTR_SEARCH_LEAF: case BTR_MODIFY_LEAF: case BTR_SEARCH_TREE: @@ -239,7 +244,6 @@ btr_cur_latch_leaves( = mtr_set_savepoint(mtr); } - mode = latch_mode == BTR_MODIFY_LEAF ? RW_X_LATCH : RW_S_LATCH; latch_leaves.savepoints[1] = mtr_set_savepoint(mtr); get_block = btr_block_get(*cursor->index, block->page.id().page_no(), mode, @@ -335,7 +339,6 @@ btr_cur_latch_leaves( case BTR_SEARCH_PREV: case BTR_MODIFY_PREV: - mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH; /* Because we are holding index->lock, no page splits or merges may run concurrently, and we may read FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ @@ -787,8 +790,13 @@ btr_cur_optimistic_latch_leaves( left_page_no = btr_page_get_prev(block->page.frame); } - const rw_lock_type_t mode = *latch_mode == BTR_SEARCH_PREV - ? RW_S_LATCH : RW_X_LATCH; + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) + == (RW_S_LATCH ^ RW_X_LATCH), ""); + + const rw_lock_type_t mode = rw_lock_type_t( + *latch_mode & (RW_X_LATCH | RW_S_LATCH)); if (left_page_no != FIL_NULL) { dberr_t err = DB_SUCCESS; @@ -1328,10 +1336,8 @@ btr_cur_search_to_nth_level_func( /* These flags are mutually exclusive, they are lumped together with the latch mode for historical reasons. It's possible for none of the flags to be set. */ - switch (UNIV_EXPECT(latch_mode - & (BTR_INSERT | BTR_DELETE | BTR_DELETE_MARK), - 0)) { - case 0: + switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { + default: btr_op = BTR_NO_OP; break; case BTR_INSERT: @@ -1346,10 +1352,6 @@ btr_cur_search_to_nth_level_func( case BTR_DELETE_MARK: btr_op = BTR_DELMARK_OP; break; - default: - /* only one of BTR_INSERT, BTR_DELETE, BTR_DELETE_MARK - should be specified at a time */ - ut_error; } /* Operations on the insert buffer tree cannot be buffered. */ @@ -1941,17 +1943,18 @@ retry_page_get: /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ if (search_mode == PAGE_CUR_RTREE_INSERT && cursor->rtr_info->mbr_adj) { - if (latch_mode & BTR_MODIFY_LEAF) { + static_assert(BTR_MODIFY_TREE + == (8 | BTR_MODIFY_LEAF), ""); + + if (!(latch_mode & 8)) { /* Parent MBR needs updated, should retry with BTR_MODIFY_TREE */ goto func_exit; - } else if (latch_mode & BTR_MODIFY_TREE) { - rtree_parent_modified = true; - cursor->rtr_info->mbr_adj = false; - mbr_adj = true; - } else { - ut_ad(0); } + + rtree_parent_modified = true; + cursor->rtr_info->mbr_adj = false; + mbr_adj = true; } if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) { diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index 9c63cfbd568..a6b9e59204e 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -547,26 +547,12 @@ btr_pcur_move_backward_from_page( ulint prev_page_no; page_t* page; buf_block_t* prev_block; - ulint latch_mode; - ulint latch_mode2; - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); ut_ad(btr_pcur_is_before_first_on_page(cursor)); ut_ad(!btr_pcur_is_before_first_in_tree(cursor)); - latch_mode = cursor->latch_mode; - - if (latch_mode == BTR_SEARCH_LEAF) { - - latch_mode2 = BTR_SEARCH_PREV; - - } else if (latch_mode == BTR_MODIFY_LEAF) { - - latch_mode2 = BTR_MODIFY_PREV; - } else { - latch_mode2 = 0; /* To eliminate compiler warning */ - ut_error; - } + const ulint latch_mode = cursor->latch_mode; + ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); btr_pcur_store_position(cursor, mtr); @@ -574,7 +560,10 @@ btr_pcur_move_backward_from_page( mtr_start(mtr); - cursor->restore_position(latch_mode2, mtr); + static_assert(BTR_SEARCH_PREV == (4 | BTR_SEARCH_LEAF), ""); + static_assert(BTR_MODIFY_PREV == (4 | BTR_MODIFY_LEAF), ""); + + cursor->restore_position(4 | latch_mode, mtr); page = btr_pcur_get_page(cursor); diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 96bbd8af3c1..19c0d94b280 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1124,15 +1124,14 @@ fail: block->page.fix(); block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); - mtr_memo_type_t fix_type; - if (latch_mode == BTR_SEARCH_LEAF) { - fix_type = MTR_MEMO_PAGE_S_FIX; - ut_ad(!block->page.is_read_fixed()); - } else { - fix_type = MTR_MEMO_PAGE_X_FIX; - ut_ad(!block->page.is_io_fixed()); - } - mtr->memo_push(block, fix_type); + ut_ad(!block->page.is_read_fixed()); + ut_ad(latch_mode == BTR_SEARCH_LEAF + || !block->page.is_io_fixed()); + static_assert(ulint{MTR_MEMO_PAGE_S_FIX} == + ulint{BTR_SEARCH_LEAF}, ""); + static_assert(ulint{MTR_MEMO_PAGE_X_FIX} == + ulint{BTR_MODIFY_LEAF}, ""); + mtr->memo_push(block, mtr_memo_type_t(latch_mode)); ++buf_pool.stat.n_page_gets; diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 1e212524032..86c9d06be21 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -105,7 +105,6 @@ rtr_pcur_getnext_from_path( ulint my_latch_mode; ulint skip_parent = false; bool new_split = false; - bool need_parent; bool for_delete = false; bool for_undo_ins = false; @@ -131,13 +130,12 @@ rtr_pcur_getnext_from_path( /* Whether need to track parent information. Only need so when we do tree altering operations (such as index page merge) */ - need_parent = ((my_latch_mode == BTR_MODIFY_TREE - || my_latch_mode == BTR_CONT_MODIFY_TREE) - && mode == PAGE_CUR_RTREE_LOCATE); + static_assert(BTR_CONT_MODIFY_TREE == (4 | BTR_MODIFY_TREE), ""); + + const bool need_parent = mode == PAGE_CUR_RTREE_LOCATE + && (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE; if (!index_locked) { - ut_ad(latch_mode & BTR_SEARCH_LEAF - || latch_mode & BTR_MODIFY_LEAF); mtr_s_lock_index(index, mtr); } else { ut_ad(mtr->memo_contains_flagged(&index->lock, @@ -156,7 +154,7 @@ rtr_pcur_getnext_from_path( buf_block_t* block; node_seq_t path_ssn; const page_t* page; - ulint rw_latch = RW_X_LATCH; + rw_lock_type_t rw_latch; ulint tree_idx; mysql_mutex_lock(&rtr_info->rtr_path_mutex); @@ -215,18 +213,14 @@ rtr_pcur_getnext_from_path( One reason for pre-latch is that we might need to position some parent position (requires latch) during search */ if (level == 0) { - /* S latched for SEARCH_LEAF, and X latched - for MODIFY_LEAF */ - if (my_latch_mode <= BTR_MODIFY_LEAF) { - rw_latch = my_latch_mode; - } - - if (my_latch_mode == BTR_CONT_MODIFY_TREE - || my_latch_mode == BTR_MODIFY_TREE) { - rw_latch = RW_NO_LATCH; - } - - } else if (level == target_level) { + static_assert(ulint{BTR_SEARCH_LEAF} == + ulint{RW_S_LATCH}, ""); + static_assert(ulint{BTR_MODIFY_LEAF} == + ulint{RW_X_LATCH}, ""); + rw_latch = (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE + ? RW_NO_LATCH + : rw_lock_type_t(my_latch_mode); + } else { rw_latch = RW_X_LATCH; } @@ -257,8 +251,7 @@ rtr_pcur_getnext_from_path( /* set up savepoint to record any locks to be taken */ rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr); - ut_ad(my_latch_mode == BTR_MODIFY_TREE - || my_latch_mode == BTR_CONT_MODIFY_TREE + ut_ad((my_latch_mode | 4) == BTR_CONT_MODIFY_TREE || !page_is_leaf(btr_cur_get_page(btr_cur)) || !btr_cur->page_cur.block->page.lock.have_any()); @@ -543,7 +536,8 @@ rtr_pcur_open( ulint low_match; rec_t* rec; - ut_ad(latch_mode & BTR_MODIFY_LEAF || latch_mode & BTR_MODIFY_TREE); + static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); + ut_ad(latch_mode & BTR_MODIFY_LEAF); ut_ad(mode == PAGE_CUR_RTREE_LOCATE); /* Initialize the cursor */ @@ -566,7 +560,7 @@ rtr_pcur_open( btr_cursor->rtr_info->thr = btr_cursor->thr; } - if ((latch_mode & BTR_MODIFY_TREE) && index->lock.have_u_not_x()) { + if ((latch_mode & 8) && index->lock.have_u_not_x()) { index->lock.u_x_upgrade(SRW_LOCK_CALL); mtr->lock_upgrade(index->lock); } @@ -595,7 +589,7 @@ rtr_pcur_open( } /* Did not find matched row in first dive. Release latched block if any before search more pages */ - if (latch_mode & BTR_MODIFY_LEAF) { + if (!(latch_mode & 8)) { ulint tree_idx = btr_cursor->tree_height - 1; rtr_info_t* rtr_info = btr_cursor->rtr_info; @@ -610,7 +604,7 @@ rtr_pcur_open( bool ret = rtr_pcur_getnext_from_path( tuple, mode, btr_cursor, 0, latch_mode, - latch_mode & (BTR_MODIFY_TREE | BTR_ALREADY_S_LATCHED), + latch_mode & (8 | BTR_ALREADY_S_LATCHED), mtr); if (ret) { diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 1b5c7e377d9..e4cfc42c88c 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -63,37 +63,37 @@ enum btr_latch_mode { BTR_MODIFY_LEAF = RW_X_LATCH, /** Obtain no latches. */ BTR_NO_LATCHES = RW_NO_LATCH, - /** Start modifying the entire B-tree. */ - BTR_MODIFY_TREE = 33, - /** Continue modifying the entire B-tree. */ - BTR_CONT_MODIFY_TREE = 34, /** Search the previous record. */ - BTR_SEARCH_PREV = 35, + BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF, /** Modify the previous record. */ - BTR_MODIFY_PREV = 36, + BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF, /** Start searching the entire B-tree. */ - BTR_SEARCH_TREE = 37, + BTR_SEARCH_TREE = 8 | BTR_SEARCH_LEAF, + /** Start modifying1 the entire B-tree. */ + BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF, /** Continue searching the entire B-tree. */ - BTR_CONT_SEARCH_TREE = 38, + BTR_CONT_SEARCH_TREE = 4 | BTR_SEARCH_TREE, + /** Continue modifying the entire B-tree. */ + BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE, /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */ /** The search tuple will be inserted to the secondary index at the searched position. When the leaf page is not in the buffer pool, try to use the change buffer. */ - BTR_INSERT = 512, + BTR_INSERT = 64, /** Try to delete mark a secondary index leaf page record at the searched position using the change buffer when the page is not in the buffer pool. */ - BTR_DELETE_MARK = 4096, + BTR_DELETE_MARK = 128, /** Try to purge the record using the change buffer when the secondary index leaf page is not in the buffer pool. */ - BTR_DELETE = 8192, + BTR_DELETE = BTR_INSERT | BTR_DELETE_MARK, /** The caller is already holding dict_index_t::lock S-latch. */ - BTR_ALREADY_S_LATCHED = 16384, + BTR_ALREADY_S_LATCHED = 256, /** Search and S-latch a leaf page, assuming that the dict_index_t::lock S-latch is being held. */ BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF @@ -123,7 +123,7 @@ enum btr_latch_mode { /** In the case of BTR_MODIFY_TREE, the caller specifies the intention to delete record only. It is used to optimize block->lock range.*/ - BTR_LATCH_FOR_DELETE = 65536, + BTR_LATCH_FOR_DELETE = 512, /** Attempt to purge a secondary index record in the tree. */ BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE @@ -140,19 +140,19 @@ the insert buffer to speed up inserts */ /** In the case of BTR_MODIFY_TREE, the caller specifies the intention to insert record only. It is used to optimize block->lock range.*/ -#define BTR_LATCH_FOR_INSERT 32768U +#define BTR_LATCH_FOR_INSERT 4096U /** This flag is for undo insert of rtree. For rtree, we need this flag to find proper rec to undo insert.*/ -#define BTR_RTREE_UNDO_INS 131072U +#define BTR_RTREE_UNDO_INS 8192U /** In the case of BTR_MODIFY_LEAF, the caller intends to allocate or free the pages of externally stored fields. */ -#define BTR_MODIFY_EXTERNAL 262144U +#define BTR_MODIFY_EXTERNAL 16384U /** Try to delete mark the record at the searched position when the record is in spatial index */ -#define BTR_RTREE_DELETE_MARK 524288U +#define BTR_RTREE_DELETE_MARK 32768U #define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \ ((latch_mode) & ulint(~(BTR_INSERT \ diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index a002f104516..4fc40dca3f9 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2860,10 +2860,6 @@ row_ins_sec_index_entry_low( the function will return in both low_match and up_match of the cursor sensible values */ - if (!thr_get_trx(thr)->check_unique_secondary) { - search_mode |= BTR_IGNORE_SEC_UNIQUE; - } - if (dict_index_is_spatial(index)) { cursor.index = index; rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); @@ -2874,8 +2870,9 @@ row_ins_sec_index_entry_low( search_mode, &cursor, 0, &mtr); - if (mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) { + if (search_mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) { mtr_commit(&mtr); + search_mode = mode = BTR_MODIFY_TREE; rtr_clean_rtr_info(&rtr_info, true); rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); @@ -2886,13 +2883,10 @@ row_ins_sec_index_entry_low( } else { index->set_modified(mtr); } - search_mode &= ulint(~BTR_MODIFY_LEAF); - search_mode |= BTR_MODIFY_TREE; err = btr_cur_search_to_nth_level( index, 0, entry, PAGE_CUR_RTREE_INSERT, search_mode, &cursor, 0, &mtr); - mode = BTR_MODIFY_TREE; } DBUG_EXECUTE_IF( @@ -2900,6 +2894,10 @@ row_ins_sec_index_entry_low( goto func_exit;}); } else { + if (!thr_get_trx(thr)->check_unique_secondary) { + search_mode |= BTR_IGNORE_SEC_UNIQUE; + } + err = btr_cur_search_to_nth_level( index, 0, entry, PAGE_CUR_LE, search_mode, diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index b83b0ab1fef..9f9c81a4b5b 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1306,7 +1306,7 @@ row_search_index_entry( switch (btr_pcur_get_btr_cur(pcur)->flag) { case BTR_CUR_DELETE_REF: - ut_ad(mode & BTR_DELETE); + ut_ad(!(~mode & BTR_DELETE)); ut_ad(!index->is_spatial()); return(ROW_NOT_DELETED_REF); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index f18d7ab1be1..0f96e021d9f 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -693,12 +693,13 @@ row_undo_mod_del_unmark_sec_and_undo_update( due to avoid undel-mark a wrong rec in rolling back in partial update. Later, we could log some info in secondary index updates to avoid this. */ - ut_ad(mode & BTR_MODIFY_LEAF); + static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); + ut_ad(!(mode & 8)); mode |= BTR_RTREE_DELETE_MARK; } try_again: - row_mtr_start(&mtr, index, !(mode & BTR_MODIFY_LEAF)); + row_mtr_start(&mtr, index, mode & 8); btr_cur->thr = thr; -- cgit v1.2.1 From 0b47c126e31cddda1e94588799599e138400bcf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 14:03:22 +0300 Subject: MDEV-13542: Crashing on corrupted page is unhelpful The approach to handling corruption that was chosen by Oracle in commit 177d8b0c125b841c0650d27d735e3b87509dc286 is not really useful. Not only did it actually fail to prevent InnoDB from crashing, but it is making things worse by blocking attempts to rescue data from or rebuild a partially readable table. We will try to prevent crashes in a different way: by propagating errors up the call stack. We will never mark the clustered index persistently corrupted, so that data recovery may be attempted by reading from the table, or by rebuilding the table. This should also fix MDEV-13680 (crash on btr_page_alloc() failure); it was extensively tested with innodb_file_per_table=0 and a non-autoextend system tablespace. We should now avoid crashes in many cases, such as when a page cannot be read or allocated, or an inconsistency is detected when attempting to update multiple pages. We will not crash on double-free, such as on the recovery of DDL in system tablespace in case something was corrupted. Crashes on corrupted data are still possible. The fault injection mechanism that is introduced in the subsequent commit may help catch more of them. buf_page_import_corrupt_failure: Remove the fault injection, and instead corrupt some pages using Perl code in the tests. btr_cur_pessimistic_insert(): Always reserve extents (except for the change buffer), in order to prevent a subsequent allocation failure. btr_pcur_open_at_rnd_pos(): Merged to the only caller ibuf_merge_pages(). btr_assert_not_corrupted(), btr_corruption_report(): Remove. Similar checks are already part of btr_block_get(). FSEG_MAGIC_N_BYTES: Replaces FSEG_MAGIC_N_VALUE. dict_hdr_get(), trx_rsegf_get_new(), trx_undo_page_get(), trx_undo_page_get_s_latched(): Replaced with error-checking calls. trx_rseg_t::get(mtr_t*): Replaces trx_rsegf_get(). trx_rseg_header_create(): Let the caller update the TRX_SYS page if needed. trx_sys_create_sys_pages(): Merged with trx_sysf_create(). dict_check_tablespaces_and_store_max_id(): Do not access DICT_HDR_MAX_SPACE_ID, because it was already recovered in dict_boot(). Merge dict_check_sys_tables() with this function. dir_pathname(): Replaces os_file_make_new_pathname(). row_undo_ins_remove_sec(): Do not modify the undo page by adding a terminating NUL byte to the record. btr_decryption_failed(): Report decryption failures dict_set_corrupted_by_space(), dict_set_encrypted_by_space(), dict_set_corrupted_index_cache_only(): Remove. dict_set_corrupted(): Remove the constant parameter dict_locked=false. Never flag the clustered index corrupted in SYS_INDEXES, because that would deny further access to the table. It might be possible to repair the table by executing ALTER TABLE or OPTIMIZE TABLE, in case no B-tree leaf page is corrupted. dict_table_skip_corrupt_index(), dict_table_next_uncorrupted_index(), row_purge_skip_uncommitted_virtual_index(): Remove, and refactor the callers to read dict_index_t::type only once. dict_table_is_corrupted(): Remove. dict_index_t::is_btree(): Determine if the index is a valid B-tree. BUF_GET_NO_LATCH, BUF_EVICT_IF_IN_POOL: Remove. UNIV_BTR_DEBUG: Remove. Any inconsistency will no longer trigger assertion failures, but error codes being returned. buf_corrupt_page_release(): Replaced with a direct call to buf_pool.corrupted_evict(). fil_invalid_page_access_msg(): Never crash on an invalid read; let the caller of buf_page_get_gen() decide. btr_pcur_t::restore_position(): Propagate failure status to the caller by returning CORRUPTED. opt_search_plan_for_table(): Simplify the code. row_purge_del_mark(), row_purge_upd_exist_or_extern_func(), row_undo_ins_remove_sec_rec(), row_undo_mod_upd_del_sec(), row_undo_mod_del_mark_sec(): Avoid mem_heap_create()/mem_heap_free() when no secondary indexes exist. row_undo_mod_upd_exist_sec(): Simplify the code. row_upd_clust_step(), dict_load_table_one(): Return DB_TABLE_CORRUPT if the clustered index (and therefore the table) is corrupted, similar to what we do in row_insert_for_mysql(). fut_get_ptr(): Replace with buf_page_get_gen() calls. buf_page_get_gen(): Return nullptr and *err=DB_CORRUPTION if the page is marked as freed. For other modes than BUF_GET_POSSIBLY_FREED or BUF_PEEK_IF_IN_POOL this will trigger a debug assertion failure. For BUF_GET_POSSIBLY_FREED, we will return nullptr for freed pages, so that the callers can be simplified. The purge of transaction history will be a new user of BUF_GET_POSSIBLY_FREED, to avoid crashes on corrupted data. buf_page_get_low(): Never crash on a corrupted page, but simply return nullptr. fseg_page_is_allocated(): Replaces fseg_page_is_free(). fts_drop_common_tables(): Return an error if the transaction was rolled back. fil_space_t::set_corrupted(): Report a tablespace as corrupted if it was not reported already. fil_space_t::io(): Invoke fil_space_t::set_corrupted() to report out-of-bounds page access or other errors. Clean up mtr_t::page_lock() buf_page_get_low(): Validate the page identifier (to check for recently read corrupted pages) after acquiring the page latch. buf_page_t::read_complete(): Flag uninitialized (all-zero) pages with DB_FAIL. Return DB_PAGE_CORRUPTED on page number mismatch. mtr_t::defer_drop_ahi(): Renamed from mtr_defer_drop_ahi(). recv_sys_t::free_corrupted_page(): Only set_corrupt_fs() if any log records exist for the page. We do not mind if read-ahead produces corrupted (or all-zero) pages that were not actually needed during recovery. recv_recover_page(): Return whether the operation succeeded. recv_sys_t::recover_low(): Simplify the logic. Check for recovery error. Thanks to Matthias Leich for testing this extensively and to the authors of https://rr-project.org for making it easy to diagnose and fix any failures that were found during the testing. --- extra/mariabackup/xtrabackup.cc | 4 +- .../encryption/r/innodb-bad-key-change.result | 4 +- .../encryption/r/innodb-bad-key-change2.result | 2 +- .../encryption/r/innodb-bad-key-change4.result | 2 +- .../encryption/r/innodb-compressed-blob.result | 2 +- .../suite/encryption/r/innodb-redo-badkey.result | 3 +- .../suite/encryption/t/innodb-bad-key-change.test | 4 +- .../suite/encryption/t/innodb-bad-key-change2.test | 2 +- .../suite/encryption/t/innodb-bad-key-change4.test | 2 +- .../suite/encryption/t/innodb-compressed-blob.test | 2 +- .../suite/encryption/t/innodb-redo-badkey.test | 3 +- mysql-test/suite/innodb/disabled.def | 13 - .../suite/innodb/r/create-index-debug.result | 23 - mysql-test/suite/innodb/r/import_corrupted.result | 1 + .../suite/innodb/r/innodb-wl5522-debug.result | 6 +- .../suite/innodb/r/page_id_innochecksum.result | 3 + .../suite/innodb/t/corrupted_during_recovery.test | 2 +- mysql-test/suite/innodb/t/create-index-debug.test | 34 - mysql-test/suite/innodb/t/import_corrupted.test | 1 + mysql-test/suite/innodb/t/innodb-wl5522-debug.test | 21 +- mysql-test/suite/innodb/t/innodb_bug14147491.test | 7 +- .../t/leaf_page_corrupted_during_recovery.test | 2 +- .../suite/innodb/t/page_id_innochecksum.test | 6 +- .../suite/innodb_zip/r/wl5522_debug_zip.result | 6 +- .../suite/innodb_zip/t/wl5522_debug_zip.test | 22 +- storage/innobase/CMakeLists.txt | 2 - storage/innobase/btr/btr0btr.cc | 1505 ++++++++------- storage/innobase/btr/btr0bulk.cc | 82 +- storage/innobase/btr/btr0cur.cc | 835 ++++----- storage/innobase/btr/btr0defragment.cc | 85 +- storage/innobase/btr/btr0pcur.cc | 129 +- storage/innobase/btr/btr0sea.cc | 20 +- storage/innobase/buf/buf0buf.cc | 411 ++-- storage/innobase/buf/buf0dblwr.cc | 24 +- storage/innobase/buf/buf0lru.cc | 13 +- storage/innobase/buf/buf0rea.cc | 45 +- storage/innobase/dict/dict0boot.cc | 86 +- storage/innobase/dict/dict0crea.cc | 64 +- storage/innobase/dict/dict0defrag_bg.cc | 5 +- storage/innobase/dict/dict0dict.cc | 230 +-- storage/innobase/dict/dict0load.cc | 514 +++-- storage/innobase/dict/dict0stats.cc | 206 +- storage/innobase/dict/drop.cc | 7 +- storage/innobase/fil/fil0crypt.cc | 87 +- storage/innobase/fil/fil0fil.cc | 64 +- storage/innobase/fsp/fsp0fsp.cc | 1978 ++++++++++---------- storage/innobase/fts/fts0fts.cc | 91 +- storage/innobase/fut/fut0lst.cc | 175 +- storage/innobase/gis/gis0rtree.cc | 116 +- storage/innobase/gis/gis0sea.cc | 254 +-- storage/innobase/handler/ha_innodb.cc | 110 +- storage/innobase/handler/handler0alter.cc | 79 +- storage/innobase/handler/i_s.cc | 1 - storage/innobase/ibuf/ibuf0ibuf.cc | 707 ++++--- storage/innobase/include/btr0btr.h | 117 +- storage/innobase/include/btr0bulk.h | 4 +- storage/innobase/include/btr0cur.h | 39 +- storage/innobase/include/btr0pcur.h | 98 +- storage/innobase/include/btr0pcur.inl | 107 +- storage/innobase/include/buf0buf.h | 39 +- storage/innobase/include/dict0boot.h | 5 +- storage/innobase/include/dict0crea.h | 2 +- storage/innobase/include/dict0dict.h | 55 +- storage/innobase/include/dict0dict.inl | 15 +- storage/innobase/include/dict0mem.h | 19 +- storage/innobase/include/fil0fil.h | 11 +- storage/innobase/include/fsp0fsp.h | 97 +- storage/innobase/include/fsp0types.h | 4 +- storage/innobase/include/fts0types.h | 3 +- storage/innobase/include/fut0fut.h | 77 - storage/innobase/include/fut0lst.h | 58 +- storage/innobase/include/gis0rtree.h | 33 +- storage/innobase/include/ibuf0ibuf.h | 10 +- storage/innobase/include/lock0lock.inl | 1 - storage/innobase/include/log0recv.h | 19 +- storage/innobase/include/mtr0mtr.h | 14 +- storage/innobase/include/os0file.h | 19 +- storage/innobase/include/page0page.h | 69 +- storage/innobase/include/page0zip.h | 10 +- storage/innobase/include/trx0rec.h | 23 +- storage/innobase/include/trx0rseg.h | 95 +- storage/innobase/include/trx0rseg.inl | 64 - storage/innobase/include/trx0sys.h | 8 +- storage/innobase/include/trx0undo.h | 62 +- storage/innobase/include/trx0undo.inl | 24 +- storage/innobase/include/univ.i | 3 - storage/innobase/lock/lock0lock.cc | 8 +- storage/innobase/log/log0recv.cc | 130 +- storage/innobase/mtr/mtr0mtr.cc | 22 +- storage/innobase/os/os0file.cc | 40 - storage/innobase/page/page0cur.cc | 40 +- storage/innobase/page/page0page.cc | 184 +- storage/innobase/page/page0zip.cc | 29 +- storage/innobase/pars/pars0opt.cc | 61 +- storage/innobase/row/row0import.cc | 186 +- storage/innobase/row/row0ins.cc | 81 +- storage/innobase/row/row0log.cc | 119 +- storage/innobase/row/row0merge.cc | 92 +- storage/innobase/row/row0mysql.cc | 108 +- storage/innobase/row/row0purge.cc | 173 +- storage/innobase/row/row0row.cc | 13 +- storage/innobase/row/row0sel.cc | 162 +- storage/innobase/row/row0uins.cc | 45 +- storage/innobase/row/row0umod.cc | 161 +- storage/innobase/row/row0undo.cc | 27 +- storage/innobase/row/row0upd.cc | 13 +- storage/innobase/srv/srv0start.cc | 51 +- storage/innobase/trx/trx0purge.cc | 409 ++-- storage/innobase/trx/trx0rec.cc | 138 +- storage/innobase/trx/trx0rseg.cc | 414 ++-- storage/innobase/trx/trx0sys.cc | 126 +- storage/innobase/trx/trx0trx.cc | 173 +- storage/innobase/trx/trx0undo.cc | 496 +++-- 113 files changed, 6055 insertions(+), 6752 deletions(-) delete mode 100644 mysql-test/suite/innodb/disabled.def delete mode 100644 mysql-test/suite/innodb/r/create-index-debug.result delete mode 100644 mysql-test/suite/innodb/t/create-index-debug.test delete mode 100644 storage/innobase/include/fut0fut.h delete mode 100644 storage/innobase/include/trx0rseg.inl diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 512c8d21267..adb13a7c119 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -584,8 +584,8 @@ void CorruptedPages::zero_out_free_pages() space_it->second.pages.begin(); page_it != space_it->second.pages.end(); ++page_it) { - bool is_free= fseg_page_is_free(space, *page_it); - if (!is_free) { + if (fseg_page_is_allocated(space, *page_it)) + { space_info_t &space_info = non_free_pages[space_id]; space_info.pages.insert(*page_it); if (space_info.space_name.empty()) diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change.result b/mysql-test/suite/encryption/r/innodb-bad-key-change.result index 255dde207ed..1ca9d784d57 100644 --- a/mysql-test/suite/encryption/r/innodb-bad-key-change.result +++ b/mysql-test/suite/encryption/r/innodb-bad-key-change.result @@ -1,8 +1,8 @@ call mtr.add_suppression("Plugin 'file_key_management' init function returned error"); call mtr.add_suppression("Plugin 'file_key_management' registration.*failed"); call mtr.add_suppression("InnoDB: Table `test`\\.`t[12]` (has an unreadable root page|is corrupted)"); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[12]\\.ibd' cannot be decrypted\\."); -call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[12]\\.ibd' cannot be decrypted; key_version=1"); +call mtr.add_suppression("failed to read \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=3\\] in file .*test.t1.ibd looks corrupted; key_version=1"); call mtr.add_suppression("InnoDB: Table `test`\\.`t[12]` is corrupted"); call mtr.add_suppression("File '.*mysql-test.std_data.keysbad3\\.txt' not found"); diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change2.result b/mysql-test/suite/encryption/r/innodb-bad-key-change2.result index af1028f1331..1c19525638f 100644 --- a/mysql-test/suite/encryption/r/innodb-bad-key-change2.result +++ b/mysql-test/suite/encryption/r/innodb-bad-key-change2.result @@ -1,5 +1,5 @@ call mtr.add_suppression("InnoDB: Table `test`\\.`t1` (has an unreadable root page|is corrupted|does not exist.*is trying to rename)"); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1(new)?\\.ibd' cannot be decrypted\\."); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1(new)?\\.ibd' cannot be decrypted; key_version=1"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("Couldn't load plugins from 'file_key_management"); call mtr.add_suppression("InnoDB: Tablespace for table \`test\`.\`t1\` is set as discarded\\."); diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change4.result b/mysql-test/suite/encryption/r/innodb-bad-key-change4.result index ad218457068..7d90b3b42ae 100644 --- a/mysql-test/suite/encryption/r/innodb-bad-key-change4.result +++ b/mysql-test/suite/encryption/r/innodb-bad-key-change4.result @@ -1,5 +1,5 @@ call mtr.add_suppression("InnoDB: Table `test`\\.`t1` (has an unreadable root page|is corrupted)"); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1\\.ibd' cannot be decrypted\\."); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1\\.ibd' cannot be decrypted; key_version=1"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("Couldn't load plugins from 'file_key_management"); call mtr.add_suppression("InnoDB: Table `test`\\.`t1` is corrupted"); diff --git a/mysql-test/suite/encryption/r/innodb-compressed-blob.result b/mysql-test/suite/encryption/r/innodb-compressed-blob.result index de20b554a67..39f482b2422 100644 --- a/mysql-test/suite/encryption/r/innodb-compressed-blob.result +++ b/mysql-test/suite/encryption/r/innodb-compressed-blob.result @@ -1,4 +1,4 @@ -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[123]\\.ibd' cannot be decrypted\\."); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[123]\\.ibd' cannot be decrypted; key_version=1"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("InnoDB: Unable to decompress ..test.t[1-3]\\.ibd\\[page id: space=[1-9][0-9]*, page number=[0-9]+\\]"); call mtr.add_suppression("InnoDB: Table `test`\\.`t[12]` is corrupted"); diff --git a/mysql-test/suite/encryption/r/innodb-redo-badkey.result b/mysql-test/suite/encryption/r/innodb-redo-badkey.result index 87377a01479..b1c4ae913fa 100644 --- a/mysql-test/suite/encryption/r/innodb-redo-badkey.result +++ b/mysql-test/suite/encryption/r/innodb-redo-badkey.result @@ -1,6 +1,7 @@ call mtr.add_suppression("Plugin 'file_key_management'"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[1-4]\\.ibd' cannot be decrypted"); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[1-4]\\.ibd' cannot be decrypted; key_version=1"); +call mtr.add_suppression("InnoDB: File '.*test/t[1234]\\.ibd' is corrupted"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("InnoDB: Unable to decompress .*.test.t[12]\\.ibd\\[page id: space=[1-9][0-9]*, page number=[0-9]+\\]"); call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*test.t[12]\\.ibd'"); diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change.test b/mysql-test/suite/encryption/t/innodb-bad-key-change.test index a9a32a3d6fc..30beff4bd85 100644 --- a/mysql-test/suite/encryption/t/innodb-bad-key-change.test +++ b/mysql-test/suite/encryption/t/innodb-bad-key-change.test @@ -11,8 +11,8 @@ call mtr.add_suppression("Plugin 'file_key_management' init function returned error"); call mtr.add_suppression("Plugin 'file_key_management' registration.*failed"); call mtr.add_suppression("InnoDB: Table `test`\\.`t[12]` (has an unreadable root page|is corrupted)"); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[12]\\.ibd' cannot be decrypted\\."); -call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[12]\\.ibd' cannot be decrypted; key_version=1"); +call mtr.add_suppression("failed to read \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=3\\] in file .*test.t1.ibd looks corrupted; key_version=1"); call mtr.add_suppression("InnoDB: Table `test`\\.`t[12]` is corrupted"); call mtr.add_suppression("File '.*mysql-test.std_data.keysbad3\\.txt' not found"); diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change2.test b/mysql-test/suite/encryption/t/innodb-bad-key-change2.test index 19399b1e891..58787c65ef1 100644 --- a/mysql-test/suite/encryption/t/innodb-bad-key-change2.test +++ b/mysql-test/suite/encryption/t/innodb-bad-key-change2.test @@ -9,7 +9,7 @@ # MDEV-8727: Server/InnoDB hangs on shutdown after trying to read an encrypted table with a wrong key # call mtr.add_suppression("InnoDB: Table `test`\\.`t1` (has an unreadable root page|is corrupted|does not exist.*is trying to rename)"); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1(new)?\\.ibd' cannot be decrypted\\."); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1(new)?\\.ibd' cannot be decrypted; key_version=1"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); # Suppression for builds where file_key_management plugin is linked statically call mtr.add_suppression("Couldn't load plugins from 'file_key_management"); diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change4.test b/mysql-test/suite/encryption/t/innodb-bad-key-change4.test index 58517f14978..fdcffa25428 100644 --- a/mysql-test/suite/encryption/t/innodb-bad-key-change4.test +++ b/mysql-test/suite/encryption/t/innodb-bad-key-change4.test @@ -8,7 +8,7 @@ # call mtr.add_suppression("InnoDB: Table `test`\\.`t1` (has an unreadable root page|is corrupted)"); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1\\.ibd' cannot be decrypted\\."); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t1\\.ibd' cannot be decrypted; key_version=1"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); # Suppression for builds where file_key_management plugin is linked statically call mtr.add_suppression("Couldn't load plugins from 'file_key_management"); diff --git a/mysql-test/suite/encryption/t/innodb-compressed-blob.test b/mysql-test/suite/encryption/t/innodb-compressed-blob.test index 261fdd73aa1..1bc9e2a879a 100644 --- a/mysql-test/suite/encryption/t/innodb-compressed-blob.test +++ b/mysql-test/suite/encryption/t/innodb-compressed-blob.test @@ -4,7 +4,7 @@ # embedded does not support restart -- source include/not_embedded.inc -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[123]\\.ibd' cannot be decrypted\\."); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[123]\\.ibd' cannot be decrypted; key_version=1"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("InnoDB: Unable to decompress ..test.t[1-3]\\.ibd\\[page id: space=[1-9][0-9]*, page number=[0-9]+\\]"); call mtr.add_suppression("InnoDB: Table `test`\\.`t[12]` is corrupted"); diff --git a/mysql-test/suite/encryption/t/innodb-redo-badkey.test b/mysql-test/suite/encryption/t/innodb-redo-badkey.test index 09ad7a7d5a3..81febb92c5b 100644 --- a/mysql-test/suite/encryption/t/innodb-redo-badkey.test +++ b/mysql-test/suite/encryption/t/innodb-redo-badkey.test @@ -9,7 +9,8 @@ call mtr.add_suppression("Plugin 'file_key_management'"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); -call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[1-4]\\.ibd' cannot be decrypted"); +call mtr.add_suppression("InnoDB: The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*test.t[1-4]\\.ibd' cannot be decrypted; key_version=1"); +call mtr.add_suppression("InnoDB: File '.*test/t[1234]\\.ibd' is corrupted"); call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\]"); call mtr.add_suppression("InnoDB: Unable to decompress .*.test.t[12]\\.ibd\\[page id: space=[1-9][0-9]*, page number=[0-9]+\\]"); call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*test.t[12]\\.ibd'"); diff --git a/mysql-test/suite/innodb/disabled.def b/mysql-test/suite/innodb/disabled.def deleted file mode 100644 index 35c941f8af7..00000000000 --- a/mysql-test/suite/innodb/disabled.def +++ /dev/null @@ -1,13 +0,0 @@ -############################################################################## -# -# List the test cases that are to be disabled temporarily. -# -# Separate the test case name and the comment with ':'. -# -# : BUG# -# -# Do not use any TAB characters for whitespace. -# -############################################################################## - -create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails diff --git a/mysql-test/suite/innodb/r/create-index-debug.result b/mysql-test/suite/innodb/r/create-index-debug.result deleted file mode 100644 index 9d266c68af3..00000000000 --- a/mysql-test/suite/innodb/r/create-index-debug.result +++ /dev/null @@ -1,23 +0,0 @@ -SET @saved_debug_dbug = @@SESSION.debug_dbug; -# -#BUG#21326304 INNODB ONLINE ALTER TABLE ENDS IN CRASH ON DISK FULL -# -CREATE TABLE t1(f1 CHAR(255) NOT NULL, f2 CHAR(255) NOT NULL, f3 -CHAR(255) NOT NULL, f4 CHAR(255) NOT NULL, f5 CHAR(255) NOT NULL,f6 -CHAR(255) NOT NULL, f7 CHAR(255) NOT NULL, f8 CHAR(255) NOT NULL,f9 -CHAR(255) NOT NULL, f10 CHAR(255) NOT NULL, f11 CHAR(255) NOT NULL,f12 -CHAR(255) NOT NULL, f13 CHAR(255) NOT NULL, f14 CHAR(255) NOT NULL,f15 -CHAR(255) NOT NULL, f16 CHAR(255) NOT NULL, f17 CHAR(255) NOT NULL,f18 -CHAR(255) NOT NULL) -ENGINE=INNODB ROW_FORMAT=DYNAMIC; -INSERT INTO t1 -VALUES('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r'); -INSERT INTO t1 SELECT * FROM t1; -INSERT INTO t1 SELECT * FROM t1; -INSERT INTO t1 SELECT * FROM t1; -INSERT INTO t1 SELECT * FROM t1; -SET debug_dbug = '+d,disk_is_full'; -ALTER TABLE t1 FORCE, ALGORITHM=INPLACE; -ERROR HY000: The table 't1' is full -SET debug_dbug= @saved_debug_dbug; -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/r/import_corrupted.result b/mysql-test/suite/innodb/r/import_corrupted.result index 149a48dccfe..bc24a28f22f 100644 --- a/mysql-test/suite/innodb/r/import_corrupted.result +++ b/mysql-test/suite/innodb/r/import_corrupted.result @@ -1,6 +1,7 @@ call mtr.add_suppression("Table `test`.`t2` should have 2 indexes but the tablespace has 1 indexes"); call mtr.add_suppression("Index for table 't2' is corrupt; try to repair it"); call mtr.add_suppression("Trying to read .* bytes at .* outside the bounds of the file: \\..test.t2\\.ibd"); +call mtr.add_suppression("InnoDB: File '.*test/t2\\.ibd' is corrupted"); CREATE TABLE t1 ( id INT AUTO_INCREMENT PRIMARY KEY, not_id INT, diff --git a/mysql-test/suite/innodb/r/innodb-wl5522-debug.result b/mysql-test/suite/innodb/r/innodb-wl5522-debug.result index 2973e5de550..08f4bb93899 100644 --- a/mysql-test/suite/innodb/r/innodb-wl5522-debug.result +++ b/mysql-test/suite/innodb/r/innodb-wl5522-debug.result @@ -9,6 +9,8 @@ call mtr.add_suppression("InnoDB: Page for tablespace "); call mtr.add_suppression("InnoDB: Invalid FSP_SPACE_FLAGS="); call mtr.add_suppression("InnoDB: Unknown index id .* on page"); call mtr.add_suppression("InnoDB: Cannot save statistics for table `test`\\.`t1` because the \\.ibd file is missing"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*ibdata1' page"); +call mtr.add_suppression("InnoDB: File '.*ibdata1' is corrupted"); FLUSH TABLES; SET GLOBAL innodb_file_per_table = 1; CREATE TABLE t1 (c1 INT) ENGINE = InnoDB; @@ -862,10 +864,8 @@ ALTER TABLE t1 DISCARD TABLESPACE; SELECT COUNT(*) FROM t1; ERROR HY000: Tablespace has been discarded for table `t1` restore: t1 .ibd and .cfg files -SET SESSION debug_dbug="+d,buf_page_import_corrupt_failure"; ALTER TABLE t1 IMPORT TABLESPACE; -ERROR HY000: Internal error: Cannot reset LSNs in table `test`.`t1` : Data structure corruption -SET SESSION debug_dbug=@saved_debug_dbug; +ERROR HY000: Index for table 't1' is corrupt; try to repair it DROP TABLE t1; unlink: t1.ibd unlink: t1.cfg diff --git a/mysql-test/suite/innodb/r/page_id_innochecksum.result b/mysql-test/suite/innodb/r/page_id_innochecksum.result index e2c13442fe6..7a5f44b21e6 100644 --- a/mysql-test/suite/innodb/r/page_id_innochecksum.result +++ b/mysql-test/suite/innodb/r/page_id_innochecksum.result @@ -3,4 +3,7 @@ create table t1(f1 int not null)engine=innodb; insert into t1 values(1), (2), (3); # Change the page offset FOUND 1 /page id mismatch/ in result.log +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; +InnoDB 0 transactions not purged drop table t1; +call mtr.add_suppression("InnoDB: Failed to read page 3 from file '.*test/t1\\.ibd': Page read from tablespace is corrupted\\."); diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.test b/mysql-test/suite/innodb/t/corrupted_during_recovery.test index 31fd1f18b8a..345c86badb2 100644 --- a/mysql-test/suite/innodb/t/corrupted_during_recovery.test +++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.test @@ -6,8 +6,8 @@ call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*test.t1\\.ibd' page"); call mtr.add_suppression("InnoDB: Failed to read page 3 from file '.*test.t1\\.ibd': Page read from tablespace is corrupted."); -call mtr.add_suppression("InnoDB: Background Page read failed to read or decrypt \\[page id: space=\\d+, page number=3\\]"); call mtr.add_suppression("InnoDB: Table `test`.`t1` is corrupted. Please drop the table and recreate."); +call mtr.add_suppression("InnoDB: File '.*test/t1\\.ibd' is corrupted"); --enable_query_log let INNODB_PAGE_SIZE=`select @@innodb_page_size`; diff --git a/mysql-test/suite/innodb/t/create-index-debug.test b/mysql-test/suite/innodb/t/create-index-debug.test deleted file mode 100644 index 9ea416fbe1e..00000000000 --- a/mysql-test/suite/innodb/t/create-index-debug.test +++ /dev/null @@ -1,34 +0,0 @@ ---source include/have_innodb.inc ---source include/have_innodb_16k.inc ---source include/have_debug.inc - -SET @saved_debug_dbug = @@SESSION.debug_dbug; - ---echo # ---echo #BUG#21326304 INNODB ONLINE ALTER TABLE ENDS IN CRASH ON DISK FULL ---echo # -CREATE TABLE t1(f1 CHAR(255) NOT NULL, f2 CHAR(255) NOT NULL, f3 -CHAR(255) NOT NULL, f4 CHAR(255) NOT NULL, f5 CHAR(255) NOT NULL,f6 -CHAR(255) NOT NULL, f7 CHAR(255) NOT NULL, f8 CHAR(255) NOT NULL,f9 -CHAR(255) NOT NULL, f10 CHAR(255) NOT NULL, f11 CHAR(255) NOT NULL,f12 -CHAR(255) NOT NULL, f13 CHAR(255) NOT NULL, f14 CHAR(255) NOT NULL,f15 -CHAR(255) NOT NULL, f16 CHAR(255) NOT NULL, f17 CHAR(255) NOT NULL,f18 -CHAR(255) NOT NULL) -ENGINE=INNODB ROW_FORMAT=DYNAMIC; - -INSERT INTO t1 -VALUES('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r'); - -INSERT INTO t1 SELECT * FROM t1; -INSERT INTO t1 SELECT * FROM t1; -INSERT INTO t1 SELECT * FROM t1; -INSERT INTO t1 SELECT * FROM t1; - -SET debug_dbug = '+d,disk_is_full'; - ---error ER_RECORD_FILE_FULL -ALTER TABLE t1 FORCE, ALGORITHM=INPLACE; - -SET debug_dbug= @saved_debug_dbug; - -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/import_corrupted.test b/mysql-test/suite/innodb/t/import_corrupted.test index 3a9b9a40493..57c8c6dd9be 100644 --- a/mysql-test/suite/innodb/t/import_corrupted.test +++ b/mysql-test/suite/innodb/t/import_corrupted.test @@ -3,6 +3,7 @@ call mtr.add_suppression("Table `test`.`t2` should have 2 indexes but the tablespace has 1 indexes"); call mtr.add_suppression("Index for table 't2' is corrupt; try to repair it"); call mtr.add_suppression("Trying to read .* bytes at .* outside the bounds of the file: \\..test.t2\\.ibd"); +call mtr.add_suppression("InnoDB: File '.*test/t2\\.ibd' is corrupted"); let MYSQLD_DATADIR = `SELECT @@datadir`; diff --git a/mysql-test/suite/innodb/t/innodb-wl5522-debug.test b/mysql-test/suite/innodb/t/innodb-wl5522-debug.test index b460cba9322..76c7346b521 100644 --- a/mysql-test/suite/innodb/t/innodb-wl5522-debug.test +++ b/mysql-test/suite/innodb/t/innodb-wl5522-debug.test @@ -29,6 +29,8 @@ call mtr.add_suppression("InnoDB: Page for tablespace "); call mtr.add_suppression("InnoDB: Invalid FSP_SPACE_FLAGS="); call mtr.add_suppression("InnoDB: Unknown index id .* on page"); call mtr.add_suppression("InnoDB: Cannot save statistics for table `test`\\.`t1` because the \\.ibd file is missing"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*ibdata1' page"); +call mtr.add_suppression("InnoDB: File '.*ibdata1' is corrupted"); FLUSH TABLES; let MYSQLD_DATADIR =`SELECT @@datadir`; @@ -1173,20 +1175,29 @@ ALTER TABLE t1 DISCARD TABLESPACE; SELECT COUNT(*) FROM t1; # Restore files +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; + perl; do "$ENV{MTR_SUITE_DIR}/include/innodb-util.pl"; ib_restore_tablespaces("test", "t1"); +my $page; +my $ps = $ENV{INNODB_PAGE_SIZE}; +my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n"; +die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps; +substr($page,24,2)='42'; +sysseek(FILE, 3*$ps, 0) || die "Unable to rewind $file\n"; +syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n"; +close(FILE) || die "Unable to close $file"; EOF -SET SESSION debug_dbug="+d,buf_page_import_corrupt_failure"; - --replace_regex /'.*t1.cfg'/'t1.cfg'/ ---error ER_INTERNAL_ERROR +--error ER_NOT_KEYFILE ALTER TABLE t1 IMPORT TABLESPACE; -SET SESSION debug_dbug=@saved_debug_dbug; - DROP TABLE t1; perl; diff --git a/mysql-test/suite/innodb/t/innodb_bug14147491.test b/mysql-test/suite/innodb/t/innodb_bug14147491.test index c6e4f01a642..3c37f1b7cce 100644 --- a/mysql-test/suite/innodb/t/innodb_bug14147491.test +++ b/mysql-test/suite/innodb/t/innodb_bug14147491.test @@ -12,6 +12,7 @@ call mtr.add_suppression("InnoDB: Table `test`\\.`t1` is corrupted\\. Please dro call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*test.t1\\.ibd' page"); call mtr.add_suppression("InnoDB: We detected index corruption in an InnoDB type table"); call mtr.add_suppression("Index for table 't1' is corrupt; try to repair it"); +call mtr.add_suppression("InnoDB: File '.*test/t1\\.ibd' is corrupted"); --enable_query_log --echo # Ensure that purge will not crash on the table after we corrupt it. @@ -70,13 +71,13 @@ EOF --echo # Now t1 is corrupted but we should not crash ---error 1030,1712,1932 +--error ER_GET_ERRNO,ER_NOT_KEYFILE,ER_INDEX_CORRUPT,ER_NO_SUCH_TABLE_IN_ENGINE SELECT * FROM t1; ---error 126,1030,1034,1712,1932 +--error 126,ER_GET_ERRNO,ER_NOT_KEYFILE,ER_INDEX_CORRUPT,ER_NO_SUCH_TABLE_IN_ENGINE INSERT INTO t1(b) VALUES('abcdef'); ---error 1030,1712,1932 +--error ER_GET_ERRNO,ER_NOT_KEYFILE,ER_INDEX_CORRUPT,ER_NO_SUCH_TABLE_IN_ENGINE UPDATE t1 set b = 'deadbeef' where a = 1; --echo # Cleanup, this must be possible diff --git a/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test b/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test index ac5f14ba9ad..d4391f89ec1 100644 --- a/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test +++ b/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test @@ -3,13 +3,13 @@ --disable_query_log call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*test.t1\\.ibd' page"); -call mtr.add_suppression("InnoDB: Background Page read failed to read or decrypt \\[page id: space=\\d+, page number=19\\]"); call mtr.add_suppression("\\[ERROR\\] InnoDB: Failed to read page 19 from file '.*test.t1\\.ibd': Page read from tablespace is corrupted\\."); call mtr.add_suppression("\\[ERROR\\] InnoDB: Plugin initialization aborted at srv0start\\.cc.* with error Data structure corruption"); call mtr.add_suppression("\\[ERROR\\] Plugin 'InnoDB' (init function|registration)"); call mtr.add_suppression("\\[ERROR\\] InnoDB: We detected index corruption"); call mtr.add_suppression("\\[ERROR\\] (mysqld|mariadbd).*: Index for table 't1' is corrupt; try to repair it"); call mtr.add_suppression("InnoDB: btr_pcur_open_low level: 0 table: `test`\\.`t1` index: `PRIMARY`"); +call mtr.add_suppression("InnoDB: File '.*test/t1\\.ibd' is corrupted"); --enable_query_log CREATE TABLE t1 (pk INT PRIMARY KEY, c CHAR(255))ENGINE=InnoDB STATS_PERSISTENT=0; diff --git a/mysql-test/suite/innodb/t/page_id_innochecksum.test b/mysql-test/suite/innodb/t/page_id_innochecksum.test index f5166018dd1..2a2c14844fa 100644 --- a/mysql-test/suite/innodb/t/page_id_innochecksum.test +++ b/mysql-test/suite/innodb/t/page_id_innochecksum.test @@ -61,9 +61,11 @@ let SEARCH_PATTERN=page id mismatch; --source include/search_pattern_in_file.inc --remove_file $resultlog -# prevent purge from crashing on page ID mismatch -let $restart_parameters=--innodb-force-recovery=2; +let $restart_parameters=--innodb-force-recovery=1; --source include/start_mysqld.inc +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; +--source include/wait_all_purged.inc drop table t1; +call mtr.add_suppression("InnoDB: Failed to read page 3 from file '.*test/t1\\.ibd': Page read from tablespace is corrupted\\."); let $restart_parameters=; --source include/restart_mysqld.inc diff --git a/mysql-test/suite/innodb_zip/r/wl5522_debug_zip.result b/mysql-test/suite/innodb_zip/r/wl5522_debug_zip.result index c69c30c5b25..63369c9b41f 100644 --- a/mysql-test/suite/innodb_zip/r/wl5522_debug_zip.result +++ b/mysql-test/suite/innodb_zip/r/wl5522_debug_zip.result @@ -9,6 +9,8 @@ call mtr.add_suppression("InnoDB: The error means"); call mtr.add_suppression("InnoDB: Cannot open datafile .*t1\\.ibd"); call mtr.add_suppression("InnoDB: Ignoring tablespace for test/t1 "); call mtr.add_suppression("InnoDB: Cannot save statistics for table `test`\\.`t1` because the \\.ibd file is missing"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*ibdata1' page"); +call mtr.add_suppression("InnoDB: File '.*ibdata1' is corrupted"); FLUSH TABLES; SET SESSION innodb_strict_mode=1; CREATE TABLE t1 (c1 INT) ENGINE = Innodb @@ -325,10 +327,8 @@ ALTER TABLE t1 DISCARD TABLESPACE; SELECT COUNT(*) FROM t1; ERROR HY000: Tablespace has been discarded for table `t1` restore: t1 .ibd and .cfg files -SET SESSION debug_dbug="+d,buf_page_import_corrupt_failure"; ALTER TABLE t1 IMPORT TABLESPACE; -ERROR HY000: Internal error: Cannot reset LSNs in table `test`.`t1` : Data structure corruption -SET SESSION debug_dbug=@saved_debug_dbug; +Got one of the listed errors DROP TABLE t1; unlink: t1.ibd unlink: t1.cfg diff --git a/mysql-test/suite/innodb_zip/t/wl5522_debug_zip.test b/mysql-test/suite/innodb_zip/t/wl5522_debug_zip.test index 8d328dea576..c30a4f8f000 100644 --- a/mysql-test/suite/innodb_zip/t/wl5522_debug_zip.test +++ b/mysql-test/suite/innodb_zip/t/wl5522_debug_zip.test @@ -25,6 +25,8 @@ call mtr.add_suppression("InnoDB: The error means"); call mtr.add_suppression("InnoDB: Cannot open datafile .*t1\\.ibd"); call mtr.add_suppression("InnoDB: Ignoring tablespace for test/t1 "); call mtr.add_suppression("InnoDB: Cannot save statistics for table `test`\\.`t1` because the \\.ibd file is missing"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed read of file '.*ibdata1' page"); +call mtr.add_suppression("InnoDB: File '.*ibdata1' is corrupted"); FLUSH TABLES; let MYSQLD_DATADIR =`SELECT @@datadir`; @@ -435,19 +437,29 @@ ALTER TABLE t1 DISCARD TABLESPACE; SELECT COUNT(*) FROM t1; # Restore files +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +let MYSQLD_DATADIR=`select @@datadir`; + perl; do "$ENV{MTR_SUITE_DIR}/../innodb/include/innodb-util.pl"; ib_restore_tablespaces("test", "t1"); +my $page; +my $ps = $ENV{INNODB_PAGE_SIZE}; +my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +sysseek(FILE, 4*$ps, 0) || die "Unable to seek $file\n"; +die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps; +substr($page,24,2)='42'; +sysseek(FILE, 4*$ps, 0) || die "Unable to rewind $file\n"; +syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n"; +close(FILE) || die "Unable to close $file"; EOF -SET SESSION debug_dbug="+d,buf_page_import_corrupt_failure"; - # Following alter is failing ---error ER_INTERNAL_ERROR +--error ER_INTERNAL_ERROR,ER_NOT_KEYFILE ALTER TABLE t1 IMPORT TABLESPACE; -SET SESSION debug_dbug=@saved_debug_dbug; - DROP TABLE t1; perl; diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index 25ab30fe5bc..cc31b3c5dcc 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -148,7 +148,6 @@ SET(INNOBASE_SOURCES include/fts0types.h include/fts0types.inl include/fts0vlc.h - include/fut0fut.h include/fut0lst.h include/gis0geo.h include/gis0rtree.h @@ -241,7 +240,6 @@ SET(INNOBASE_SOURCES include/trx0rec.inl include/trx0roll.h include/trx0rseg.h - include/trx0rseg.inl include/trx0sys.h include/trx0trx.h include/trx0trx.inl diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 37ffe8a74e8..c3fa33717f7 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -43,6 +43,7 @@ Created 6/2/1994 Heikki Tuuri #include "gis0geo.h" #include "dict0boot.h" #include "row0sel.h" /* row_search_max_autoinc() */ +#include "log.h" /**************************************************************//** Checks if the page in the cursor can be merged with given page. @@ -57,17 +58,6 @@ btr_can_merge_with_page( buf_block_t** merge_block, /*!< out: the merge block */ mtr_t* mtr); /*!< in: mini-transaction */ -/** Report that an index page is corrupted. -@param[in] buffer block -@param[in] index tree */ -void btr_corruption_report(const buf_block_t* block, const dict_index_t* index) -{ - ib::fatal() - << "Flag mismatch in page " << block->page.id() - << " index " << index->name - << " of table " << index->table->name; -} - /* Latching strategy of the InnoDB B-tree -------------------------------------- @@ -186,25 +176,75 @@ make them consecutive on disk if possible. From the other file segment we allocate pages for the non-leaf levels of the tree. */ -#ifdef UNIV_BTR_DEBUG -/**************************************************************//** -Checks a file segment header within a B-tree root page. -@return TRUE if valid */ -static -ibool -btr_root_fseg_validate( -/*===================*/ - const fseg_header_t* seg_header, /*!< in: segment header */ - ulint space) /*!< in: tablespace identifier */ +/** Check a file segment header within a B-tree root page. +@param offset file segment header offset +@param block B-tree root page +@param space tablespace +@return whether the segment header is valid */ +static bool btr_root_fseg_validate(ulint offset, + const buf_block_t &block, + const fil_space_t &space) { - ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET); + ut_ad(block.page.id().space() == space.id); + const uint16_t hdr= mach_read_from_2(offset + FSEG_HDR_OFFSET + + block.page.frame); + if (FIL_PAGE_DATA <= hdr && hdr <= srv_page_size - FIL_PAGE_DATA_END && + mach_read_from_4(block.page.frame + offset + FSEG_HDR_SPACE) == space.id) + return true; + sql_print_error("InnoDB: Index root page " UINT32PF " in %s is corrupted " + "at " ULINTPF, + block.page.id().page_no(), + UT_LIST_GET_FIRST(space.chain)->name); + return false; +} - ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space); - ut_a(offset >= FIL_PAGE_DATA); - ut_a(offset <= srv_page_size - FIL_PAGE_DATA_END); - return(TRUE); +/** Report a decryption failure. */ +ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index) +{ + ib_push_warning(static_cast(nullptr), DB_DECRYPTION_FAILED, + "Table %s is encrypted but encryption service or" + " used key_id is not available. " + " Can't continue reading table.", + index.table->name.m_name); + index.table->file_unreadable= true; +} + +/** Get an index page and declare its latching order level. +@param[in] index index tree +@param[in] page page number +@param[in] mode latch mode +@param[in] merge whether change buffer merge should be attempted +@param[in,out] mtr mini-transaction +@param[out] err error code +@return block */ +buf_block_t *btr_block_get(const dict_index_t &index, + uint32_t page, ulint mode, bool merge, + mtr_t *mtr, dberr_t *err) +{ + dberr_t local_err; + if (!err) + err= &local_err; + buf_block_t *block= + buf_page_get_gen(page_id_t{index.table->space->id, page}, + index.table->space->zip_size(), mode, nullptr, BUF_GET, + mtr, err, merge && !index.is_clust()); + ut_ad(!block == (*err != DB_SUCCESS)); + + if (UNIV_LIKELY(block != nullptr)) + { + if (!!page_is_comp(block->page.frame) != index.table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index.id || + !fil_page_index_page_check(block->page.frame)) + { + *err= DB_PAGE_CORRUPTED; + block= nullptr; + } + } + else if (*err == DB_DECRYPTION_FAILED) + btr_decryption_failed(index); + + return block; } -#endif /* UNIV_BTR_DEBUG */ /**************************************************************//** Gets the root node of a tree and x- or s-latches it. @@ -215,43 +255,30 @@ btr_root_block_get( const dict_index_t* index, /*!< in: index tree */ rw_lock_type_t mode, /*!< in: either RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ { - if (!index->table || !index->table->space || index->page == FIL_NULL) { - return NULL; - } - - buf_block_t* block = btr_block_get(*index, index->page, mode, false, - mtr); - - if (!block) { - index->table->file_unreadable = true; - - ib_push_warning( - static_cast(NULL), DB_DECRYPTION_FAILED, - "Table %s in file %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name, - UT_LIST_GET_FIRST(index->table->space->chain)->name); - - return NULL; - } - - btr_assert_not_corrupted(block, index); - -#ifdef UNIV_BTR_DEBUG - if (!dict_index_is_ibuf(index)) { - const page_t* root = buf_block_get_frame(block); - - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root, index->table->space_id)); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, index->table->space_id)); - } -#endif /* UNIV_BTR_DEBUG */ + if (!index->table || !index->table->space) + { + *err= DB_TABLESPACE_NOT_FOUND; + return nullptr; + } - return(block); + buf_block_t *block = btr_block_get(*index, index->page, mode, false, mtr, + err); + if (block) + { + if (index->is_ibuf()); + else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF, + *block, *index->table->space) || + !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP, + *block, *index->table->space)) + { + *err= DB_CORRUPTION; + block= nullptr; + } + } + return block; } /**************************************************************//** @@ -262,13 +289,14 @@ page_t* btr_root_get( /*=========*/ const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ { - /* Intended to be used for segment list access. - SX lock doesn't block reading user data by other threads. - And block the segment list access by others.*/ - buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - return(root ? buf_block_get_frame(root) : NULL); + /* Intended to be used for accessing file segment lists. + Concurrent read of other data is allowed. */ + if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err)) + return root->page.frame; + return nullptr; } /**************************************************************//** @@ -439,28 +467,27 @@ buf_block_t* btr_page_alloc_for_ibuf( /*====================*/ dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ { - buf_block_t* new_block; - - buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - - fil_addr_t node_addr = flst_get_first(PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST - + root->page.frame); - ut_a(node_addr.page != FIL_NULL); - - new_block = buf_page_get( - page_id_t(index->table->space_id, node_addr.page), - index->table->space->zip_size(), - RW_X_LATCH, mtr); - - flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - new_block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, - mtr); - ut_d(flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); - - return(new_block); + buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err); + if (UNIV_UNLIKELY(!root)) + return root; + + buf_block_t *new_block= + buf_page_get_gen(page_id_t(index->table->space_id, + mach_read_from_4(PAGE_HEADER + + PAGE_BTR_IBUF_FREE_LIST + + FLST_FIRST + FIL_ADDR_PAGE + + root->page.frame)), + index->table->space->zip_size(), RW_X_LATCH, nullptr, + BUF_GET, mtr, err); + if (new_block) + *err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block, + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); + ut_d(if (*err == DB_SUCCESS) + flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); + return new_block; } /**************************************************************//** @@ -479,26 +506,18 @@ btr_page_alloc_low( in the tree */ mtr_t* mtr, /*!< in/out: mini-transaction for the allocation */ - mtr_t* init_mtr) /*!< in/out: mtr or another + mtr_t* init_mtr, /*!< in/out: mtr or another mini-transaction in which the page should be initialized. */ + dberr_t* err) /*!< out: error code */ { - page_t* root = btr_root_get(index, mtr); - - fseg_header_t* seg_header = (level - ? PAGE_HEADER + PAGE_BTR_SEG_TOP - : PAGE_HEADER + PAGE_BTR_SEG_LEAF) - + root; - - /* Parameter TRUE below states that the caller has made the - reservation for free extents, and thus we know that a page can - be allocated: */ - - buf_block_t* block = fseg_alloc_free_page_general( - seg_header, hint_page_no, file_direction, - true, mtr, init_mtr); - - return block; + buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err); + if (UNIV_UNLIKELY(!root)) + return root; + fseg_header_t *seg_header= root->page.frame + + (level ? PAGE_HEADER + PAGE_BTR_SEG_TOP : PAGE_HEADER + PAGE_BTR_SEG_LEAF); + return fseg_alloc_free_page_general(seg_header, hint_page_no, file_direction, + true, mtr, init_mtr, err); } /**************************************************************//** @@ -516,38 +535,40 @@ btr_page_alloc( in the tree */ mtr_t* mtr, /*!< in/out: mini-transaction for the allocation */ - mtr_t* init_mtr) /*!< in/out: mini-transaction + mtr_t* init_mtr, /*!< in/out: mini-transaction for x-latching and initializing the page */ + dberr_t* err) /*!< out: error code */ { - if (dict_index_is_ibuf(index)) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } - - return btr_page_alloc_low( - index, hint_page_no, file_direction, level, mtr, init_mtr); + ut_ad(level < BTR_MAX_NODE_LEVEL); + return index->is_ibuf() + ? btr_page_alloc_for_ibuf(index, mtr, err) + : btr_page_alloc_low(index, hint_page_no, file_direction, level, + mtr, init_mtr, err); } /**************************************************************//** Frees a page used in an ibuf tree. Puts the page to the free list of the ibuf tree. */ static -void +dberr_t btr_page_free_for_ibuf( /*===================*/ dict_index_t* index, /*!< in: index tree */ buf_block_t* block, /*!< in: block to be freed, x-latched */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); - - buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - - flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); + ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); - ut_d(flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); + dberr_t err; + if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err)) + { + err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); + ut_d(if (err == DB_SUCCESS) + flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); + } + return err; } /** Free an index page. @@ -555,51 +576,53 @@ btr_page_free_for_ibuf( @param[in,out] block block to be freed @param[in,out] mtr mini-transaction @param[in] blob whether this is freeing a BLOB page -@param[in] latched whether index->table->space->x_lock() was called */ -void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, - bool blob, bool space_latched) +@param[in] latched whether index->table->space->x_lock() was called +@return error code */ +dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, + bool blob, bool space_latched) { - ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); #ifdef BTR_CUR_HASH_ADAPT - if (block->index && !block->index->freed()) { - ut_ad(!blob); - ut_ad(page_is_leaf(block->page.frame)); - } + if (block->index && !block->index->freed()) + { + ut_ad(!blob); + ut_ad(page_is_leaf(block->page.frame)); + } #endif - const page_id_t id(block->page.id()); - ut_ad(index->table->space_id == id.space()); - /* The root page is freed by btr_free_root(). */ - ut_ad(id.page_no() != index->page); - ut_ad(mtr->is_named_space(index->table->space)); - - /* The page gets invalid for optimistic searches: increment the frame - modify clock */ - - buf_block_modify_clock_inc(block); - - if (dict_index_is_ibuf(index)) { - btr_page_free_for_ibuf(index, block, mtr); - return; - } - - /* TODO: Discard any operations for block from mtr->log. - The page will be freed, so previous changes to it by this - mini-transaction should not matter. */ - page_t* root = btr_root_get(index, mtr); - fseg_header_t* seg_header = &root[blob - || page_is_leaf(block->page.frame) - ? PAGE_HEADER + PAGE_BTR_SEG_LEAF - : PAGE_HEADER + PAGE_BTR_SEG_TOP]; - fil_space_t* space= index->table->space; - const uint32_t page= id.page_no(); - - fseg_free_page(seg_header, space, page, mtr, space_latched); - buf_page_free(space, page, mtr); + const uint32_t page{block->page.id().page_no()}; + ut_ad(index->table->space_id == block->page.id().space()); + /* The root page is freed by btr_free_root(). */ + ut_ad(page != index->page); + ut_ad(mtr->is_named_space(index->table->space)); + + /* The page gets invalid for optimistic searches: increment the frame + modify clock */ + buf_block_modify_clock_inc(block); + + /* TODO: Discard any operations for block from mtr->m_log. + The page will be freed, so previous changes to it by this + mini-transaction should not matter. */ + + if (index->is_ibuf()) + return btr_page_free_for_ibuf(index, block, mtr); + + fil_space_t *space= index->table->space; + dberr_t err; + if (page_t* root = btr_root_get(index, mtr, &err)) + { + err= fseg_free_page(&root[blob || page_is_leaf(block->page.frame) + ? PAGE_HEADER + PAGE_BTR_SEG_LEAF + : PAGE_HEADER + PAGE_BTR_SEG_TOP], + space, page, mtr, space_latched); + } + if (err == DB_SUCCESS) + buf_page_free(space, page, mtr); - /* The page was marked free in the allocation bitmap, but it - should remain exclusively latched until mtr_t::commit() or until it - is explicitly freed from the mini-transaction. */ - ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); + /* The page was marked free in the allocation bitmap, but it + should remain exclusively latched until mtr_t::commit() or until it + is explicitly freed from the mini-transaction. */ + ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); + return err; } /** Set the child page number in a node pointer record. @@ -627,6 +650,7 @@ inline void btr_node_ptr_set_child_page_no(buf_block_t *block, mtr->write<4>(*block, rec + offs - REC_NODE_PTR_SIZE, page_no); } +MY_ATTRIBUTE((nonnull(1,2,3,4),warn_unused_result)) /************************************************************//** Returns the child page of a node pointer and sx-latches it. @return child page, sx-latched */ @@ -637,7 +661,8 @@ btr_node_ptr_get_child( const rec_t* node_ptr,/*!< in: node pointer */ dict_index_t* index, /*!< in: index */ const rec_offs* offsets,/*!< in: array returned by rec_get_offsets() */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err = nullptr) /*!< out: error code */ { ut_ad(rec_offs_validate(node_ptr, index, offsets)); ut_ad(index->table->space_id @@ -646,9 +671,10 @@ btr_node_ptr_get_child( return btr_block_get( *index, btr_node_ptr_get_child_page_no(node_ptr, offsets), RW_SX_LATCH, btr_page_get_level(page_align(node_ptr)) == 1, - mtr); + mtr, err); } +MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result)) /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds an sx-latch on the tree. @@ -666,18 +692,11 @@ btr_page_get_father_node_ptr_func( or BTR_CONT_SEARCH_TREE */ mtr_t* mtr) /*!< in: mtr */ { - dtuple_t* tuple; - rec_t* user_rec; - rec_t* node_ptr; - ulint level; - ulint page_no; - dict_index_t* index; - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE || latch_mode == BTR_CONT_SEARCH_TREE); - page_no = btr_cur_get_block(cursor)->page.id().page_no(); - index = btr_cur_get_index(cursor); + const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); + dict_index_t* index = btr_cur_get_index(cursor); ut_ad(!dict_index_is_spatial(index)); ut_ad(srv_read_only_mode @@ -686,58 +705,27 @@ btr_page_get_father_node_ptr_func( ut_ad(dict_index_get_page(index) != page_no); - level = btr_page_get_level(btr_cur_get_page(cursor)); + const auto level = btr_page_get_level(btr_cur_get_page(cursor)); - user_rec = btr_cur_get_rec(cursor); + const rec_t* user_rec = btr_cur_get_rec(cursor); ut_a(page_rec_is_user_rec(user_rec)); - tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level); - dberr_t err = DB_SUCCESS; - - err = btr_cur_search_to_nth_level( - index, level + 1, tuple, - PAGE_CUR_LE, latch_mode, cursor, 0, mtr); - - if (err != DB_SUCCESS) { - ib::warn() << " Error code: " << err - << " btr_page_get_father_node_ptr_func " - << " level: " << level + 1 - << " table: " << index->table->name - << " index: " << index->name(); + if (btr_cur_search_to_nth_level(index, level + 1, + dict_index_build_node_ptr(index, + user_rec, 0, + heap, level), + PAGE_CUR_LE, latch_mode, + cursor, 0, mtr) != DB_SUCCESS) { + return nullptr; } - node_ptr = btr_cur_get_rec(cursor); + const rec_t* node_ptr = btr_cur_get_rec(cursor); offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) { - rec_t* print_rec; - - ib::error() - << "Corruption of an index tree: table " - << index->table->name - << " index " << index->name - << ", father ptr page no " - << btr_node_ptr_get_child_page_no(node_ptr, offsets) - << ", child page no " << page_no; - - print_rec = page_rec_get_next( - page_get_infimum_rec(page_align(user_rec))); - offsets = rec_get_offsets(print_rec, index, offsets, - page_rec_is_leaf(user_rec) - ? index->n_core_fields : 0, - ULINT_UNDEFINED, &heap); - page_rec_print(print_rec, offsets); - offsets = rec_get_offsets(node_ptr, index, offsets, 0, - ULINT_UNDEFINED, &heap); - page_rec_print(node_ptr, offsets); - - ib::fatal() - << "You should dump + drop + reimport the table to" - << " fix the corruption. If the crash happens at" - << " database startup. " << FORCE_RECOVERY_MSG - << " Then dump + drop + reimport."; + offsets = nullptr; } return(offsets); @@ -778,8 +766,9 @@ btr_page_get_father_block( @param[in,out] index b-tree @param[in] block child page @param[in,out] mtr mini-transaction -@param[out] cursor cursor pointing to the x-latched parent page */ -void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr, +@param[out] cursor cursor pointing to the x-latched parent page +@return whether the cursor was successfully positioned */ +bool btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr, btr_cur_t* cursor) { mem_heap_t* heap; @@ -789,8 +778,9 @@ void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr, btr_cur_position(index, rec, block, cursor); heap = mem_heap_create(100); - btr_page_get_father_node_ptr(NULL, heap, cursor, mtr); + const bool got = btr_page_get_father_node_ptr(NULL, heap, cursor, mtr); mem_heap_free(heap); + return got; } #ifdef UNIV_DEBUG @@ -800,28 +790,25 @@ constexpr index_id_t BTR_FREED_INDEX_ID = 0; /** Free a B-tree root page. btr_free_but_not_root() must already have been called. -In a persistent tablespace, the caller must invoke fsp_init_file_page() -before mtr.commit(). -@param[in,out] block index root page -@param[in,out] mtr mini-transaction */ -static void btr_free_root(buf_block_t *block, mtr_t *mtr) +@param block index root page +@param space tablespace +@param mtr mini-transaction */ +static void btr_free_root(buf_block_t *block, const fil_space_t &space, + mtr_t *mtr) { ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - ut_ad(mtr->is_named_space(block->page.id().space())); + ut_ad(mtr->is_named_space(&space)); btr_search_drop_page_hash_index(block); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP + - block->page.frame, - block->page.id().space())); -#endif /* UNIV_BTR_DEBUG */ - - /* Free the entire segment in small steps. */ - ut_d(mtr->freeing_tree()); - while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + - block->page.frame, mtr)); + if (btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP, *block, space)) + { + /* Free the entire segment in small steps. */ + ut_d(mtr->freeing_tree()); + while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + + block->page.frame, mtr)); + } } MY_ATTRIBUTE((warn_unused_result)) @@ -843,8 +830,6 @@ buf_block_t *btr_free_root_check(const page_id_t page_id, ulint zip_size, nullptr, BUF_GET_POSSIBLY_FREED, mtr); if (!block); - else if (block->page.is_freed()) - block= nullptr; else if (fil_page_index_page_check(block->page.frame) && index_id == btr_page_get_index_id(block->page.frame)) /* This should be a root page. It should not be possible to @@ -900,6 +885,7 @@ static void btr_root_page_init(buf_block_t *block, index_id_t index_id, @param[in,out] space tablespace where created @param[in] index index, or NULL to create a system table @param[in,out] mtr mini-transaction +@param[out] err error code @return page number of the created root @retval FIL_NULL if did not succeed */ uint32_t @@ -908,12 +894,14 @@ btr_create( fil_space_t* space, index_id_t index_id, dict_index_t* index, - mtr_t* mtr) + mtr_t* mtr, + dberr_t* err) { buf_block_t* block; ut_ad(mtr->is_named_space(space)); ut_ad(index_id != BTR_FREED_INDEX_ID); + ut_ad(index || space == fil_system.sys_space); /* Create the two new segments (one, in the case of an ibuf tree) for the index tree; the segment headers are put on the allocated root page @@ -923,7 +911,7 @@ btr_create( if (UNIV_UNLIKELY(type & DICT_IBUF)) { /* Allocate first the ibuf header page */ buf_block_t* ibuf_hdr_block = fseg_create( - space, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr); + space, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr, err); if (ibuf_hdr_block == NULL) { return(FIL_NULL); @@ -934,11 +922,11 @@ btr_create( /* Allocate then the next page to the segment: it will be the tree root page */ - block = fseg_alloc_free_page( + block = fseg_alloc_free_page_general( buf_block_get_frame(ibuf_hdr_block) + IBUF_HEADER + IBUF_TREE_SEG_HEADER, IBUF_TREE_ROOT_PAGE_NO, - FSP_UP, mtr); + FSP_UP, false, mtr, mtr, err); if (block == NULL) { return(FIL_NULL); @@ -949,17 +937,17 @@ btr_create( flst_init(block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr); } else { block = fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_TOP, - mtr); + mtr, err); if (block == NULL) { return(FIL_NULL); } if (!fseg_create(space, PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr, - false, block)) { + err, false, block)) { /* Not enough space for new segment, free root segment before return. */ - btr_free_root(block, mtr); + btr_free_root(block, *space, mtr); return(FIL_NULL); } } @@ -1012,27 +1000,21 @@ leaf_loop: mtr_start(&mtr); ut_d(mtr.freeing_tree()); mtr_set_log_mode(&mtr, log_mode); - mtr.set_named_space_id(block->page.id().space()); + fil_space_t *space = mtr.set_named_space_id(block->page.id().space()); - page_t* root = block->page.frame; - - if (!root) { + if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF, + *block, *space) + || !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP, + *block, *space)) { mtr_commit(&mtr); return; } -#ifdef UNIV_BTR_DEBUG - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root, block->page.id().space())); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, block->page.id().space())); -#endif /* UNIV_BTR_DEBUG */ - /* NOTE: page hash indexes are dropped when a page is freed inside fsp0fsp. */ - bool finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, - &mtr + bool finished = fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_LEAF + + block->page.frame, &mtr #ifdef BTR_CUR_HASH_ADAPT , ahi #endif /* BTR_CUR_HASH_ADAPT */ @@ -1046,21 +1028,16 @@ leaf_loop: top_loop: mtr_start(&mtr); mtr_set_log_mode(&mtr, log_mode); - mtr.set_named_space_id(block->page.id().space()); - - root = block->page.frame; + space = mtr.set_named_space_id(block->page.id().space()); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, block->page.id().space())); -#endif /* UNIV_BTR_DEBUG */ - - finished = fseg_free_step_not_header( - root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr + finished = !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP, + *block, *space) + || fseg_free_step_not_header(PAGE_HEADER + PAGE_BTR_SEG_TOP + + block->page.frame, &mtr #ifdef BTR_CUR_HASH_ADAPT - ,ahi + ,ahi #endif /* BTR_CUR_HASH_ADAPT */ - ); + ); mtr_commit(&mtr); if (!finished) { @@ -1070,9 +1047,10 @@ top_loop: /** Clear the index tree and reinitialize the root page, in the rollback of TRX_UNDO_EMPTY. The BTR_SEG_LEAF is freed and reinitialized. -@param thr query thread */ +@param thr query thread +@return error code */ TRANSACTIONAL_TARGET -void dict_index_t::clear(que_thr_t *thr) +dberr_t dict_index_t::clear(que_thr_t *thr) { mtr_t mtr; mtr.start(); @@ -1081,9 +1059,11 @@ void dict_index_t::clear(que_thr_t *thr) else set_modified(mtr); - if (buf_block_t *root_block= buf_page_get(page_id_t(table->space->id, page), - table->space->zip_size(), - RW_X_LATCH, &mtr)) + dberr_t err; + if (buf_block_t *root_block= + buf_page_get_gen(page_id_t(table->space->id, page), + table->space->zip_size(), + RW_X_LATCH, nullptr, BUF_GET, &mtr, &err)) { btr_free_but_not_root(root_block, mtr.get_log_mode() #ifdef BTR_CUR_HASH_ADAPT @@ -1098,12 +1078,13 @@ void dict_index_t::clear(que_thr_t *thr) #endif mtr.memset(root_block, PAGE_HEADER + PAGE_BTR_SEG_LEAF, FSEG_HEADER_SIZE, 0); - if (fseg_create(table->space, PAGE_HEADER + PAGE_BTR_SEG_LEAF, &mtr, false, - root_block)) + if (fseg_create(table->space, PAGE_HEADER + PAGE_BTR_SEG_LEAF, &mtr, + &err, false, root_block)) btr_root_page_init(root_block, id, this, &mtr); } mtr.commit(); + return err; } /** Free a persistent index tree if it exists. @@ -1120,7 +1101,7 @@ void btr_free_if_exists(fil_space_t *space, uint32_t page, { btr_free_but_not_root(root, mtr->get_log_mode()); mtr->set_named_space(space); - btr_free_root(root, mtr); + btr_free_root(root, *space, mtr); } } @@ -1141,7 +1122,7 @@ void btr_drop_temporary_table(const dict_table_t &table) { btr_free_but_not_root(block, MTR_LOG_NO_REDO); mtr.set_log_mode(MTR_LOG_NO_REDO); - btr_free_root(block, &mtr); + btr_free_root(block, *fil_system.temp_space, &mtr); mtr.commit(); mtr.start(); } @@ -1233,27 +1214,29 @@ btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no) void btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset) { - ut_ad(index->is_primary()); - ut_ad(index->table->persistent_autoinc); - ut_ad(!index->table->is_temporary()); + ut_ad(index->is_primary()); + ut_ad(index->table->persistent_autoinc); + ut_ad(!index->table->is_temporary()); - mtr_t mtr; - mtr.start(); - fil_space_t* space = index->table->space; - mtr.set_named_space(space); - page_set_autoinc(buf_page_get(page_id_t(space->id, index->page), - space->zip_size(), - RW_SX_LATCH, &mtr), - autoinc, &mtr, reset); - mtr.commit(); + mtr_t mtr; + mtr.start(); + fil_space_t *space= index->table->space; + if (buf_block_t *root= buf_page_get(page_id_t(space->id, index->page), + space->zip_size(), RW_SX_LATCH, &mtr)) + { + mtr.set_named_space(space); + page_set_autoinc(root, autoinc, &mtr, reset); + } + + mtr.commit(); } /** Reorganize an index page. @param cursor index page cursor @param index the index that the cursor belongs to @param mtr mini-transaction */ -static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, - mtr_t *mtr) +static dberr_t btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, + mtr_t *mtr) { const mtr_log_t log_mode= mtr->set_log_mode(MTR_LOG_NO_REDO); @@ -1261,7 +1244,6 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); ut_ad(!is_buf_block_get_page_zip(block)); - btr_assert_not_corrupted(block, index); ut_ad(fil_page_index_page_check(block->page.frame)); ut_ad(index->is_dummy || block->page.id().space() == index->table->space->id); @@ -1288,9 +1270,11 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, /* Copy the records from the temporary space to the recreated page; do not copy the lock bits yet */ - page_copy_rec_list_end_no_locks(block, old, - page_get_infimum_rec(old->page.frame), - index, mtr); + if (dberr_t err= + page_copy_rec_list_end_no_locks(block, old, + page_get_infimum_rec(old->page.frame), + index, mtr)) + return err; /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */ ut_ad(!page_get_max_trx_id(block->page.frame)); @@ -1504,6 +1488,7 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, MONITOR_INC(MONITOR_INDEX_REORG_ATTEMPTS); MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL); + return DB_SUCCESS; } /*************************************************************//** @@ -1515,9 +1500,9 @@ be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, IBUF_BITMAP_FREE is unaffected by reorganization. -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -bool +@return error code +@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */ +dberr_t btr_page_reorganize_block( ulint z_level,/*!< in: compression level to be used if dealing with compressed page */ @@ -1525,15 +1510,11 @@ btr_page_reorganize_block( dict_index_t* index, /*!< in: the index tree of the page */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - if (buf_block_get_page_zip(block)) { - return page_zip_reorganize(block, index, z_level, mtr, true); - } - - page_cur_t cur; - page_cur_set_before_first(block, &cur); - - btr_page_reorganize_low(&cur, index, mtr); - return true; + if (buf_block_get_page_zip(block)) + return page_zip_reorganize(block, index, z_level, mtr, true); + page_cur_t cur; + page_cur_set_before_first(block, &cur); + return btr_page_reorganize_low(&cur, index, mtr); } /*************************************************************//** @@ -1545,33 +1526,28 @@ be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, IBUF_BITMAP_FREE is unaffected by reorganization. -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -bool +@return error code +@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */ +dberr_t btr_page_reorganize( /*================*/ page_cur_t* cursor, /*!< in/out: page cursor */ dict_index_t* index, /*!< in: the index tree of the page */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - if (!buf_block_get_page_zip(cursor->block)) { - btr_page_reorganize_low(cursor, index, mtr); - return true; - } - - ulint pos = page_rec_get_n_recs_before(cursor->rec); - if (!page_zip_reorganize(cursor->block, index, page_zip_level, mtr, - true)) { - return false; - } - if (pos) { - cursor->rec = page_rec_get_nth(cursor->block->page.frame, pos); - } else { - ut_ad(cursor->rec == page_get_infimum_rec( - cursor->block->page.frame)); - } + if (!buf_block_get_page_zip(cursor->block)) + return btr_page_reorganize_low(cursor, index, mtr); + + ulint pos= page_rec_get_n_recs_before(cursor->rec); + dberr_t err= page_zip_reorganize(cursor->block, index, page_zip_level, mtr, + true); + if (err == DB_FAIL); + else if (pos) + cursor->rec= page_rec_get_nth(cursor->block->page.frame, pos); + else + ut_ad(cursor->rec == page_get_infimum_rec(cursor->block->page.frame)); - return true; + return err; } /** Empty an index page (possibly the root page). @see btr_page_create(). @@ -1698,13 +1674,15 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction */ +@param[in,out] mtr mini-transaction +@return error code */ ATTRIBUTE_COLD -void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) +dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) { ut_ad(!index.table->is_temporary()); ut_ad(index.is_primary()); - if (buf_block_t *root = btr_root_block_get(&index, RW_SX_LATCH, mtr)) + dberr_t err; + if (buf_block_t *root= btr_root_block_get(&index, RW_SX_LATCH, mtr, &err)) { byte *page_type= root->page.frame + FIL_PAGE_TYPE; if (all) @@ -1737,6 +1715,7 @@ void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) mtr->memcpy(*root, &root->page.frame[supremum], supremuminfimum, 8); } + return err; } /*************************************************************//** @@ -1758,7 +1737,8 @@ btr_root_raise_and_insert( mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ { dict_index_t* index; ulint new_page_no; @@ -1783,32 +1763,33 @@ btr_root_raise_and_insert( #endif /* UNIV_ZIP_DEBUG */ const page_id_t root_id{root->page.id()}; -#ifdef UNIV_BTR_DEBUG - if (!dict_index_is_ibuf(index)) { - ulint space = index->table->space_id; - - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root->page.frame, space)); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root->page.frame, space)); - } - - ut_a(dict_index_get_page(index) == root_id.page_no()); -#endif /* UNIV_BTR_DEBUG */ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); ut_ad(mtr->memo_contains_flagged(root, MTR_MEMO_PAGE_X_FIX)); + if (index->page != root_id.page_no()) { + ut_ad("corrupted root page number" == 0); + return nullptr; + } + + if (index->is_ibuf()) { + } else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF, + *root, *index->table->space) + || !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP, + *root, *index->table->space)) { + return nullptr; + } + /* Allocate a new page to the tree. Root splitting is done by first moving the root records to the new page, emptying the root, putting a node pointer to the new page, and then splitting the new page. */ level = btr_page_get_level(root->page.frame); - new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr); + new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr, err); - if (new_block == NULL && os_has_said_disk_full) { - return(NULL); + if (!new_block) { + return nullptr; } new_page_zip = buf_block_get_page_zip(new_block); @@ -1832,6 +1813,10 @@ btr_root_raise_and_insert( } /* Copy the records from root to the new page one by one. */ + dberr_t e; + if (!err) { + err = &e; + } if (0 #ifdef UNIV_ZIP_COPY @@ -1839,7 +1824,11 @@ btr_root_raise_and_insert( #endif /* UNIV_ZIP_COPY */ || !page_copy_rec_list_end(new_block, root, page_get_infimum_rec(root->page.frame), - index, mtr)) { + index, mtr, err)) { + if (*err != DB_SUCCESS && *err != DB_FAIL) { + return nullptr; + } + ut_a(new_page_zip); /* Copy the page byte for byte. */ @@ -1974,10 +1963,10 @@ btr_root_raise_and_insert( if (dict_index_is_spatial(index)) { /* Split rtree page and insert tuple */ return(rtr_page_split_and_insert(flags, cursor, offsets, heap, - tuple, n_ext, mtr)); + tuple, n_ext, mtr, err)); } else { return(btr_page_split_and_insert(flags, cursor, offsets, heap, - tuple, n_ext, mtr)); + tuple, n_ext, mtr, err)); } } @@ -2290,7 +2279,7 @@ btr_page_insert_fits( /*******************************************************//** Inserts a data tuple to a tree on a non-leaf level. It is assumed that mtr holds an x-latch on the tree. */ -void +dberr_t btr_insert_on_non_leaf_level( ulint flags, /*!< in: undo logging and locking flags */ dict_index_t* index, /*!< in: index */ @@ -2300,7 +2289,6 @@ btr_insert_on_non_leaf_level( { big_rec_t* dummy_big_rec; btr_cur_t cursor; - dberr_t err; rec_t* rec; mem_heap_t* heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; @@ -2309,70 +2297,62 @@ btr_insert_on_non_leaf_level( rtr_info_t rtr_info; ut_ad(level > 0); + auto mode = PAGE_CUR_LE; - if (!dict_index_is_spatial(index)) { - dberr_t err = btr_cur_search_to_nth_level( - index, level, tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, - &cursor, 0, mtr); - - if (err != DB_SUCCESS) { - ib::warn() << " Error code: " << err - << " btr_page_get_father_node_ptr_func " - << " level: " << level - << " table: " << index->table->name - << " index: " << index->name; - } - } else { + if (index->is_spatial()) { + mode = PAGE_CUR_RTREE_INSERT; /* For spatial index, initialize structures to track its parents etc. */ rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); - - btr_cur_search_to_nth_level(index, level, tuple, - PAGE_CUR_RTREE_INSERT, - BTR_CONT_MODIFY_TREE, - &cursor, 0, mtr); } + flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG; + + dberr_t err = btr_cur_search_to_nth_level(index, level, tuple, mode, + BTR_CONT_MODIFY_TREE, + &cursor, 0, mtr); ut_ad(cursor.flag == BTR_CUR_BINARY); - err = btr_cur_optimistic_insert( - flags - | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, &offsets, &heap, - tuple, &rec, &dummy_big_rec, 0, NULL, mtr); + if (UNIV_LIKELY(err == DB_SUCCESS)) { + err = btr_cur_optimistic_insert(flags, + &cursor, &offsets, &heap, + tuple, &rec, + &dummy_big_rec, 0, NULL, mtr); + } if (err == DB_FAIL) { - err = btr_cur_pessimistic_insert(flags - | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, + err = btr_cur_pessimistic_insert(flags, &cursor, &offsets, &heap, tuple, &rec, &dummy_big_rec, 0, NULL, mtr); - ut_a(err == DB_SUCCESS); } - if (heap != NULL) { + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { ut_ad(cursor.rtr_info); rtr_clean_rtr_info(&rtr_info, true); } + + return err; } +static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); +static_assert(FIL_PAGE_PREV % 4 == 0, "alignment"); +static_assert(FIL_PAGE_NEXT % 4 == 0, "alignment"); + +MY_ATTRIBUTE((nonnull,warn_unused_result)) /**************************************************************//** Attaches the halves of an index page on the appropriate level in an index tree. */ -static MY_ATTRIBUTE((nonnull)) -void +static +dberr_t btr_attach_half_pages( /*==================*/ ulint flags, /*!< in: undo logging and @@ -2452,32 +2432,38 @@ btr_attach_half_pages( /* Insert it next to the pointer to the lower half. Note that this may generate recursion leading to a split on the higher level. */ - btr_insert_on_non_leaf_level(flags, index, level + 1, - node_ptr_upper, mtr); + dberr_t err = btr_insert_on_non_leaf_level( + flags, index, level + 1, node_ptr_upper, mtr); /* Free the memory heap */ mem_heap_free(heap); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } + /* Update page links of the level */ if (prev_block) { -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(prev_block->page.frame) - == page_is_comp(block->page.frame)); - ut_a(btr_page_get_next(prev_block->page.frame) - == block->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ + if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, + 4))) { + return DB_CORRUPTION; + } btr_page_set_next(prev_block, lower_block->page.id().page_no(), mtr); } if (next_block) { -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_block->page.frame) - == page_is_comp(block->page.frame)); - ut_a(btr_page_get_prev(next_block->page.frame) - == block->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ + if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame + + FIL_PAGE_PREV, + block->page.frame + + FIL_PAGE_OFFSET, + 4))) { + return DB_CORRUPTION; + } btr_page_set_prev(next_block, upper_block->page.id().page_no(), mtr); } @@ -2496,6 +2482,8 @@ btr_attach_half_pages( btr_page_set_prev(upper_block, lower_block->page.id().page_no(), mtr); btr_page_set_next(lower_block, upper_block->page.id().page_no(), mtr); + + return DB_SUCCESS; } /*************************************************************//** @@ -2566,27 +2554,28 @@ btr_insert_into_right_sibling( if (next_page_no == FIL_NULL || !page_rec_is_supremum( page_rec_get_next(btr_cur_get_rec(cursor)))) { - return(NULL); + return nullptr; } page_cur_t next_page_cursor; buf_block_t* next_block; page_t* next_page; btr_cur_t next_father_cursor; - rec_t* rec = NULL; + rec_t* rec = nullptr; ulint max_size; next_block = btr_block_get(*cursor->index, next_page_no, RW_X_LATCH, page_is_leaf(page), mtr); if (UNIV_UNLIKELY(!next_block)) { - return NULL; + return nullptr; } next_page = buf_block_get_frame(next_block); + const bool is_leaf = page_is_leaf(next_page); - bool is_leaf = page_is_leaf(next_page); - - btr_page_get_father( - cursor->index, next_block, mtr, &next_father_cursor); + if (!btr_page_get_father(cursor->index, next_block, mtr, + &next_father_cursor)) { + return nullptr; + } page_cur_search( next_block, cursor->index, tuple, PAGE_CUR_LE, @@ -2603,7 +2592,7 @@ btr_insert_into_right_sibling( &next_page_cursor, tuple, cursor->index, offsets, &heap, n_ext, mtr); - if (rec == NULL) { + if (!rec) { if (is_leaf && next_block->page.zip.ssize && !dict_index_is_clust(cursor->index) @@ -2613,7 +2602,7 @@ btr_insert_into_right_sibling( reorganize before failing. */ ibuf_reset_free_bits(next_block); } - return(NULL); + return nullptr; } ibool compressed; @@ -2632,18 +2621,22 @@ btr_insert_into_right_sibling( &err, TRUE, &next_father_cursor, BTR_CREATE_FLAG, false, mtr); - ut_a(err == DB_SUCCESS); + if (err != DB_SUCCESS) { + return nullptr; + } if (!compressed) { - btr_cur_compress_if_useful(&next_father_cursor, FALSE, mtr); + btr_cur_compress_if_useful(&next_father_cursor, false, mtr); } dtuple_t* node_ptr = dict_index_build_node_ptr( cursor->index, rec, next_block->page.id().page_no(), heap, level); - btr_insert_on_non_leaf_level( - flags, cursor->index, level + 1, node_ptr, mtr); + if (btr_insert_on_non_leaf_level(flags, cursor->index, level + 1, + node_ptr, mtr) != DB_SUCCESS) { + return nullptr; + } ut_ad(rec_offs_validate(rec, cursor->index, *offsets)); @@ -2665,6 +2658,95 @@ btr_insert_into_right_sibling( return(rec); } +/*************************************************************//** +Moves record list end to another page. Moved records include +split_rec. + +IMPORTANT: The caller will have to update IBUF_BITMAP_FREE +if new_block is a compressed leaf page in a secondary index. +This has to be done either within the same mini-transaction, +or by invoking ibuf_reset_free_bits() before mtr_commit(). + +@return error code */ +static +dberr_t +page_move_rec_list_end( +/*===================*/ + buf_block_t* new_block, /*!< in/out: index page where to move */ + buf_block_t* block, /*!< in: index page from where to move */ + rec_t* split_rec, /*!< in: first record to move */ + dict_index_t* index, /*!< in: record descriptor */ + mtr_t* mtr) /*!< in: mtr */ +{ + page_t* new_page = buf_block_get_frame(new_block); + ulint old_data_size; + ulint new_data_size; + ulint old_n_recs; + ulint new_n_recs; + + ut_ad(!dict_index_is_spatial(index)); + + old_data_size = page_get_data_size(new_page); + old_n_recs = page_get_n_recs(new_page); +#ifdef UNIV_ZIP_DEBUG + { + page_zip_des_t* new_page_zip + = buf_block_get_page_zip(new_block); + page_zip_des_t* page_zip + = buf_block_get_page_zip(block); + ut_a(!new_page_zip == !page_zip); + ut_a(!new_page_zip + || page_zip_validate(new_page_zip, new_page, index)); + ut_a(!page_zip + || page_zip_validate(page_zip, page_align(split_rec), + index)); + } +#endif /* UNIV_ZIP_DEBUG */ + + dberr_t err; + if (!page_copy_rec_list_end(new_block, block, + split_rec, index, mtr, &err)) { + return err; + } + + new_data_size = page_get_data_size(new_page); + new_n_recs = page_get_n_recs(new_page); + + ut_ad(new_data_size >= old_data_size); + + page_delete_rec_list_end(split_rec, block, index, + new_n_recs - old_n_recs, + new_data_size - old_data_size, mtr); + + return DB_SUCCESS; +} + +/*************************************************************//** +Moves record list start to another page. Moved records do not include +split_rec. + +IMPORTANT: The caller will have to update IBUF_BITMAP_FREE +if new_block is a compressed leaf page in a secondary index. +This has to be done either within the same mini-transaction, +or by invoking ibuf_reset_free_bits() before mtr_commit(). + +@return error code */ +static +dberr_t +page_move_rec_list_start( +/*=====================*/ + buf_block_t* new_block, /*!< in/out: index page where to move */ + buf_block_t* block, /*!< in/out: page containing split_rec */ + rec_t* split_rec, /*!< in: first record not to move */ + dict_index_t* index, /*!< in: record descriptor */ + mtr_t* mtr) /*!< in: mtr */ +{ + dberr_t err; + if (page_copy_rec_list_start(new_block, block, split_rec, index, mtr, &err)) + page_delete_rec_list_start(split_rec, block, index, mtr); + return err; +} + /*************************************************************//** Splits an index page to halves and inserts the tuple. It is assumed that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is @@ -2684,7 +2766,8 @@ btr_page_split_and_insert( mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ { buf_block_t* block; page_t* page; @@ -2702,10 +2785,12 @@ btr_page_split_and_insert( ulint n_iterations = 0; ulint n_uniq; + ut_ad(*err == DB_SUCCESS); + if (cursor->index->is_spatial()) { /* Split rtree page and update parent */ - return(rtr_page_split_and_insert(flags, cursor, offsets, heap, - tuple, n_ext, mtr)); + return rtr_page_split_and_insert(flags, cursor, offsets, heap, + tuple, n_ext, mtr, err); } if (!*heap) { @@ -2771,17 +2856,13 @@ func_start: } } - DBUG_EXECUTE_IF("disk_is_full", - os_has_said_disk_full = true; - return(NULL);); - /* 2. Allocate a new page to the index */ const uint16_t page_level = btr_page_get_level(page); new_block = btr_page_alloc(cursor->index, hint_page_no, direction, - page_level, mtr, mtr); + page_level, mtr, mtr, err); if (!new_block) { - return(NULL); + return nullptr; } new_page = buf_block_get_frame(new_block); @@ -2842,8 +2923,12 @@ insert_empty: /* 4. Do first the modifications in the tree structure */ /* FIXME: write FIL_PAGE_PREV,FIL_PAGE_NEXT in new_block earlier! */ - btr_attach_half_pages(flags, cursor->index, block, - first_rec, new_block, direction, mtr); + *err = btr_attach_half_pages(flags, cursor->index, block, + first_rec, new_block, direction, mtr); + + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } /* If the split is made on the leaf level and the insert will fit on the appropriate half-page, we may release the tree x-latch. @@ -2877,9 +2962,14 @@ insert_empty: #ifdef UNIV_ZIP_COPY || page_zip #endif /* UNIV_ZIP_COPY */ - || !page_move_rec_list_start(new_block, block, move_limit, - cursor->index, mtr)) { - /* For some reason, compressing new_page failed, + || (*err = page_move_rec_list_start(new_block, block, + move_limit, + cursor->index, mtr))) { + if (*err != DB_FAIL) { + return nullptr; + } + + /* For some reason, compressing new_block failed, even though it should contain fewer records than the original page. Copy the page byte for byte and then delete the records from both pages @@ -2922,8 +3012,13 @@ insert_empty: #ifdef UNIV_ZIP_COPY || page_zip #endif /* UNIV_ZIP_COPY */ - || !page_move_rec_list_end(new_block, block, move_limit, - cursor->index, mtr)) { + || (*err = page_move_rec_list_end(new_block, block, + move_limit, + cursor->index, mtr))) { + if (*err != DB_FAIL) { + return nullptr; + } + /* For some reason, compressing new_page failed, even though it should contain fewer records than the original page. Copy the page byte for byte @@ -3009,12 +3104,16 @@ insert_empty: For compressed pages, page_cur_tuple_insert() will have attempted this already. */ - if (page_cur_get_page_zip(page_cursor) - || !btr_page_reorganize(page_cursor, cursor->index, mtr)) { - + if (page_cur_get_page_zip(page_cursor)) { goto insert_failed; } + *err = btr_page_reorganize(page_cursor, cursor->index, mtr); + + if (*err != DB_SUCCESS) { + return nullptr; + } + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, offsets, heap, n_ext, mtr); @@ -3075,40 +3174,38 @@ dberr_t btr_level_list_remove(const buf_block_t& block, const uint32_t next_page_no = btr_page_get_next(page); /* Update page links of the level */ + dberr_t err; if (prev_page_no != FIL_NULL) { buf_block_t* prev_block = btr_block_get( index, prev_page_no, RW_X_LATCH, page_is_leaf(page), - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(prev_block->page.frame) - == page_is_comp(page)); - static_assert(FIL_PAGE_NEXT % 4 == 0, "alignment"); - static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - ut_a(!memcmp_aligned<4>(prev_block->page.frame + FIL_PAGE_NEXT, - page + FIL_PAGE_OFFSET, 4)); -#endif /* UNIV_BTR_DEBUG */ - + mtr, &err); + if (UNIV_UNLIKELY(!prev_block)) { + return err; + } + if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame + + FIL_PAGE_NEXT, + page + FIL_PAGE_OFFSET, + 4))) { + return DB_CORRUPTION; + } btr_page_set_next(prev_block, next_page_no, mtr); } if (next_page_no != FIL_NULL) { buf_block_t* next_block = btr_block_get( index, next_page_no, RW_X_LATCH, page_is_leaf(page), - mtr); + mtr, &err); - if (!next_block) { - return DB_ERROR; + if (UNIV_UNLIKELY(!next_block)) { + return err; + } + if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame + + FIL_PAGE_PREV, + page + FIL_PAGE_OFFSET, + 4))) { + return DB_CORRUPTION; } -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_block->page.frame) - == page_is_comp(page)); - static_assert(FIL_PAGE_PREV % 4 == 0, "alignment"); - static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - ut_a(!memcmp_aligned<4>(next_block->page.frame + FIL_PAGE_PREV, - page + FIL_PAGE_OFFSET, 4)); -#endif /* UNIV_BTR_DEBUG */ - btr_page_set_prev(next_block, prev_page_no, mtr); } @@ -3126,7 +3223,8 @@ btr_lift_page_up( must not be empty: use btr_discard_only_page_on_level if the last record from the page should be removed */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + dberr_t* err) /*!< out: error code */ { buf_block_t* father_block; ulint page_level; @@ -3239,7 +3337,11 @@ btr_lift_page_up( #endif /* UNIV_ZIP_COPY */ || !page_copy_rec_list_end(father_block, block, page_get_infimum_rec(page), - index, mtr)) { + index, mtr, err)) { + if (*err != DB_SUCCESS && *err != DB_FAIL) { + return nullptr; + } + const page_zip_des_t* page_zip = buf_block_get_page_zip(block); ut_a(father_page_zip); @@ -3309,16 +3411,16 @@ level lifts the records of the page to the father page, thus reducing the tree height. It is assumed that mtr holds an x-latch on the tree and on the page. If cursor is on the leaf level, mtr must also hold x-latches to the brothers, if they exist. -@return TRUE on success */ -ibool +@return error code */ +dberr_t btr_compress( /*=========*/ btr_cur_t* cursor, /*!< in/out: cursor on the page to merge or lift; the page must not be empty: when deleting records, use btr_discard_page() if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ + bool adjust, /*!< in: whether the cursor position should be + adjusted even when compression occurs */ mtr_t* mtr) /*!< in/out: mini-transaction */ { dict_index_t* index; @@ -3342,8 +3444,6 @@ btr_compress( page = btr_cur_get_page(cursor); index = btr_cur_get_index(cursor); - btr_assert_not_corrupted(block, index); - ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); @@ -3352,14 +3452,12 @@ btr_compress( const uint32_t left_page_no = btr_page_get_prev(page); const uint32_t right_page_no = btr_page_get_next(page); + dberr_t err = DB_SUCCESS; -#ifdef UNIV_DEBUG - if (!page_is_leaf(page) && left_page_no == FIL_NULL) { - ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)), - page_is_comp(page))); - } -#endif /* UNIV_DEBUG */ + ut_ad(page_is_leaf(page) || left_page_no != FIL_NULL + || (REC_INFO_MIN_REC_FLAG & rec_get_info_bits( + page_rec_get_next(page_get_infimum_rec(page)), + page_is_comp(page)))); heap = mem_heap_create(100); @@ -3392,7 +3490,7 @@ btr_compress( /* The page is the only one on the level, lift the records to the father */ - merge_block = btr_lift_page_up(index, block, mtr); + merge_block = btr_lift_page_up(index, block, mtr, &err); goto func_exit; } @@ -3417,20 +3515,21 @@ retry: if (!merge_block) { merge_page = NULL; } +cannot_merge: + err = DB_FAIL; goto err_exit; } merge_page = buf_block_get_frame(merge_block); -#ifdef UNIV_BTR_DEBUG - if (is_left) { - ut_a(btr_page_get_next(merge_page) - == block->page.id().page_no()); - } else { - ut_a(btr_page_get_prev(merge_page) - == block->page.id().page_no()); + if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_page + (is_left + ? FIL_PAGE_NEXT + : FIL_PAGE_PREV), + block->page.frame + + FIL_PAGE_OFFSET, 4))) { + err = DB_CORRUPTION; + goto err_exit; } -#endif /* UNIV_BTR_DEBUG */ ut_ad(page_validate(merge_page, index)); @@ -3482,7 +3581,7 @@ retry: rec_t* orig_pred = page_copy_rec_list_start( merge_block, block, page_get_supremum_rec(page), - index, mtr); + index, mtr, &err); if (!orig_pred) { goto err_exit; @@ -3491,7 +3590,9 @@ retry: btr_search_drop_page_hash_index(block); /* Remove the page from the level list */ - if (DB_SUCCESS != btr_level_list_remove(*block, *index, mtr)) { + err = btr_level_list_remove(*block, *index, mtr); + + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { goto err_exit; } @@ -3520,7 +3621,10 @@ retry: /* No GAP lock needs to be worrying about */ lock_sys.prdt_page_free_from_discard(id); } else { - btr_cur_node_ptr_delete(&father_cursor, mtr); + err = btr_cur_node_ptr_delete(&father_cursor, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto err_exit; + } if (index->has_locking()) { lock_update_merge_left( *merge_block, orig_pred, id); @@ -3537,9 +3641,7 @@ retry: btr_cur_t cursor2; /* father cursor pointing to node ptr of the right sibling */ -#ifdef UNIV_BTR_DEBUG byte fil_page_prev[4]; -#endif /* UNIV_BTR_DEBUG */ if (dict_index_is_spatial(index)) { cursor2.rtr_info = NULL; @@ -3552,15 +3654,16 @@ retry: index, &cursor2, btr_cur_get_block(&father_cursor), merge_block, heap)) { - goto err_exit; + goto cannot_merge; } /* Set rtr_info for cursor2, since it is necessary in recursive page merge. */ cursor2.rtr_info = cursor->rtr_info; cursor2.tree_height = cursor->tree_height; - } else { - btr_page_get_father(index, merge_block, mtr, &cursor2); + } else if (!btr_page_get_father(index, merge_block, mtr, + &cursor2)) { + goto cannot_merge; } if (merge_page_zip && left_page_no == FIL_NULL) { @@ -3570,33 +3673,28 @@ retry: requires that FIL_PAGE_PREV be FIL_NULL. Clear the field, but prepare to restore it. */ static_assert(FIL_PAGE_PREV % 8 == 0, "alignment"); -#ifdef UNIV_BTR_DEBUG memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4); -#endif /* UNIV_BTR_DEBUG */ compile_time_assert(FIL_NULL == 0xffffffffU); memset_aligned<4>(merge_page + FIL_PAGE_PREV, 0xff, 4); } orig_succ = page_copy_rec_list_end(merge_block, block, page_get_infimum_rec(page), - cursor->index, mtr); + cursor->index, mtr, &err); if (!orig_succ) { ut_a(merge_page_zip); -#ifdef UNIV_BTR_DEBUG if (left_page_no == FIL_NULL) { /* FIL_PAGE_PREV was restored from merge_page_zip. */ - ut_a(!memcmp(fil_page_prev, - merge_page + FIL_PAGE_PREV, 4)); + ut_ad(!memcmp(fil_page_prev, + merge_page + FIL_PAGE_PREV, 4)); } -#endif /* UNIV_BTR_DEBUG */ goto err_exit; } btr_search_drop_page_hash_index(block); -#ifdef UNIV_BTR_DEBUG if (merge_page_zip && left_page_no == FIL_NULL) { /* Restore FIL_PAGE_PREV in order to avoid an assertion @@ -3607,10 +3705,11 @@ retry: are X-latched. */ memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4); } -#endif /* UNIV_BTR_DEBUG */ /* Remove the page from the level list */ - if (DB_SUCCESS != btr_level_list_remove(*block, *index, mtr)) { + err = btr_level_list_remove(*block, *index, mtr); + + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { goto err_exit; } @@ -3681,8 +3780,7 @@ retry: ut_a(err == DB_SUCCESS); if (!compressed) { - btr_cur_compress_if_useful(&cursor2, - FALSE, + btr_cur_compress_if_useful(&cursor2, false, mtr); } @@ -3747,39 +3845,33 @@ retry: } /* Free the file page */ - btr_page_free(index, block, mtr); - - /* btr_check_node_ptr() needs parent block latched. - If the merge_block's parent block is not same, - we cannot use btr_check_node_ptr() */ - ut_ad(leftmost_child - || btr_check_node_ptr(index, merge_block, mtr)); + err = btr_page_free(index, block, mtr); + if (err == DB_SUCCESS) { + ut_ad(leftmost_child + || btr_check_node_ptr(index, merge_block, mtr)); func_exit: - mem_heap_free(heap); - - if (adjust) { - ut_ad(nth_rec > 0); - btr_cur_position( - index, - page_rec_get_nth(merge_block->page.frame, nth_rec), - merge_block, cursor); - } - - MONITOR_INC(MONITOR_INDEX_MERGE_SUCCESSFUL); - - DBUG_RETURN(TRUE); + MONITOR_INC(MONITOR_INDEX_MERGE_SUCCESSFUL); + if (adjust) { + ut_ad(nth_rec > 0); + btr_cur_position( + index, + page_rec_get_nth(merge_block->page.frame, + nth_rec), + merge_block, cursor); + } + } else { err_exit: - /* We play it safe and reset the free bits. */ - if (merge_block && merge_block->zip_size() - && page_is_leaf(merge_block->page.frame) - && !dict_index_is_clust(index)) { - - ibuf_reset_free_bits(merge_block); - } + /* We play it safe and reset the free bits. */ + if (merge_block && merge_block->zip_size() + && page_is_leaf(merge_block->page.frame) + && !index->is_clust()) { + ibuf_reset_free_bits(merge_block); + } + } mem_heap_free(heap); - DBUG_RETURN(FALSE); + DBUG_RETURN(err); } /*************************************************************//** @@ -3819,12 +3911,17 @@ btr_discard_only_page_on_level( ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); btr_search_drop_page_hash_index(block); - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { /* Check any concurrent search having this page */ rtr_check_discard_page(index, NULL, block); - rtr_page_get_father(index, block, mtr, NULL, &cursor); + if (!rtr_page_get_father(index, block, mtr, nullptr, + &cursor)) { + return; + } } else { - btr_page_get_father(index, block, mtr, &cursor); + if (!btr_page_get_father(index, block, mtr, &cursor)) { + return; + } } father = btr_cur_get_block(&cursor); @@ -3834,7 +3931,9 @@ btr_discard_only_page_on_level( } /* Free the file page */ - btr_page_free(index, block, mtr); + if (btr_page_free(index, block, mtr) != DB_SUCCESS) { + return; + } block = father; page_level++; @@ -3844,17 +3943,6 @@ btr_discard_only_page_on_level( for the node pointer to the (now discarded) block(s). */ ut_ad(!page_has_siblings(block->page.frame)); -#ifdef UNIV_BTR_DEBUG - if (!dict_index_is_ibuf(index)) { - const page_t* root = buf_block_get_frame(block); - const ulint space = index->table->space_id; - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root, space)); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, space)); - } -#endif /* UNIV_BTR_DEBUG */ - mem_heap_t* heap = nullptr; const rec_t* rec = nullptr; rec_offs* offsets = nullptr; @@ -3907,7 +3995,7 @@ btr_discard_only_page_on_level( Discards a page from a B-tree. This is used to remove the last record from a B-tree page: the whole page must be removed at the same time. This cannot be used for the root page, which is allowed to be empty. */ -void +dberr_t btr_discard_page( /*=============*/ btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on @@ -3930,10 +4018,10 @@ btr_discard_page( MONITOR_INC(MONITOR_INDEX_DISCARD); - if (dict_index_is_spatial(index)) { - rtr_page_get_father(index, block, mtr, cursor, &parent_cursor); - } else { - btr_page_get_father(index, block, mtr, &parent_cursor); + if (index->is_spatial() + ? !rtr_page_get_father(index, block, mtr, cursor, &parent_cursor) + : !btr_page_get_father(index, block, mtr, &parent_cursor)) { + return DB_CORRUPTION; } /* Decide the page which will inherit the locks */ @@ -3943,12 +4031,20 @@ btr_discard_page( ut_d(bool parent_is_different = false); if (left_page_no != FIL_NULL) { + dberr_t err; merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH, - true, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(merge_block->page.frame) - == block->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ + true, mtr, &err); + if (UNIV_UNLIKELY(!merge_block)) { + return err; + } + + if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, 4))) { + return DB_CORRUPTION; + } + ut_d(parent_is_different = (page_rec_get_next( page_get_infimum_rec( @@ -3956,12 +4052,19 @@ btr_discard_page( &parent_cursor))) == btr_cur_get_rec(&parent_cursor))); } else if (right_page_no != FIL_NULL) { + dberr_t err; merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH, - true, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(merge_block->page.frame) - == block->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ + true, mtr, &err); + if (UNIV_UNLIKELY(!merge_block)) { + return err; + } + if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_PREV, + block->page.frame + + FIL_PAGE_OFFSET, 4))) { + return DB_CORRUPTION; + } + ut_d(parent_is_different = page_rec_is_supremum( page_rec_get_next(btr_cur_get_rec(&parent_cursor)))); if (!page_is_leaf(merge_block->page.frame)) { @@ -3975,26 +4078,29 @@ btr_discard_page( } } else { btr_discard_only_page_on_level(index, block, mtr); + return DB_SUCCESS; + } - return; + if (UNIV_UNLIKELY(memcmp_aligned<2>(&merge_block->page.frame + [PAGE_HEADER + PAGE_LEVEL], + &block->page.frame + [PAGE_HEADER + PAGE_LEVEL], 2))) { + return DB_CORRUPTION; } - ut_a(page_is_comp(merge_block->page.frame) - == page_is_comp(block->page.frame)); - ut_ad(!memcmp_aligned<2>(&merge_block->page.frame - [PAGE_HEADER + PAGE_LEVEL], - &block->page.frame - [PAGE_HEADER + PAGE_LEVEL], 2)); btr_search_drop_page_hash_index(block); if (dict_index_is_spatial(index)) { rtr_node_ptr_delete(&parent_cursor, mtr); - } else { - btr_cur_node_ptr_delete(&parent_cursor, mtr); + } else if (dberr_t err = + btr_cur_node_ptr_delete(&parent_cursor, mtr)) { + return err; } /* Remove the page from the level list */ - ut_a(DB_SUCCESS == btr_level_list_remove(*block, *index, mtr)); + if (dberr_t err = btr_level_list_remove(*block, *index, mtr)) { + return err; + } #ifdef UNIV_ZIP_DEBUG { @@ -4022,20 +4128,25 @@ btr_discard_page( } /* Free the file page */ - btr_page_free(index, block, mtr); - - /* btr_check_node_ptr() needs parent block latched. - If the merge_block's parent block is not same, - we cannot use btr_check_node_ptr() */ - ut_ad(parent_is_different - || btr_check_node_ptr(index, merge_block, mtr)); - - if (btr_cur_get_block(&parent_cursor)->page.id().page_no() - == index->page - && !page_has_siblings(btr_cur_get_page(&parent_cursor)) - && page_get_n_recs(btr_cur_get_page(&parent_cursor)) == 1) { - btr_lift_page_up(index, merge_block, mtr); + dberr_t err = btr_page_free(index, block, mtr); + + if (err == DB_SUCCESS) { + /* btr_check_node_ptr() needs parent block latched. + If the merge_block's parent block is not same, + we cannot use btr_check_node_ptr() */ + ut_ad(parent_is_different + || btr_check_node_ptr(index, merge_block, mtr)); + + if (btr_cur_get_block(&parent_cursor)->page.id().page_no() + == index->page + && !page_has_siblings(btr_cur_get_page(&parent_cursor)) + && page_get_n_recs(btr_cur_get_page(&parent_cursor)) + == 1) { + btr_lift_page_up(index, merge_block, mtr, &err); + } } + + return err; } #ifdef UNIV_BTR_PRINT @@ -4126,12 +4237,12 @@ btr_print_recursive( *offsets = rec_get_offsets( node_ptr, index, *offsets, 0, ULINT_UNDEFINED, heap); - btr_print_recursive(index, - btr_node_ptr_get_child(node_ptr, - index, - *offsets, - &mtr2), - width, heap, offsets, &mtr2); + if (buf_block_t *child = + btr_node_ptr_get_child(node_ptr, index, *offsets, + &mtr2)) { + btr_print_recursive(index, child, width, heap, + offsets, &mtr2); + } mtr_commit(&mtr2); } @@ -4518,11 +4629,9 @@ btr_validate_report2( error << ", index tree level " << level; } -/************************************************************//** -Validates index tree level. -@return TRUE if ok */ +/** Validate an index tree level. */ static -bool +dberr_t btr_validate_level( /*===============*/ dict_index_t* index, /*!< in: index tree */ @@ -4540,7 +4649,6 @@ btr_validate_level( rec_t* rec; page_cur_t cursor; dtuple_t* node_ptr_tuple; - bool ret = true; mtr_t mtr; mem_heap_t* heap = mem_heap_create(256); rec_offs* offsets = NULL; @@ -4564,23 +4672,31 @@ btr_validate_level( } } - block = btr_root_block_get(index, RW_SX_LATCH, &mtr); + dberr_t err; + block = btr_root_block_get(index, RW_SX_LATCH, &mtr, &err); + if (!block) { + mtr.commit(); + return err; + } page = buf_block_get_frame(block); fil_space_t* space = index->table->space; while (level != btr_page_get_level(page)) { const rec_t* node_ptr; - - if (fseg_page_is_free(space, block->page.id().page_no())) { - + switch (dberr_t e = + fseg_page_is_allocated(space, + block->page.id().page_no())) { + case DB_SUCCESS_LOCKED_REC: + break; + case DB_SUCCESS: btr_validate_report1(index, level, block); - ib::warn() << "Page is free"; - - ret = false; + e = DB_CORRUPTION; + /* fall through */ + default: + err = e; } - ut_a(index->table->space_id == block->page.id().space()); ut_a(block->page.id().space() == page_get_space_id(page)); #ifdef UNIV_ZIP_DEBUG @@ -4597,7 +4713,11 @@ btr_validate_level( ULINT_UNDEFINED, &heap); savepoint2 = mtr_set_savepoint(&mtr); - block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr); + block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr, + &err); + if (!block) { + break; + } page = buf_block_get_frame(block); /* For R-Tree, since record order might not be the same as @@ -4619,7 +4739,10 @@ btr_validate_level( savepoint2 = mtr_set_savepoint(&mtr); block = btr_block_get(*index, left_page_no, RW_SX_LATCH, false, - &mtr); + &mtr, &err); + if (!block) { + goto invalid_page; + } page = buf_block_get_frame(block); left_page_no = btr_page_get_prev(page); } @@ -4630,8 +4753,16 @@ btr_validate_level( level. */ loop: + if (!block) { +invalid_page: +func_exit: + mem_heap_free(heap); + return err; + } + mem_heap_empty(heap); offsets = offsets2 = NULL; + if (!srv_read_only_mode) { if (lockout) { mtr_x_lock_index(index, &mtr); @@ -4640,72 +4771,64 @@ loop: } } + page = block->page.frame; + #ifdef UNIV_ZIP_DEBUG page_zip = buf_block_get_page_zip(block); ut_a(!page_zip || page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ - ut_a(block->page.id().space() == index->table->space_id); - - if (fseg_page_is_free(space, block->page.id().page_no())) { - + if (DB_SUCCESS_LOCKED_REC + != fseg_page_is_allocated(space, block->page.id().page_no())) { btr_validate_report1(index, level, block); ib::warn() << "Page is marked as free"; - ret = false; - + err = DB_CORRUPTION; } else if (btr_page_get_index_id(page) != index->id) { - ib::error() << "Page index id " << btr_page_get_index_id(page) << " != data dictionary index id " << index->id; - - ret = false; - + err = DB_CORRUPTION; } else if (!page_validate(page, index)) { - btr_validate_report1(index, level, block); - ret = false; - + err = DB_CORRUPTION; + } else if (btr_page_get_level(page) != level) { + btr_validate_report1(index, level, block); + ib::error() << "Page level is not " << level; + err = DB_CORRUPTION; } else if (level == 0 && !btr_index_page_validate(block, index)) { - /* We are on level 0. Check that the records have the right number of fields, and field lengths are right. */ - - ret = false; + err = DB_CORRUPTION; + } else if (!page_is_empty(page)) { + } else if (level) { + btr_validate_report1(index, level, block); + ib::error() << "Non-leaf page is empty"; + } else if (block->page.id().page_no() != index->page) { + btr_validate_report1(index, level, block); + ib::error() << "Empty leaf page is not index root"; } - ut_a(btr_page_get_level(page) == level); - uint32_t right_page_no = btr_page_get_next(page); uint32_t left_page_no = btr_page_get_prev(page); - ut_a(!page_is_empty(page) - || (level == 0 - && page_get_page_no(page) == dict_index_get_page(index))); - if (right_page_no != FIL_NULL) { const rec_t* right_rec; savepoint = mtr_set_savepoint(&mtr); right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, - !level, &mtr); + !level, &mtr, &err); + if (!right_block) { + btr_validate_report1(index, level, block); + fputs("InnoDB: broken FIL_PAGE_NEXT link\n", stderr); + goto invalid_page; + } right_page = buf_block_get_frame(right_block); if (btr_page_get_prev(right_page) != page_get_page_no(page)) { btr_validate_report2(index, level, block, right_block); fputs("InnoDB: broken FIL_PAGE_NEXT" " or FIL_PAGE_PREV links\n", stderr); - - ret = false; - } - - if (page_is_comp(right_page) != page_is_comp(page)) { - btr_validate_report2(index, level, block, right_block); - fputs("InnoDB: 'compact' flag mismatch\n", stderr); - - ret = false; - - goto node_ptr_fails; + err = DB_CORRUPTION; } rec = page_rec_get_prev(page_get_supremum_rec(page)); @@ -4741,15 +4864,17 @@ loop: page_get_infimum_rec(right_page)); rec_print(stderr, rec, index); putc('\n', stderr); - - ret = false; + err = DB_CORRUPTION; } } - if (level > 0 && left_page_no == FIL_NULL) { - ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)), - page_is_comp(page))); + if (level > 0 && left_page_no == FIL_NULL + && !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( + page_rec_get_next(page_get_infimum_rec(page)), + page_is_comp(page)))) { + btr_validate_report1(index, level, block); + ib::error() << "Missing REC_INFO_MIN_REC_FLAG"; + err = DB_CORRUPTION; } /* Similarly skip the father node check for spatial index for now, @@ -4806,8 +4931,7 @@ loop: fputs("InnoDB: record on page ", stderr); rec_print_new(stderr, rec, offsets); putc('\n', stderr); - ret = false; - + err = DB_CORRUPTION; goto node_ptr_fails; } @@ -4831,8 +4955,7 @@ loop: fputs("InnoDB: first rec ", stderr); rec_print(stderr, first_rec, index); putc('\n', stderr); - ret = false; - + err = DB_CORRUPTION; goto node_ptr_fails; } } @@ -4870,7 +4993,15 @@ loop: right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, - !level, &mtr); + !level, &mtr, + &err); + if (!right_block) { + btr_validate_report1(index, level, + block); + fputs("InnoDB: broken FIL_PAGE_NEXT" + " link\n", stderr); + goto invalid_page; + } } btr_cur_position( @@ -4888,7 +5019,7 @@ loop: if (btr_cur_get_rec(&right_node_cur) != right_node_ptr) { - ret = false; + err = DB_CORRUPTION; fputs("InnoDB: node pointer to" " the right page is wrong\n", stderr); @@ -4904,7 +5035,7 @@ loop: != page_rec_get_next( page_get_infimum_rec( right_father_page))) { - ret = false; + err = DB_CORRUPTION; fputs("InnoDB: node pointer 2 to" " the right page is wrong\n", stderr); @@ -4916,7 +5047,7 @@ loop: if (page_get_page_no(right_father_page) != btr_page_get_next(father_page)) { - ret = false; + err = DB_CORRUPTION; fputs("InnoDB: node pointer 3 to" " the right page is wrong\n", stderr); @@ -4955,15 +5086,11 @@ node_ptr_fails: } block = btr_block_get(*index, right_page_no, RW_SX_LATCH, - !level, &mtr); - page = buf_block_get_frame(block); - + !level, &mtr, &err); goto loop; } - mem_heap_free(heap); - - return(ret); + goto func_exit; } /**************************************************************//** @@ -4975,45 +5102,33 @@ btr_validate_index( dict_index_t* index, /*!< in: index */ const trx_t* trx) /*!< in: transaction or NULL */ { - dberr_t err = DB_SUCCESS; - bool lockout = dict_index_is_spatial(index); + /* Full Text index are implemented by auxiliary tables, not the B-tree */ + if (index->online_status != ONLINE_INDEX_COMPLETE || + (index->type & (DICT_FTS | DICT_CORRUPT))) + return DB_SUCCESS; - /* Full Text index are implemented by auxiliary tables, - not the B-tree */ - if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) { - return(err); - } + const bool lockout= index->is_spatial(); - mtr_t mtr; - - mtr_start(&mtr); - - if (!srv_read_only_mode) { - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } - } - - page_t* root = btr_root_get(index, &mtr); - - if (!root) { - mtr_commit(&mtr); - return DB_CORRUPTION; - } - - ulint n = btr_page_get_level(root); + mtr_t mtr; + mtr.start(); - for (ulint i = 0; i <= n; ++i) { + if (lockout) + mtr_x_lock_index(index, &mtr); + else + mtr_sx_lock_index(index, &mtr); - if (!btr_validate_level(index, trx, n - i, lockout)) { - err = DB_CORRUPTION; - } - } + dberr_t err; + if (page_t *root= btr_root_get(index, &mtr, &err)) + for (auto level= btr_page_get_level(root);; level--) + { + if (dberr_t err_level= btr_validate_level(index, trx, level, lockout)) + err= err_level; + if (!level) + break; + } - mtr_commit(&mtr); - return(err); + mtr.commit(); + return err; } /**************************************************************//** @@ -5040,6 +5155,7 @@ btr_can_merge_with_page( DBUG_ENTER("btr_can_merge_with_page"); if (page_no == FIL_NULL) { +error: *merge_block = NULL; DBUG_RETURN(false); } @@ -5049,6 +5165,9 @@ btr_can_merge_with_page( mblock = btr_block_get(*index, page_no, RW_X_LATCH, page_is_leaf(page), mtr); + if (!mblock) { + goto error; + } mpage = buf_block_get_frame(mblock); n_recs = page_get_n_recs(page); @@ -5075,8 +5194,8 @@ btr_can_merge_with_page( if (data_size > max_ins_size) { /* We have to reorganize mpage */ - if (!btr_page_reorganize_block(page_zip_level, mblock, index, - mtr)) { + if (btr_page_reorganize_block(page_zip_level, mblock, index, + mtr) != DB_SUCCESS) { goto error; } @@ -5096,8 +5215,4 @@ btr_can_merge_with_page( *merge_block = mblock; DBUG_RETURN(true); - -error: - *merge_block = NULL; - DBUG_RETURN(false); } diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index c05f91e7e5e..538cb06e654 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -61,17 +61,22 @@ PageBulk::init() m_index->set_modified(alloc_mtr); uint32_t n_reserved; - if (!fsp_reserve_free_extents(&n_reserved, - m_index->table->space, - 1, FSP_NORMAL, &alloc_mtr)) { + dberr_t err = fsp_reserve_free_extents( + &n_reserved, m_index->table->space, 1, FSP_NORMAL, + &alloc_mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { +oom: alloc_mtr.commit(); m_mtr.commit(); - return(DB_OUT_OF_FILE_SPACE); + return err; } /* Allocate a new page. */ new_block = btr_page_alloc(m_index, 0, FSP_UP, m_level, - &alloc_mtr, &m_mtr); + &alloc_mtr, &m_mtr, &err); + if (!new_block) { + goto oom; + } m_index->table->space->release_free_extents(n_reserved); @@ -103,9 +108,12 @@ PageBulk::init() } else { new_block = btr_block_get(*m_index, m_page_no, RW_X_LATCH, false, &m_mtr); + if (!new_block) { + m_mtr.commit(); + return(DB_CORRUPTION); + } new_page = buf_block_get_frame(new_block); - ut_ad(new_block->page.id().page_no() == m_page_no); ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW); @@ -840,38 +848,19 @@ PageBulk::release() } /** Start mtr and latch the block */ -dberr_t -PageBulk::latch() +void PageBulk::latch() { - m_mtr.start(); - m_index->set_modified(m_mtr); - - ut_ad(m_block->page.buf_fix_count()); - - /* In case the block is U-latched by page_cleaner. */ - if (!buf_page_optimistic_get(RW_X_LATCH, m_block, m_modify_clock, - &m_mtr)) { - /* FIXME: avoid another lookup */ - m_block = buf_page_get_gen(page_id_t(m_index->table->space_id, - m_page_no), - 0, RW_X_LATCH, - m_block, BUF_GET_IF_IN_POOL, - &m_mtr, &m_err); - - if (m_err != DB_SUCCESS) { - return (m_err); - } - - ut_ad(m_block != NULL); - } - - ut_d(const auto buf_fix_count =) m_block->page.unfix(); - - ut_ad(buf_fix_count); - ut_ad(m_cur_rec > m_page); - ut_ad(m_cur_rec < m_heap_top); - - return (m_err); + m_mtr.start(); + m_index->set_modified(m_mtr); +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!m_block->index); +#endif + m_block->page.lock.x_lock(); + ut_ad(m_block->page.buf_fix_count()); + m_mtr.memo_push(m_block, MTR_MEMO_PAGE_X_FIX); + + ut_ad(m_cur_rec > m_page); + ut_ad(m_cur_rec < m_heap_top); } /** Split a page @@ -1199,6 +1188,13 @@ BtrBulk::finish(dberr_t err) ut_ad(last_page_no != FIL_NULL); last_block = btr_block_get(*m_index, last_page_no, RW_X_LATCH, false, &mtr); + if (!last_block) { + err = DB_CORRUPTION; +err_exit: + mtr.commit(); + return err; + } + first_rec = page_rec_get_next( page_get_infimum_rec(last_block->page.frame)); ut_ad(page_rec_is_user_rec(first_rec)); @@ -1206,18 +1202,18 @@ BtrBulk::finish(dberr_t err) /* Copy last page to root page. */ err = root_page_bulk.init(); if (err != DB_SUCCESS) { - mtr.commit(); - return(err); + goto err_exit; } root_page_bulk.copyIn(first_rec); root_page_bulk.finish(); /* Remove last page. */ - btr_page_free(m_index, last_block, &mtr); - + err = btr_page_free(m_index, last_block, &mtr); mtr.commit(); - err = pageCommit(&root_page_bulk, NULL, false); + if (dberr_t e = pageCommit(&root_page_bulk, NULL, false)) { + err = e; + } ut_ad(err == DB_SUCCESS); } diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index c01d3f8134f..abab6ab876b 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -202,30 +202,29 @@ btr_rec_free_externally_stored_fields( @param[in] latch_mode BTR_SEARCH_LEAF, ... @param[in] cursor cursor @param[in] mtr mini-transaction -@return blocks and savepoints which actually latched. */ -btr_latch_leaves_t +@param[out] latch_leaves latched blocks and savepoints */ +void btr_cur_latch_leaves( buf_block_t* block, ulint latch_mode, btr_cur_t* cursor, - mtr_t* mtr) + mtr_t* mtr, + btr_latch_leaves_t* latch_leaves) { - buf_block_t* get_block; - bool spatial; - btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}}; - compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); ut_ad(block->page.id().space() == cursor->index->table->space->id); - - spatial = dict_index_is_spatial(cursor->index) && cursor->rtr_info; ut_ad(block->page.in_file()); ut_ad(srv_read_only_mode || mtr->memo_contains_flagged(&cursor->index->lock, MTR_MEMO_S_LOCK | MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); + auto rtr_info = cursor->rtr_info; + if (UNIV_LIKELY_NULL(rtr_info) && !cursor->index->is_spatial()) { + rtr_info = nullptr; + } const rw_lock_type_t mode = rw_lock_type_t( latch_mode & (RW_X_LATCH | RW_S_LATCH)); @@ -236,146 +235,124 @@ btr_cur_latch_leaves( switch (latch_mode) { uint32_t left_page_no; uint32_t right_page_no; + ulint save; case BTR_SEARCH_LEAF: case BTR_MODIFY_LEAF: case BTR_SEARCH_TREE: - if (spatial) { - cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS] - = mtr_set_savepoint(mtr); + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS] + = mtr->get_savepoint(); } - - latch_leaves.savepoints[1] = mtr_set_savepoint(mtr); - get_block = btr_block_get(*cursor->index, - block->page.id().page_no(), mode, - true, mtr); - latch_leaves.blocks[1] = get_block; -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->page.frame) - == page_is_comp(block->page.frame)); -#endif /* UNIV_BTR_DEBUG */ - if (spatial) { - cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS] - = get_block; +latch_block: + if (latch_leaves) { + latch_leaves->savepoints[1] = mtr->get_savepoint(); + latch_leaves->blocks[1] = block; } - - return(latch_leaves); + block->page.fix(); + mtr->page_lock(block, mode); + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_blocks[RTR_MAX_LEVELS] = block; + } + return; case BTR_MODIFY_TREE: /* It is exclusive for other operations which calls btr_page_set_prev() */ ut_ad(mtr->memo_contains_flagged(&cursor->index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); + save = mtr->get_savepoint(); /* x-latch also siblings from left to right */ left_page_no = btr_page_get_prev(block->page.frame); if (left_page_no != FIL_NULL) { - - if (spatial) { - cursor->rtr_info->tree_savepoints[ - RTR_MAX_LEVELS] = mtr_set_savepoint(mtr); - } - - latch_leaves.savepoints[0] = mtr_set_savepoint(mtr); - get_block = btr_block_get( + buf_block_t *b = btr_block_get( *cursor->index, left_page_no, RW_X_LATCH, true, mtr); - latch_leaves.blocks[0] = get_block; - if (spatial) { - cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS] - = get_block; + if (latch_leaves) { + latch_leaves->savepoints[0] = save; + latch_leaves->blocks[0] = b; } + + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS] + = save; + rtr_info->tree_blocks[RTR_MAX_LEVELS] = b; + } + + save = mtr->get_savepoint(); } - if (spatial) { - cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] - = mtr_set_savepoint(mtr); + if (latch_leaves) { + latch_leaves->savepoints[1] = mtr->get_savepoint(); + latch_leaves->blocks[1] = block; } - latch_leaves.savepoints[1] = mtr_set_savepoint(mtr); - get_block = btr_block_get( - *cursor->index, block->page.id().page_no(), - RW_X_LATCH, true, mtr); - latch_leaves.blocks[1] = get_block; + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->page.frame) - == page_is_comp(block->page.frame)); -#endif /* UNIV_BTR_DEBUG */ + block->page.fix(); + block->page.lock.x_lock(); +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!block->index || !block->index->freed()); +#endif - if (spatial) { - cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] - = get_block; + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] = save; + rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] = block; } right_page_no = btr_page_get_next(block->page.frame); if (right_page_no != FIL_NULL) { - if (spatial) { - cursor->rtr_info->tree_savepoints[ - RTR_MAX_LEVELS + 2] = mtr_set_savepoint( - mtr); - } - latch_leaves.savepoints[2] = mtr_set_savepoint(mtr); - get_block = btr_block_get(*cursor->index, - right_page_no, RW_X_LATCH, - true, mtr); - latch_leaves.blocks[2] = get_block; -#ifdef UNIV_BTR_DEBUG - if (get_block) { - ut_a(page_is_comp(get_block->page.frame) - == page_is_comp(block->page.frame)); - ut_a(btr_page_get_prev(get_block->page.frame) - == block->page.id().page_no()); + save = mtr->get_savepoint(); + + buf_block_t* b = btr_block_get( + *cursor->index, right_page_no, RW_X_LATCH, + true, mtr); + if (latch_leaves) { + latch_leaves->savepoints[2] = save; + latch_leaves->blocks[2] = b; } -#endif /* UNIV_BTR_DEBUG */ - if (spatial) { - cursor->rtr_info->tree_blocks[ - RTR_MAX_LEVELS + 2] = get_block; + + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS + 2] + = save; + rtr_info->tree_blocks[RTR_MAX_LEVELS + 2] = b; } } - return(latch_leaves); + return; case BTR_SEARCH_PREV: case BTR_MODIFY_PREV: + ut_ad(!rtr_info); + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) + == (RW_S_LATCH ^ RW_X_LATCH), ""); + /* Because we are holding index->lock, no page splits or merges may run concurrently, and we may read FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ left_page_no = btr_page_get_prev(block->page.frame); if (left_page_no != FIL_NULL) { - latch_leaves.savepoints[0] = mtr_set_savepoint(mtr); - get_block = btr_block_get( - *cursor->index, left_page_no, mode, - true, mtr); - latch_leaves.blocks[0] = get_block; - cursor->left_block = get_block; -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->page.frame) - == page_is_comp(block->page.frame)); - ut_a(btr_page_get_next(get_block->page.frame) - == block->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ - } - - latch_leaves.savepoints[1] = mtr_set_savepoint(mtr); - get_block = btr_block_get(*cursor->index, - block->page.id().page_no(), mode, - true, mtr); - latch_leaves.blocks[1] = get_block; -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->page.frame) - == page_is_comp(block->page.frame)); -#endif /* UNIV_BTR_DEBUG */ - return(latch_leaves); + save = mtr->get_savepoint(); + cursor->left_block = btr_block_get( + *cursor->index, left_page_no, mode, true, mtr); + if (latch_leaves) { + latch_leaves->savepoints[0] = save; + latch_leaves->blocks[0] = cursor->left_block; + } + } + + goto latch_block; case BTR_CONT_MODIFY_TREE: - ut_ad(dict_index_is_spatial(cursor->index)); - return(latch_leaves); + ut_ad(cursor->index->is_spatial()); + return; } - ut_error; - return(latch_leaves); + MY_ASSERT_UNREACHABLE(); } /** Load the instant ALTER TABLE metadata from the clustered index @@ -392,21 +369,28 @@ static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr) ut_ad(index->table->supports_instant()); ut_ad(index->table->is_readable()); + dberr_t err; const fil_space_t* space = index->table->space; if (!space) { +corrupted: + err = DB_CORRUPTION; unreadable: ib::error() << "Table " << index->table->name << " has an unreadable root page"; index->table->corrupted = true; - return DB_CORRUPTION; + index->table->file_unreadable = true; + return err; } - buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); - - if (!root || btr_cur_instant_root_init(index, root->page.frame)) { + buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr, &err); + if (!root) { goto unreadable; } + if (btr_cur_instant_root_init(index, root->page.frame)) { + goto corrupted; + } + ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES); if (fil_page_get_type(root->page.frame) == FIL_PAGE_INDEX) { @@ -418,8 +402,8 @@ unreadable: /* Relax the assertion in rec_init_offsets(). */ ut_ad(!index->in_instant_init); ut_d(index->in_instant_init = true); - dberr_t err = btr_cur_open_at_index_side(true, index, BTR_SEARCH_LEAF, - &cur, 0, mtr); + err = btr_cur_open_at_index_side(true, index, BTR_SEARCH_LEAF, + &cur, 0, mtr); ut_d(index->in_instant_init = false); if (err != DB_SUCCESS) { index->table->corrupted = true; @@ -552,6 +536,10 @@ incompatible: page_id_t(space->id, mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)), 0, RW_S_LATCH, mtr); + if (!block) { + goto incompatible; + } + if (fil_page_get_type(block->page.frame) != FIL_PAGE_TYPE_BLOB || mach_read_from_4(&block->page.frame [FIL_PAGE_DATA @@ -670,14 +658,14 @@ index root page. bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) { ut_ad(!index->is_dummy); - ut_ad(fil_page_index_page_check(page)); - ut_ad(!page_has_siblings(page)); - ut_ad(page_get_space_id(page) == index->table->space_id); - ut_ad(page_get_page_no(page) == index->page); - ut_ad(!page_is_comp(page) == !dict_table_is_comp(index->table)); ut_ad(index->is_primary()); ut_ad(!index->is_instant()); ut_ad(index->table->supports_instant()); + + if (page_has_siblings(page)) { + return true; + } + /* This is normally executed as part of btr_cur_instant_init() when dict_load_table_one() is loading a table definition. Other threads should not access or modify the n_core_null_bytes, @@ -688,13 +676,14 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) switch (fil_page_get_type(page)) { default: - ut_ad("wrong page type" == 0); return true; case FIL_PAGE_INDEX: /* The field PAGE_INSTANT is guaranteed 0 on clustered index root pages of ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC when instant ADD COLUMN is not used. */ - ut_ad(!page_is_comp(page) || !page_get_instant(page)); + if (page_is_comp(page) && page_get_instant(page)) { + return true; + } index->n_core_null_bytes = static_cast( UT_BITS_IN_BYTES(unsigned(index->n_nullable))); return false; @@ -771,7 +760,7 @@ btr_cur_optimistic_latch_leaves( switch (*latch_mode) { default: - ut_error; + MY_ASSERT_UNREACHABLE(); return(false); case BTR_SEARCH_LEAF: case BTR_MODIFY_LEAF: @@ -799,27 +788,23 @@ btr_cur_optimistic_latch_leaves( *latch_mode & (RW_X_LATCH | RW_S_LATCH)); if (left_page_no != FIL_NULL) { - dberr_t err = DB_SUCCESS; cursor->left_block = buf_page_get_gen( page_id_t(cursor->index->table->space_id, left_page_no), cursor->index->table->space->zip_size(), - mode, nullptr, BUF_GET_POSSIBLY_FREED, - mtr, &err); + mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (!cursor->left_block) { - cursor->index->table->file_unreadable = true; - } else if (cursor->left_block->page.is_freed() - || btr_page_get_next( - cursor->left_block->page.frame) - != curr_page_no) { + if (cursor->left_block + && btr_page_get_next( + cursor->left_block->page.frame) + != curr_page_no) { /* release the left block */ btr_leaf_page_release( cursor->left_block, mode, mtr); return false; } } else { - cursor->left_block = NULL; + cursor->left_block = nullptr; } if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) { @@ -841,10 +826,8 @@ btr_cur_optimistic_latch_leaves( } ut_ad(block->page.buf_fix_count()); - /* release the left block */ - if (cursor->left_block != NULL) { - btr_leaf_page_release(cursor->left_block, - mode, mtr); + if (cursor->left_block) { + btr_leaf_page_release(cursor->left_block, mode, mtr); } } @@ -907,7 +890,7 @@ btr_cur_latch_for_root_leaf( return(RW_NO_LATCH); } - ut_error; + MY_ASSERT_UNREACHABLE(); return(RW_NO_LATCH); /* avoid compiler warnings */ } @@ -1070,7 +1053,7 @@ btr_cur_need_opposite_intention( return(false); } - ut_error; + MY_ASSERT_UNREACHABLE(); return(false); } @@ -1272,7 +1255,6 @@ btr_cur_search_to_nth_level_func( page_cur_t* page_cursor; btr_op_t btr_op; ulint root_height = 0; /* remove warning */ - dberr_t err = DB_SUCCESS; btr_intention_t lock_intention; bool modify_external; @@ -1428,7 +1410,7 @@ btr_cur_search_to_nth_level_func( || mode != PAGE_CUR_LE); ++btr_cur_n_sea; - DBUG_RETURN(err); + DBUG_RETURN(DB_SUCCESS); } else { ++btr_cur_n_non_sea; } @@ -1601,30 +1583,24 @@ search_loop: retry_page_get: ut_ad(n_blocks < BTR_MAX_LEVELS); tree_savepoints[n_blocks] = mtr_set_savepoint(mtr); + dberr_t err; block = buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr, &err, height == 0 && !index->is_clust()); tree_blocks[n_blocks] = block; - /* Note that block==NULL signifies either an error or change - buffering. */ - - if (err != DB_SUCCESS) { - ut_ad(block == NULL); - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name); - index->table->file_unreadable = true; + if (!block) { + switch (err) { + case DB_SUCCESS: + /* change buffering */ + break; + case DB_DECRYPTION_FAILED: + btr_decryption_failed(*index); + /* fall through */ + default: + goto func_exit; } - goto func_exit; - } - - if (block == NULL) { /* This must be a search to perform an insert/delete mark/ delete; try using the insert/delete buffer */ @@ -1633,7 +1609,7 @@ retry_page_get: switch (btr_op) { default: - ut_error; + MY_ASSERT_UNREACHABLE(); break; case BTR_INSERT_OP: case BTR_INSERT_IGNORE_UNIQUE_OP: @@ -1724,15 +1700,9 @@ retry_page_get: prev_tree_blocks[prev_n_blocks] = get_block; prev_n_blocks++; - if (err != DB_SUCCESS) { + if (!get_block) { if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name); - index->table->file_unreadable = true; + btr_decryption_failed(*index); } goto func_exit; @@ -1753,15 +1723,9 @@ retry_page_get: rw_latch, NULL, buf_mode, mtr, &err); tree_blocks[n_blocks] = block; - if (err != DB_SUCCESS) { + if (!block) { if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name); - index->table->file_unreadable = true; + btr_decryption_failed(*index); } goto func_exit; @@ -1831,8 +1795,8 @@ retry_page_get: if (height == 0) { if (rw_latch == RW_NO_LATCH) { - latch_leaves = btr_cur_latch_leaves( - block, latch_mode, cursor, mtr); + btr_cur_latch_leaves(block, latch_mode, cursor, mtr, + &latch_leaves); } switch (latch_mode) { @@ -2389,15 +2353,13 @@ need_opposite_intention: if (upper_rw_latch == RW_NO_LATCH) { ut_ad(latch_mode == BTR_CONT_MODIFY_TREE || latch_mode == BTR_CONT_SEARCH_TREE); - buf_block_t* child_block = btr_block_get( + btr_block_get( *index, page_id.page_no(), latch_mode == BTR_CONT_MODIFY_TREE - ? RW_X_LATCH : RW_SX_LATCH, false, mtr); - btr_assert_not_corrupted(child_block, index); + ? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err); } else { ut_ad(mtr->memo_contains_flagged(block, upper_rw_latch)); - btr_assert_not_corrupted(block, index); if (s_latch_by_caller) { ut_ad(latch_mode == BTR_SEARCH_TREE); @@ -2525,7 +2487,7 @@ btr_cur_open_at_index_side( mem_heap_t* heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets = offsets_; - dberr_t err = DB_SUCCESS; + dberr_t err; rec_offs_init(offsets_); @@ -2625,15 +2587,9 @@ btr_cur_open_at_index_side( ut_ad((block != NULL) == (err == DB_SUCCESS)); tree_blocks[n_blocks] = block; - if (err != DB_SUCCESS) { + if (!block) { if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name); - index->table->file_unreadable = true; + btr_decryption_failed(*index); } goto exit_loop; @@ -2939,7 +2895,7 @@ btr_cur_open_at_rnd_pos( page_id_t page_id(index->table->space_id, index->page); const ulint zip_size = index->table->space->zip_size(); - dberr_t err = DB_SUCCESS; + dberr_t err; if (root_leaf_rw_latch == RW_X_LATCH) { node_ptr_max_size = btr_node_ptr_max_size(index); @@ -2965,15 +2921,9 @@ btr_cur_open_at_rnd_pos( ut_ad((block != NULL) == (err == DB_SUCCESS)); - if (err != DB_SUCCESS) { + if (!block) { if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name); - index->table->file_unreadable = true; + btr_decryption_failed(*index); } break; @@ -3015,7 +2965,7 @@ btr_cur_open_at_rnd_pos( mtr); } - /* btr_cur_open_at_index_side_func() and + /* btr_cur_open_at_index_side() and btr_cur_search_to_nth_level() release tree s-latch here.*/ switch (latch_mode) { @@ -3183,7 +3133,8 @@ btr_cur_insert_if_possible( For compressed pages, page_cur_tuple_insert() attempted this already. */ if (!rec && !page_cur_get_page_zip(page_cursor) - && btr_page_reorganize(page_cursor, cursor->index, mtr)) { + && btr_page_reorganize(page_cursor, cursor->index, mtr) + == DB_SUCCESS) { rec = page_cur_tuple_insert( page_cursor, tuple, cursor->index, offsets, heap, n_ext, mtr); @@ -3508,7 +3459,8 @@ fail_err: << ' ' << rec_printer(entry).str()); DBUG_EXECUTE_IF("do_page_reorganize", if (n_recs) - btr_page_reorganize(page_cursor, index, mtr);); + ut_a(btr_page_reorganize(page_cursor, index, mtr) + == DB_SUCCESS);); /* Now, try the insert */ { @@ -3571,25 +3523,17 @@ fail_err: goto fail; } else { ut_ad(!reorg); + reorg = true; /* If the record did not fit, reorganize */ - if (!btr_page_reorganize(page_cursor, index, mtr)) { - ut_ad(0); - goto fail; - } - - ut_ad(page_get_max_insert_size(page, 1) == max_size); - - reorg = TRUE; - - *rec = page_cur_tuple_insert(page_cursor, entry, index, - offsets, heap, n_ext, mtr); - - if (UNIV_UNLIKELY(!*rec)) { - ib::fatal() << "Cannot insert tuple " << *entry - << "into index " << index->name - << " of table " << index->table->name - << ". Max size: " << max_size; + err = btr_page_reorganize(page_cursor, index, mtr); + if (err != DB_SUCCESS + || page_get_max_insert_size(page, 1) != max_size + || !(*rec = page_cur_tuple_insert(page_cursor, entry, index, + offsets, heap, n_ext, + mtr))) { + err = DB_CORRUPTION; + goto fail_err; } } @@ -3683,9 +3627,7 @@ btr_cur_pessimistic_insert( { dict_index_t* index = cursor->index; big_rec_t* big_rec_vec = NULL; - dberr_t err; bool inherit = false; - bool success; uint32_t n_reserved = 0; ut_ad(dtuple_check_typed(entry)); @@ -3705,27 +3647,24 @@ btr_cur_pessimistic_insert( /* Check locks and write to undo log, if specified */ - err = btr_cur_ins_lock_and_undo(flags, cursor, entry, - thr, mtr, &inherit); + dberr_t err = btr_cur_ins_lock_and_undo(flags, cursor, entry, + thr, mtr, &inherit); if (err != DB_SUCCESS) { - return(err); } - if (!(flags & BTR_NO_UNDO_LOG_FLAG)) { - /* First reserve enough free space for the file segments - of the index tree, so that the insert will not fail because - of lack of space */ - - uint32_t n_extents = uint32_t(cursor->tree_height / 16 + 3); + /* First reserve enough free space for the file segments of + the index tree, so that the insert will not fail because of + lack of space */ - success = fsp_reserve_free_extents(&n_reserved, - index->table->space, - n_extents, FSP_NORMAL, mtr); - if (!success) { - return(DB_OUT_OF_FILE_SPACE); - } + if (!index->is_ibuf() + && (err = fsp_reserve_free_extents(&n_reserved, index->table->space, + uint32_t(cursor->tree_height / 16 + + 3), + FSP_NORMAL, mtr)) + != DB_SUCCESS) { + return err; } if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext), @@ -3756,19 +3695,14 @@ btr_cur_pessimistic_insert( } } - if (dict_index_get_page(index) - == btr_cur_get_block(cursor)->page.id().page_no()) { - - /* The page is the root page */ - *rec = btr_root_raise_and_insert( - flags, cursor, offsets, heap, entry, n_ext, mtr); - } else { - *rec = btr_page_split_and_insert( - flags, cursor, offsets, heap, entry, n_ext, mtr); - } + *rec = index->page == btr_cur_get_block(cursor)->page.id().page_no() + ? btr_root_raise_and_insert(flags, cursor, offsets, heap, + entry, n_ext, mtr, &err) + : btr_page_split_and_insert(flags, cursor, offsets, heap, + entry, n_ext, mtr, &err); - if (*rec == NULL && os_has_said_disk_full) { - return(DB_OUT_OF_FILE_SPACE); + if (!*rec) { + goto func_exit; } ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec @@ -3819,10 +3753,12 @@ btr_cur_pessimistic_insert( } } + err = DB_SUCCESS; +func_exit: index->table->space->release_free_extents(n_reserved); *big_rec = big_rec_vec; - return(DB_SUCCESS); + return err; } /*==================== B-TREE UPDATE =========================*/ @@ -4051,32 +3987,26 @@ btr_cur_update_alloc_zip_func( return(false); } - if (!btr_page_reorganize(cursor, index, mtr)) { - goto out_of_space; - } - - rec_offs_make_valid(page_cur_get_rec(cursor), index, - page_is_leaf(page), offsets); + if (btr_page_reorganize(cursor, index, mtr) == DB_SUCCESS) { + rec_offs_make_valid(page_cur_get_rec(cursor), index, + page_is_leaf(page), offsets); - /* After recompressing a page, we must make sure that the free - bits in the insert buffer bitmap will not exceed the free - space on the page. Because this function will not attempt - recompression unless page_zip_available() fails above, it is - safe to reset the free bits if page_zip_available() fails - again, below. The free bits can safely be reset in a separate - mini-transaction. If page_zip_available() succeeds below, we - can be sure that the btr_page_reorganize() above did not reduce - the free space available on the page. */ + /* After recompressing a page, we must make sure that the free + bits in the insert buffer bitmap will not exceed the free + space on the page. Because this function will not attempt + recompression unless page_zip_available() fails above, it is + safe to reset the free bits if page_zip_available() fails + again, below. The free bits can safely be reset in a separate + mini-transaction. If page_zip_available() succeeds below, we + can be sure that the btr_page_reorganize() above did not reduce + the free space available on the page. */ - if (page_zip_available(page_zip, dict_index_is_clust(index), - length, create)) { - return(true); + if (page_zip_available(page_zip, dict_index_is_clust(index), + length, create)) { + return true; + } } -out_of_space: - ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets)); - - /* Out of space: reset the free bits. */ if (!dict_index_is_clust(index) && !index->table->is_temporary() && page_is_leaf(page)) { @@ -4425,6 +4355,11 @@ static void btr_cur_trim_alter_metadata(dtuple_t* entry, page_id_t(index->table->space->id, mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)), 0, RW_S_LATCH, &mtr); + if (!block) { + ut_ad("corruption" == 0); + mtr.commit(); + return; + } ut_ad(fil_page_get_type(block->page.frame) == FIL_PAGE_TYPE_BLOB); ut_ad(mach_read_from_4(&block->page.frame [FIL_PAGE_DATA + BTR_BLOB_HDR_NEXT_PAGE_NO]) @@ -4674,7 +4609,6 @@ any_extern: (!dict_table_is_comp(index->table) && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) { err = DB_OVERFLOW; - goto func_exit; } @@ -4765,17 +4699,22 @@ any_extern: btr_cur_write_sys(new_entry, index, trx_id, roll_ptr); } - /* There are no externally stored columns in new_entry */ - rec = btr_cur_insert_if_possible( - cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr); - ut_a(rec); /* <- We calculated above the insert would fit */ + rec = btr_cur_insert_if_possible(cursor, new_entry, offsets, heap, + 0/*n_ext*/, mtr); + if (UNIV_UNLIKELY(!rec)) { + err = DB_CORRUPTION; + goto func_exit; + } if (UNIV_UNLIKELY(update->is_metadata())) { /* We must empty the PAGE_FREE list, because if this was a rollback, the shortened metadata record would have too many fields, and we would be unable to know the size of the freed record. */ - btr_page_reorganize(page_cursor, index, mtr); + err = btr_page_reorganize(page_cursor, index, mtr); + if (err != DB_SUCCESS) { + goto func_exit; + } } else { /* Restore the old explicit lock state on the record */ lock_rec_restore_from_page_infimum(*block, rec, @@ -4813,7 +4752,7 @@ updated record. In the split it may have inherited locks from the successor of the updated record, which is not correct. This function restores the right locks for the new supremum. */ static -void +dberr_t btr_cur_pess_upd_restore_supremum( /*==============================*/ buf_block_t* block, /*!< in: buffer block of rec */ @@ -4821,28 +4760,30 @@ btr_cur_pess_upd_restore_supremum( mtr_t* mtr) /*!< in: mtr */ { page_t* page; - buf_block_t* prev_block; page = buf_block_get_frame(block); if (page_rec_get_next(page_get_infimum_rec(page)) != rec) { /* Updated record is not the first user record on its page */ - - return; + return DB_SUCCESS; } const uint32_t prev_page_no = btr_page_get_prev(page); const page_id_t block_id{block->page.id()}; const page_id_t prev_id(block_id.space(), prev_page_no); - - ut_ad(prev_page_no != FIL_NULL); - prev_block = buf_page_get_with_no_latch(prev_id, block->zip_size(), - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_block->page.frame) - == block->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ + dberr_t err; + buf_block_t* prev_block + = buf_page_get_gen(prev_id, 0, RW_NO_LATCH, nullptr, + BUF_PEEK_IF_IN_POOL, mtr, &err); + /* Since we already held an x-latch on prev_block, it must + be available and not be corrupted unless the buffer pool got + corrupted somehow. */ + if (UNIV_UNLIKELY(!prev_block)) { + return err; + } + ut_ad(!memcmp_aligned<4>(prev_block->page.frame + FIL_PAGE_NEXT, + block->page.frame + FIL_PAGE_OFFSET, 4)); /* We must already have an x-latch on prev_block! */ ut_ad(mtr->memo_contains_flagged(prev_block, MTR_MEMO_PAGE_X_FIX)); @@ -4850,6 +4791,7 @@ btr_cur_pess_upd_restore_supremum( lock_rec_reset_and_inherit_gap_locks(*prev_block, block_id, PAGE_HEAP_NO_SUPREMUM, page_rec_get_heap_no(rec)); + return DB_SUCCESS; } /*************************************************************//** @@ -5057,18 +4999,17 @@ btr_cur_pessimistic_update( } if (optim_err == DB_OVERFLOW) { - /* First reserve enough free space for the file segments of the index tree, so that the update will not fail because of lack of space */ - uint32_t n_extents = uint32_t(cursor->tree_height / 16 + 3); - - if (!fsp_reserve_free_extents( - &n_reserved, index->table->space, n_extents, - flags & BTR_NO_UNDO_LOG_FLAG - ? FSP_CLEANING : FSP_NORMAL, - mtr)) { + err = fsp_reserve_free_extents( + &n_reserved, index->table->space, + uint32_t(cursor->tree_height / 16 + 3), + flags & BTR_NO_UNDO_LOG_FLAG + ? FSP_CLEANING : FSP_NORMAL, + mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { err = DB_OUT_OF_FILE_SPACE; goto err_exit; } @@ -5127,7 +5068,10 @@ btr_cur_pessimistic_update( was a rollback, the shortened metadata record would have too many fields, and we would be unable to know the size of the freed record. */ - btr_page_reorganize(page_cursor, index, mtr); + err = btr_page_reorganize(page_cursor, index, mtr); + if (err != DB_SUCCESS) { + goto return_after_reservations; + } rec = page_cursor->rec; rec_offs_make_valid(rec, index, true, *offsets); if (page_cursor->block->page.id().page_no() @@ -5239,8 +5183,8 @@ btr_cur_pessimistic_update( cursor, offsets, offsets_heap, new_entry, &rec, &dummy_big_rec, n_ext, NULL, mtr); - ut_a(rec); ut_a(err == DB_SUCCESS); + ut_a(rec); ut_a(dummy_big_rec == NULL); ut_ad(rec_offs_validate(rec, cursor->index, *offsets)); page_cursor->rec = rec; @@ -5279,7 +5223,10 @@ btr_cur_pessimistic_update( was a rollback, the shortened metadata record would have too many fields, and we would be unable to know the size of the freed record. */ - btr_page_reorganize(page_cursor, index, mtr); + err = btr_page_reorganize(page_cursor, index, mtr); + if (err != DB_SUCCESS) { + goto return_after_reservations; + } rec = page_cursor->rec; } else { lock_rec_restore_from_page_infimum( @@ -5292,13 +5239,14 @@ btr_cur_pessimistic_update( from a wrong record. */ if (!was_first) { - btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor), - rec, mtr); + err = btr_cur_pess_upd_restore_supremum( + btr_cur_get_block(cursor), rec, mtr); } return_after_reservations: #ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(btr_cur_get_page_zip(cursor), + ut_a(err || + !page_zip || page_zip_validate(btr_cur_get_page_zip(cursor), btr_cur_get_page(cursor), index)); #endif /* UNIV_ZIP_DEBUG */ @@ -5417,15 +5365,15 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist. NOTE: it is assumed that the caller has reserved enough free extents so that the compression will always succeed if done! -@return TRUE if compression occurred */ -ibool +@return whether compression occurred */ +bool btr_cur_compress_if_useful( /*=======================*/ btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; cursor does not stay valid if !adjust and compression occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ + bool adjust, /*!< in: whether the cursor position should be + adjusted even when compression occurs */ mtr_t* mtr) /*!< in/out: mini-transaction */ { ut_ad(mtr->memo_contains_flagged(&cursor->index->lock, @@ -5445,16 +5393,17 @@ btr_cur_compress_if_useful( } } - return(btr_cur_compress_recommendation(cursor, mtr) - && btr_compress(cursor, adjust, mtr)); + return btr_cur_compress_recommendation(cursor, mtr) + && btr_compress(cursor, adjust, mtr) == DB_SUCCESS; } /*******************************************************//** Removes the record on which the tree cursor is positioned on a leaf page. It is assumed that the mtr has an x-latch on the page where the cursor is positioned, but no latch on the whole tree. -@return TRUE if success, i.e., the page did not become too empty */ -ibool +@return error code +@retval DB_FAIL if the page would become too empty */ +dberr_t btr_cur_optimistic_delete( /*======================*/ btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to @@ -5496,14 +5445,15 @@ btr_cur_optimistic_delete( cursor->index->n_core_fields, ULINT_UNDEFINED, &heap); - const ibool no_compress_needed = !rec_offs_any_extern(offsets) - && btr_cur_can_delete_without_compress( - cursor, rec_offs_size(offsets), mtr); - - if (!no_compress_needed) { + dberr_t err = DB_SUCCESS; + if (rec_offs_any_extern(offsets) + || !btr_cur_can_delete_without_compress(cursor, + rec_offs_size(offsets), + mtr)) { /* prefetch siblings of the leaf for the pessimistic operation. */ btr_cur_prefetch_siblings(block, cursor->index); + err = DB_FAIL; goto func_exit; } @@ -5568,8 +5518,8 @@ btr_cur_optimistic_delete( after rollback, this deleted metadata record would have too many fields, and we would be unable to know the size of the freed record. */ - btr_page_reorganize(btr_cur_get_page_cur(cursor), - cursor->index, mtr); + err = btr_page_reorganize(btr_cur_get_page_cur(cursor), + cursor->index, mtr); goto func_exit; } else { if (!flags) { @@ -5618,7 +5568,7 @@ func_exit: mem_heap_free(heap); } - return(no_compress_needed); + return err; } /*************************************************************//** @@ -5656,7 +5606,6 @@ btr_cur_pessimistic_delete( dict_index_t* index; rec_t* rec; uint32_t n_reserved = 0; - bool success; ibool ret = FALSE; mem_heap_t* heap; rec_offs* offsets; @@ -5686,13 +5635,11 @@ btr_cur_pessimistic_delete( uint32_t n_extents = uint32_t(cursor->tree_height / 32 + 1); - success = fsp_reserve_free_extents(&n_reserved, - index->table->space, - n_extents, - FSP_CLEANING, mtr); - if (!success) { - *err = DB_OUT_OF_FILE_SPACE; - + *err = fsp_reserve_free_extents(&n_reserved, + index->table->space, + n_extents, + FSP_CLEANING, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { return(FALSE); } } @@ -5781,10 +5728,10 @@ btr_cur_pessimistic_delete( after rollback, this deleted metadata record would carry too many fields, and we would be unable to know the size of the freed record. */ - btr_page_reorganize(btr_cur_get_page_cur(cursor), - index, mtr); + *err = btr_page_reorganize(btr_cur_get_page_cur(cursor), + index, mtr); ut_ad(!ret); - goto return_after_reservations; + goto err_exit; } } else if (UNIV_UNLIKELY(page_rec_is_first(rec, page))) { if (page_rec_is_last(rec, page)) { @@ -5836,22 +5783,36 @@ discard_page: so that it is equal to the new leftmost node pointer on the page */ btr_cur_t cursor; - btr_page_get_father(index, block, mtr, &cursor); - btr_cur_node_ptr_delete(&cursor, mtr); + ret = btr_page_get_father(index, block, mtr, &cursor); + if (!ret) { + *err = DB_CORRUPTION; + goto err_exit; + } + *err = btr_cur_node_ptr_delete(&cursor, mtr); + if (*err != DB_SUCCESS) { +got_err: + ret = FALSE; + goto err_exit; + } + const ulint level = btr_page_get_level(page); // FIXME: reuse the node_ptr from above dtuple_t* node_ptr = dict_index_build_node_ptr( index, next_rec, block->page.id().page_no(), heap, level); - btr_insert_on_non_leaf_level( + *err = btr_insert_on_non_leaf_level( flags, index, level + 1, node_ptr, mtr); + if (*err != DB_SUCCESS) { + ret = FALSE; + goto got_err; + } ut_d(parent_latched = true); } } - /* SPATIAL INDEX never use SX locks; we can allow page merges + /* SPATIAL INDEX never use U locks; we can allow page merges while holding X lock on the spatial index tree. Do not allow merges of non-leaf B-tree pages unless it is safe to do so. */ @@ -5892,7 +5853,7 @@ discard_page: return_after_reservations: *err = DB_SUCCESS; - +err_exit: mem_heap_free(heap); if (!srv_read_only_mode @@ -5913,7 +5874,7 @@ return_after_reservations: /** Delete the node pointer in a parent page. @param[in,out] parent cursor pointing to parent record @param[in,out] mtr mini-transaction */ -void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr) +dberr_t btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr) { ut_ad(mtr->memo_contains_flagged(btr_cur_get_block(parent), MTR_MEMO_PAGE_X_FIX)); @@ -5921,10 +5882,11 @@ void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr) ibool compressed = btr_cur_pessimistic_delete(&err, TRUE, parent, BTR_CREATE_FLAG, false, mtr); - ut_a(err == DB_SUCCESS); - if (!compressed) { + if (err == DB_SUCCESS && !compressed) { btr_cur_compress_if_useful(parent, FALSE, mtr); } + + return err; } /*******************************************************************//** @@ -6037,7 +5999,7 @@ btr_estimate_n_rows_in_range_on_level( mtr_t mtr; page_t* page; buf_block_t* block; - dberr_t err=DB_SUCCESS; + dberr_t err; mtr_start(&mtr); @@ -6054,13 +6016,7 @@ btr_estimate_n_rows_in_range_on_level( if (!block) { if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name); - index->table->file_unreadable = true; + btr_decryption_failed(*index); } mtr_commit(&mtr); @@ -6193,18 +6149,23 @@ btr_estimate_n_rows_in_range_low( example if "5 < x AND x <= 10" then we should not include the left boundary, but should include the right one. */ - mtr_start(&mtr); + mtr.start(); cursor.path_arr = path1; - bool should_count_the_left_border; + bool should_count_the_left_border = + dtuple_get_n_fields(tuple1->tuple) > 0; - if (dtuple_get_n_fields(tuple1->tuple) > 0) { - - btr_cur_search_to_nth_level(index, 0, tuple1->tuple, - tuple1->mode, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); + if (should_count_the_left_border) { + if (btr_cur_search_to_nth_level(index, 0, tuple1->tuple, + tuple1->mode, + BTR_SEARCH_LEAF | BTR_ESTIMATE, + &cursor, 0, &mtr) + != DB_SUCCESS) { +corrupted: + mtr.commit(); + return 0; + } ut_ad(!page_rec_is_infimum(btr_cur_get_rec(&cursor))); @@ -6217,19 +6178,11 @@ btr_estimate_n_rows_in_range_low( should_count_the_left_border = !page_rec_is_supremum(btr_cur_get_rec(&cursor)); } else { - dberr_t err = DB_SUCCESS; - - err = btr_cur_open_at_index_side(true, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); - - if (err != DB_SUCCESS) { - ib::warn() << " Error code: " << err - << " btr_estimate_n_rows_in_range_low " - << " called from file: " - << __FILE__ << " line: " << __LINE__ - << " table: " << index->table->name - << " index: " << index->name; + if (btr_cur_open_at_index_side(true, index, + BTR_SEARCH_LEAF | BTR_ESTIMATE, + &cursor, 0, &mtr) + != DB_SUCCESS) { + goto corrupted; } ut_ad(page_rec_is_infimum(btr_cur_get_rec(&cursor))); @@ -6238,29 +6191,27 @@ btr_estimate_n_rows_in_range_low( 'x < 123' or 'x <= 123' and btr_cur_open_at_index_side() positioned the cursor on the infimum record on the leftmost page, which must not be counted. */ - should_count_the_left_border = false; } tuple1->page_id= cursor.page_cur.block->page.id(); - mtr_commit(&mtr); - - if (!index->is_readable()) { - return 0; - } + mtr.commit(); - mtr_start(&mtr); + mtr.start(); cursor.path_arr = path2; - bool should_count_the_right_border; + bool should_count_the_right_border = + dtuple_get_n_fields(tuple2->tuple) > 0; - if (dtuple_get_n_fields(tuple2->tuple) > 0) { - - btr_cur_search_to_nth_level(index, 0, tuple2->tuple, - mode2, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); + if (should_count_the_right_border) { + if (btr_cur_search_to_nth_level(index, 0, tuple2->tuple, + mode2, + BTR_SEARCH_LEAF | BTR_ESTIMATE, + &cursor, 0, &mtr) + != DB_SUCCESS) { + goto corrupted; + } const rec_t* rec = btr_cur_get_rec(&cursor); @@ -6289,19 +6240,11 @@ btr_estimate_n_rows_in_range_low( the requested one (can also be positioned on the 'sup') and we should not count the right border. */ } else { - dberr_t err = DB_SUCCESS; - - err = btr_cur_open_at_index_side(false, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); - - if (err != DB_SUCCESS) { - ib::warn() << " Error code: " << err - << " btr_estimate_n_rows_in_range_low " - << " called from file: " - << __FILE__ << " line: " << __LINE__ - << " table: " << index->table->name - << " index: " << index->name; + if (btr_cur_open_at_index_side(false, index, + BTR_SEARCH_LEAF | BTR_ESTIMATE, + &cursor, 0, &mtr) + != DB_SUCCESS) { + goto corrupted; } ut_ad(page_rec_is_supremum(btr_cur_get_rec(&cursor))); @@ -6310,12 +6253,11 @@ btr_estimate_n_rows_in_range_low( 'x > 123' or 'x >= 123' and btr_cur_open_at_index_side() positioned the cursor on the supremum record on the rightmost page, which must not be counted. */ - should_count_the_right_border = false; } tuple2->page_id= cursor.page_cur.block->page.id(); - mtr_commit(&mtr); + mtr.commit(); /* We have the path information for the range in path1 and path2 */ @@ -6821,11 +6763,12 @@ struct btr_blob_log_check_t { if (UNIV_UNLIKELY(page_no != FIL_NULL)) { m_pcur->btr_cur.page_cur.block = btr_block_get( *index, page_no, RW_X_LATCH, false, m_mtr); + /* The page should not be evicted or corrupted while + we are holding a buffer-fix on it. */ + m_pcur->btr_cur.page_cur.block->page.unfix(); m_pcur->btr_cur.page_cur.rec = m_pcur->btr_cur.page_cur.block->page.frame + offs; - - m_pcur->btr_cur.page_cur.block->page.unfix(); } else { ut_ad(m_pcur->rel_pos == BTR_PCUR_ON); ut_a(m_pcur->restore_position( @@ -6900,12 +6843,16 @@ btr_store_big_rec_extern_fields( ut_ad(buf_block_get_frame(rec_block) == page_align(rec)); ut_a(dict_index_is_clust(index)); + if (!fil_page_index_page_check(page_align(rec))) { + if (op != BTR_STORE_INSERT_BULK) { + return DB_PAGE_CORRUPTED; + } + } + btr_blob_log_check_t redo_log(pcur, btr_mtr, offsets, &rec_block, &rec, op); page_zip = buf_block_get_page_zip(rec_block); space_id = rec_block->page.id().space(); - ut_a(fil_page_index_page_check(page_align(rec)) - || op == BTR_STORE_INSERT_BULK); if (page_zip) { int err; @@ -6997,40 +6944,46 @@ btr_store_big_rec_extern_fields( mtr.start(); index->set_modified(mtr); mtr.set_log_mode(btr_mtr->get_log_mode()); + mtr.memo_push(rec_block, MTR_MEMO_PAGE_X_FIX); - buf_page_get(rec_block->page.id(), - rec_block->zip_size(), RW_X_LATCH, &mtr); + rec_block->page.fix(); + rec_block->page.lock.x_lock(); +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!rec_block->index || !rec_block->index->freed()); +#endif uint32_t hint_prev = prev_page_no; if (hint_prev == FIL_NULL) { hint_prev = rec_block->page.id().page_no(); } - if (!fsp_reserve_free_extents(&r_extents, - index->table->space, 1, - FSP_BLOB, &mtr, 1)) { + error = fsp_reserve_free_extents( + &r_extents, index->table->space, 1, + FSP_BLOB, &mtr, 1); + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { +alloc_fail: mtr.commit(); - error = DB_OUT_OF_FILE_SPACE; goto func_exit; } block = btr_page_alloc(index, hint_prev + 1, - FSP_NO_DIR, 0, &mtr, &mtr); + FSP_NO_DIR, 0, &mtr, &mtr, + &error); index->table->space->release_free_extents(r_extents); - - ut_a(block != NULL); + if (!block) { + goto alloc_fail; + } const uint32_t page_no = block->page.id().page_no(); - if (prev_page_no != FIL_NULL) { - buf_block_t* prev_block; - - prev_block = buf_page_get( - page_id_t(space_id, prev_page_no), - rec_block->zip_size(), - RW_X_LATCH, &mtr); - + if (prev_page_no == FIL_NULL) { + } else if (buf_block_t* prev_block = + buf_page_get_gen(page_id_t(space_id, + prev_page_no), + rec_block->zip_size(), + RW_X_LATCH, nullptr, + BUF_GET, &mtr, &error)) { if (page_zip) { mtr.write<4>(*prev_block, prev_block->page.frame @@ -7049,6 +7002,8 @@ btr_store_big_rec_extern_fields( + prev_block->page.frame, page_no); } + } else { + goto alloc_fail; } ut_ad(!page_has_siblings(block->page.frame)); @@ -7295,14 +7250,11 @@ btr_free_externally_stored_field( containing the latch to data an an X-latch to the index tree */ { - page_t* page; const uint32_t space_id = mach_read_from_4( field_ref + BTR_EXTERN_SPACE_ID); - uint32_t page_no; - uint32_t next_page_no; - mtr_t mtr; ut_ad(index->is_primary()); + ut_ad(block->page.lock.have_x()); ut_ad(local_mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); ut_ad(local_mtr->memo_contains_page_flagged(field_ref, @@ -7327,29 +7279,22 @@ btr_free_externally_stored_field( ut_ad(space_id == index->table->space_id); const ulint ext_zip_size = index->table->space->zip_size(); - const ulint rec_zip_size = rec ? ext_zip_size : 0; - /* !rec holds in a call from purge when field_ref is in an undo page */ ut_ad(rec || !block->page.zip.data); for (;;) { - buf_block_t* ext_block; + mtr_t mtr; - mtr_start(&mtr); + mtr.start(); mtr.set_spaces(*local_mtr); mtr.set_log_mode(local_mtr->get_log_mode()); ut_ad(!index->table->is_temporary() || local_mtr->get_log_mode() == MTR_LOG_NO_REDO); - const page_t* p = page_align(field_ref); - - const page_id_t page_id(page_get_space_id(p), - page_get_page_no(p)); - - buf_page_get(page_id, rec_zip_size, RW_X_LATCH, &mtr); - - page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO); + const uint32_t page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + buf_block_t* ext_block; if (/* There is no external storage data */ page_no == FIL_NULL @@ -7360,18 +7305,31 @@ btr_free_externally_stored_field( || (rollback && (mach_read_from_1(field_ref + BTR_EXTERN_LEN) & BTR_EXTERN_INHERITED_FLAG))) { - +skip_free: /* Do not free */ - mtr_commit(&mtr); + mtr.commit(); return; } - ext_block = buf_page_get( - page_id_t(space_id, page_no), ext_zip_size, - RW_X_LATCH, &mtr); + ext_block = buf_page_get(page_id_t(space_id, page_no), + ext_zip_size, RW_X_LATCH, &mtr); + + if (!ext_block) { + goto skip_free; + } + + /* The buffer pool block containing the BLOB pointer is + exclusively latched by local_mtr. To satisfy some design + constraints, we must recursively latch it in mtr as well. */ + mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); + block->fix(); + block->page.lock.x_lock(); +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!block->index || !block->index->freed()); +#endif - page = buf_block_get_frame(ext_block); + const page_t* page = buf_block_get_frame(ext_block); if (ext_zip_size) { /* Note that page_zip will be NULL @@ -7381,9 +7339,10 @@ btr_free_externally_stored_field( case FIL_PAGE_TYPE_ZBLOB2: break; default: - ut_error; + MY_ASSERT_UNREACHABLE(); } - next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT); + const uint32_t next_page_no = mach_read_from_4( + page + FIL_PAGE_NEXT); btr_page_free(index, ext_block, &mtr, true, local_mtr->memo_contains( @@ -7408,7 +7367,7 @@ btr_free_externally_stored_field( ut_ad(!block->page.zip.data); btr_check_blob_fil_page_type(*ext_block, false); - next_page_no = mach_read_from_4( + const uint32_t next_page_no = mach_read_from_4( page + FIL_PAGE_DATA + BTR_BLOB_HDR_NEXT_PAGE_NO); btr_page_free(index, ext_block, &mtr, true, @@ -7540,6 +7499,10 @@ btr_copy_blob_prefix( mtr_start(&mtr); block = buf_page_get(id, 0, RW_S_LATCH, &mtr); + if (!block) { + mtr.commit(); + return copied_len; + } page = buf_block_get_frame(block); btr_check_blob_fil_page_type(*block, true); diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 8823bab572d..23d93caecf5 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2021, MariaDB Corporation. +Copyright (C) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -283,7 +283,8 @@ btr_defragment_calc_n_recs_for_size( /*************************************************************//** Merge as many records from the from_block to the to_block. Delete the from_block if all records are successfully merged to to_block. -@return the to_block to target for next merge operation. */ +@return the to_block to target for next merge operation. +@retval nullptr if corruption was noticed */ static buf_block_t* btr_defragment_merge_pages( @@ -330,9 +331,9 @@ btr_defragment_merge_pages( // reorganizing the page, otherwise we need to reorganize the page // first to release more space. if (move_size > max_ins_size) { - if (!btr_page_reorganize_block(page_zip_level, - to_block, index, - mtr)) { + dberr_t err = btr_page_reorganize_block(page_zip_level, + to_block, index, mtr); + if (err != DB_SUCCESS) { if (!dict_index_is_clust(index) && page_is_leaf(to_page)) { ibuf_reset_free_bits(to_block); @@ -341,23 +342,30 @@ btr_defragment_merge_pages( // not compressable. There's no point to try // merging into this page. Continue to the // next page. - return from_block; + return err == DB_FAIL ? from_block : nullptr; } ut_ad(page_validate(to_page, index)); max_ins_size = page_get_max_insert_size(to_page, n_recs); - ut_a(max_ins_size >= move_size); + if (max_ins_size < move_size) { + return nullptr; + } } // Move records to pack to_page more full. orig_pred = NULL; target_n_recs = n_recs_to_move; + dberr_t err; while (n_recs_to_move > 0) { rec = page_rec_get_nth(from_page, n_recs_to_move + 1); orig_pred = page_copy_rec_list_start( - to_block, from_block, rec, index, mtr); + to_block, from_block, rec, index, mtr, &err); if (orig_pred) break; + if (err != DB_FAIL) { + return nullptr; + } + // If we reach here, that means compression failed after packing // n_recs_to_move number of records to to_page. We try to reduce // the targeted data size on the to_page by @@ -396,19 +404,20 @@ btr_defragment_merge_pages( } } btr_cur_t parent; - if (n_recs_to_move == n_recs) { + if (!btr_page_get_father(index, from_block, mtr, &parent)) { + to_block = nullptr; + } else if (n_recs_to_move == n_recs) { /* The whole page is merged with the previous page, free it. */ - const page_id_t from{from_block->page.id()}; - lock_update_merge_left(*to_block, orig_pred, from); + lock_update_merge_left(*to_block, orig_pred, + from_block->page.id()); btr_search_drop_page_hash_index(from_block); - ut_a(DB_SUCCESS == btr_level_list_remove(*from_block, *index, - mtr)); - btr_page_get_father(index, from_block, mtr, &parent); - btr_cur_node_ptr_delete(&parent, mtr); - /* btr_blob_dbg_remove(from_page, index, - "btr_defragment_n_pages"); */ - btr_page_free(index, from_block, mtr); + if (btr_level_list_remove(*from_block, *index, mtr) + != DB_SUCCESS + || btr_cur_node_ptr_delete(&parent, mtr) != DB_SUCCESS + || btr_page_free(index, from_block, mtr) != DB_SUCCESS) { + return nullptr; + } } else { // There are still records left on the page, so // increment n_defragmented. Node pointer will be changed @@ -424,15 +433,20 @@ btr_defragment_merge_pages( orig_pred, from_block); // FIXME: reuse the node_ptr! - btr_page_get_father(index, from_block, mtr, &parent); - btr_cur_node_ptr_delete(&parent, mtr); + if (btr_cur_node_ptr_delete(&parent, mtr) + != DB_SUCCESS) { + return nullptr; + } rec = page_rec_get_next( page_get_infimum_rec(from_page)); node_ptr = dict_index_build_node_ptr( index, rec, page_get_page_no(from_page), heap, level); - btr_insert_on_non_leaf_level(0, index, level+1, - node_ptr, mtr); + if (btr_insert_on_non_leaf_level(0, index, level+1, + node_ptr, mtr) + != DB_SUCCESS) { + return nullptr; + } } to_block = from_block; } @@ -507,6 +521,9 @@ btr_defragment_n_pages( blocks[i] = btr_block_get(*index, page_no, RW_X_LATCH, true, mtr); + if (!blocks[i]) { + return nullptr; + } } if (n_pages == 1) { @@ -517,7 +534,8 @@ btr_defragment_n_pages( return NULL; /* given page is the last page. Lift the records to father. */ - btr_lift_page_up(index, block, mtr); + dberr_t err; + btr_lift_page_up(index, block, mtr, &err); } return NULL; } @@ -580,6 +598,9 @@ btr_defragment_n_pages( if (new_block != current_block) { n_defragmented ++; current_block = new_block; + if (!new_block) { + break; + } } } mem_heap_free(heap); @@ -667,17 +688,19 @@ processed: mtr_start(&mtr); dict_index_t *index = item->pcur->btr_cur.index; index->set_modified(mtr); - /* To follow the latching order defined in WL#6326, acquire index->lock X-latch. - This entitles us to acquire page latches in any order for the index. */ + /* To follow the latching order defined in WL#6326, + acquire index->lock X-latch. This entitles us to + acquire page latches in any order for the index. */ mtr_x_lock_index(index, &mtr); - /* This will acquire index->lock SX-latch, which per WL#6363 is allowed + /* This will acquire index->lock U latch, which is allowed when we are already holding the X-latch. */ - item->pcur->restore_position(BTR_MODIFY_TREE, &mtr); - buf_block_t* first_block = btr_pcur_get_block(item->pcur); if (buf_block_t *last_block = - btr_defragment_n_pages(first_block, index, - srv_defragment_n_pages, - &mtr)) { + item->pcur->restore_position(BTR_MODIFY_TREE, &mtr) + == btr_pcur_t::CORRUPTED + ? nullptr + : btr_defragment_n_pages(btr_pcur_get_block(item->pcur), + index, srv_defragment_n_pages, + &mtr)) { /* If we haven't reached the end of the index, place the cursor on the last record of last page, store the cursor position, and put back in queue. */ diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index a6b9e59204e..8c33fee9e61 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -299,21 +299,14 @@ btr_pcur_t::restore_position(ulint restore_latch_mode, mtr_t *mtr) if (UNIV_UNLIKELY (rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE || rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { - dberr_t err = DB_SUCCESS; - /* In these cases we do not try an optimistic restoration, but always do a search */ - err = btr_cur_open_at_index_side( + if (btr_cur_open_at_index_side( rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE, index, restore_latch_mode, - &btr_cur, 0, mtr); - - if (err != DB_SUCCESS) { - ib::warn() << " Error code: " << err - << " btr_pcur_t::restore_position " - << " table: " << index->table->name - << " index: " << index->name; + &btr_cur, 0, mtr) != DB_SUCCESS) { + return restore_status::CORRUPTED; } latch_mode = @@ -412,16 +405,20 @@ btr_pcur_t::restore_position(ulint restore_latch_mode, mtr_t *mtr) mode = PAGE_CUR_L; break; default: - ut_error; + MY_ASSERT_UNREACHABLE(); mode = PAGE_CUR_UNSUPP; } - btr_pcur_open_with_no_init_func(index, tuple, mode, restore_latch_mode, - this, + if (btr_pcur_open_with_no_init_func( + index, tuple, mode, restore_latch_mode, + this, #ifdef BTR_CUR_HASH_ADAPT - NULL, + nullptr, #endif /* BTR_CUR_HASH_ADAPT */ - mtr); + mtr) != DB_SUCCESS) { + mem_heap_free(heap); + return restore_status::CORRUPTED; + } /* Restore the old search mode */ search_mode = old_mode; @@ -473,7 +470,7 @@ Moves the persistent cursor to the first record on the next page. Releases the latch on the current page, and bufferunfixes it. Note that there must not be modifications on the current page, as then the x-latch can be released only in mtr_commit. */ -void +dberr_t btr_pcur_move_to_next_page( /*=======================*/ btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the @@ -487,11 +484,6 @@ btr_pcur_move_to_next_page( cursor->old_stored = false; const page_t* page = btr_pcur_get_page(cursor); - - if (UNIV_UNLIKELY(!page)) { - return; - } - const uint32_t next_page_no = btr_page_get_next(page); ut_ad(next_page_no != FIL_NULL); @@ -505,28 +497,31 @@ btr_pcur_move_to_next_page( mode = BTR_MODIFY_LEAF; } + dberr_t err; buf_block_t* next_block = btr_block_get( *btr_pcur_get_btr_cur(cursor)->index, next_page_no, mode, - page_is_leaf(page), mtr); + page_is_leaf(page), mtr, &err); if (UNIV_UNLIKELY(!next_block)) { - return; + return err; } const page_t* next_page = buf_block_get_frame(next_block); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_page) == page_is_comp(page)); - ut_a(btr_page_get_prev(next_page) - == btr_pcur_get_block(cursor)->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ + + if (UNIV_UNLIKELY(memcmp_aligned<4>(next_page + FIL_PAGE_PREV, + page + FIL_PAGE_OFFSET, 4))) { + return DB_CORRUPTION; + } btr_leaf_page_release(btr_pcur_get_block(cursor), mode, mtr); page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor)); ut_d(page_check_dir(next_page)); + return err; } +MY_ATTRIBUTE((nonnull,warn_unused_result)) /*********************************************************//** Moves the persistent cursor backward if it is on the first record of the page. Commits mtr. Note that to prevent a possible deadlock, the operation @@ -537,17 +532,13 @@ return, but it may happen that the cursor is not positioned on the last record of any page, because the structure of the tree may have changed during the time when the cursor had no latches. */ static -void +bool btr_pcur_move_backward_from_page( /*=============================*/ btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the first record of the current page */ mtr_t* mtr) /*!< in: mtr */ { - ulint prev_page_no; - page_t* page; - buf_block_t* prev_block; - ut_ad(btr_pcur_is_before_first_on_page(cursor)); ut_ad(!btr_pcur_is_before_first_in_tree(cursor)); @@ -563,43 +554,39 @@ btr_pcur_move_backward_from_page( static_assert(BTR_SEARCH_PREV == (4 | BTR_SEARCH_LEAF), ""); static_assert(BTR_MODIFY_PREV == (4 | BTR_MODIFY_LEAF), ""); - cursor->restore_position(4 | latch_mode, mtr); - - page = btr_pcur_get_page(cursor); + if (UNIV_UNLIKELY(cursor->restore_position(4 | latch_mode, mtr) + == btr_pcur_t::CORRUPTED)) { + return true; + } - prev_page_no = btr_page_get_prev(page); + buf_block_t* prev_block = btr_pcur_get_btr_cur(cursor)->left_block; - if (prev_page_no == FIL_NULL) { + if (!page_has_prev(btr_pcur_get_page(cursor))) { } else if (btr_pcur_is_before_first_on_page(cursor)) { - - prev_block = btr_pcur_get_btr_cur(cursor)->left_block; - btr_leaf_page_release(btr_pcur_get_block(cursor), latch_mode, mtr); page_cur_set_after_last(prev_block, btr_pcur_get_page_cur(cursor)); } else { - /* The repositioned cursor did not end on an infimum record on a page. Cursor repositioning acquired a latch also on the previous page, but we do not need the latch: release it. */ - prev_block = btr_pcur_get_btr_cur(cursor)->left_block; - btr_leaf_page_release(prev_block, latch_mode, mtr); } cursor->latch_mode = latch_mode; cursor->old_stored = false; + return false; } /*********************************************************//** Moves the persistent cursor to the previous record in the tree. If no records are left, the cursor stays 'before first in tree'. @return TRUE if the cursor was not before first in tree */ -ibool +bool btr_pcur_move_to_prev( /*==================*/ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the @@ -612,53 +599,13 @@ btr_pcur_move_to_prev( cursor->old_stored = false; if (btr_pcur_is_before_first_on_page(cursor)) { - - if (btr_pcur_is_before_first_in_tree(cursor)) { - - return(FALSE); - } - - btr_pcur_move_backward_from_page(cursor, mtr); - - return(TRUE); - } - - btr_pcur_move_to_prev_on_page(cursor); - - return(TRUE); -} - -/**************************************************************//** -If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first -user record satisfying the search condition, in the case PAGE_CUR_L or -PAGE_CUR_LE, on the last user record. If no such user record exists, then -in the first case sets the cursor after last in tree, and in the latter case -before first in tree. The latching mode must be BTR_SEARCH_LEAF or -BTR_MODIFY_LEAF. */ -void -btr_pcur_open_on_user_rec( - dict_index_t* index, /*!< in: index */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent - cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - btr_pcur_open_low(index, 0, tuple, mode, latch_mode, cursor, 0, mtr); - - if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) { - - if (btr_pcur_is_after_last_on_page(cursor)) { - - btr_pcur_move_to_next_user_rec(cursor, mtr); + if (btr_pcur_is_before_first_in_tree(cursor) + || btr_pcur_move_backward_from_page(cursor, mtr)) { + return false; } } else { - ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L)); - - /* Not implemented yet */ - - ut_error; + btr_pcur_move_to_prev_on_page(cursor); } + + return true; } diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 19c0d94b280..a9b4e9ff0d0 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1436,19 +1436,13 @@ void btr_search_drop_page_hash_when_freed(const page_id_t page_id) block = buf_page_get_gen(page_id, 0, RW_X_LATCH, NULL, BUF_PEEK_IF_IN_POOL, &mtr); - if (block) { - /* If AHI is still valid, page can't be in free state. - AHI is dropped when page is freed. */ - DBUG_ASSERT(!block->page.is_freed()); - - if (block->index) { - /* In all our callers, the table handle should - be open, or we should be in the process of - dropping the table (preventing eviction). */ - DBUG_ASSERT(block->index->table->get_ref_count() - || dict_sys.locked()); - btr_search_drop_page_hash_index(block); - } + if (block && block->index) { + /* In all our callers, the table handle should + be open, or we should be in the process of + dropping the table (preventing eviction). */ + DBUG_ASSERT(block->index->table->get_ref_count() + || dict_sys.locked()); + btr_search_drop_page_hash_index(block); } mtr_commit(&mtr); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 73492eca216..6eb1b6d463a 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -66,6 +66,7 @@ Created 11/5/1995 Heikki Tuuri #include "buf0dump.h" #include #include +#include "log.h" using st_::span; @@ -585,9 +586,6 @@ buf_page_is_corrupted( const byte* read_buf, ulint fsp_flags) { -#ifndef UNIV_INNOCHECKSUM - DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(true); ); -#endif if (fil_space_t::full_crc32(fsp_flags)) { bool compressed = false, corrupted = false; const uint size = buf_page_full_crc32_size( @@ -2208,6 +2206,7 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) ++buf_pool.stat.n_page_gets; const page_id_t page_id(space->id, page); buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); + uint32_t fix; buf_block_t *block; { transactional_shared_lock_guard g @@ -2220,7 +2219,13 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) /* To avoid a deadlock with buf_LRU_free_page() of some other page and buf_page_write_complete() of this page, we must not wait for a page latch while holding a page_hash latch. */ - block->page.fix(); + fix= block->page.fix(); + } + + if (UNIV_UNLIKELY(fix < buf_page_t::UNFIXED)) + { + block->page.unfix(); + return; } block->page.lock.x_lock(); @@ -2413,6 +2418,7 @@ buf_zip_decompress( case FIL_PAGE_RTREE: if (page_zip_decompress(&block->page.zip, block->page.frame, TRUE)) { +func_exit: if (space) { space->release(); } @@ -2432,11 +2438,7 @@ buf_zip_decompress( case FIL_PAGE_TYPE_ZBLOB2: /* Copy to uncompressed storage. */ memcpy(block->page.frame, frame, block->zip_size()); - if (space) { - space->release(); - } - - return(TRUE); + goto func_exit; } ib::error() << "Unknown compressed page type " @@ -2451,12 +2453,6 @@ err_exit: } if (space) { - if (encrypted) { - dict_set_encrypted_by_space(space); - } else { - dict_set_corrupted_by_space(space); - } - space->release(); } @@ -2469,7 +2465,7 @@ err_exit: @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] guess guessed block or NULL @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, -BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH +BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH @param[in] mtr mini-transaction @param[out] err DB_SUCCESS or error code @param[in] allow_ibuf_merge Allow change buffer merge to happen @@ -2492,8 +2488,8 @@ buf_page_get_low( unsigned access_time; ulint retries = 0; - ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL)); ut_ad(!mtr || mtr->is_active()); + ut_ad(mtr || mode == BUF_PEEK_IF_IN_POOL); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH) || (rw_latch == RW_SX_LATCH) @@ -2510,24 +2506,15 @@ buf_page_get_low( #ifdef UNIV_DEBUG switch (mode) { - case BUF_EVICT_IF_IN_POOL: - /* After DISCARD TABLESPACE, the tablespace would not exist, - but in IMPORT TABLESPACE, PageConverter::operator() must - replace any old pages, which were not evicted during DISCARD. - Skip the assertion on space_page_size. */ - break; case BUF_PEEK_IF_IN_POOL: case BUF_GET_IF_IN_POOL: /* The caller may pass a dummy page size, because it does not really matter. */ break; default: - ut_error; + MY_ASSERT_UNREACHABLE(); case BUF_GET_POSSIBLY_FREED: break; - case BUF_GET_NO_LATCH: - ut_ad(rw_latch == RW_NO_LATCH); - /* fall through */ case BUF_GET: case BUF_GET_IF_IN_POOL_OR_WATCH: ut_ad(!mtr->is_freeing_tree()); @@ -2584,7 +2571,6 @@ loop: switch (mode) { case BUF_GET_IF_IN_POOL: case BUF_PEEK_IF_IN_POOL: - case BUF_EVICT_IF_IN_POOL: return nullptr; case BUF_GET_IF_IN_POOL_OR_WATCH: /* We cannot easily use a memory transaction here. */ @@ -2614,62 +2600,15 @@ loop: checksum cannot be decypted. */ if (dberr_t local_err = buf_read_page(page_id, zip_size)) { - if (mode == BUF_GET_POSSIBLY_FREED) { - if (err) { - *err = local_err; - } - return nullptr; - } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { - ++retries; - DBUG_EXECUTE_IF("innodb_page_corruption_retries", + if (mode != BUF_GET_POSSIBLY_FREED + && retries++ < BUF_PAGE_READ_MAX_RETRIES) { + DBUG_EXECUTE_IF("intermittent_read_failure", retries = BUF_PAGE_READ_MAX_RETRIES;); } else { if (err) { *err = local_err; } - /* Pages whose encryption key is unavailable or the - configured key, encryption algorithm or encryption - method are incorrect are marked as encrypted in - buf_page_check_corrupt(). Unencrypted page could be - corrupted in a way where the key_id field is - nonzero. There is no checksum on field - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION. */ - switch (local_err) { - case DB_PAGE_CORRUPTED: - if (!srv_force_recovery) { - break; - } - /* fall through */ - case DB_DECRYPTION_FAILED: - return nullptr; - default: - break; - } - - /* Try to set table as corrupted instead of - asserting. */ - if (page_id.space() == TRX_SYS_SPACE) { - } else if (page_id.space() == SRV_TMP_SPACE_ID) { - } else if (fil_space_t* space - = fil_space_t::get(page_id.space())) { - bool set = dict_set_corrupted_by_space(space); - space->release(); - if (set) { - return nullptr; - } - } - - if (local_err == DB_IO_ERROR) { - return nullptr; - } - - ib::fatal() << "Unable to read page " << page_id - << " into the buffer pool after " - << BUF_PAGE_READ_MAX_RETRIES - << ". The most probable cause" - " of this error may be that the" - " table has been corrupted." - " See https://mariadb.com/kb/en/library/innodb-recovery-modes/"; + return nullptr; } } else { buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr)); @@ -2684,10 +2623,14 @@ got_block: ut_ad(state > buf_page_t::FREED); if (state > buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) { - if (mode == BUF_PEEK_IF_IN_POOL - || mode == BUF_EVICT_IF_IN_POOL) { + if (mode == BUF_PEEK_IF_IN_POOL) { ignore_block: + ut_ad(mode == BUF_GET_POSSIBLY_FREED + || mode == BUF_PEEK_IF_IN_POOL); block->unfix(); + if (err) { + *err = DB_CORRUPTION; + } return nullptr; } @@ -2713,46 +2656,29 @@ ignore_block: *err = DB_PAGE_CORRUPTED; } - if (page_id.space() == TRX_SYS_SPACE) { - } else if (page_id.space() == SRV_TMP_SPACE_ID) { - } else if (fil_space_t* space = - fil_space_t::get(page_id.space())) { - bool set = dict_set_corrupted_by_space(space); - space->release(); - if (set) { - return nullptr; - } - } - - ib::fatal() << "Unable to read page " << page_id - << " into the buffer pool after " - << BUF_PAGE_READ_MAX_RETRIES - << ". The most probable cause" - " of this error may be that the" - " table has been corrupted." - " See https://mariadb.com/kb/en/library/innodb-recovery-modes/"; - } - } else if (mode == BUF_PEEK_IF_IN_POOL) { - if (UNIV_UNLIKELY(!block->page.frame)) { - /* This mode is only used for dropping an - adaptive hash index. There cannot be an - adaptive hash index for a compressed-only page. */ - goto ignore_block; + return nullptr; } - } else if (mode == BUF_EVICT_IF_IN_POOL) { + } else if (mode != BUF_PEEK_IF_IN_POOL) { + } else if (!mtr) { ut_ad(!block->page.oldest_modification()); mysql_mutex_lock(&buf_pool.mutex); block->unfix(); - if (!buf_LRU_free_page(&block->page, true)) { + if (!buf_LRU_free_page(&block->page, true)) { ut_ad(0); } mysql_mutex_unlock(&buf_pool.mutex); return nullptr; + } else if (UNIV_UNLIKELY(!block->page.frame)) { + /* The BUF_PEEK_IF_IN_POOL mode is mainly used for dropping an + adaptive hash index. There cannot be an + adaptive hash index for a compressed-only page. */ + goto ignore_block; } - ut_ad(mode == BUF_GET_IF_IN_POOL || block->zip_size() == zip_size); + ut_ad(mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL + || block->zip_size() == zip_size); if (UNIV_UNLIKELY(!block->page.frame)) { if (!block->page.lock.x_lock_try()) { @@ -2797,14 +2723,11 @@ wait_for_unfix: if (state < buf_page_t::UNFIXED + 1) { ut_ad(state > buf_page_t::FREED); - ut_ad(mode == BUF_GET_POSSIBLY_FREED - || mode == BUF_PEEK_IF_IN_POOL); - block->page.unfix(); block->page.lock.x_unlock(); hash_lock.unlock(); buf_LRU_block_free_non_file_page(new_block); mysql_mutex_unlock(&buf_pool.mutex); - return nullptr; + goto ignore_block; } mysql_mutex_unlock(&buf_pool.mutex); @@ -2922,60 +2845,135 @@ re_evict: #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ ut_ad(state > buf_page_t::FREED); - ut_ad(state < buf_page_t::UNFIXED || (~buf_page_t::LRU_MASK) & state); - ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX); - - /* While tablespace is reinited the indexes are already freed but the - blocks related to it still resides in buffer pool. Trying to remove - such blocks from buffer pool would invoke removal of AHI entries - associated with these blocks. Logic to remove AHI entry will try to - load the block but block is already in free state. Handle the said case - with mode = BUF_PEEK_IF_IN_POOL that is invoked from - "btr_search_drop_page_hash_when_freed". */ - ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL - || state > buf_page_t::UNFIXED); - - const bool not_first_access = block->page.set_accessed(); - - if (mode != BUF_PEEK_IF_IN_POOL) { - buf_page_make_young_if_needed(&block->page); - if (!not_first_access) { - buf_read_ahead_linear(page_id, block->zip_size(), - ibuf_inside(mtr)); - } + if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) { + goto ignore_block; } + ut_ad((~buf_page_t::LRU_MASK) & state); + ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX); #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ ut_ad(block->page.frame); - ut_ad(block->page.id() == page_id); - if (state >= buf_page_t::UNFIXED && allow_ibuf_merge && fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX && page_is_leaf(block->page.frame)) { block->page.lock.x_lock(); + ut_ad(block->page.id() == page_id + || (state >= buf_page_t::READ_FIX + && state < buf_page_t::WRITE_FIX)); + +#ifdef BTR_CUR_HASH_ADAPT + if (dict_index_t* index = block->index) { + if (index->freed()) { + btr_search_drop_page_hash_index(block); + } + } +#endif /* BTR_CUR_HASH_ADAPT */ + + dberr_t e; + + if (UNIV_UNLIKELY(block->page.id() != page_id)) { +page_id_mismatch: + state = block->page.state(); + e = DB_CORRUPTION; +ibuf_merge_corrupted: + if (err) { + *err = e; + } + + buf_pool.corrupted_evict(&block->page, state); + return nullptr; + } + state = block->page.state(); ut_ad(state < buf_page_t::READ_FIX); if (state >= buf_page_t::IBUF_EXIST && state < buf_page_t::REINIT) { block->page.clear_ibuf_exist(); - ibuf_merge_or_delete_for_page(block, page_id, - block->zip_size()); + e = ibuf_merge_or_delete_for_page(block, page_id, + block->zip_size()); + if (UNIV_UNLIKELY(e != DB_SUCCESS)) { + goto ibuf_merge_corrupted; + } } if (rw_latch == RW_X_LATCH) { mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); + goto got_latch; } else { block->page.lock.x_unlock(); goto get_latch; } } else { get_latch: - mtr->page_lock(block, rw_latch); + switch (rw_latch) { + mtr_memo_type_t fix_type; + case RW_NO_LATCH: + mtr->memo_push(block, MTR_MEMO_BUF_FIX); + return block; + case RW_S_LATCH: + fix_type = MTR_MEMO_PAGE_S_FIX; + block->page.lock.s_lock(); + ut_ad(!block->page.is_read_fixed()); + if (UNIV_UNLIKELY(block->page.id() != page_id)) { + block->page.lock.s_unlock(); + block->page.lock.x_lock(); + goto page_id_mismatch; + } +get_latch_valid: +#ifdef BTR_CUR_HASH_ADAPT + if (dict_index_t* index = block->index) { + if (index->freed()) { + mtr_t::defer_drop_ahi(block, fix_type); + } + } +#endif /* BTR_CUR_HASH_ADAPT */ + mtr->memo_push(block, fix_type); + break; + case RW_SX_LATCH: + fix_type = MTR_MEMO_PAGE_SX_FIX; + block->page.lock.u_lock(); + ut_ad(!block->page.is_io_fixed()); + if (UNIV_UNLIKELY(block->page.id() != page_id)) { + block->page.lock.u_x_upgrade(); + goto page_id_mismatch; + } + goto get_latch_valid; + default: + ut_ad(rw_latch == RW_X_LATCH); + fix_type = MTR_MEMO_PAGE_X_FIX; + if (block->page.lock.x_lock_upgraded()) { + ut_ad(block->page.id() == page_id); + block->unfix(); + mtr->page_lock_upgrade(*block); + return block; + } + if (UNIV_UNLIKELY(block->page.id() != page_id)) { + goto page_id_mismatch; + } + goto get_latch_valid; + } + +got_latch: + ut_ad(page_id_t(page_get_space_id(block->page.frame), + page_get_page_no(block->page.frame)) + == page_id); + + if (mode == BUF_GET_POSSIBLY_FREED + || mode == BUF_PEEK_IF_IN_POOL) { + return block; + } + + const bool not_first_access{block->page.set_accessed()}; + buf_page_make_young_if_needed(&block->page); + if (!not_first_access) { + buf_read_ahead_linear(page_id, block->zip_size(), + ibuf_inside(mtr)); + } } return block; @@ -2987,8 +2985,8 @@ get_latch: @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] guess guessed block or NULL @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, -BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH -@param[in] mtr mini-transaction +BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH +@param[in,out] mtr mini-transaction, or NULL @param[out] err DB_SUCCESS or error code @param[in] allow_ibuf_merge Allow change buffer merge while reading the pages from file. @@ -3006,6 +3004,13 @@ buf_page_get_gen( { if (buf_block_t *block= recv_sys.recover(page_id)) { + if (UNIV_UNLIKELY(block == reinterpret_cast(-1))) + { + corrupted: + if (err) + *err= DB_CORRUPTION; + return nullptr; + } /* Recovery is a special case; we fix() before acquiring lock. */ auto s= block->page.fix(); ut_ad(s >= buf_page_t::FREED); @@ -3017,7 +3022,12 @@ buf_page_get_gen( const bool must_merge= allow_ibuf_merge && ibuf_page_exists(page_id, block->zip_size()); if (s < buf_page_t::UNFIXED) + { + got_freed_page: ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL); + block->page.unfix(); + goto corrupted; + } else if (must_merge && fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX && page_is_leaf(block->page.frame)) @@ -3027,12 +3037,22 @@ buf_page_get_gen( ut_ad(s > buf_page_t::FREED); ut_ad(s < buf_page_t::READ_FIX); if (s < buf_page_t::UNFIXED) - ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL); + { + block->page.lock.x_unlock(); + goto got_freed_page; + } else { if (block->page.is_ibuf_exist()) block->page.clear_ibuf_exist(); - ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()); + if (dberr_t e= + ibuf_merge_or_delete_for_page(block, page_id, block->zip_size())) + { + if (err) + *err= e; + buf_pool.corrupted_evict(&block->page, s); + return nullptr; + } } if (rw_latch == RW_X_LATCH) @@ -3167,7 +3187,7 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr) block->page.fix(); ut_ad(!block->page.is_read_fixed()); - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_S_FIX); + mtr->memo_push(block, MTR_MEMO_PAGE_S_FIX); #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); @@ -3239,7 +3259,7 @@ static buf_block_t *buf_page_create_low(page_id_t page_id, ulint zip_size, { mysql_mutex_unlock(&buf_pool.mutex); buf_block_t *block= reinterpret_cast(bpage); - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT drop_hash_entry= block->index; #endif @@ -3279,7 +3299,7 @@ static buf_block_t *buf_page_create_low(page_id_t page_id, ulint zip_size, bpage->lock.free(); #endif ut_free(bpage); - mtr_memo_push(mtr, free_block, MTR_MEMO_PAGE_X_FIX); + mtr->memo_push(free_block, MTR_MEMO_PAGE_X_FIX); bpage= &free_block->page; } } @@ -3494,37 +3514,6 @@ ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read) MONITOR_INC_NOCHECK(counter); } -/** Mark a table corrupted. -@param[in] bpage corrupted page -@param[in] space tablespace of the corrupted page */ -ATTRIBUTE_COLD -static void buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t& space) -{ - /* If block is not encrypted find the table with specified - space id, and mark it corrupted. Encrypted tables - are marked unusable later e.g. in ::open(). */ - if (!space.crypt_data - || space.crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) { - dict_set_corrupted_by_space(&space); - } else { - dict_set_encrypted_by_space(&space); - } -} - -/** Mark a table corrupted. -@param[in] bpage Corrupted page -@param[in] node data file -Also remove the bpage from LRU list. */ -ATTRIBUTE_COLD -static void buf_corrupt_page_release(buf_page_t *bpage, const fil_node_t &node) -{ - ut_ad(bpage->id().space() == node.space->id); - buf_pool.corrupted_evict(bpage); - - if (!srv_force_recovery) - buf_mark_space_corrupt(bpage, *node.space); -} - /** Check if the encrypted page is corrupted for the full crc32 format. @param[in] space_id page belongs to space id @param[in] d page @@ -3600,21 +3589,8 @@ static dberr_t buf_page_check_corrupt(buf_page_t *bpage, ib::error() << "The page " << bpage->id() << " in file '" << node.name - << "' cannot be decrypted."; - - ib::info() - << "However key management plugin or used key_version " - << key_version - << " is not found or" - " used encryption algorithm or method does not match."; - - if (bpage->id().space() != TRX_SYS_SPACE) { - ib::info() - << "Marking tablespace as missing." - " You may drop this table or" - " install correct key management plugin" - " and key file."; - } + << "' cannot be decrypted; key_version=" + << key_version; } return (err); @@ -3634,6 +3610,10 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node) ut_ad(zip_size() == node.space->zip_size()); ut_ad(!!zip.ssize == !!zip.data); + ut_d(auto n=) buf_pool.n_pend_reads--; + ut_ad(n > 0); + buf_pool.stat.n_pages_read++; + const byte *read_frame= zip.data ? zip.data : frame; ut_ad(read_frame); @@ -3664,7 +3644,11 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node) if (read_id == expected_id); else if (read_id == page_id_t(0, 0)) - /* This is likely an uninitialized page. */; + { + /* This is likely an uninitialized (all-zero) page. */ + err= DB_FAIL; + goto release_page; + } else if (!node.space->full_crc32() && page_id_t(0, read_id.page_no()) == expected_id) /* FIL_PAGE_SPACE_ID was written as garbage in the system tablespace @@ -3680,25 +3664,18 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node) goto release_page; } else + { ib::error() << "Space id and page no stored in the page, read in are " << read_id << ", should be " << expected_id; + err= DB_PAGE_CORRUPTED; + goto release_page; + } } err= buf_page_check_corrupt(this, node); if (UNIV_UNLIKELY(err != DB_SUCCESS)) { database_corrupted: - /* Not a real corruption if it was triggered by error injection */ - DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", - if (!is_predefined_tablespace(id().space())) - { - buf_corrupt_page_release(this, node); - ib::info() << "Simulated IMPORT corruption"; - return err; - } - err= DB_SUCCESS; - goto page_not_corrupt;); - if (belongs_to_unzip_LRU()) memset_aligned(frame, 0, srv_page_size); @@ -3711,38 +3688,28 @@ database_corrupted: buf_page_print(read_frame, zip_size()); + node.space->set_corrupted(); + ib::info() << " You can use CHECK TABLE to scan" " your table for corruption. " << FORCE_RECOVERY_MSG; } if (!srv_force_recovery) - { - /* If the corruption is in the system tablespace, we will - intentionally crash the server. */ - if (expected_id.space() == TRX_SYS_SPACE) - ib::fatal() << "Aborting because of a corrupt database page."; - buf_corrupt_page_release(this, node); - return err; - } + goto release_page; } - DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", - page_not_corrupt: err= err; ); - if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED) { release_page: - buf_corrupt_page_release(this, node); - if (recv_recovery_is_on()) - recv_sys.free_corrupted_page(expected_id); + buf_pool.corrupted_evict(this, buf_page_t::READ_FIX); return err; } const bool recovery= recv_recovery_is_on(); - if (recovery) - recv_recover_page(node.space, this); + if (recovery && !recv_recover_page(node.space, this)) + return DB_PAGE_CORRUPTED; const bool ibuf_may_exist= frame && !recv_no_ibuf_operations && (!expected_id.space() || !is_predefined_tablespace(expected_id.space())) && @@ -3766,10 +3733,6 @@ release_page: lock.x_unlock(true); - ut_d(auto n=) buf_pool.n_pend_reads--; - ut_ad(n > 0); - buf_pool.stat.n_pages_read++; - return DB_SUCCESS; } diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index d3c871ea69a..b9f505db56e 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -83,7 +83,13 @@ bool buf_dblwr_t::create() start_again: mtr.start(); + dberr_t err; buf_block_t *trx_sys_block= buf_dblwr_trx_sys_get(&mtr); + if (!trx_sys_block) + { + mtr.commit(); + return false; + } if (mach_read_from_4(TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC + trx_sys_block->page.frame) == @@ -98,10 +104,10 @@ start_again: if (UT_LIST_GET_FIRST(fil_system.sys_space->chain)->size < 3 * size) { -too_small: ib::error() << "Cannot create doublewrite buffer: " "the first file in innodb_data_file_path must be at least " << (3 * (size >> (20U - srv_page_size_shift))) << "M."; +fail: mtr.commit(); return false; } @@ -109,9 +115,13 @@ too_small: { buf_block_t *b= fseg_create(fil_system.sys_space, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG, - &mtr, false, trx_sys_block); + &mtr, &err, false, trx_sys_block); if (!b) - goto too_small; + { + ib::error() << "Cannot create doublewrite buffer: " << err; + goto fail; + } + ib::info() << "Doublewrite buffer not found: creating new"; /* FIXME: After this point, the doublewrite buffer creation @@ -126,8 +136,9 @@ too_small: for (uint32_t prev_page_no= 0, i= 0, extent_size= FSP_EXTENT_SIZE; i < 2 * size + extent_size / 2; i++) { - buf_block_t *new_block= fseg_alloc_free_page(fseg_header, prev_page_no + 1, - FSP_UP, &mtr); + buf_block_t *new_block= + fseg_alloc_free_page_general(fseg_header, prev_page_no + 1, FSP_UP, + false, &mtr, &mtr, &err); if (!new_block) { ib::error() << "Cannot create doublewrite buffer: " @@ -395,9 +406,12 @@ next_page: physical_size, read_buf); if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) + { ib::warn() << "Double write buffer recovery: " << page_id << " ('" << space->chain.start->name << "') read failed with error: " << fio.err; + continue; + } if (buf_is_zeroes(span(read_buf, physical_size))) { diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 6f8e975bd03..92f4efb7747 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -1204,8 +1204,10 @@ evict_zip: } /** Release and evict a corrupted page. -@param bpage page that was being read */ -ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage) +@param bpage x-latched page that was found corrupted +@param state expected current state of the page */ +ATTRIBUTE_COLD +void buf_pool_t::corrupted_evict(buf_page_t *bpage, uint32_t state) { const page_id_t id{bpage->id()}; buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); @@ -1216,8 +1218,8 @@ ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage) ut_ad(!bpage->oldest_modification()); bpage->set_corrupt_id(); - constexpr auto read_unfix= buf_page_t::READ_FIX - buf_page_t::UNFIXED; - auto s= bpage->zip.fix.fetch_sub(read_unfix) - read_unfix; + auto unfix= state - buf_page_t::UNFIXED; + auto s= bpage->zip.fix.fetch_sub(unfix) - unfix; bpage->lock.x_unlock(true); while (s != buf_page_t::UNFIXED) @@ -1236,8 +1238,7 @@ ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage) mysql_mutex_unlock(&mutex); - ut_d(auto n=) n_pend_reads--; - ut_ad(n > 0); + recv_sys.free_corrupted_page(id); } /** Update buf_pool.LRU_old_ratio. diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 6a5f973ee22..05dde6e69e4 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -319,28 +319,17 @@ nothing_read: ? IORequest::READ_SYNC : IORequest::READ_ASYNC), page_id.page_no() * len, len, dst, bpage); - *err= fio.err; + *err = fio.err; if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) { - if (!sync || fio.err == DB_TABLESPACE_DELETED - || fio.err == DB_IO_ERROR) { - buf_pool.corrupted_evict(bpage); - return false; - } - - ut_error; - } - - if (sync) { + ut_d(auto n=) buf_pool.n_pend_reads--; + ut_ad(n > 0); + buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX); + } else if (sync) { thd_wait_end(NULL); - /* The i/o was already completed in space->io() */ *err = bpage->read_complete(*fio.node); space->release(); - - if (*err != DB_SUCCESS) { - return false; - } } return true; @@ -489,26 +478,6 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id, srv_stats.buf_pool_reads.add(1); } - switch (err) { - case DB_SUCCESS: - case DB_ERROR: - break; - case DB_TABLESPACE_DELETED: - ib::info() << "trying to read page " << page_id - << " in the background" - " in a non-existing or being-dropped tablespace"; - break; - case DB_PAGE_CORRUPTED: - case DB_DECRYPTION_FAILED: - ib::error() - << "Background Page read failed to " - "read or decrypt " << page_id; - break; - default: - ib::fatal() << "Error " << err << " in background read of " - << page_id; - } - /* We do not increment number of I/O operations used for LRU policy here (buf_LRU_stat_inc_io()). We use this in heuristics to decide about evicting uncompressed version of compressed pages from the @@ -742,8 +711,8 @@ void buf_read_recv_pages(ulint space_id, const uint32_t* page_nos, ulint n) BUF_READ_ANY_PAGE, cur_page_id, zip_size, true); - if (err == DB_DECRYPTION_FAILED || err == DB_PAGE_CORRUPTED) { - ib::error() << "Recovery failed to read or decrypt " + if (err != DB_SUCCESS) { + ib::error() << "Recovery failed to read " << cur_page_id; } } diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc index 40c8df1498e..f1133bae3de 100644 --- a/storage/innobase/dict/dict0boot.cc +++ b/storage/innobase/dict/dict0boot.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -35,6 +35,16 @@ Created 4/18/1996 Heikki Tuuri #include "log0recv.h" #include "os0file.h" +/** The DICT_HDR page identifier */ +static constexpr page_id_t hdr_page_id{DICT_HDR_SPACE, DICT_HDR_PAGE_NO}; + +/** @return the DICT_HDR block, x-latched */ +static buf_block_t *dict_hdr_get(mtr_t *mtr) +{ + /* We assume that the DICT_HDR page is always readable and available. */ + return buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH, nullptr, BUF_GET, mtr); +} + /**********************************************************************//** Returns a new table, index, or space id. */ void @@ -97,25 +107,25 @@ void dict_hdr_flush_row_id(row_id_t id) } /** Create the DICT_HDR page on database initialization. -@return whether the operation failed */ -static bool dict_hdr_create() +@return error code */ +dberr_t dict_create() { - buf_block_t* block; ulint root_page_no; - bool fail = false; + dberr_t err; mtr_t mtr; mtr.start(); compile_time_assert(DICT_HDR_SPACE == 0); /* Create the dictionary header file block in a new, allocated file segment in the system tablespace */ - block = fseg_create(fil_system.sys_space, - DICT_HDR + DICT_HDR_FSEG_HEADER, &mtr); - - ut_a(block->page.id() == page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO)); - - buf_block_t* d = dict_hdr_get(&mtr); + buf_block_t* d = fseg_create(fil_system.sys_space, + DICT_HDR + DICT_HDR_FSEG_HEADER, &mtr, + &err); + if (!d) { + goto func_exit; + } + ut_a(d->page.id() == hdr_page_id); /* Start counting row, table, index, and tree ids from DICT_HDR_FIRST_ID */ @@ -139,10 +149,8 @@ static bool dict_hdr_create() /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_TABLES_ID, - nullptr, &mtr); + nullptr, &mtr, &err); if (root_page_no == FIL_NULL) { -failed: - fail = true; goto func_exit; } @@ -151,9 +159,9 @@ failed: /*--------------------------*/ root_page_no = btr_create(DICT_UNIQUE, fil_system.sys_space, DICT_TABLE_IDS_ID, - nullptr, &mtr); + nullptr, &mtr, &err); if (root_page_no == FIL_NULL) { - goto failed; + goto func_exit; } mtr.write<4>(*d, DICT_HDR + DICT_HDR_TABLE_IDS + d->page.frame, @@ -161,9 +169,9 @@ failed: /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_COLUMNS_ID, - nullptr, &mtr); + nullptr, &mtr, &err); if (root_page_no == FIL_NULL) { - goto failed; + goto func_exit; } mtr.write<4>(*d, DICT_HDR + DICT_HDR_COLUMNS + d->page.frame, @@ -171,9 +179,9 @@ failed: /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_INDEXES_ID, - nullptr, &mtr); + nullptr, &mtr, &err); if (root_page_no == FIL_NULL) { - goto failed; + goto func_exit; } mtr.write<4>(*d, DICT_HDR + DICT_HDR_INDEXES + d->page.frame, @@ -181,25 +189,23 @@ failed: /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, fil_system.sys_space, DICT_FIELDS_ID, - nullptr, &mtr); + nullptr, &mtr, &err); if (root_page_no == FIL_NULL) { - goto failed; + goto func_exit; } mtr.write<4>(*d, DICT_HDR + DICT_HDR_FIELDS + d->page.frame, root_page_no); func_exit: mtr.commit(); - return fail; + return err ? err : dict_boot(); } /*****************************************************************//** Initializes the data dictionary memory structures when the database is started. This function is also called when the data dictionary is created. @return DB_SUCCESS or error code. */ -dberr_t -dict_boot(void) -/*===========*/ +dberr_t dict_boot() { dict_table_t* table; dict_index_t* index; @@ -222,17 +228,23 @@ dict_boot(void) static_assert(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4, "compatibility"); static_assert(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6, "compatibility"); - mtr_start(&mtr); - + mtr.start(); /* Create the hash tables etc. */ dict_sys.create(); + dberr_t err; + const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH, + nullptr, BUF_GET, &mtr, &err); + if (!d) { + mtr.commit(); + return err; + } + heap = mem_heap_create(450); dict_sys.lock(SRW_LOCK_CALL); - /* Get the dictionary header */ - const byte* dict_hdr = &dict_hdr_get(&mtr)->page.frame[DICT_HDR]; + const byte* dict_hdr = &d->page.frame[DICT_HDR]; /* Because we only write new row ids to disk-based data structure (dictionary header) when it is divisible by @@ -406,9 +418,9 @@ dict_boot(void) table->indexes.start->n_core_null_bytes = static_cast( UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable))); - mtr_commit(&mtr); + mtr.commit(); - dberr_t err = ibuf_init_at_db_start(); + err = ibuf_init_at_db_start(); if (err == DB_SUCCESS) { /* Load definitions of other indexes on system tables */ @@ -423,13 +435,5 @@ dict_boot(void) dict_sys.unlock(); } - return(err); -} - -/*****************************************************************//** -Creates and initializes the data dictionary at the server bootstrap. -@return DB_SUCCESS or error code. */ -dberr_t dict_create() -{ - return dict_hdr_create() ? DB_ERROR : dict_boot(); + return err; } diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 4532ccc1a3d..4b175229971 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -751,28 +751,32 @@ dict_create_index_tree_step( mtr.start(); search_tuple = dict_create_search_tuple(node->ind_row, node->heap); + node->page_no = FIL_NULL; - btr_pcur_open(UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes), - search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, - &pcur, &mtr); + dberr_t err = + btr_pcur_open(UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes), + search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, + &pcur, &mtr); - btr_pcur_move_to_next_user_rec(&pcur, &mtr); + if (err != DB_SUCCESS) { +func_exit: + mtr.commit(); + return err; + } + btr_pcur_move_to_next_user_rec(&pcur, &mtr); - dberr_t err = DB_SUCCESS; + if (UNIV_UNLIKELY(btr_pcur_is_after_last_on_page(&pcur))) { + err = DB_CORRUPTION; + goto func_exit; + } - if (!index->is_readable()) { - node->page_no = FIL_NULL; - } else { + if (index->is_readable()) { index->set_modified(mtr); node->page_no = btr_create( index->type, index->table->space, - index->id, index, &mtr); - - if (node->page_no == FIL_NULL) { - err = DB_OUT_OF_FILE_SPACE; - } + index->id, index, &mtr, &err); DBUG_EXECUTE_IF("ib_import_create_index_failure_1", node->page_no = FIL_NULL; @@ -786,16 +790,13 @@ dict_create_index_tree_step( ut_ad(len == 4); mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data, node->page_no); - - mtr.commit(); - - return(err); + goto func_exit; } /***************************************************************//** Creates an index tree for the index if it is not a member of a cluster. Don't update SYSTEM TABLES. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +@return error code */ dberr_t dict_create_index_tree_in_mem( /*==========================*/ @@ -815,13 +816,14 @@ dict_create_index_tree_in_mem( ut_ad(index->is_readable()); ut_ad(!(index->table->flags2 & DICT_TF2_DISCARDED)); + dberr_t err; index->page = btr_create(index->type, index->table->space, - index->id, index, &mtr); + index->id, index, &mtr, &err); mtr_commit(&mtr); index->trx_id = trx->id; - return index->page == FIL_NULL ? DB_OUT_OF_FILE_SPACE : DB_SUCCESS; + return err; } /** Drop the index tree associated with a row in SYS_INDEXES table. @@ -1107,15 +1109,7 @@ dict_create_table_step( function_exit: trx->error_state = err; - if (err == DB_SUCCESS) { - /* Ok: do nothing */ - - } else if (err == DB_LOCK_WAIT) { - - return(NULL); - } else { - /* SQL error detected */ - + if (err != DB_SUCCESS) { return(NULL); } @@ -1299,16 +1293,8 @@ dict_create_index_step( function_exit: trx->error_state = err; - if (err == DB_SUCCESS) { - /* Ok: do nothing */ - - } else if (err == DB_LOCK_WAIT) { - - return(NULL); - } else { - /* SQL error detected */ - - return(NULL); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return nullptr; } thr->run_node = que_node_get_parent(node); diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc index 043ea24ccbf..9abe6a20589 100644 --- a/storage/innobase/dict/dict0defrag_bg.cc +++ b/storage/innobase/dict/dict0defrag_bg.cc @@ -194,7 +194,7 @@ static void dict_stats_process_entry_from_defrag_pool(THD *thd) { if (dict_index_t *index= !table->corrupted ? dict_table_find_index_on_id(table, index_id) : nullptr) - if (!index->is_corrupted()) + if (index->is_btree()) dict_stats_save_defrag_stats(index); dict_table_close(table, false, thd, mdl); } @@ -307,7 +307,8 @@ btr_get_size_and_reserved( return(ULINT_UNDEFINED); } - buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr); + dberr_t err; + buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr, &err); *used = 0; if (!root) { return ULINT_UNDEFINED; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 8c39f7318ab..be2d3517547 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -1435,12 +1435,61 @@ struct dict_foreign_remove_partial } }; +/** This function returns a new path name after replacing the basename +in an old path with a new basename. The old_path is a full path +name including the extension. The tablename is in the normal +form "databasename/tablename". The new base name is found after +the forward slash. Both input strings are null terminated. + +This function allocates memory to be returned. It is the callers +responsibility to free the return value after it is no longer needed. + +@param[in] old_path Pathname +@param[in] tablename Contains new base name +@return own: new full pathname */ +static char *dir_pathname(const char *old_path, span tablename) +{ + /* Split the tablename into its database and table name components. + They are separated by a '/'. */ + const char *base_name= tablename.data(); + for (const char *last= tablename.end(); last > tablename.data(); last--) + { + if (last[-1] == '/') + { + base_name= last; + break; + } + } + const size_t base_name_len= tablename.end() - base_name; + + /* Find the offset of the last slash. We will strip off the + old basename.ibd which starts after that slash. */ + const char *last_slash= strrchr(old_path, '/'); +#ifdef _WIN32 + if (const char *last= strrchr(old_path, '\\')) + if (last > last_slash) + last_slash= last; +#endif + + size_t dir_len= last_slash + ? size_t(last_slash - old_path) + : strlen(old_path); + + /* allocate a new path and move the old directory path to it. */ + size_t new_path_len= dir_len + base_name_len + sizeof "/.ibd"; + char *new_path= static_cast(ut_malloc_nokey(new_path_len)); + memcpy(new_path, old_path, dir_len); + snprintf(new_path + dir_len, new_path_len - dir_len, "/%.*s.ibd", + int(base_name_len), base_name); + return new_path; +} + /** Rename the data file. @param new_name name of the table @param replace whether to replace the file with the new name (as part of rolling back TRUNCATE) */ dberr_t -dict_table_t::rename_tablespace(const char *new_name, bool replace) const +dict_table_t::rename_tablespace(span new_name, bool replace) const { ut_ad(dict_table_is_file_per_table(this)); ut_ad(!is_temporary()); @@ -1449,18 +1498,17 @@ dict_table_t::rename_tablespace(const char *new_name, bool replace) const return DB_SUCCESS; const char *old_path= UT_LIST_GET_FIRST(space->chain)->name; - fil_space_t::name_type space_name{new_name, strlen(new_name)}; const bool data_dir= DICT_TF_HAS_DATA_DIR(flags); char *path= data_dir - ? os_file_make_new_pathname(old_path, new_name) - : fil_make_filepath(nullptr, space_name, IBD, false); + ? dir_pathname(old_path, new_name) + : fil_make_filepath(nullptr, new_name, IBD, false); dberr_t err; if (!path) err= DB_OUT_OF_MEMORY; else if (!strcmp(path, old_path)) err= DB_SUCCESS; else if (data_dir && - DB_SUCCESS != RemoteDatafile::create_link_file(space_name, path)) + DB_SUCCESS != RemoteDatafile::create_link_file(new_name, path)) err= DB_TABLESPACE_EXISTS; else { @@ -1468,8 +1516,8 @@ dict_table_t::rename_tablespace(const char *new_name, bool replace) const if (data_dir) { if (err == DB_SUCCESS) - space_name= {name.m_name, strlen(name.m_name)}; - RemoteDatafile::delete_link_file(space_name); + new_name= {name.m_name, strlen(name.m_name)}; + RemoteDatafile::delete_link_file(new_name); } } @@ -1484,18 +1532,13 @@ dberr_t dict_table_rename_in_cache( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - const char* new_name, /*!< in: new name */ - bool rename_also_foreigns, - /*!< in: in ALTER TABLE we want - to preserve the original table name - in constraints which reference it */ + span new_name, /*!< in: new name */ bool replace_new_file) /*!< in: whether to replace the file with the new name (as part of rolling back TRUNCATE) */ { dict_foreign_t* foreign; - ulint fold; char old_name[MAX_FULL_NAME_LEN + 1]; ut_ad(dict_sys.locked()); @@ -1505,23 +1548,8 @@ dict_table_rename_in_cache( ut_a(old_name_len < sizeof old_name); strcpy(old_name, table->name.m_name); - fold = my_crc32c(0, new_name, strlen(new_name)); - - /* Look for a table with the same name: error if such exists */ - dict_table_t* table2; - HASH_SEARCH(name_hash, &dict_sys.table_hash, fold, - dict_table_t*, table2, ut_ad(table2->cached), - (strcmp(table2->name.m_name, new_name) == 0)); - DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure", - if (table2 == NULL) { - table2 = (dict_table_t*) -1; - } ); - if (table2) { - ib::error() << "Cannot rename table '" << table->name - << "' to '" << new_name << "' since the" - " dictionary cache already contains '" << new_name << "'."; - return(DB_ERROR); - } + const uint32_t fold= my_crc32c(0, new_name.data(), new_name.size()); + ut_a(!dict_sys.find_table(new_name)); if (!dict_table_is_file_per_table(table)) { } else if (dberr_t err = table->rename_tablespace(new_name, @@ -1533,8 +1561,14 @@ dict_table_rename_in_cache( HASH_DELETE(dict_table_t, name_hash, &dict_sys.table_hash, my_crc32c(0, table->name.m_name, old_name_len), table); - const bool keep_mdl_name = dict_table_t::is_temporary_name(new_name) - && !table->name.is_temporary(); + bool keep_mdl_name = !table->name.is_temporary(); + + if (!keep_mdl_name) { + } else if (const char* s = static_cast + (memchr(new_name.data(), '/', new_name.size()))) { + keep_mdl_name = new_name.end() - s >= 5 + && !memcmp(s, "/#sql", 5); + } if (keep_mdl_name) { /* Preserve the original table name for @@ -1543,18 +1577,17 @@ dict_table_rename_in_cache( table->name.m_name); } - const size_t new_len = strlen(new_name); - - if (new_len > strlen(table->name.m_name)) { + if (new_name.size() > strlen(table->name.m_name)) { /* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid memory fragmentation, we assume a repeated calls of ut_realloc() with the same size do not cause fragmentation */ - ut_a(new_len <= MAX_FULL_NAME_LEN); + ut_a(new_name.size() <= MAX_FULL_NAME_LEN); table->name.m_name = static_cast( ut_realloc(table->name.m_name, MAX_FULL_NAME_LEN + 1)); } - strcpy(table->name.m_name, new_name); + memcpy(table->name.m_name, new_name.data(), new_name.size()); + table->name.m_name[new_name.size()] = '\0'; if (!keep_mdl_name) { table->mdl_name.m_name = table->name.m_name; @@ -1564,7 +1597,7 @@ dict_table_rename_in_cache( HASH_INSERT(dict_table_t, name_hash, &dict_sys.table_hash, fold, table); - if (!rename_also_foreigns) { + if (table->name.is_temporary()) { /* In ALTER TABLE we think of the rename table operation in the direction table -> temporary table (#sql...) as dropping the table with the old name and creating @@ -4069,80 +4102,10 @@ dict_print_info_on_foreign_keys( return str; } -/** Given a space_id of a file-per-table tablespace, search the -dict_sys.table_LRU list and return the dict_table_t* pointer for it. -@param space tablespace -@return table if found, NULL if not */ -static -dict_table_t* -dict_find_single_table_by_space(const fil_space_t* space) -{ - dict_table_t* table; - ulint num_item; - ulint count = 0; - - ut_ad(space->id > 0); - - if (!dict_sys.is_initialised()) { - /* This could happen when it's in redo processing. */ - return(NULL); - } - - table = UT_LIST_GET_FIRST(dict_sys.table_LRU); - num_item = UT_LIST_GET_LEN(dict_sys.table_LRU); - - /* This function intentionally does not acquire mutex as it is used - by error handling code in deep call stack as last means to avoid - killing the server, so it worth to risk some consequences for - the action. */ - while (table && count < num_item) { - if (table->space == space) { - if (dict_table_is_file_per_table(table)) { - return(table); - } - return(NULL); - } - - table = UT_LIST_GET_NEXT(table_LRU, table); - count++; - } - - return(NULL); -} - -/**********************************************************************//** -Flags a table with specified space_id corrupted in the data dictionary -cache -@return true if successful */ -bool dict_set_corrupted_by_space(const fil_space_t* space) -{ - dict_table_t* table; - - table = dict_find_single_table_by_space(space); - - if (!table) { - return false; - } - - /* mark the table->corrupted bit only, since the caller - could be too deep in the stack for SYS_INDEXES update */ - table->corrupted = true; - table->file_unreadable = true; - return true; -} - -/** Flag a table encrypted in the data dictionary cache. */ -void dict_set_encrypted_by_space(const fil_space_t* space) -{ - if (dict_table_t* table = dict_find_single_table_by_space(space)) { - table->file_unreadable = true; - } -} - /**********************************************************************//** Flags an index corrupted both in the data dictionary cache and in the SYS_INDEXES */ -void dict_set_corrupted(dict_index_t *index, const char *ctx, bool dict_locked) +void dict_set_corrupted(dict_index_t *index, const char *ctx) { mem_heap_t* heap; mtr_t mtr; @@ -4153,11 +4116,8 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx, bool dict_locked) const char* status; btr_cur_t cursor; - if (!dict_locked) { - dict_sys.lock(SRW_LOCK_CALL); - } + dict_sys.lock(SRW_LOCK_CALL); - ut_ad(dict_sys.locked()); ut_ad(!dict_table_is_comp(dict_sys.sys_tables)); ut_ad(!dict_table_is_comp(dict_sys.sys_indexes)); @@ -4165,6 +4125,7 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx, bool dict_locked) is corrupted */ if (dict_index_is_clust(index)) { index->table->corrupted = TRUE; + goto func_exit; } if (index->type & DICT_CORRUPT) { @@ -4203,8 +4164,11 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx, bool dict_locked) dict_index_copy_types(tuple, sys_index, 2); - btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_LE, - BTR_MODIFY_LEAF, &cursor, 0, &mtr); + if (btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_LE, + BTR_MODIFY_LEAF, &cursor, 0, &mtr) + != DB_SUCCESS) { + goto fail; + } if (cursor.low_match == dtuple_get_n_fields(tuple)) { /* UPDATE SYS_INDEXES SET TYPE=index->type @@ -4229,33 +4193,7 @@ fail: << " in table " << index->table->name << " in " << ctx; func_exit: - if (!dict_locked) { - dict_sys.unlock(); - } -} - -/** Flags an index corrupted in the data dictionary cache only. This -is used mostly to mark a corrupted index when index's own dictionary -is corrupted, and we force to load such index for repair purpose -@param[in,out] index index which is corrupted */ -void -dict_set_corrupted_index_cache_only( - dict_index_t* index) -{ - ut_ad(index != NULL); - ut_ad(index->table != NULL); - ut_ad(dict_sys.locked()); - ut_ad(!dict_table_is_comp(dict_sys.sys_tables)); - ut_ad(!dict_table_is_comp(dict_sys.sys_indexes)); - - /* Mark the table as corrupted only if the clustered index - is corrupted */ - if (dict_index_is_clust(index)) { - index->table->corrupted = TRUE; - index->table->file_unreadable = true; - } - - index->type |= DICT_CORRUPT; + dict_sys.unlock(); } /** Sets merge_threshold in the SYS_INDEXES @@ -4301,8 +4239,11 @@ dict_index_set_merge_threshold( dict_index_copy_types(tuple, sys_index, 2); - btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &cursor, 0, &mtr); + if (btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &cursor, 0, &mtr) + != DB_SUCCESS) { + goto func_exit; + } if (cursor.up_match == dtuple_get_n_fields(tuple) && rec_get_n_fields_old(btr_cur_get_rec(&cursor)) @@ -4317,6 +4258,7 @@ dict_index_set_merge_threshold( field, merge_threshold); } +func_exit: mtr_commit(&mtr); mem_heap_free(heap); } diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 2c3d48b9573..eb886a5554e 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -210,8 +210,9 @@ dict_startscan_system( mtr_t* mtr, /*!< in: the mini-transaction */ dict_table_t* table) /*!< in: system table */ { - btr_pcur_open_at_index_side(true, table->indexes.start, BTR_SEARCH_LEAF, - pcur, true, 0, mtr); + if (btr_pcur_open_at_index_side(true, table->indexes.start, BTR_SEARCH_LEAF, + pcur, true, 0, mtr) != DB_SUCCESS) + return nullptr; const rec_t *rec; do rec= dict_getnext_system_low(pcur, mtr); @@ -229,17 +230,12 @@ dict_getnext_system( to the record */ mtr_t* mtr) /*!< in: the mini-transaction */ { - const rec_t* rec; - - /* Restore the position */ - pcur->restore_position(BTR_SEARCH_LEAF, mtr); - - /* Get the next record */ - do { - rec = dict_getnext_system_low(pcur, mtr); - } while (rec && rec_get_deleted_flag(rec, 0)); - - return(rec); + const rec_t *rec=nullptr; + if (pcur->restore_position(BTR_SEARCH_LEAF, mtr) != btr_pcur_t::CORRUPTED) + do + rec= dict_getnext_system_low(pcur, mtr); + while (rec && rec_get_deleted_flag(rec, 0)); + return rec; } /********************************************************************//** @@ -863,22 +859,28 @@ err_exit: return READ_OK; } -/** Load and check each non-predefined tablespace mentioned in SYS_TABLES. -Search SYS_TABLES and check each tablespace mentioned that has not -already been added to the fil_system. If it is valid, add it to the -file_system list. -@return the highest space ID found. */ -static ulint dict_check_sys_tables() +/** Check each tablespace found in the data dictionary. +Then look at each table defined in SYS_TABLES that has a space_id > 0 +to find all the file-per-table tablespaces. + +In a crash recovery we already have some tablespace objects created from +processing the REDO log. We will compare the +space_id information in the data dictionary to what we find in the +tablespace file. In addition, more validation will be done if recovery +was needed and force_recovery is not set. + +We also scan the biggest space id, and store it to fil_system. */ +void dict_check_tablespaces_and_store_max_id() { ulint max_space_id = 0; btr_pcur_t pcur; mtr_t mtr; - DBUG_ENTER("dict_check_sys_tables"); + DBUG_ENTER("dict_check_tablespaces_and_store_max_id"); - ut_ad(dict_sys.locked()); + mtr.start(); - mtr_start(&mtr); + dict_sys.lock(SRW_LOCK_CALL); for (const rec_t *rec = dict_startscan_system(&pcur, &mtr, dict_sys.sys_tables); @@ -962,44 +964,10 @@ static ulint dict_check_sys_tables() ut_free(filepath); } - mtr_commit(&mtr); - - DBUG_RETURN(max_space_id); -} - -/** Check each tablespace found in the data dictionary. -Then look at each table defined in SYS_TABLES that has a space_id > 0 -to find all the file-per-table tablespaces. - -In a crash recovery we already have some tablespace objects created from -processing the REDO log. We will compare the -space_id information in the data dictionary to what we find in the -tablespace file. In addition, more validation will be done if recovery -was needed and force_recovery is not set. - -We also scan the biggest space id, and store it to fil_system. */ -void dict_check_tablespaces_and_store_max_id() -{ - mtr_t mtr; - - DBUG_ENTER("dict_check_tablespaces_and_store_max_id"); - - dict_sys.lock(SRW_LOCK_CALL); - - /* Initialize the max space_id from sys header */ - mtr.start(); - ulint max_space_id = mach_read_from_4(DICT_HDR_MAX_SPACE_ID - + DICT_HDR - + dict_hdr_get(&mtr) - ->page.frame); mtr.commit(); fil_set_max_space_id_if_bigger(max_space_id); - /* Open all tablespaces referenced in SYS_TABLES. */ - max_space_id = dict_check_sys_tables(); - fil_set_max_space_id_if_bigger(max_space_id); - dict_sys.unlock(); DBUG_VOID_RETURN; @@ -1296,8 +1264,9 @@ err_len: /********************************************************************//** Loads definitions for table columns. */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) static -void +dberr_t dict_load_columns( /*==============*/ dict_table_t* table, /*!< in/out: table */ @@ -1305,11 +1274,6 @@ dict_load_columns( for temporary storage */ { btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - const rec_t* rec; - byte* buf; - ulint i; mtr_t mtr; ulint n_skipped = 0; @@ -1325,29 +1289,34 @@ dict_load_columns( ut_ad(name_of_col_is(dict_sys.sys_columns, sys_index, DICT_FLD__SYS_COLUMNS__PREC, "PREC")); - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = static_cast(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; + byte table_id[8]; + mach_write_to_8(table_id, table->id); + dfield_set_data(&dfield, table_id, 8); + dict_index_copy_types(&tuple, sys_index, 1); + + dberr_t err = btr_pcur_open_on_user_rec(sys_index, &tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + if (err != DB_SUCCESS) { + goto func_exit; + } ut_ad(table->n_t_cols == static_cast( table->n_cols) + static_cast(table->n_v_cols)); - for (i = 0; + for (ulint i = 0; i + DATA_N_SYS_COLS < table->n_t_cols + n_skipped; i++) { const char* err_msg; const char* name = NULL; ulint nth_v_col = ULINT_UNDEFINED; - - rec = btr_pcur_get_rec(&pcur); + const rec_t* rec = btr_pcur_get_rec(&pcur); err_msg = btr_pcur_is_on_user_rec(&pcur) ? dict_load_column_low(table, heap, NULL, NULL, @@ -1363,7 +1332,9 @@ dict_load_columns( "/" TEMP_FILE_PREFIX_INNODB)) { break; } else { - ib::fatal() << err_msg << " for table " << table->name; + ib::error() << err_msg << " for table " << table->name; + err = DB_CORRUPTION; + goto func_exit; } /* Note: Currently we have one DOC_ID column that is @@ -1405,38 +1376,32 @@ next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); } +func_exit: mtr.commit(); + return err; } /** Loads SYS_VIRTUAL info for one virtual column @param[in,out] table table @param[in] nth_v_col virtual column sequence num -@param[in,out] v_col virtual column -@param[in,out] heap memory heap */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) static -void -dict_load_virtual_one_col( - dict_table_t* table, - ulint nth_v_col, - dict_v_col_t* v_col, - mem_heap_t* heap) +dberr_t +dict_load_virtual_col(dict_table_t *table, ulint nth_v_col) { + const dict_v_col_t* v_col = dict_table_get_nth_v_col(table, nth_v_col); + + if (v_col->num_base == 0) { + return DB_SUCCESS; + } + dict_index_t* sys_virtual_index; btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - byte* buf; - ulint i = 0; mtr_t mtr; - ulint skipped = 0; ut_ad(dict_sys.locked()); - if (v_col->num_base == 0) { - return; - } - mtr.start(); sys_virtual_index = dict_sys.sys_virtual->indexes.start; @@ -1445,31 +1410,31 @@ dict_load_virtual_one_col( ut_ad(name_of_col_is(dict_sys.sys_virtual, sys_virtual_index, DICT_FLD__SYS_VIRTUAL__POS, "POS")); - tuple = dtuple_create(heap, 2); - - /* table ID field */ - dfield = dtuple_get_nth_field(tuple, 0); - - buf = static_cast(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - - /* virtual column pos field */ - dfield = dtuple_get_nth_field(tuple, 1); - - buf = static_cast(mem_heap_alloc(heap, 4)); - ulint vcol_pos = dict_create_v_col_pos(nth_v_col, v_col->m_col.ind); - mach_write_to_4(buf, vcol_pos); - - dfield_set_data(dfield, buf, 4); - - dict_index_copy_types(tuple, sys_virtual_index, 2); - - btr_pcur_open_on_user_rec(sys_virtual_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); + dfield_t dfield[2]; + dtuple_t tuple{ + 0,2,2,dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; + byte table_id[8], vcol_pos[4]; + mach_write_to_8(table_id, table->id); + dfield_set_data(&dfield[0], table_id, 8); + mach_write_to_4(vcol_pos, + dict_create_v_col_pos(nth_v_col, v_col->m_col.ind)); + dfield_set_data(&dfield[1], vcol_pos, 4); + + dict_index_copy_types(&tuple, sys_virtual_index, 2); + + dberr_t err = btr_pcur_open_on_user_rec(sys_virtual_index, &tuple, + PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + if (err != DB_SUCCESS) { + goto func_exit; + } - for (i = 0; i < unsigned{v_col->num_base} + skipped; i++) { + for (ulint i = 0, skipped = 0; + i < unsigned{v_col->num_base} + skipped; i++) { ulint pos; const char* err_msg = btr_pcur_is_on_user_rec(&pcur) @@ -1481,7 +1446,7 @@ dict_load_virtual_one_col( : dict_load_virtual_none; if (!err_msg) { - ut_ad(pos == vcol_pos); + ut_ad(pos == mach_read_from_4(vcol_pos)); } else if (err_msg == dict_load_virtual_del) { skipped++; } else if (err_msg == dict_load_virtual_none @@ -1489,30 +1454,28 @@ dict_load_virtual_one_col( "/" TEMP_FILE_PREFIX_INNODB)) { break; } else { - ib::fatal() << err_msg << " for table " << table->name; + ib::error() << err_msg << " for table " << table->name; + err = DB_CORRUPTION; + break; } btr_pcur_move_to_next_user_rec(&pcur, &mtr); } +func_exit: mtr.commit(); + return err; } /** Loads info from SYS_VIRTUAL for virtual columns. -@param[in,out] table table -@param[in] heap memory heap -*/ -static -void -dict_load_virtual( - dict_table_t* table, - mem_heap_t* heap) +@param[in,out] table table */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) +static dberr_t dict_load_virtual(dict_table_t *table) { - for (ulint i = 0; i < table->n_v_cols; i++) { - dict_v_col_t* v_col = dict_table_get_nth_v_col(table, i); - - dict_load_virtual_one_col(table, i, v_col, heap); - } + for (ulint i= 0; i < table->n_v_cols; i++) + if (dberr_t err= dict_load_virtual_col(table, i)) + return err; + return DB_SUCCESS; } /** Error message for a delete-marked record in dict_load_field_low() */ @@ -1672,12 +1635,7 @@ dict_load_fields( mem_heap_t* heap) /*!< in: memory heap for temporary storage */ { btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - byte* buf; - ulint i; mtr_t mtr; - dberr_t error; ut_ad(dict_sys.locked()); @@ -1688,20 +1646,29 @@ dict_load_fields( ut_ad(name_of_col_is(dict_sys.sys_fields, sys_index, DICT_FLD__SYS_FIELDS__COL_NAME, "COL_NAME")); - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = static_cast(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, index->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; + byte index_id[8]; + mach_write_to_8(index_id, index->id); + dfield_set_data(&dfield, index_id, 8); + dict_index_copy_types(&tuple, sys_index, 1); + + dberr_t error = btr_pcur_open_on_user_rec(sys_index, &tuple, + PAGE_CUR_GE, BTR_SEARCH_LEAF, + &pcur, &mtr); + if (error != DB_SUCCESS) { + goto func_exit; + } - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i < index->n_fields; i++) { + for (ulint i = 0; i < index->n_fields; i++) { const char *err_msg = btr_pcur_is_on_user_rec(&pcur) - ? dict_load_field_low(buf, index, NULL, NULL, NULL, + ? dict_load_field_low(index_id, index, + NULL, NULL, NULL, heap, &mtr, btr_pcur_get_rec(&pcur)) : dict_load_field_none; @@ -1721,13 +1688,12 @@ dict_load_fields( << index->table->name; } error = DB_CORRUPTION; - goto func_exit; + break; } btr_pcur_move_to_next_user_rec(&pcur, &mtr); } - error = DB_SUCCESS; func_exit: mtr.commit(); return error; @@ -1925,12 +1891,8 @@ dict_load_indexes( { dict_index_t* sys_index; btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - const rec_t* rec; - byte buf[8]; + byte table_id[8]; mtr_t mtr; - dberr_t error = DB_SUCCESS; ut_ad(dict_sys.locked()); @@ -1943,21 +1905,28 @@ dict_load_indexes( ut_ad(name_of_col_is(dict_sys.sys_indexes, sys_index, DICT_FLD__SYS_INDEXES__PAGE_NO, "PAGE_NO")); - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; + mach_write_to_8(table_id, table->id); + dfield_set_data(&dfield, table_id, 8); + dict_index_copy_types(&tuple, sys_index, 1); + + dberr_t error = btr_pcur_open_on_user_rec(sys_index, &tuple, + PAGE_CUR_GE, BTR_SEARCH_LEAF, + &pcur, &mtr); + if (error != DB_SUCCESS) { + goto func_exit; + } - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); while (btr_pcur_is_on_user_rec(&pcur)) { dict_index_t* index = NULL; const char* err_msg; - - rec = btr_pcur_get_rec(&pcur); + const rec_t* rec = btr_pcur_get_rec(&pcur); if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK) && (rec_get_n_fields_old(rec) == DICT_NUM_FIELDS__SYS_INDEXES @@ -1981,7 +1950,7 @@ dict_load_indexes( } } - err_msg = dict_load_index_low(buf, heap, rec, &mtr, table, + err_msg = dict_load_index_low(table_id, heap, rec, &mtr, table, &index); ut_ad(!index == !!err_msg); @@ -2018,7 +1987,7 @@ dict_load_indexes( if (ignore_err != DICT_ERR_IGNORE_DROP && index->is_corrupted() && index->is_clust()) { dict_mem_index_free(index); - error = DB_INDEX_CORRUPT; + error = DB_TABLE_CORRUPT; goto func_exit; } @@ -2067,7 +2036,11 @@ corrupted: dictionary cache for such metadata corruption, since we would always be able to set it when loading the dictionary cache */ - dict_set_corrupted_index_cache_only(index); + if (index->is_clust()) { + index->table->corrupted = true; + index->table->file_unreadable = true; + } + index->type |= DICT_CORRUPT; } else if (!dict_index_is_clust(index) && NULL == dict_table_get_first_index(table)) { @@ -2300,12 +2273,7 @@ static dict_table_t *dict_load_table_one(const span &name, dict_err_ignore_t ignore_err, dict_names_t &fk_tables) { - dberr_t err; btr_pcur_t pcur; - dtuple_t* tuple; - mem_heap_t* heap; - dfield_t* dfield; - const rec_t* rec; mtr_t mtr; DBUG_ENTER("dict_load_table_one"); @@ -2314,8 +2282,6 @@ static dict_table_t *dict_load_table_one(const span &name, ut_ad(dict_sys.locked()); - heap = mem_heap_create(32000); - mtr.start(); dict_index_t *sys_index = dict_sys.sys_tables->indexes.start; @@ -2331,25 +2297,28 @@ static dict_table_t *dict_load_table_one(const span &name, ut_ad(name_of_col_is(dict_sys.sys_tables, sys_index, DICT_FLD__SYS_TABLES__SPACE, "SPACE")); - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, name.data(), name.size()); - dict_index_copy_types(tuple, sys_index, 1); + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; + dfield_set_data(&dfield, name.data(), name.size()); + dict_index_copy_types(&tuple, sys_index, 1); - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); + dberr_t err = btr_pcur_open_on_user_rec(sys_index, &tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); - if (!btr_pcur_is_on_user_rec(&pcur)) { + if (err != DB_SUCCESS || !btr_pcur_is_on_user_rec(&pcur)) { /* Not found */ err_exit: mtr.commit(); - mem_heap_free(heap); - - DBUG_RETURN(NULL); + DBUG_RETURN(nullptr); } + const rec_t* rec = btr_pcur_get_rec(&pcur); + /* Check if the table name in record is the searched one */ if (rec_get_field_start_offs(rec, 1) != name.size() || memcmp(name.data(), rec, name.size())) { @@ -2369,11 +2338,16 @@ err_exit: mtr.commit(); - dict_load_tablespace(table, ignore_err); + mem_heap_t* heap = mem_heap_create(32000); - dict_load_columns(table, heap); + dict_load_tablespace(table, ignore_err); - dict_load_virtual(table, heap); + if (dict_load_columns(table, heap) || dict_load_virtual(table)) { +evict: + dict_sys.remove(table); + mem_heap_free(heap); + DBUG_RETURN(nullptr); + } dict_table_add_system_columns(table, heap); @@ -2397,16 +2371,13 @@ err_exit: err = dict_load_indexes(table, heap, index_load_err); - if (err == DB_INDEX_CORRUPT || !UT_LIST_GET_FIRST(table->indexes)) { + if (err == DB_TABLE_CORRUPT) { /* Refuse to load the table if the table has a corrupted cluster index */ ut_ad(index_load_err != DICT_ERR_IGNORE_DROP); ib::error() << "Refusing to load corrupted table " << table->name; -evict: - dict_sys.remove(table); - table = NULL; - goto func_exit; + goto evict; } if (err != DB_SUCCESS || !table->is_readable()) { @@ -2417,7 +2388,7 @@ evict: corrupted: table->corrupted = true; table->file_unreadable = true; - err = DB_CORRUPTION; + err = DB_TABLE_CORRUPT; } else if (table->space->id && ignore_err == DICT_ERR_IGNORE_DROP) { /* Do not bother to load data from .ibd files @@ -2483,7 +2454,6 @@ corrupted: } } -func_exit: mem_heap_free(heap); ut_ad(!table @@ -2573,17 +2543,16 @@ dict_load_table_on_id( dfield_set_data(&dfield, id_buf, 8); dict_index_copy_types(&tuple, sys_table_ids, 1); - btr_pcur_open_on_user_rec(sys_table_ids, &tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - - - const rec_t* rec = btr_pcur_get_rec(&pcur); dict_table_t* table = nullptr; - if (page_rec_is_user_rec(rec)) { + if (btr_pcur_open_on_user_rec(sys_table_ids, &tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr) + == DB_SUCCESS + && btr_pcur_is_on_user_rec(&pcur)) { /*---------------------------------------------------*/ /* Now we have the record in the secondary index containing the table ID and NAME */ + const rec_t* rec = btr_pcur_get_rec(&pcur); check_rec: field = rec_get_nth_field_old( rec, DICT_FLD__SYS_TABLE_IDS__ID, &len); @@ -2636,6 +2605,7 @@ dict_load_sys_table( mem_heap_free(heap); } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /********************************************************************//** Loads foreign key constraint col names (also for the referenced table). Members that must be set (and valid) in foreign: @@ -2646,15 +2616,9 @@ Members that will be created and set by this function: foreign->foreign_col_names[i] foreign->referenced_col_names[i] (for i=0..foreign->n_fields-1) */ -static void dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) +static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) { btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - const rec_t* rec; - const byte* field; - ulint len; - ulint i; mtr_t mtr; size_t id_len; @@ -2675,22 +2639,30 @@ static void dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) dict_index_t* sys_index = dict_sys.sys_foreign_cols->indexes.start; ut_ad(!dict_sys.sys_foreign_cols->not_redundant()); - tuple = dtuple_create(foreign->heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; - dfield_set_data(dfield, foreign->id, id_len); - dict_index_copy_types(tuple, sys_index, 1); + dfield_set_data(&dfield, foreign->id, id_len); + dict_index_copy_types(&tuple, sys_index, 1); mem_heap_t* heap = nullptr; - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i < foreign->n_fields; i++) { + dberr_t err = btr_pcur_open_on_user_rec(sys_index, &tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + if (err != DB_SUCCESS) { + goto func_exit; + } + for (ulint i = 0; i < foreign->n_fields; i++) { retry: ut_a(btr_pcur_is_on_user_rec(&pcur)); - rec = btr_pcur_get_rec(&pcur); - - field = rec_get_nth_field_old( + const rec_t* rec = btr_pcur_get_rec(&pcur); + ulint len; + const byte* field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID, &len); ut_a(len == DATA_TRX_ID_LEN); @@ -2780,11 +2752,12 @@ next: btr_pcur_move_to_next_user_rec(&pcur, &mtr); } - +func_exit: mtr.commit(); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } + return err; } /***********************************************************************//** @@ -2827,7 +2800,6 @@ dict_load_foreign( mtr_t mtr; dict_table_t* for_table; dict_table_t* ref_table; - byte dtuple_buf[DTUPLE_EST_ALLOC(1)]; DBUG_ENTER("dict_load_foreign"); DBUG_PRINT("dict_load_foreign", @@ -2839,27 +2811,37 @@ dict_load_foreign( dict_index_t* sys_index = dict_sys.sys_foreign->indexes.start; ut_ad(!dict_sys.sys_foreign->not_redundant()); - dtuple_t* tuple = dtuple_create_from_mem(dtuple_buf, sizeof dtuple_buf, - 1, 0); - dfield_set_data(dtuple_get_nth_field(tuple, 0), id.data(), id.size()); - dict_index_copy_types(tuple, sys_index, 1); + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; + dfield_set_data(&dfield, id.data(), id.size()); + dict_index_copy_types(&tuple, sys_index, 1); mtr.start(); - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - const rec_t* rec = btr_pcur_get_rec(&pcur); mem_heap_t* heap = nullptr; + dberr_t err = btr_pcur_open_on_user_rec(sys_index, &tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + if (err != DB_SUCCESS) { + goto err_exit; + } if (!btr_pcur_is_on_user_rec(&pcur)) { - not_found: +not_found: + err = DB_NOT_FOUND; +err_exit: mtr.commit(); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - DBUG_RETURN(DB_NOT_FOUND); + DBUG_RETURN(err); } + const rec_t* rec = btr_pcur_get_rec(&pcur); static_assert(DICT_FLD__SYS_FOREIGN__ID == 0, "compatibility"); field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len); @@ -2945,7 +2927,10 @@ dict_load_foreign( mem_heap_free(heap); } - dict_load_foreign_cols(foreign, trx_id); + err = dict_load_foreign_cols(foreign, trx_id); + if (err != DB_SUCCESS) { + goto load_error; + } ref_table = dict_sys.find_table( {foreign->referenced_table_name_lookup, @@ -2965,9 +2950,9 @@ dict_load_foreign( mem_heap_strdupl(ref_table->heap, foreign->foreign_table_name_lookup, foreign_table_name_len)); - +load_error: dict_foreign_remove_from_cache(foreign); - DBUG_RETURN(DB_SUCCESS); + DBUG_RETURN(err); } ut_a(for_table || ref_table); @@ -3013,11 +2998,7 @@ dict_load_foreigns( subsequently to load all the foreign key constraints. */ { - ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1) - / sizeof(ulint)]; btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; mtr_t mtr; DBUG_ENTER("dict_load_foreigns"); @@ -3039,18 +3020,24 @@ dict_load_foreigns( dict_table_get_first_index(dict_sys.sys_foreign)); ut_ad(!strcmp(sec_index->fields[0].name, "FOR_NAME")); bool check_recursive = !trx_id; + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; start_load: mtr.start(); + dfield_set_data(&dfield, table_name, strlen(table_name)); + dict_index_copy_types(&tuple, sec_index, 1); - tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1, 0); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, table_name, strlen(table_name)); - dict_index_copy_types(tuple, sec_index, 1); - - btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); + dberr_t err = btr_pcur_open_on_user_rec(sec_index, &tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + if (err != DB_SUCCESS) { + DBUG_RETURN(err); + } loop: const rec_t* rec = btr_pcur_get_rec(&pcur); const byte* field; @@ -3073,11 +3060,10 @@ loop: following call does the comparison in the latin1_swedish_ci charset-collation, in a case-insensitive way. */ - if (0 != cmp_data_data(dfield_get_type(dfield)->mtype, - dfield_get_type(dfield)->prtype, - static_cast( - dfield_get_data(dfield)), - dfield_get_len(dfield), + if (0 != cmp_data_data(dfield_get_type(&dfield)->mtype, + dfield_get_type(&dfield)->prtype, + reinterpret_cast(table_name), + dfield_get_len(&dfield), field, len)) { goto load_next_index; @@ -3111,10 +3097,11 @@ loop: /* Load the foreign constraint definition to the dictionary cache */ - switch (dberr_t err - = dict_load_foreign(table_name, col_names, trx_id, - check_recursive, check_charsets, - {fk_id, len}, ignore_err, fk_tables)) { + err = dict_load_foreign(table_name, col_names, trx_id, + check_recursive, check_charsets, + {fk_id, len}, ignore_err, fk_tables); + + switch (err) { case DB_SUCCESS: break; case DB_NOT_FOUND: @@ -3126,12 +3113,17 @@ loop: "SYS_FOREIGN", int(len), fk_id); /* fall through */ default: +corrupted: ut_free(pcur.old_rec_buf); DBUG_RETURN(err); } mtr.start(); - pcur.restore_position(BTR_SEARCH_LEAF, &mtr); + if (pcur.restore_position(BTR_SEARCH_LEAF, &mtr) + == btr_pcur_t::CORRUPTED) { + mtr.commit(); + goto corrupted; + } next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); @@ -3141,9 +3133,7 @@ load_next_index: mtr.commit(); ut_free(pcur.old_rec_buf); - sec_index = dict_table_get_next_index(sec_index); - - if (sec_index) { + if ((sec_index = dict_table_get_next_index(sec_index))) { /* Switch to scan index on REF_NAME, fk_max_recusive_level already been updated when scanning FOR_NAME index, no need to update again */ diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 307023bd519..ff0b64f9b12 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -167,10 +167,7 @@ dict_stats_should_ignore_index( /*===========================*/ const dict_index_t* index) /*!< in: index */ { - return((index->type & (DICT_FTS | DICT_SPATIAL)) - || index->is_corrupted() - || index->to_be_dropped - || !index->is_committed()); + return !index->is_btree() || index->to_be_dropped || !index->is_committed(); } @@ -1113,8 +1110,6 @@ btr_estimate_number_of_different_key_vals(dict_index_t* index, uintmax_t n_sample_pages=1; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; - ulint i; - ulint j; uintmax_t add_on; mtr_t mtr; mem_heap_t* heap = NULL; @@ -1221,19 +1216,15 @@ btr_estimate_number_of_different_key_vals(dict_index_t* index, /* We sample some pages in the index to get an estimate */ - for (i = 0; i < n_sample_pages; i++) { + for (ulint i = 0; i < n_sample_pages; i++) { mtr.start(); - bool available; - - available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, - &cursor, &mtr); - - if (!available || index->table->bulk_trx_id != bulk_trx_id) { + if (!btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, + &cursor, &mtr) + || index->table->bulk_trx_id != bulk_trx_id + || !index->is_readable()) { mtr.commit(); - mem_heap_free(heap); - - return result; + goto exit_loop; } /* Count the number of different key values for each prefix of @@ -1242,11 +1233,6 @@ btr_estimate_number_of_different_key_vals(dict_index_t* index, because otherwise our algorithm would give a wrong estimate for an index where there is just one key value. */ - if (!index->is_readable()) { - mtr.commit(); - goto exit_loop; - } - page = btr_cur_get_page(&cursor); rec = page_rec_get_next(page_get_infimum_rec(page)); @@ -1286,7 +1272,7 @@ btr_estimate_number_of_different_key_vals(dict_index_t* index, index, stats_null_not_equal, &matched_fields); - for (j = matched_fields; j < n_cols; j++) { + for (ulint j = matched_fields; j < n_cols; j++) { /* We add one if this index record has a different prefix from the previous */ @@ -1342,7 +1328,7 @@ exit_loop: result.reserve(n_cols); - for (j = 0; j < n_cols; j++) { + for (ulint j = 0; j < n_cols; j++) { index_field_stats_t stat; stat.n_diff_key_vals @@ -1381,7 +1367,6 @@ exit_loop: } mem_heap_free(heap); - return result; } @@ -1423,8 +1408,9 @@ dummy_empty: mtr.start(); mtr_s_lock_index(index, &mtr); + dberr_t err; buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, - &mtr); + &mtr, &err); if (!root) { invalid: mtr.commit(); @@ -1642,9 +1628,12 @@ dict_stats_analyze_index_level( /* Position pcur on the leftmost record on the leftmost page on the desired level. */ - btr_pcur_open_at_index_side( - true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED, - &pcur, true, level, mtr); + if (btr_pcur_open_at_index_side( + true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED, + &pcur, true, level, mtr) != DB_SUCCESS) { + goto func_exit; + } + btr_pcur_move_to_next_on_page(&pcur); page = btr_pcur_get_page(&pcur); @@ -1655,11 +1644,17 @@ dict_stats_analyze_index_level( ut_ad(btr_pcur_get_rec(&pcur) == page_rec_get_next_const(page_get_infimum_rec(page))); - /* check that we are indeed on the desired level */ - ut_a(btr_page_get_level(page) == level); + prev_rec = NULL; + prev_rec_is_copied = false; - /* there should not be any pages on the left */ - ut_a(!page_has_prev(page)); + /* no records by default */ + *total_recs = 0; + + *total_pages = 0; + + if (page_has_prev(page) || btr_page_get_level(page) != level) { + goto func_exit; + } if (REC_INFO_MIN_REC_FLAG & rec_get_info_bits( btr_pcur_get_rec(&pcur), page_is_comp(page))) { @@ -1669,20 +1664,12 @@ dict_stats_analyze_index_level( ut_ad(index->is_instant()); btr_pcur_move_to_next_user_rec(&pcur, mtr); } - } else { + } else if (UNIV_UNLIKELY(level != 0)) { /* The first record on the leftmost page must be marked as such on each level except the leaf level. */ - ut_a(level == 0); + goto func_exit; } - prev_rec = NULL; - prev_rec_is_copied = false; - - /* no records by default */ - *total_recs = 0; - - *total_pages = 0; - /* iterate over all user records on this level and compare each two adjacent ones, even the last on page X and the fist on page X+1 */ @@ -1885,7 +1872,7 @@ dict_stats_analyze_index_level( #endif /* UNIV_STATS_DEBUG */ btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr); - +func_exit: ut_free(prev_rec_buf); mem_heap_free(heap); } @@ -2088,16 +2075,18 @@ dict_stats_analyze_index_below_cur( /* descend to the leaf level on the B-tree */ for (;;) { - - dberr_t err = DB_SUCCESS; + dberr_t err; block = buf_page_get_gen(page_id, zip_size, RW_S_LATCH, NULL, BUF_GET, &mtr, &err, !index->is_clust() && 1 == btr_page_get_level(page)); + if (!block) { + goto func_exit; + } - page = buf_block_get_frame(block); + page = block->page.frame; if (page_is_leaf(page)) { /* leaf level */ @@ -2161,6 +2150,7 @@ dict_stats_analyze_index_below_cur( __func__, page_no, n_diff); #endif +func_exit: mtr_commit(&mtr); mem_heap_free(heap); } @@ -2241,48 +2231,43 @@ dict_stats_analyze_index_for_n_prefix( #endif ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK)); + ut_ad(n_diff_data->level); /* Position pcur on the leftmost record on the leftmost page on the desired level. */ - btr_pcur_open_at_index_side( - true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED, - &pcur, true, n_diff_data->level, mtr); + n_diff_data->n_diff_all_analyzed_pages = 0; + n_diff_data->n_external_pages_sum = 0; + + if (btr_pcur_open_at_index_side(true, index, + BTR_SEARCH_TREE_ALREADY_S_LATCHED, + &pcur, true, n_diff_data->level, mtr) + != DB_SUCCESS) { + return; + } + btr_pcur_move_to_next_on_page(&pcur); page = btr_pcur_get_page(&pcur); const rec_t* first_rec = btr_pcur_get_rec(&pcur); - /* We shouldn't be scanning the leaf level. The caller of this function - should have stopped the descend on level 1 or higher. */ - ut_ad(n_diff_data->level > 0); - ut_ad(!page_is_leaf(page)); - /* The page must not be empty, except when it is the root page (and the whole index is empty). */ - ut_ad(btr_pcur_is_on_user_rec(&pcur)); - ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page))); - - /* check that we are indeed on the desired level */ - ut_a(btr_page_get_level(page) == n_diff_data->level); - - /* there should not be any pages on the left */ - ut_a(!page_has_prev(page)); - - /* check whether the first record on the leftmost page is marked - as such; we are on a non-leaf level */ - ut_a(rec_get_info_bits(first_rec, page_is_comp(page)) - & REC_INFO_MIN_REC_FLAG); + if (page_has_prev(page) + || !btr_pcur_is_on_user_rec(&pcur) + || btr_page_get_level(page) != n_diff_data->level + || first_rec != page_rec_get_next_const(page_get_infimum_rec(page)) + || !(rec_get_info_bits(first_rec, page_is_comp(page)) + & REC_INFO_MIN_REC_FLAG)) { + return; + } const ib_uint64_t last_idx_on_level = boundaries->at( static_cast(n_diff_data->n_diff_on_level - 1)); rec_idx = 0; - n_diff_data->n_diff_all_analyzed_pages = 0; - n_diff_data->n_external_pages_sum = 0; - for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) { /* there are n_diff_on_level elements in 'boundaries' and we divide those elements @@ -2514,34 +2499,25 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index) mtr.start(); mtr_s_lock_index(index, &mtr); - uint16_t root_level; - - { - buf_block_t* root; - root = btr_root_block_get(index, RW_SX_LATCH, &mtr); - if (!root) { + dberr_t err; + buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, &mtr, &err); + if (!root) { empty_index: - mtr.commit(); - dict_stats_assert_initialized_index(index); - DBUG_RETURN(result); - } - - root_level = btr_page_get_level(root->page.frame); - - mtr.x_lock_space(index->table->space); - ulint dummy, size; - result.index_size - = fseg_n_reserved_pages(*root, PAGE_HEADER - + PAGE_BTR_SEG_LEAF - + root->page.frame, - &size, &mtr) - + fseg_n_reserved_pages(*root, PAGE_HEADER - + PAGE_BTR_SEG_TOP - + root->page.frame, - &dummy, &mtr); - result.n_leaf_pages = size ? size : 1; + mtr.commit(); + dict_stats_assert_initialized_index(index); + DBUG_RETURN(result); } + uint16_t root_level = btr_page_get_level(root->page.frame); + mtr.x_lock_space(index->table->space); + ulint dummy, size; + result.index_size + = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF + + root->page.frame, &size, &mtr) + + fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_TOP + + root->page.frame, &dummy, &mtr); + result.n_leaf_pages = size ? size : 1; + const auto bulk_trx_id = index->table->bulk_trx_id; if (bulk_trx_id && trx_sys.find(nullptr, bulk_trx_id, false)) { result.index_size = 1; @@ -2645,7 +2621,7 @@ empty_index: mtr.start(); mtr_sx_lock_index(index, &mtr); buf_block_t *root = btr_root_block_get(index, RW_S_LATCH, - &mtr); + &mtr, &err); if (!root || root_level != btr_page_get_level(root->page.frame) || index->table->bulk_trx_id != bulk_trx_id) { /* Just quit if the tree has changed beyond @@ -3633,6 +3609,41 @@ dict_stats_fetch_from_ps( stats. */ dict_stats_empty_table(table, true); + THD* thd = current_thd; + MDL_ticket *mdl_table = nullptr, *mdl_index = nullptr; + dict_table_t* table_stats = dict_table_open_on_name( + TABLE_STATS_NAME, false, DICT_ERR_IGNORE_NONE); + if (table_stats) { + dict_sys.freeze(SRW_LOCK_CALL); + table_stats = dict_acquire_mdl_shared(table_stats, thd, + &mdl_table); + dict_sys.unfreeze(); + } + if (!table_stats + || strcmp(table_stats->name.m_name, TABLE_STATS_NAME)) { +release_and_exit: + if (table_stats) { + dict_table_close(table_stats, false, thd, mdl_table); + } + return DB_STATS_DO_NOT_EXIST; + } + + dict_table_t* index_stats = dict_table_open_on_name( + INDEX_STATS_NAME, false, DICT_ERR_IGNORE_NONE); + if (index_stats) { + dict_sys.freeze(SRW_LOCK_CALL); + index_stats = dict_acquire_mdl_shared(index_stats, thd, + &mdl_index); + dict_sys.unfreeze(); + } + if (!index_stats) { + goto release_and_exit; + } + if (strcmp(index_stats->name.m_name, INDEX_STATS_NAME)) { + dict_table_close(index_stats, false, thd, mdl_index); + goto release_and_exit; + } + trx = trx_create(); trx_start_internal_read_only(trx); @@ -3715,6 +3726,9 @@ dict_stats_fetch_from_ps( /* pinfo is freed by que_eval_sql() */ dict_sys.unlock(); + dict_table_close(table_stats, false, thd, mdl_table); + dict_table_close(index_stats, false, thd, mdl_index); + trx_commit_for_mysql(trx); trx->free(); diff --git a/storage/innobase/dict/drop.cc b/storage/innobase/dict/drop.cc index ff5ceee5e43..4a4e10b45a8 100644 --- a/storage/innobase/dict/drop.cc +++ b/storage/innobase/dict/drop.cc @@ -83,7 +83,10 @@ dberr_t trx_t::drop_table_foreign(const table_name_t &name) ut_ad(dict_operation); ut_ad(dict_operation_lock_mode); - if (!dict_sys.sys_foreign || !dict_sys.sys_foreign_cols) + if (!dict_sys.sys_foreign || dict_sys.sys_foreign->corrupted) + return DB_SUCCESS; + + if (!dict_sys.sys_foreign_cols || dict_sys.sys_foreign_cols->corrupted) return DB_SUCCESS; pars_info_t *info= pars_info_create(); @@ -172,7 +175,7 @@ dberr_t trx_t::drop_table(const dict_table_t &table) ut_ad(found_x); #endif - if (dict_sys.sys_virtual) + if (dict_sys.sys_virtual && !dict_sys.sys_virtual->corrupted) { pars_info_t *info= pars_info_create(); pars_info_add_ull_literal(info, "id", table.id); diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index cd0c98d6fe2..6c8498f96a3 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -913,43 +913,32 @@ fil_crypt_needs_rotation( /** Read page 0 and possible crypt data from there. @param[in,out] space Tablespace */ -static inline -void -fil_crypt_read_crypt_data(fil_space_t* space) +static inline void fil_crypt_read_crypt_data(fil_space_t *space) { - if (space->crypt_data || space->size || !space->get_size()) { - /* The encryption metadata has already been read, or - the tablespace is not encrypted and the file has been - opened already, or the file cannot be accessed, - likely due to a concurrent DROP - (possibly as part of TRUNCATE or ALTER TABLE). - FIXME: The file can become unaccessible any time - after this check! We should really remove this - function and instead make crypt_data an integral - part of fil_space_t. */ - return; - } + if (space->crypt_data || space->size || !space->get_size()) + /* The encryption metadata has already been read, or the + tablespace is not encrypted and the file has been opened already, + or the file cannot be accessed, likely due to a concurrent DROP + (possibly as part of TRUNCATE or ALTER TABLE). + + FIXME: The file can become unaccessible any time after this check! + We should really remove this function and instead make crypt_data + an integral part of fil_space_t. */ + return; - const ulint zip_size = space->zip_size(); - mtr_t mtr; - mtr.start(); - if (buf_block_t* block = buf_page_get_gen(page_id_t(space->id, 0), - zip_size, RW_S_LATCH, - nullptr, - BUF_GET_POSSIBLY_FREED, - &mtr)) { - if (block->page.is_freed()) { - goto func_exit; - } - mysql_mutex_lock(&fil_system.mutex); - if (!space->crypt_data && !space->is_stopping()) { - space->crypt_data = fil_space_read_crypt_data( - zip_size, block->page.frame); - } - mysql_mutex_unlock(&fil_system.mutex); - } -func_exit: - mtr.commit(); + const ulint zip_size= space->zip_size(); + mtr_t mtr; + mtr.start(); + if (buf_block_t* b= buf_page_get_gen(page_id_t{space->id, 0}, zip_size, + RW_S_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, &mtr)) + { + mysql_mutex_lock(&fil_system.mutex); + if (!space->crypt_data && !space->is_stopping()) + space->crypt_data= fil_space_read_crypt_data(zip_size, b->page.frame); + mysql_mutex_unlock(&fil_system.mutex); + } + mtr.commit(); } /** Start encrypting a space @@ -996,15 +985,9 @@ func_exit: mtr.start(); /* 2 - get page 0 */ - dberr_t err = DB_SUCCESS; if (buf_block_t* block = buf_page_get_gen( page_id_t(space->id, 0), space->zip_size(), - RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, - &mtr, &err)) { - if (block->page.is_freed()) { - goto abort; - } - + RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, &mtr)) { crypt_data->type = CRYPT_SCHEME_1; crypt_data->min_key_version = 0; // all pages are unencrypted crypt_data->rotate_state.start_time = time(0); @@ -1715,7 +1698,8 @@ fil_crypt_get_page_throttle( return NULL; } - if (fseg_page_is_free(space, state->offset)) { + if (DB_SUCCESS_LOCKED_REC + != fseg_page_is_allocated(space, state->offset)) { /* page is already freed */ return NULL; } @@ -1793,10 +1777,7 @@ fil_crypt_rotate_page( const lsn_t block_lsn = mach_read_from_8(FIL_PAGE_LSN + frame); uint kv = buf_page_get_key_version(frame, space->flags); - if (block->page.is_freed()) { - /* Do not modify freed pages to avoid an assertion - failure on recovery.*/ - } else if (block->page.oldest_modification() > 1) { + if (block->page.oldest_modification() > 1) { /* Do not unnecessarily touch pages that are already dirty. */ } else if (space->is_stopping()) { @@ -1808,11 +1789,11 @@ fil_crypt_rotate_page( allocated. Because key rotation is accessing pages in a pattern that is unlike the normal B-tree and undo log access pattern, we cannot - invoke fseg_page_is_free() here, because that + invoke fseg_page_is_allocated() here, because that could result in a deadlock. If we invoked - fseg_page_is_free() and released the + fseg_page_is_allocated() and released the tablespace latch before acquiring block->lock, - then the fseg_page_is_free() information + then the fseg_page_is_allocated() information could be stale already. */ /* If the data file was originally created @@ -1972,10 +1953,8 @@ fil_crypt_flush_space( if (buf_block_t* block = buf_page_get_gen( page_id_t(space->id, 0), space->zip_size(), RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, &mtr)) { - if (!block->page.is_freed()) { - mtr.set_named_space(space); - crypt_data->write_page0(block, &mtr); - } + mtr.set_named_space(space); + crypt_data->write_page0(block, &mtr); } mtr.commit(); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index b261fdce8b7..f91d127215f 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -54,6 +54,12 @@ Created 10/25/1995 Heikki Tuuri # include #endif +ATTRIBUTE_COLD void fil_space_t::set_corrupted() const +{ + if (!is_stopping() && !is_corrupted.test_and_set()) + sql_print_error("InnoDB: File '%s' is corrupted", chain.start->name); +} + /** Determine if the space id is a user tablespace id or not. @param space_id tablespace identifier @return true if it is a user tablespace ID */ @@ -1435,8 +1441,6 @@ fil_write_flushed_lsn( fio = fil_system.sys_space->io(IORequestWrite, 0, srv_page_size, buf); fil_flush_file_spaces(); - } else { - fil_system.sys_space->release(); } aligned_free(buf); @@ -2104,9 +2108,8 @@ err_exit: IF_WIN(node->find_metadata(), node->find_metadata(file, true)); mtr.start(); mtr.set_named_space(space); - fsp_header_init(space, size, &mtr); + ut_a(fsp_header_init(space, size, &mtr) == DB_SUCCESS); mtr.commit(); - *err = DB_SUCCESS; return space; } @@ -2764,16 +2767,16 @@ func_exit: /** Report information about an invalid page access. */ ATTRIBUTE_COLD -static void fil_invalid_page_access_msg(bool fatal, const char *name, +static void fil_invalid_page_access_msg(const char *name, os_offset_t offset, ulint len, bool is_read) { - sql_print_error("%s%s %zu bytes at " UINT64PF + sql_print_error("%s %zu bytes at " UINT64PF " outside the bounds of the file: %s", - fatal ? "[FATAL] InnoDB: " : "InnoDB: ", - is_read ? "Trying to read" : "Trying to write", - len, offset, name); - if (fatal) + is_read + ? "InnoDB: Trying to read" + : "[FATAL] InnoDB: Trying to write", len, offset, name); + if (!is_read) abort(); } @@ -2822,15 +2825,15 @@ fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len, fil_node_t* node= UT_LIST_GET_FIRST(chain); ut_ad(node); + ulint p = static_cast(offset >> srv_page_size_shift); + dberr_t err; if (type.type == IORequest::READ_ASYNC && is_stopping()) { - release(); - return {DB_TABLESPACE_DELETED, nullptr}; + err = DB_TABLESPACE_DELETED; + node = nullptr; + goto release; } - ulint p = static_cast(offset >> srv_page_size_shift); - bool fatal; - if (UNIV_LIKELY_NULL(UT_LIST_GET_NEXT(chain, node))) { ut_ad(this == fil_system.sys_space || this == fil_system.temp_space); @@ -2840,16 +2843,18 @@ fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len, p -= node->size; node = UT_LIST_GET_NEXT(chain, node); if (!node) { - release(); - if (type.type != IORequest::READ_ASYNC) { - fatal = true; fail: + if (type.type != IORequest::READ_ASYNC) { fil_invalid_page_access_msg( - fatal, node->name, + node->name, offset, len, type.is_read()); } - return {DB_IO_ERROR, nullptr}; + + set_corrupted(); + err = DB_IO_ERROR; + node = nullptr; + goto release; } } @@ -2857,21 +2862,9 @@ fail: } if (UNIV_UNLIKELY(node->size <= p)) { - release(); - - if (type.type == IORequest::READ_ASYNC) { - /* If we can tolerate the non-existent pages, we - should return with DB_ERROR and let caller decide - what to do. */ - return {DB_ERROR, nullptr}; - } - - fatal = node->space->purpose != FIL_TYPE_IMPORT; goto fail; } - dberr_t err; - if (type.type == IORequest::PUNCH_RANGE) { err = os_file_punch_hole(node->handle, offset, len); /* Punch hole is not supported, make space not to @@ -2897,12 +2890,14 @@ release_sync_write: node->complete_write(); release: release(); + goto func_exit; } ut_ad(fil_validate_skip()); } if (err != DB_SUCCESS) { goto release; } +func_exit: return {err, node}; } @@ -2948,8 +2943,9 @@ write_completed: mysql_mutex_unlock(&recv_sys.mutex); } - ib::error() << "Failed to read page " << id.page_no() - << " from file '" << request.node->name << "': " << err; + if (err != DB_FAIL) + ib::error() << "Failed to read page " << id.page_no() + << " from file '" << request.node->name << "': " << err; } } diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index d563e0868f1..b1cf62fc160 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -31,7 +31,6 @@ Created 11/29/1995 Heikki Tuuri #include "mtr0log.h" #include "ut0byte.h" #include "page0page.h" -#include "fut0fut.h" #include "srv0srv.h" #include "srv0start.h" #include "ibuf0ibuf.h" @@ -41,30 +40,17 @@ Created 11/29/1995 Heikki Tuuri #include "log0log.h" #include "dict0mem.h" #include "fsp0types.h" - -// JAN: MySQL 5.7 Encryption -// #include +#include "log.h" typedef uint32_t page_no_t; -/** Return an extent to the free list of a space. -@param[in,out] space tablespace -@param[in] offset page number in the extent -@param[in,out] mtr mini-transaction */ -MY_ATTRIBUTE((nonnull)) -static -void -fsp_free_extent( - fil_space_t* space, - page_no_t offset, - mtr_t* mtr); - /** Returns the first extent descriptor for a segment. We think of the extent lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE. @param[in] inode segment inode @param[in] space tablespace @param[in,out] mtr mini-transaction +@param[out] err error code @return the first extent descriptor, or NULL if none */ MY_ATTRIBUTE((nonnull, warn_unused_result)) static @@ -72,8 +58,10 @@ xdes_t* fseg_get_first_extent( fseg_inode_t* inode, const fil_space_t* space, - mtr_t* mtr); + mtr_t* mtr, + dberr_t* err); +ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Put new extents to the free list if there are free extents above the free limit. If an extent happens to contain an extent descriptor page, the extent is put to the FSP_FREE_FRAG list with the page marked as used. @@ -83,8 +71,8 @@ then we will not allocate more extents @param[in,out] space tablespace @param[in,out] header tablespace header @param[in,out] mtr mini-transaction */ -static ATTRIBUTE_COLD -void +static +dberr_t fsp_fill_free_list( bool init_space, fil_space_t* space, @@ -104,7 +92,9 @@ direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR @param[in,out] mtr mini-transaction @param[in,out] init_mtr mtr or another mini-transaction in which the page should be initialized. -@retval NULL if no page could be allocated */ +@param[out] err error code +@return the allocated page +@retval nullptr if no page could be allocated */ static buf_block_t* fseg_alloc_free_page_low( @@ -118,22 +108,30 @@ fseg_alloc_free_page_low( /*!< whether the space has already been reserved */ #endif /* UNIV_DEBUG */ mtr_t* mtr, - mtr_t* init_mtr) - MY_ATTRIBUTE((warn_unused_result)); + mtr_t* init_mtr, + dberr_t* err) + MY_ATTRIBUTE((nonnull, warn_unused_result)); +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Get the tablespace header block, SX-latched @param[in] space tablespace @param[in,out] mtr mini-transaction -@return pointer to the space header, page x-locked */ -static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr) +@param[out] err error code +@return pointer to the space header, page x-locked +@retval nullptr if the page cannot be retrieved or is corrupted */ +static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr, + dberr_t *err) { buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), space->zip_size(), RW_SX_LATCH, - nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (!block || block->page.is_freed()) - return nullptr; - ut_ad(space->id == mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + - block->page.frame)); + nullptr, BUF_GET_POSSIBLY_FREED, + mtr, err); + if (block && space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + + block->page.frame)) + { + *err= DB_CORRUPTION; + block= nullptr; + } return block; } @@ -215,7 +213,7 @@ inline bool xdes_is_full(const xdes_t *descr) @param[in] state the state @param[in,out] mtr mini-transaction */ inline void xdes_set_state(const buf_block_t &block, xdes_t *descr, - byte state, mtr_t *mtr) + byte state, mtr_t *mtr) { ut_ad(descr && mtr); ut_ad(state >= XDES_FREE); @@ -261,15 +259,16 @@ inline void xdes_init(const buf_block_t &block, xdes_t *descr, mtr_t *mtr) @param[in] page page number @param[in,out] descr extent descriptor @param[in,out] xdes extent descriptor page -@param[in,out] mtr mini-transaction */ -static MY_ATTRIBUTE((nonnull)) -void +@param[in,out] mtr mini-transaction +@return error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock, ulint page, xdes_t *descr, buf_block_t *xdes, mtr_t *mtr) { ut_ad(fil_page_get_type(iblock->page.frame) == FIL_PAGE_INODE); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + seg_inode, 4)); ut_ad(!memcmp(seg_inode + FSEG_ID, descr + XDES_ID, 4)); const uint16_t xoffset= uint16_t(descr - xdes->page.frame + XDES_FLST_NODE); @@ -278,12 +277,16 @@ fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock, if (!xdes_get_n_used(descr)) { /* We move the extent from the free list to the NOT_FULL list */ - flst_remove(iblock, uint16_t(FSEG_FREE + ioffset), xdes, xoffset, mtr); - flst_add_last(iblock, uint16_t(FSEG_NOT_FULL + ioffset), - xdes, xoffset, mtr); + if (dberr_t err= flst_remove(iblock, uint16_t(FSEG_FREE + ioffset), + xdes, xoffset, mtr)) + return err; + if (dberr_t err= flst_add_last(iblock, uint16_t(FSEG_NOT_FULL + ioffset), + xdes, xoffset, mtr)) + return err; } - ut_ad(xdes_is_free(descr, page % FSP_EXTENT_SIZE)); + if (UNIV_UNLIKELY(!xdes_is_free(descr, page % FSP_EXTENT_SIZE))) + return DB_CORRUPTION; /* We mark the page as used */ xdes_set_free(*xdes, descr, page % FSP_EXTENT_SIZE, mtr); @@ -294,11 +297,17 @@ fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock, if (xdes_is_full(descr)) { /* We move the extent from the NOT_FULL list to the FULL list */ - flst_remove(iblock, uint16_t(FSEG_NOT_FULL + ioffset), xdes, xoffset, mtr); - flst_add_last(iblock, uint16_t(FSEG_FULL + ioffset), xdes, xoffset, mtr); + if (dberr_t err= flst_remove(iblock, uint16_t(FSEG_NOT_FULL + ioffset), + xdes, xoffset, mtr)) + return err; + if (dberr_t err= flst_add_last(iblock, uint16_t(FSEG_FULL + ioffset), + xdes, xoffset, mtr)) + return err; mtr->write<4>(*iblock, seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used - FSP_EXTENT_SIZE); } + + return DB_SUCCESS; } /** Get pointer to a the extent descriptor of a page. @@ -306,6 +315,7 @@ fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock, @param[in] space tablespace @param[in] offset page offset @param[in,out] mtr mini-transaction +@param[out] err error code @param[out] desc_block descriptor block @param[in] init_space whether the tablespace is being initialized @return pointer to the extent descriptor, NULL if the page does not @@ -317,6 +327,7 @@ xdes_get_descriptor_with_space_hdr( const fil_space_t* space, page_no_t offset, mtr_t* mtr, + dberr_t* err = nullptr, buf_block_t** desc_block = nullptr, bool init_space = false) { @@ -337,8 +348,8 @@ xdes_get_descriptor_with_space_hdr( || srv_is_undo_tablespace(space->id)))))); ut_ad(size == space->size_in_header); - if ((offset >= size) || (offset >= limit)) { - return(NULL); + if (offset >= size || offset >= limit) { + return nullptr; } const unsigned zip_size = space->zip_size(); @@ -349,14 +360,11 @@ xdes_get_descriptor_with_space_hdr( if (descr_page_no) { block = buf_page_get_gen(page_id_t(space->id, descr_page_no), - zip_size, RW_SX_LATCH, nullptr, - BUF_GET_POSSIBLY_FREED, mtr); - if (block && block->page.is_freed()) { - block = nullptr; - } + zip_size, RW_SX_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr, err); } - if (desc_block != NULL) { + if (desc_block) { *desc_block = block; } @@ -367,6 +375,7 @@ xdes_get_descriptor_with_space_hdr( : nullptr; } +MY_ATTRIBUTE((nonnull(1,3), warn_unused_result)) /** Get the extent descriptor of a page. The page where the extent descriptor resides is x-locked. If the page offset is equal to the free limit of the space, we will add new @@ -377,67 +386,19 @@ defined, as they are uninitialized above the free limit. @param[in] offset page offset; if equal to the free limit, we try to add new extents to the space free list @param[in,out] mtr mini-transaction +@param[out] err error code @param[out] xdes extent descriptor page @return the extent descriptor */ static xdes_t *xdes_get_descriptor(const fil_space_t *space, page_no_t offset, - mtr_t *mtr, buf_block_t **xdes= nullptr) -{ - buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), - space->zip_size(), RW_SX_LATCH, - nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (!block || block->page.is_freed()) - return nullptr; - return xdes_get_descriptor_with_space_hdr(block, space, offset, mtr, xdes); -} - -/** Get the extent descriptor of a page. -The page where the extent descriptor resides is x-locked. If the page -offset is equal to the free limit of the space, we will add new -extents from above the free limit to the space free list, if not free -limit == space size. This adding is necessary to make the descriptor -defined, as they are uninitialized above the free limit. -@param[in] space tablespace -@param[in] page descriptor page offset -@param[in] offset page offset -@param[in,out] mtr mini-transaction -@return the extent descriptor -@retval NULL if the descriptor is not available */ -MY_ATTRIBUTE((warn_unused_result)) -static -const xdes_t* -xdes_get_descriptor_const( - const fil_space_t* space, - page_no_t page, - page_no_t offset, - mtr_t* mtr) + mtr_t *mtr, dberr_t *err= nullptr, + buf_block_t **xdes= nullptr) { - ut_ad(space->is_owner() || mtr->memo_contains(*space, true)); - ut_ad(offset < space->free_limit); - ut_ad(offset < space->size_in_header); - - const ulint zip_size = space->zip_size(); - - if (buf_block_t* block = buf_page_get_gen(page_id_t(space->id, page), - zip_size, RW_S_LATCH, - nullptr, - BUF_GET_POSSIBLY_FREED, - mtr)) { - if (block->page.is_freed()) { - return nullptr; - } - - ut_ad(page != 0 || space->free_limit == mach_read_from_4( - FSP_FREE_LIMIT + FSP_HEADER_OFFSET - + block->page.frame)); - ut_ad(page != 0 || space->size_in_header == mach_read_from_4( - FSP_SIZE + FSP_HEADER_OFFSET - + block->page.frame)); - - return(block->page.frame + XDES_ARR_OFFSET + XDES_SIZE - * xdes_calc_descriptor_index(zip_size, offset)); - } - - return(NULL); + if (buf_block_t *block= + buf_page_get_gen(page_id_t(space->id, 0), space->zip_size(), RW_SX_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, mtr, err)) + return xdes_get_descriptor_with_space_hdr(block, space, offset, mtr, + err, xdes); + return nullptr; } MY_ATTRIBUTE((nonnull(3), warn_unused_result)) @@ -446,16 +407,27 @@ extent descriptor resides is x-locked. @param space tablespace @param lst_node file address of the list node contained in the descriptor @param mtr mini-transaction +@param err error code @param block extent descriptor block @return pointer to the extent descriptor */ static inline xdes_t *xdes_lst_get_descriptor(const fil_space_t &space, fil_addr_t lst_node, - mtr_t *mtr, buf_block_t **block= nullptr) + mtr_t *mtr, buf_block_t **block= nullptr, + dberr_t *err= nullptr) { ut_ad(mtr->memo_contains(space)); - auto b= fut_get_ptr(space.id, space.zip_size(), lst_node, RW_SX_LATCH, - mtr, block); - return b ? b - XDES_FLST_NODE : nullptr; + ut_ad(lst_node.boffset < space.physical_size()); + buf_block_t *b; + if (!block) + block= &b; + *block= buf_page_get_gen(page_id_t{space.id, lst_node.page}, + space.zip_size(), RW_SX_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, mtr, err); + if (*block) + return (*block)->page.frame + lst_node.boffset - XDES_FLST_NODE; + + space.set_corrupted(); + return nullptr; } /********************************************************************//** @@ -529,8 +501,9 @@ void fil_space_t::modify_check(const mtr_t& mtr) const /** Initialize a tablespace header. @param[in,out] space tablespace @param[in] size current size in blocks -@param[in,out] mtr mini-transaction */ -void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr) +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t fsp_header_init(fil_space_t *space, uint32_t size, mtr_t *mtr) { const page_id_t page_id(space->id, 0); const ulint zip_size = space->zip_size(); @@ -586,8 +559,10 @@ void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr) + block->page.frame, 1U); - fsp_fill_free_list(!is_system_tablespace(space->id), - space, block, mtr); + if (dberr_t err = fsp_fill_free_list(!is_system_tablespace(space->id), + space, block, mtr)) { + return err; + } /* Write encryption metadata to page 0 if tablespace is encrypted or encryption is disabled by table option. */ @@ -596,6 +571,8 @@ void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr) space->crypt_data->not_encrypted())) { space->crypt_data->write_page0(block, mtr); } + + return DB_SUCCESS; } /** Try to extend a single-table tablespace so that a page would fit in the @@ -616,7 +593,7 @@ fsp_try_extend_data_file_with_pages( bool success; ulint size; - ut_a(!is_system_tablespace(space->id)); + ut_ad(!is_system_tablespace(space->id)); ut_d(space->modify_check(*mtr)); size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE @@ -702,9 +679,9 @@ fsp_try_extend_data_file(fil_space_t *space, buf_block_t *header, mtr_t *mtr) to reset the flag to false as dealing with this error requires server restart. */ if (!srv_sys_space.get_tablespace_full_status()) { - ib::error() << "The InnoDB system tablespace " - << OUT_OF_SPACE_MSG - << " innodb_data_file_path."; + sql_print_error("InnoDB: The InnoDB system tablespace " + "%s" " innodb_data_file_path.", + OUT_OF_SPACE_MSG); srv_sys_space.set_tablespace_full_status(true); } return(0); @@ -716,9 +693,10 @@ fsp_try_extend_data_file(fil_space_t *space, buf_block_t *header, mtr_t *mtr) to reset the flag to false as dealing with this error requires server restart. */ if (!srv_tmp_space.get_tablespace_full_status()) { - ib::error() << "The InnoDB temporary tablespace " - << OUT_OF_SPACE_MSG - << " innodb_temp_data_file_path."; + sql_print_error("InnoDB: The InnoDB temporary" + " tablespace %s" + " innodb_temp_data_file_path.", + OUT_OF_SPACE_MSG); srv_tmp_space.set_tablespace_full_status(true); } return(0); @@ -802,170 +780,163 @@ and we are only initializing the first extent and the first bitmap pages; then we will not allocate more extents @param[in,out] space tablespace @param[in,out] header tablespace header -@param[in,out] mtr mini-transaction */ +@param[in,out] mtr mini-transaction +@return error code */ static -void +dberr_t fsp_fill_free_list( bool init_space, fil_space_t* space, buf_block_t* header, mtr_t* mtr) { - ut_d(space->modify_check(*mtr)); - - /* Check if we can fill free list from above the free list limit */ - uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE - + header->page.frame); - uint32_t limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT - + header->page.frame); - - ut_ad(size == space->size_in_header); - ut_ad(limit == space->free_limit); + ut_d(space->modify_check(*mtr)); - const ulint zip_size = space->zip_size(); - - if (size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) { - bool skip_resize = init_space; - switch (space->id) { - case TRX_SYS_SPACE: - skip_resize = !srv_sys_space.can_auto_extend_last_file(); - break; - case SRV_TMP_SPACE_ID: - skip_resize = !srv_tmp_space.can_auto_extend_last_file(); - break; - } - - if (!skip_resize) { - fsp_try_extend_data_file(space, header, mtr); - size = space->size_in_header; - } - } - - uint32_t count = 0; + /* Check if we can fill free list from above the free list limit */ + uint32_t size= + mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE + header->page.frame); + uint32_t limit= + mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + header->page.frame); - for (uint32_t i = limit, extent_size = FSP_EXTENT_SIZE, - physical_size = space->physical_size(); - (init_space && i < 1) - || (i + extent_size <= size && count < FSP_FREE_ADD); - i += extent_size) { - const bool init_xdes = !ut_2pow_remainder(i, physical_size); + ut_ad(size == space->size_in_header); + ut_ad(limit == space->free_limit); - space->free_limit = i + extent_size; - mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FREE_LIMIT - + header->page.frame, i + extent_size); + const auto zip_size= space->zip_size(); - if (init_xdes) { - - buf_block_t* block; + if (size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) + { + bool skip_resize= init_space; + switch (space->id) { + case TRX_SYS_SPACE: + skip_resize= !srv_sys_space.can_auto_extend_last_file(); + break; + case SRV_TMP_SPACE_ID: + skip_resize= !srv_tmp_space.can_auto_extend_last_file(); + break; + } - /* We are going to initialize a new descriptor page - and a new ibuf bitmap page: the prior contents of the - pages should be ignored. */ + if (!skip_resize) + { + fsp_try_extend_data_file(space, header, mtr); + size= space->size_in_header; + } + } - if (i > 0) { - buf_block_t *f= buf_LRU_get_free_block(false); - block= buf_page_create( - space, static_cast(i), - zip_size, mtr, f); - if (UNIV_UNLIKELY(block != f)) { - buf_pool.free_block(f); - } - fsp_init_file_page(space, block, mtr); - mtr->write<2>(*block, FIL_PAGE_TYPE - + block->page.frame, - FIL_PAGE_TYPE_XDES); - } + uint32_t count= 0; + for (uint32_t i= limit, extent_size= FSP_EXTENT_SIZE, + physical_size= space->physical_size(); + (init_space && i < 1) || + (i + extent_size <= size && count < FSP_FREE_ADD); + i += extent_size) + { + const bool init_xdes= !ut_2pow_remainder(i, physical_size); + space->free_limit= i + extent_size; + mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FREE_LIMIT + + header->page.frame, i + extent_size); - if (space->purpose != FIL_TYPE_TEMPORARY) { - buf_block_t *f= buf_LRU_get_free_block(false); - block = buf_page_create( - space, - static_cast( - i + FSP_IBUF_BITMAP_OFFSET), - zip_size, mtr, f); - if (UNIV_UNLIKELY(block != f)) { - buf_pool.free_block(f); - } - fsp_init_file_page(space, block, mtr); - mtr->write<2>(*block, FIL_PAGE_TYPE - + block->page.frame, - FIL_PAGE_IBUF_BITMAP); - } - } + if (init_xdes) + { + /* We are going to initialize a new descriptor page + and a new ibuf bitmap page: the prior contents of the + pages should be ignored. */ + + if (i) + { + buf_block_t *f= buf_LRU_get_free_block(false); + buf_block_t *block= buf_page_create(space, static_cast(i), + zip_size, mtr, f); + if (UNIV_UNLIKELY(block != f)) + buf_pool.free_block(f); + fsp_init_file_page(space, block, mtr); + mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame, + FIL_PAGE_TYPE_XDES); + } + + if (space->purpose != FIL_TYPE_TEMPORARY) + { + buf_block_t *f= buf_LRU_get_free_block(false); + buf_block_t *block= + buf_page_create(space, + static_cast(i + FSP_IBUF_BITMAP_OFFSET), + zip_size, mtr, f); + if (UNIV_UNLIKELY(block != f)) + buf_pool.free_block(f); + fsp_init_file_page(space, block, mtr); + mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame, + FIL_PAGE_IBUF_BITMAP); + } + } - buf_block_t* xdes = nullptr; - xdes_t* descr = xdes_get_descriptor_with_space_hdr( - header, space, i, mtr, &xdes, init_space); - if (!descr) { - ut_ad("corruption" == 0); - return; - } + buf_block_t *xdes= nullptr; + xdes_t *descr; + { + dberr_t err= DB_SUCCESS; + descr= xdes_get_descriptor_with_space_hdr(header, space, i, mtr, + &err, &xdes, init_space); + if (!descr) + return err; + } - if (xdes != header && !space->full_crc32()) { - fil_block_check_type(*xdes, FIL_PAGE_TYPE_XDES, mtr); - } - xdes_init(*xdes, descr, mtr); - const uint16_t xoffset= static_cast( - descr - xdes->page.frame + XDES_FLST_NODE); - - if (UNIV_UNLIKELY(init_xdes)) { - - /* The first page in the extent is a descriptor page - and the second is an ibuf bitmap page: mark them - used */ - - xdes_set_free(*xdes, descr, 0, mtr); - xdes_set_free(*xdes, descr, - FSP_IBUF_BITMAP_OFFSET, mtr); - xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr); - - flst_add_last(header, - FSP_HEADER_OFFSET + FSP_FREE_FRAG, - xdes, xoffset, mtr); - byte* n_used = FSP_HEADER_OFFSET + FSP_FRAG_N_USED - + header->page.frame; - mtr->write<4>(*header, n_used, - 2U + mach_read_from_4(n_used)); - } else { - flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE, - xdes, xoffset, mtr); - count++; - } - } + if (xdes != header && !space->full_crc32()) + fil_block_check_type(*xdes, FIL_PAGE_TYPE_XDES, mtr); + xdes_init(*xdes, descr, mtr); + const uint16_t xoffset= + static_cast(descr - xdes->page.frame + XDES_FLST_NODE); + if (UNIV_UNLIKELY(init_xdes)) + { + /* The first page in the extent is a descriptor page and the + second is an ibuf bitmap page: mark them used */ + xdes_set_free(*xdes, descr, 0, mtr); + xdes_set_free(*xdes, descr, FSP_IBUF_BITMAP_OFFSET, mtr); + xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr); + if (dberr_t err= flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, + xdes, xoffset, mtr)) + return err; + byte *n_used= FSP_HEADER_OFFSET + FSP_FRAG_N_USED + header->page.frame; + mtr->write<4>(*header, n_used, 2U + mach_read_from_4(n_used)); + } + else + { + if (dberr_t err= + flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE, + xdes, xoffset, mtr)) + return err; + count++; + } + } - space->free_len += count; + space->free_len+= count; + return DB_SUCCESS; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Allocates a new free extent. @param[in,out] space tablespace @param[in] hint hint of which extent would be desirable: any page offset in the extent goes; the hint must not be > FSP_FREE_LIMIT @param[out] xdes extent descriptor page @param[in,out] mtr mini-transaction -@return extent descriptor, NULL if cannot be allocated */ -static -xdes_t* -fsp_alloc_free_extent( - fil_space_t* space, - uint32_t hint, - buf_block_t** xdes, - mtr_t* mtr) +@return extent descriptor +@retval nullptr if cannot be allocated */ +static xdes_t *fsp_alloc_free_extent(fil_space_t *space, uint32_t hint, + buf_block_t **xdes, mtr_t *mtr, + dberr_t *err) { fil_addr_t first; xdes_t* descr; buf_block_t* desc_block; - buf_block_t* header = fsp_get_header(space, mtr); + buf_block_t* header = fsp_get_header(space, mtr, err); if (!header) { - ut_ad("corruption" == 0); +corrupted: + space->set_corrupted(); return nullptr; } descr = xdes_get_descriptor_with_space_hdr( - header, space, hint, mtr, &desc_block); + header, space, hint, mtr, err, &desc_block); if (!descr) { - ut_ad("corruption" == 0); - return nullptr; + goto corrupted; } if (desc_block != header && !space->full_crc32()) { @@ -980,7 +951,10 @@ fsp_alloc_free_extent( + header->page.frame); if (first.page == FIL_NULL) { - fsp_fill_free_list(false, space, header, mtr); + *err = fsp_fill_free_list(false, space, header, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + goto corrupted; + } first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE + header->page.frame); @@ -990,57 +964,64 @@ fsp_alloc_free_extent( } descr = xdes_lst_get_descriptor(*space, first, mtr, - &desc_block); + &desc_block, err); if (!descr) { - ut_ad("corruption" == 0); - return nullptr; + return descr; } } - flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE, desc_block, - static_cast( - descr - desc_block->page.frame + XDES_FLST_NODE), - mtr); + *err = flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE, desc_block, + static_cast(descr - desc_block->page.frame + + XDES_FLST_NODE), + mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } + space->free_len--; *xdes = desc_block; return(descr); } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Allocate a single free page. @param[in,out] header tablespace header @param[in,out] xdes extent descriptor page @param[in,out] descr extent descriptor @param[in] bit slot to allocate in the extent -@param[in,out] mtr mini-transaction */ -static void +@param[in,out] mtr mini-transaction +@return error code */ +static dberr_t fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr, ulint bit, mtr_t *mtr) { - ut_ad(xdes_get_state(descr) == XDES_FREE_FRAG); - ut_a(xdes_is_free(descr, bit)); - xdes_set_free(*xdes, descr, bit, mtr); - - /* Update the FRAG_N_USED field */ - byte* n_used_p = FSP_HEADER_OFFSET + FSP_FRAG_N_USED - + header->page.frame; - - uint32_t n_used = mach_read_from_4(n_used_p) + 1; + if (UNIV_UNLIKELY(xdes_get_state(descr) != XDES_FREE_FRAG || + !xdes_is_free(descr, bit))) + return DB_CORRUPTION; + xdes_set_free(*xdes, descr, bit, mtr); - if (xdes_is_full(descr)) { - /* The fragment is full: move it to another list */ - const uint16_t xoffset= static_cast( - descr - xdes->page.frame + XDES_FLST_NODE); - flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, - xdes, xoffset, mtr); - xdes_set_state(*xdes, descr, XDES_FULL_FRAG, mtr); + /* Update the FRAG_N_USED field */ + byte *n_used_p= FSP_HEADER_OFFSET + FSP_FRAG_N_USED + header->page.frame; + uint32_t n_used = mach_read_from_4(n_used_p) + 1; - flst_add_last(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG, - xdes, xoffset, mtr); - n_used -= FSP_EXTENT_SIZE; - } + if (xdes_is_full(descr)) + { + /* The fragment is full: move it to another list */ + const uint16_t xoffset= + static_cast(descr - xdes->page.frame + XDES_FLST_NODE); + if (dberr_t err= flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, + xdes, xoffset, mtr)) + return err; + if (dberr_t err= flst_add_last(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG, + xdes, xoffset, mtr)) + return err; + xdes_set_state(*xdes, descr, XDES_FULL_FRAG, mtr); + n_used-= FSP_EXTENT_SIZE; + } - mtr->write<4>(*header, n_used_p, n_used); + mtr->write<4>(*header, n_used_p, n_used); + return DB_SUCCESS; } /** Gets a buffer block for an allocated page. @@ -1093,126 +1074,152 @@ The page is marked as used. @param[in,out] mtr mini-transaction @param[in,out] init_mtr mini-transaction in which the page should be initialized (may be the same as mtr) -@retval NULL if no page could be allocated */ +@param[out] err error code +@return allocated block +@retval nullptr if no page could be allocated */ static MY_ATTRIBUTE((warn_unused_result, nonnull)) -buf_block_t* -fsp_alloc_free_page( - fil_space_t* space, - uint32_t hint, - mtr_t* mtr, - mtr_t* init_mtr) +buf_block_t *fsp_alloc_free_page(fil_space_t *space, uint32_t hint, + mtr_t *mtr, mtr_t *init_mtr, dberr_t *err) { - fil_addr_t first; - xdes_t* descr; - const ulint space_id = space->id; - - ut_d(space->modify_check(*mtr)); - buf_block_t* block = fsp_get_header(space, mtr); - - if (!block) { - return nullptr; - } - - buf_block_t *xdes; - - /* Get the hinted descriptor */ - descr = xdes_get_descriptor_with_space_hdr(block, space, hint, mtr, - &xdes); - - if (descr && (xdes_get_state(descr) == XDES_FREE_FRAG)) { - /* Ok, we can take this extent */ - } else { - /* Else take the first extent in free_frag list */ - first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE_FRAG - + block->page.frame); - - if (first.page == FIL_NULL) { - /* There are no partially full fragments: allocate - a free extent and add it to the FREE_FRAG list. NOTE - that the allocation may have as a side-effect that an - extent containing a descriptor page is added to the - FREE_FRAG list. But we will allocate our page from the - the free extent anyway. */ - - descr = fsp_alloc_free_extent(space, hint, &xdes, mtr); - - if (!descr) { - /* No free space left */ - return nullptr; - } - - xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr); - flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE_FRAG, - xdes, static_cast( - descr - xdes->page.frame - + XDES_FLST_NODE), mtr); - } else { - descr = xdes_lst_get_descriptor(*space, first, mtr, - &xdes); - if (!descr) { - ut_ad("corruption" == 0); - return nullptr; - } - } + ut_d(space->modify_check(*mtr)); + buf_block_t *block= fsp_get_header(space, mtr, err); + if (!block) + return block; - /* Reset the hint */ - hint = 0; - } + buf_block_t *xdes; + /* Get the hinted descriptor */ + xdes_t *descr= xdes_get_descriptor_with_space_hdr(block, space, hint, mtr, + err, &xdes); + if (descr && xdes_get_state(descr) == XDES_FREE_FRAG) + /* Ok, we can take this extent */; + else if (*err != DB_SUCCESS) + { + err_exit: + space->set_corrupted(); + return nullptr; + } + else + { + /* Else take the first extent in free_frag list */ + fil_addr_t first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE_FRAG + + block->page.frame); + if (first.page == FIL_NULL) + { + /* There are no partially full fragments: allocate a free extent + and add it to the FREE_FRAG list. NOTE that the allocation may + have as a side-effect that an extent containing a descriptor + page is added to the FREE_FRAG list. But we will allocate our + page from the the free extent anyway. */ + descr= fsp_alloc_free_extent(space, hint, &xdes, mtr, err); + if (!descr) + return nullptr; + *err= flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE_FRAG, xdes, + static_cast(descr - xdes->page.frame + + XDES_FLST_NODE), mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) + return nullptr; + xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr); + } + else + { + descr= xdes_lst_get_descriptor(*space, first, mtr, &xdes, err); + if (!descr) + return nullptr; + /* Reset the hint */ + hint= 0; + } + } - /* Now we have in descr an extent with at least one free page. Look - for a free page in the extent. */ + /* Now we have in descr an extent with at least one free page. Look + for a free page in the extent. */ + uint32_t free= xdes_find_free(descr, hint % FSP_EXTENT_SIZE); + if (free == FIL_NULL) + { + corrupted: + *err= DB_CORRUPTION; + goto err_exit; + } - uint32_t free = xdes_find_free(descr, hint % FSP_EXTENT_SIZE); - if (free == FIL_NULL) { - ib::error() << "Allocation metadata for file '" - << space->chain.start->name - << "' is corrupted"; - ut_ad("corruption" == 0); - return nullptr; - } + uint32_t page_no= xdes_get_offset(descr) + free; + uint32_t space_size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE + + block->page.frame); + ut_ad(space_size == space->size_in_header || + (space->id == TRX_SYS_SPACE && + srv_startup_is_before_trx_rollback_phase)); - uint32_t page_no = xdes_get_offset(descr) + free; + if (space_size <= page_no) + { + /* It must be that we are extending a single-table tablespace + whose size is still < 64 pages */ + ut_ad(!is_system_tablespace(space->id)); + if (page_no >= FSP_EXTENT_SIZE) + { + sql_print_error("InnoDB: Trying to extend %s" + " by single page(s) though the size is " UINT32PF "." + " Page no " UINT32PF ".", + space->chain.start->name, space_size, page_no); + goto corrupted; + } - uint32_t space_size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE - + block->page.frame); - ut_ad(space_size == space->size_in_header - || (space_id == TRX_SYS_SPACE - && srv_startup_is_before_trx_rollback_phase)); + if (!fsp_try_extend_data_file_with_pages(space, page_no, block, mtr)) + { + *err= DB_OUT_OF_FILE_SPACE; + return nullptr; + } + } - if (space_size <= page_no) { - /* It must be that we are extending a single-table tablespace - whose size is still < 64 pages */ + *err= fsp_alloc_from_free_frag(block, xdes, descr, free, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) + goto corrupted; + return fsp_page_create(space, page_no, init_mtr); +} - ut_a(!is_system_tablespace(space_id)); - if (page_no >= FSP_EXTENT_SIZE) { - ib::error() << "Trying to extend " - << space->chain.start->name - << " by single page(s) though the size is " - << space_size - << ". Page no " << page_no << "."; - return(NULL); - } +MY_ATTRIBUTE((nonnull, warn_unused_result)) +/** Return an extent to the free list of a space. +@param[in,out] space tablespace +@param[in] offset page number in the extent +@param[in,out] mtr mini-transaction +@return error code */ +static dberr_t fsp_free_extent(fil_space_t* space, page_no_t offset, + mtr_t* mtr) +{ + ut_ad(space->is_owner()); + dberr_t err; + buf_block_t *block= fsp_get_header(space, mtr, &err); + if (!block) + return err; + buf_block_t *xdes; + xdes_t *descr= xdes_get_descriptor_with_space_hdr(block, space, offset, mtr, + &err, &xdes); + if (!descr) + { + ut_ad(err || space->is_stopping()); + return err; + } - if (!fsp_try_extend_data_file_with_pages(space, page_no, - block, mtr)) { - /* No disk space left */ - return(NULL); - } - } + if (UNIV_UNLIKELY(xdes_get_state(descr) == XDES_FREE)) + { + space->set_corrupted(); + return DB_CORRUPTION; + } - fsp_alloc_from_free_frag(block, xdes, descr, free, mtr); - return fsp_page_create(space, page_no, init_mtr); + xdes_init(*xdes, descr, mtr); + space->free_len++; + return flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE, + xdes, static_cast(descr - xdes->page.frame + + XDES_FLST_NODE), mtr); } +MY_ATTRIBUTE((nonnull)) /** Frees a single page of a space. The page is marked as free and clean. @param[in,out] space tablespace @param[in] offset page number -@param[in,out] mtr mini-transaction */ -static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) +@param[in,out] mtr mini-transaction +@return error code */ +static dberr_t fsp_free_page(fil_space_t *space, page_no_t offset, mtr_t *mtr) { xdes_t* descr; - ulint state; ulint frag_n_used; ut_ad(mtr); @@ -1220,61 +1227,35 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */ - buf_block_t* header = fsp_get_header(space, mtr); + dberr_t err; + buf_block_t* header = fsp_get_header(space, mtr, &err); if (!header) { ut_ad(space->is_stopping()); - return; + return err; } buf_block_t* xdes; descr = xdes_get_descriptor_with_space_hdr(header, space, offset, mtr, - &xdes); + &err, &xdes); if (!descr) { - ut_ad(space->is_stopping()); - return; + ut_ad(err || space->is_stopping()); + return err; } - state = xdes_get_state(descr); - - if (UNIV_UNLIKELY(state != XDES_FREE_FRAG - && state != XDES_FULL_FRAG)) { - ib::error() << "File space extent descriptor of page " - << page_id_t(space->id, offset) - << " has state " << state; - /* Crash in debug version, so that we get a core dump - of this corruption. */ - ut_ad(0); + const auto state = xdes_get_state(descr); - if (state == XDES_FREE) { - /* We put here some fault tolerance: if the page - is already free, return without doing anything! */ - - return; + switch (state) { + case XDES_FREE_FRAG: + case XDES_FULL_FRAG: + if (!xdes_is_free(descr, offset % FSP_EXTENT_SIZE)) { + break; } - - ut_error; - } - - if (xdes_is_free(descr, offset % FSP_EXTENT_SIZE)) { - ib::error() << "File space extent descriptor of page " - << page_id_t(space->id, offset) - << " says it is free."; - /* Crash in debug version, so that we get a core dump - of this corruption. */ - ut_ad(0); - - /* We put here some fault tolerance: if the page - is already free, return without doing anything! */ - - return; + /* fall through */ + default: + space->set_corrupted(); + return DB_CORRUPTION; } - mtr->free(*space, static_cast(offset)); - - const ulint bit = offset % FSP_EXTENT_SIZE; - - xdes_set_free(*xdes, descr, bit, mtr); - frag_n_used = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FRAG_N_USED + header->page.frame); @@ -1283,56 +1264,44 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) if (state == XDES_FULL_FRAG) { /* The fragment was full: move it to another list */ - flst_remove(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG, - xdes, xoffset, mtr); + err = flst_remove(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG, + xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } + err = flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, + xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr); - flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, - xdes, xoffset, mtr); mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FRAG_N_USED + header->page.frame, frag_n_used + FSP_EXTENT_SIZE - 1); + } else if (UNIV_UNLIKELY(!frag_n_used)) { + return DB_CORRUPTION; } else { - ut_a(frag_n_used > 0); mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FRAG_N_USED + header->page.frame, frag_n_used - 1); } if (!xdes_get_n_used(descr)) { /* The extent has become free: move it to another list */ - flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, - xdes, xoffset, mtr); - fsp_free_extent(space, offset, mtr); + err = flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, + xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } + err = fsp_free_extent(space, offset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } } -} -/** Return an extent to the free list of a space. -@param[in,out] space tablespace -@param[in] offset page number in the extent -@param[in,out] mtr mini-transaction */ -static void fsp_free_extent(fil_space_t* space, page_no_t offset, mtr_t* mtr) -{ - ut_ad(space->is_owner()); - - buf_block_t *block= fsp_get_header(space, mtr); - if (!block) - return; - buf_block_t *xdes; - xdes_t* descr= xdes_get_descriptor_with_space_hdr(block, space, offset, mtr, - &xdes); - if (!descr) - { - ut_ad(space->is_stopping()); - return; - } - - ut_a(xdes_get_state(descr) != XDES_FREE); - - xdes_init(*xdes, descr, mtr); + mtr->free(*space, static_cast(offset)); + xdes_set_free(*xdes, descr, offset % FSP_EXTENT_SIZE, mtr); - flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE, - xdes, static_cast(descr - xdes->page.frame + - XDES_FLST_NODE), mtr); - space->free_len++; + return DB_SUCCESS; } /** @return Number of segment inodes which fit on a single page */ @@ -1349,69 +1318,64 @@ inline ulint FSP_SEG_INODES_PER_PAGE(ulint physical_size) FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i + page /** Looks for a used segment inode on a segment inode page. -@param[in] page segment inode page -@param[in] physical_size page size -@return segment inode index, or ULINT_UNDEFINED if not found */ +@param page segment inode page +@param physical_size page size +@return segment inode index +@retval ULINT_UNDEFINED if not found */ static ulint -fsp_seg_inode_page_find_used(const page_t* page, ulint physical_size) +fsp_seg_inode_page_find_used(const page_t *page, ulint physical_size) { - for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(physical_size); i++) { - if (!mach_read_from_8( - FSEG_ID - + fsp_seg_inode_page_get_nth_inode(page, i))) { - continue; - } - /* This is used */ - ut_ad(FSEG_MAGIC_N_VALUE == mach_read_from_4( - FSEG_MAGIC_N - + fsp_seg_inode_page_get_nth_inode(page, i))); - return i; - } + for (ulint i= 0; i < FSP_SEG_INODES_PER_PAGE(physical_size); i++) + { + const byte *inode= fsp_seg_inode_page_get_nth_inode(page, i); + if (mach_read_from_8(FSEG_ID + inode)) + { + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4)); + return i; + } + } - return(ULINT_UNDEFINED); + return ULINT_UNDEFINED; } /** Looks for an unused segment inode on a segment inode page. @param[in] page segment inode page @param[in] i search forward starting from this index @param[in] physical_size page size -@return segment inode index, or ULINT_UNDEFINED if not found */ +@return segment inode index +@retval ULINT_UNDEFINED if not found */ static ulint -fsp_seg_inode_page_find_free(const page_t* page, ulint i, ulint physical_size) +fsp_seg_inode_page_find_free(const page_t *page, ulint i, ulint physical_size) { - for (; i < FSP_SEG_INODES_PER_PAGE(physical_size); i++) { - if (!mach_read_from_8( - FSEG_ID - + fsp_seg_inode_page_get_nth_inode(page, i))) { - /* This is unused */ - return i; - } - - ut_ad(FSEG_MAGIC_N_VALUE == mach_read_from_4( - FSEG_MAGIC_N - + fsp_seg_inode_page_get_nth_inode(page, i))); - } - - return ULINT_UNDEFINED; + for (; i < FSP_SEG_INODES_PER_PAGE(physical_size); i++) + { + const byte *inode= fsp_seg_inode_page_get_nth_inode(page, i); + if (mach_read_from_8(FSEG_ID + inode)) + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4)); + else + /* This is unused */ + return i; + } + return ULINT_UNDEFINED; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Allocate a file segment inode page. @param[in,out] space tablespace @param[in,out] header tablespace header @param[in,out] mtr mini-transaction -@return whether the allocation succeeded */ -MY_ATTRIBUTE((nonnull, warn_unused_result)) -static -bool -fsp_alloc_seg_inode_page(fil_space_t *space, buf_block_t *header, mtr_t *mtr) +@return error code */ +static dberr_t fsp_alloc_seg_inode_page(fil_space_t *space, + buf_block_t *header, mtr_t *mtr) { ut_ad(header->page.id().space() == space->id); - buf_block_t *block= fsp_alloc_free_page(space, 0, mtr, mtr); + dberr_t err; + buf_block_t *block= fsp_alloc_free_page(space, 0, mtr, mtr, &err); if (!block) - return false; + return err; ut_ad(block->page.lock.not_recursive()); @@ -1424,123 +1388,136 @@ fsp_alloc_seg_inode_page(fil_space_t *space, buf_block_t *header, mtr_t *mtr) ut_ad(!mach_read_from_8(inode)); #endif - flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, - block, FSEG_INODE_PAGE_NODE, mtr); - return true; + return flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, + block, FSEG_INODE_PAGE_NODE, mtr); } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Allocate a file segment inode. @param[in,out] space tablespace @param[in,out] header tablespace header @param[out] iblock segment inode page @param[in,out] mtr mini-transaction +@param[out] err error code @return segment inode -@retval NULL if not enough space */ -MY_ATTRIBUTE((nonnull, warn_unused_result)) +@retval nullptr on failure */ static fseg_inode_t* fsp_alloc_seg_inode(fil_space_t *space, buf_block_t *header, - buf_block_t **iblock, mtr_t *mtr) + buf_block_t **iblock, mtr_t *mtr, dberr_t *err) { - buf_block_t* block; - fseg_inode_t* inode; - - /* Allocate a new segment inode page if needed. */ - if (!flst_get_len(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE - + header->page.frame) - && !fsp_alloc_seg_inode_page(space, header, mtr)) { - return(NULL); - } - const page_id_t page_id( - space->id, - flst_get_first(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE - + header->page.frame).page); - - block = buf_page_get_gen(page_id, space->zip_size(), RW_SX_LATCH, - nullptr, BUF_GET_POSSIBLY_FREED, mtr); - if (!block || block->page.is_freed()) { - return nullptr; - } - - if (!space->full_crc32()) { - fil_block_check_type(*block, FIL_PAGE_INODE, mtr); - } + /* Allocate a new segment inode page if needed. */ + if (!flst_get_len(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE + + header->page.frame)) + { + *err= fsp_alloc_seg_inode_page(space, header, mtr); + if (*err != DB_SUCCESS) + return nullptr; + } - const ulint physical_size = space->physical_size(); + const page_id_t page_id + { + space->id, + mach_read_from_4(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE + FLST_FIRST + + FIL_ADDR_PAGE + header->page.frame) + }; + + buf_block_t *block= + buf_page_get_gen(page_id, space->zip_size(), RW_SX_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, mtr, err); + if (!block) + return nullptr; - ulint n = fsp_seg_inode_page_find_free(block->page.frame, 0, - physical_size); + if (!space->full_crc32()) + fil_block_check_type(*block, FIL_PAGE_INODE, mtr); - ut_a(n < FSP_SEG_INODES_PER_PAGE(physical_size)); + const ulint physical_size= space->physical_size(); + ulint n= fsp_seg_inode_page_find_free(block->page.frame, 0, physical_size); - inode = fsp_seg_inode_page_get_nth_inode(block->page.frame, n); + if (UNIV_UNLIKELY(n >= FSP_SEG_INODES_PER_PAGE(physical_size))) + { + *err= DB_CORRUPTION; + return nullptr; + } + fseg_inode_t *inode= fsp_seg_inode_page_get_nth_inode(block->page.frame, n); - if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(block->page.frame, - n + 1, - physical_size)) { - /* There are no other unused headers left on the page: move it - to another list */ - flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, - block, FSEG_INODE_PAGE_NODE, mtr); - flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, - block, FSEG_INODE_PAGE_NODE, mtr); - } + if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(block->page.frame, n + 1, + physical_size)) + { + /* There are no other unused headers left on the page: move it + to another list */ + *err= flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, + block, FSEG_INODE_PAGE_NODE, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) + return nullptr; + *err= flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, + block, FSEG_INODE_PAGE_NODE, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) + return nullptr; + } - ut_ad(!mach_read_from_8(inode + FSEG_ID) - || mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - *iblock = block; - return(inode); + ut_ad(!mach_read_from_8(inode + FSEG_ID) || + !memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4)); + *iblock= block; + return inode; } +MY_ATTRIBUTE((nonnull)) /** Frees a file segment inode. @param[in,out] space tablespace @param[in,out] inode segment inode @param[in,out] iblock segment inode page @param[in,out] mtr mini-transaction */ -static void fsp_free_seg_inode( - fil_space_t* space, - fseg_inode_t* inode, - buf_block_t* iblock, - mtr_t* mtr) +static void fsp_free_seg_inode(fil_space_t *space, fseg_inode_t *inode, + buf_block_t *iblock, mtr_t *mtr) { - ut_d(space->modify_check(*mtr)); + ut_d(space->modify_check(*mtr)); - buf_block_t* header = fsp_get_header(space, mtr); - if (!header) { - return; - } + dberr_t err; + buf_block_t *header= fsp_get_header(space, mtr, &err); + if (!header) + return; + if (UNIV_UNLIKELY(memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4))) + { + space->set_corrupted(); + return; + } - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); + const ulint physical_size= space->physical_size(); - const ulint physical_size = space->physical_size(); + if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(iblock->page.frame, 0, + physical_size)) + { + /* Move the page to another list */ + if (flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, + iblock, FSEG_INODE_PAGE_NODE, mtr) != DB_SUCCESS) + return; + if (flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, + iblock, FSEG_INODE_PAGE_NODE, mtr) != DB_SUCCESS) + return; + } - if (ULINT_UNDEFINED - == fsp_seg_inode_page_find_free(iblock->page.frame, 0, - physical_size)) { - /* Move the page to another list */ - flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, - iblock, FSEG_INODE_PAGE_NODE, mtr); - flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, - iblock, FSEG_INODE_PAGE_NODE, mtr); - } + mtr->memset(iblock, page_offset(inode) + FSEG_ID, FSEG_INODE_SIZE, 0); - mtr->memset(iblock, page_offset(inode) + FSEG_ID, FSEG_INODE_SIZE, 0); + if (ULINT_UNDEFINED != fsp_seg_inode_page_find_used(iblock->page.frame, + physical_size)) + return; - if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(iblock->page.frame, - physical_size)) { - /* There are no other used headers left on the page: free it */ - flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, - iblock, FSEG_INODE_PAGE_NODE, mtr); - fsp_free_page(space, iblock->page.id().page_no(), mtr); - } + /* There are no other used headers left on the page: free it */ + if (flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, + iblock, FSEG_INODE_PAGE_NODE, mtr) == DB_SUCCESS) + fsp_free_page(space, iblock->page.id().page_no(), mtr); } +MY_ATTRIBUTE((nonnull(1,4,5), warn_unused_result)) /** Returns the file segment inode, page x-latched. @param[in] header segment header @param[in] space space id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] mtr mini-transaction -@param[out] block inode block, or NULL to ignore -@return segment inode, page x-latched; NULL if the inode is free */ +@param[out] block inode block +@param[out] err error code +@return segment inode, page x-latched +@retrval nullptr if the inode is free or corruption was noticed */ static fseg_inode_t* fseg_inode_try_get( @@ -1548,47 +1525,34 @@ fseg_inode_try_get( ulint space, ulint zip_size, mtr_t* mtr, - buf_block_t** block) + buf_block_t** block, + dberr_t* err = nullptr) { - fil_addr_t inode_addr; - fseg_inode_t* inode; - - inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO); - inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET); - ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE)); + if (UNIV_UNLIKELY(space != mach_read_from_4(header + FSEG_HDR_SPACE))) + { + corrupted: + if (err) + *err= DB_CORRUPTION; + return nullptr; + } - inode = fut_get_ptr(space, zip_size, inode_addr, RW_SX_LATCH, - mtr, block); + *block= + buf_page_get_gen(page_id_t(space, + mach_read_from_4(header + FSEG_HDR_PAGE_NO)), + zip_size, RW_SX_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, + mtr, err); + if (!*block) + return nullptr; - if (UNIV_UNLIKELY(!inode)) { - } else if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) { - inode = NULL; - } else { - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - } + const uint16_t offset= mach_read_from_2(header + FSEG_HDR_OFFSET); + if (UNIV_UNLIKELY(offset >= (*block)->physical_size())) + goto corrupted; - return(inode); -} + fseg_inode_t *inode= (*block)->page.frame + offset; + if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID) || + memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4))) + goto corrupted; -/** Returns the file segment inode, page x-latched. -@param[in] header segment header -@param[in] space space id -@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 -@param[in,out] mtr mini-transaction -@param[out] block inode block -@return segment inode, page x-latched */ -static -fseg_inode_t* -fseg_inode_get( - const fseg_header_t* header, - ulint space, - ulint zip_size, - mtr_t* mtr, - buf_block_t** block = NULL) -{ - fseg_inode_t *inode= fseg_inode_try_get(header, space, zip_size, mtr, block); - ut_a(inode); return inode; } @@ -1601,7 +1565,7 @@ static uint32_t fseg_get_nth_frag_page_no(const fseg_inode_t *inode, ulint n) { ut_ad(inode); ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4)); return(mach_read_from_4(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE)); } @@ -1617,7 +1581,7 @@ inline void fseg_set_nth_frag_page_no(fseg_inode_t *inode, buf_block_t *iblock, { ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); ut_ad(mtr->memo_contains_flagged(iblock, MTR_MEMO_PAGE_SX_FIX)); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4)); mtr->write<4>(*iblock, inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE, page_no); @@ -1693,13 +1657,14 @@ static ulint fseg_get_n_frag_pages(const fseg_inode_t *inode) @param space tablespace @param byte_offset byte offset of the created segment header @param mtr mini-transaction +@param err error code @param has_done_reservation whether fsp_reserve_free_extents() was invoked @param block block where segment header is placed, or NULL to allocate an additional page for that @return the block where the segment header is placed, x-latched -@retval NULL if could not create segment because of lack of space */ +@retval nullptr if could not create segment */ buf_block_t* -fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, +fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, dberr_t *err, bool has_done_reservation, buf_block_t *block) { fseg_inode_t* inode; @@ -1718,23 +1683,26 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, ut_ad(!block || block->page.id().space() == space->id); - if (!has_done_reservation - && !fsp_reserve_free_extents(&n_reserved, space, 2, - FSP_NORMAL, mtr)) { - DBUG_RETURN(NULL); + if (!has_done_reservation) { + *err = fsp_reserve_free_extents(&n_reserved, space, 2, + FSP_NORMAL, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + DBUG_RETURN(nullptr); + } } - buf_block_t* header = fsp_get_header(space, mtr); + buf_block_t* header = fsp_get_header(space, mtr, err); if (!header) { - ut_ad("corruption" == 0); + block = nullptr; goto funct_exit; } buf_block_t* iblock; - inode = fsp_alloc_seg_inode(space, header, &iblock, mtr); + inode = fsp_alloc_seg_inode(space, header, &iblock, mtr, err); if (inode == NULL) { + block = nullptr; goto funct_exit; } @@ -1754,7 +1722,7 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, flst_init(*iblock, inode + FSEG_NOT_FULL, mtr); flst_init(*iblock, inode + FSEG_FULL, mtr); - mtr->write<4>(*iblock, inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE); + mtr->memcpy(*iblock, inode + FSEG_MAGIC_N, FSEG_MAGIC_N_BYTES, 4); compile_time_assert(FSEG_FRAG_SLOT_SIZE == 4); compile_time_assert(FIL_NULL == 0xffffffff); mtr->memset(iblock, @@ -1767,13 +1735,13 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, #ifdef UNIV_DEBUG has_done_reservation, #endif /* UNIV_DEBUG */ - mtr, mtr); + mtr, mtr, err); /* The allocation cannot fail if we have already reserved a space for the page. */ ut_ad(!has_done_reservation || block != NULL); - if (block == NULL) { + if (!block) { fsp_free_seg_inode(space, inode, iblock, mtr); goto funct_exit; } @@ -1838,149 +1806,127 @@ ulint fseg_n_reserved_pages(const buf_block_t &block, mtr_t *mtr) { ut_ad(page_align(header) == block.page.frame); - return fseg_n_reserved_pages_low(fseg_inode_get(header, - block.page.id().space(), - block.zip_size(), mtr), - used); + buf_block_t *iblock; + if (fseg_inode_t *inode= + fseg_inode_try_get(header, block.page.id().space(), block.zip_size(), + mtr, &iblock)) + return fseg_n_reserved_pages_low(inode, used); + return *used= 0; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Tries to fill the free list of a segment with consecutive free extents. This happens if the segment is big enough to allow extents in the free list, the free list is empty, and the extents can be allocated consecutively from the hint onward. -@param[in,out] inode segment inode +@param[in] inode segment inode @param[in,out] iblock segment inode page @param[in] space tablespace @param[in] hint hint which extent would be good as the first extent @param[in,out] mtr mini-transaction */ -static -void -fseg_fill_free_list( - fseg_inode_t* inode, - buf_block_t* iblock, - fil_space_t* space, - uint32_t hint, - mtr_t* mtr) +static dberr_t fseg_fill_free_list(const fseg_inode_t *inode, + buf_block_t *iblock, fil_space_t *space, + uint32_t hint, mtr_t *mtr) { - xdes_t* descr; - ulint i; - ib_id_t seg_id; - ulint reserved; - ulint used; - - ut_ad(inode && mtr); - ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_d(space->modify_check(*mtr)); - - reserved = fseg_n_reserved_pages_low(inode, &used); - - if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) { - - /* The segment is too small to allow extents in free list */ - - return; - } - - if (flst_get_len(inode + FSEG_FREE) > 0) { - /* Free list is not empty */ - - return; - } + ulint used; - for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) { - buf_block_t* xdes; - descr = xdes_get_descriptor(space, hint, mtr, &xdes); + ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); + ut_d(space->modify_check(*mtr)); - if (!descr || (XDES_FREE != xdes_get_state(descr))) { - /* We cannot allocate the desired extent: stop */ - return; - } + if (fseg_n_reserved_pages_low(inode, &used) < + FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) + /* The segment is too small to allow extents in free list */ + return DB_SUCCESS; - descr = fsp_alloc_free_extent(space, hint, &xdes, mtr); + if (UNIV_UNLIKELY(memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4))) + { + space->set_corrupted(); + return DB_CORRUPTION; + } - xdes_set_state(*xdes, descr, XDES_FSEG, mtr); + if (flst_get_len(inode + FSEG_FREE) > 0) + /* Free list is not empty */ + return DB_SUCCESS; - seg_id = mach_read_from_8(inode + FSEG_ID); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - mtr->write<8>(*xdes, descr + XDES_ID, seg_id); + for (ulint i= 0; i < FSEG_FREE_LIST_MAX_LEN; i++, hint += FSP_EXTENT_SIZE) + { + buf_block_t *xdes; + dberr_t err; + xdes_t *descr= xdes_get_descriptor(space, hint, mtr, &err, &xdes); + if (!descr || XDES_FREE != xdes_get_state(descr)) + /* We cannot allocate the desired extent: stop */ + return err; + + descr= fsp_alloc_free_extent(space, hint, &xdes, mtr, &err); + if (UNIV_UNLIKELY(!descr)) + return err; + + if (dberr_t err= + flst_add_last(iblock, + static_cast(inode - iblock->page.frame + + FSEG_FREE), xdes, + static_cast(descr - xdes->page.frame + + XDES_FLST_NODE), mtr)) + return err; + xdes_set_state(*xdes, descr, XDES_FSEG, mtr); + mtr->memcpy(*xdes, descr + XDES_ID, inode + FSEG_ID, 8); + } - flst_add_last(iblock, - static_cast(inode - iblock->page.frame - + FSEG_FREE), xdes, - static_cast(descr - xdes->page.frame - + XDES_FLST_NODE), mtr); - hint += FSP_EXTENT_SIZE; - } + return DB_SUCCESS; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Allocates a free extent for the segment: looks first in the free list of the segment, then tries to allocate from the space free list. NOTE that the extent returned still resides in the segment free list, it is not yet taken off it! -@param[in,out] inode segment inode +@param[in] inode segment inode @param[in,out] iblock segment inode page @param[out] xdes extent descriptor page @param[in,out] space tablespace @param[in,out] mtr mini-transaction -@retval NULL if no page could be allocated */ +@param[out] err error code +@retval nullptr if no page could be allocated */ static xdes_t* fseg_alloc_free_extent( - fseg_inode_t* inode, + const fseg_inode_t* inode, buf_block_t* iblock, buf_block_t** xdes, fil_space_t* space, - mtr_t* mtr) + mtr_t* mtr, + dberr_t* err) { - xdes_t* descr; - ib_id_t seg_id; - fil_addr_t first; - - ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - ut_d(space->modify_check(*mtr)); + ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4)); + ut_d(space->modify_check(*mtr)); - if (flst_get_len(inode + FSEG_FREE) > 0) { - /* Segment free list is not empty, allocate from it */ - - first = flst_get_first(inode + FSEG_FREE); - - descr = xdes_lst_get_descriptor(*space, first, mtr, xdes); - if (UNIV_UNLIKELY(!descr)) { - ib::error() << "Allocation metadata for file '" - << space->chain.start->name - << "' is corrupted"; - ut_ad("corruption" == 0); - return nullptr; - } - } else { - /* Segment free list was empty, allocate from space */ - descr = fsp_alloc_free_extent(space, 0, xdes, mtr); - - if (descr == NULL) { - - return(NULL); - } - - seg_id = mach_read_from_8(inode + FSEG_ID); - - xdes_set_state(**xdes, descr, XDES_FSEG, mtr); - mtr->write<8,mtr_t::MAYBE_NOP>(**xdes, descr + XDES_ID, - seg_id); - flst_add_last(iblock, - static_cast(inode - iblock->page.frame - + FSEG_FREE), *xdes, - static_cast(descr - (*xdes)->page.frame - + XDES_FLST_NODE), mtr); + if (flst_get_len(inode + FSEG_FREE)) + { + /* Segment free list is not empty, allocate from it */ + return xdes_lst_get_descriptor(*space, flst_get_first(inode + FSEG_FREE), + mtr, xdes, err); + } - /* Try to fill the segment free list */ - fseg_fill_free_list(inode, iblock, space, - xdes_get_offset(descr) + FSP_EXTENT_SIZE, - mtr); - } + xdes_t* descr= fsp_alloc_free_extent(space, 0, xdes, mtr, err); + if (UNIV_UNLIKELY(!descr)) + return descr; + xdes_set_state(**xdes, descr, XDES_FSEG, mtr); + mtr->memcpy(**xdes, descr + XDES_ID, inode + FSEG_ID, 8); + *err= flst_add_last(iblock, + static_cast(inode - iblock->page.frame + + FSEG_FREE), *xdes, + static_cast(descr - (*xdes)->page.frame + + XDES_FLST_NODE), mtr); + if (UNIV_LIKELY(*err != DB_SUCCESS)) + return nullptr; + /* Try to fill the segment free list */ + *err= fseg_fill_free_list(inode, iblock, space, + xdes_get_offset(descr) + FSP_EXTENT_SIZE, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) + return nullptr; - return(descr); + return descr; } /** Allocates a single free page from a segment. @@ -1996,7 +1942,9 @@ direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR @param[in,out] mtr mini-transaction @param[in,out] init_mtr mtr or another mini-transaction in which the page should be initialized. -@retval NULL if no page could be allocated */ +@param[out] err error code +@return the allocated page +@retval nullptr if no page could be allocated */ static buf_block_t* fseg_alloc_free_page_low( @@ -2010,7 +1958,8 @@ fseg_alloc_free_page_low( /*!< whether the space has already been reserved */ #endif /* UNIV_DEBUG */ mtr_t* mtr, - mtr_t* init_mtr) + mtr_t* init_mtr, + dberr_t* err) { ib_id_t seg_id; ulint used; @@ -2021,11 +1970,9 @@ fseg_alloc_free_page_low( xdes_t* ret_descr; /*!< the extent of the allocated page */ buf_block_t* xdes; ulint n; - const ulint space_id = space->id; ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR)); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + seg_inode, 4)); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); seg_id = mach_read_from_8(seg_inode + FSEG_ID); @@ -2035,20 +1982,22 @@ fseg_alloc_free_page_low( reserved = fseg_n_reserved_pages_low(seg_inode, &used); - buf_block_t* header = fsp_get_header(space, mtr); + buf_block_t* header = fsp_get_header(space, mtr, err); if (!header) { - ut_ad("corruption" == 0); - return nullptr; + return header; } descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr, - &xdes); + err, &xdes); if (!descr) { + if (*err != DB_SUCCESS) { + return nullptr; + } /* Hint outside space or too high above free limit: reset hint */ /* The file space header page is always allocated. */ hint = 0; - descr = xdes_get_descriptor(space, hint, mtr, &xdes); + descr = xdes_get_descriptor(space, hint, mtr, err, &xdes); if (!descr) { return nullptr; } @@ -2077,32 +2026,44 @@ take_hinted_page: ========================================================= the hinted page ===============*/ - ret_descr = fsp_alloc_free_extent(space, hint, &xdes, mtr); + ret_descr = fsp_alloc_free_extent(space, hint, &xdes, + mtr, err); - ut_a(ret_descr == descr); + if (UNIV_UNLIKELY(ret_descr != descr)) { + if (*err != DB_SUCCESS) { + *err = DB_CORRUPTION; + } + return nullptr; + } xdes_set_state(*xdes, ret_descr, XDES_FSEG, mtr); mtr->write<8,mtr_t::MAYBE_NOP>(*xdes, ret_descr + XDES_ID, seg_id); - flst_add_last(iblock, - static_cast(seg_inode - - iblock->page.frame - + FSEG_FREE), xdes, - static_cast(ret_descr - - xdes->page.frame - + XDES_FLST_NODE), mtr); + *err = flst_add_last( + iblock, + static_cast(seg_inode - iblock->page.frame + + FSEG_FREE), xdes, + static_cast(ret_descr + - xdes->page.frame + + XDES_FLST_NODE), mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } /* Try to fill the segment free list */ - fseg_fill_free_list(seg_inode, iblock, space, - hint + FSP_EXTENT_SIZE, mtr); + *err = fseg_fill_free_list(seg_inode, iblock, space, + hint + FSP_EXTENT_SIZE, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } goto take_hinted_page; /*-----------------------------------------------------------*/ } else if ((direction != FSP_NO_DIR) && ((reserved - used) < reserved / FSEG_FILLFACTOR) && (used >= FSEG_FRAG_LIMIT) - && !!(ret_descr = fseg_alloc_free_extent(seg_inode, iblock, - &xdes, space, - mtr))) { + && (ret_descr = fseg_alloc_free_extent(seg_inode, iblock, + &xdes, space, + mtr, err))) { /* 3. We take any free extent (which was already assigned above =============================================================== in the if-condition to ret_descr) and take the lowest or @@ -2116,6 +2077,8 @@ take_hinted_page: } ut_ad(!has_done_reservation || ret_page != FIL_NULL); /*-----------------------------------------------------------*/ + } else if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; } else if ((xdes_get_state(descr) == XDES_FSEG) && mach_read_from_8(descr + XDES_ID) == seg_id && (!xdes_is_full(descr))) { @@ -2150,10 +2113,6 @@ take_hinted_page: ret_descr = xdes_lst_get_descriptor(*space, first, mtr, &xdes); if (!ret_descr) { - ib::error() << "Allocation metadata for file '" - << space->chain.start->name - << "' is corrupted"; - ut_ad("corruption" == 0); return nullptr; } @@ -2168,15 +2127,18 @@ take_hinted_page: /* 6. We allocate an individual page from the space ===================================================*/ buf_block_t* block = fsp_alloc_free_page( - space, hint, mtr, init_mtr); + space, hint, mtr, init_mtr, err); - ut_ad(!has_done_reservation || block); + ut_ad(block || !has_done_reservation || *err); if (block) { /* Put the page in the fragment page array of the segment */ n = fseg_find_free_frag_page_slot(seg_inode); - ut_a(n != ULINT_UNDEFINED); + if (UNIV_UNLIKELY(n == ULINT_UNDEFINED)) { + *err = DB_CORRUPTION; + return nullptr; + } fseg_set_nth_frag_page_no( seg_inode, iblock, n, @@ -2191,14 +2153,13 @@ take_hinted_page: /* 7. We allocate a new extent and take its first page ======================================================*/ ret_descr = fseg_alloc_free_extent(seg_inode, iblock, &xdes, - space, mtr); + space, mtr, err); - if (ret_descr == NULL) { - ret_page = FIL_NULL; - ut_ad(!has_done_reservation); + if (!ret_descr) { + ut_ad(!has_done_reservation || *err); + return nullptr; } else { ret_page = xdes_get_offset(ret_descr); - ut_ad(!has_done_reservation || ret_page != FIL_NULL); } } @@ -2209,16 +2170,17 @@ take_hinted_page: return(NULL); } - if (space->size <= ret_page && !is_predefined_tablespace(space_id)) { + if (space->size <= ret_page && !is_predefined_tablespace(space->id)) { /* It must be that we are extending a single-table tablespace whose size is still < 64 pages */ if (ret_page >= FSP_EXTENT_SIZE) { - ib::error() << "Trying to extend '" - << space->chain.start->name - << "' by single page(s) though the" - << " space size " << space->size - << ". Page no " << ret_page << "."; + sql_print_error("InnoDB: Trying to extend '%s'" + " by single page(s) though the" + " space size " UINT32PF "." + " Page no " UINT32PF ".", + space->chain.start->name, space->size, + ret_page); ut_ad(!has_done_reservation); return(NULL); } @@ -2240,13 +2202,16 @@ got_hinted_page: or FSEG_FREE), and the page is not yet marked as used. */ ut_d(buf_block_t* xxdes); - ut_ad(xdes_get_descriptor(space, ret_page, mtr, &xxdes) + ut_ad(xdes_get_descriptor(space, ret_page, mtr, err, &xxdes) == ret_descr); ut_ad(xdes == xxdes); ut_ad(xdes_is_free(ret_descr, ret_page % FSP_EXTENT_SIZE)); - fseg_mark_page_used(seg_inode, iblock, ret_page, ret_descr, - xdes, mtr); + *err = fseg_mark_page_used(seg_inode, iblock, ret_page, + ret_descr, xdes, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } } return fsp_page_create(space, ret_page, init_mtr); @@ -2274,8 +2239,9 @@ fseg_alloc_free_page_general( is no need to do the check for this individual page */ mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + mtr_t* init_mtr,/*!< in/out: mtr or another mini-transaction in which the page should be initialized. */ + dberr_t* err) /*!< out: error code */ { fseg_inode_t* inode; ulint space_id; @@ -2286,16 +2252,21 @@ fseg_alloc_free_page_general( space_id = page_get_space_id(page_align(seg_header)); space = mtr->x_lock_space(space_id); - inode = fseg_inode_get(seg_header, space_id, space->zip_size(), - mtr, &iblock); + inode = fseg_inode_try_get(seg_header, space_id, space->zip_size(), + mtr, &iblock, err); + if (!inode) { + return nullptr; + } if (!space->full_crc32()) { fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr); } - if (!has_done_reservation - && !fsp_reserve_free_extents(&n_reserved, space, 2, - FSP_NORMAL, mtr)) { - return(NULL); + if (!has_done_reservation) { + *err = fsp_reserve_free_extents(&n_reserved, space, 2, + FSP_NORMAL, mtr); + if (*err != DB_SUCCESS) { + return nullptr; + } } block = fseg_alloc_free_page_low(space, @@ -2303,11 +2274,11 @@ fseg_alloc_free_page_general( #ifdef UNIV_DEBUG has_done_reservation, #endif /* UNIV_DEBUG */ - mtr, init_mtr); + mtr, init_mtr, err); /* The allocation cannot fail if we have already reserved a space for the page. */ - ut_ad(!has_done_reservation || block != NULL); + ut_ad(block || !has_done_reservation || *err); if (!has_done_reservation) { space->release_free_extents(n_reserved); @@ -2316,6 +2287,7 @@ fseg_alloc_free_page_general( return(block); } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Check that we have at least n_pages frag pages free in the first extent of a single-table tablespace, and they are also physically initialized to the data file. That is we have already extended the data file so that those @@ -2326,10 +2298,9 @@ with pages. @param[in] size tablespace size in pages, less than FSP_EXTENT_SIZE @param[in,out] mtr mini-transaction @param[in] n_pages number of pages to reserve -@return true if there were at least n_pages free pages, or we were able -to extend */ +@return error code */ static -bool +dberr_t fsp_reserve_free_pages( fil_space_t* space, buf_block_t* header, @@ -2337,25 +2308,23 @@ fsp_reserve_free_pages( mtr_t* mtr, uint32_t n_pages) { - xdes_t* descr; + ut_ad(space != fil_system.sys_space && space != fil_system.temp_space); + ut_ad(size < FSP_EXTENT_SIZE); - ut_a(!is_system_tablespace(space->id)); - ut_a(size < FSP_EXTENT_SIZE); - - descr = xdes_get_descriptor_with_space_hdr(header, space, 0, mtr); - if (!descr) { - return false; - } - uint32_t n_used = xdes_get_n_used(descr); - - if (n_used > size) { - ut_ad("corruption" == 0); - return false; - } - - return(size >= n_used + n_pages - || fsp_try_extend_data_file_with_pages( - space, n_used + n_pages - 1, header, mtr)); + dberr_t err= DB_OUT_OF_FILE_SPACE; + const xdes_t *descr= + xdes_get_descriptor_with_space_hdr(header, space, 0, mtr, &err); + if (!descr) + return err; + const uint32_t n_used= xdes_get_n_used(descr); + if (size >= n_used + n_pages) + return DB_SUCCESS; + if (n_used > size) + return DB_CORRUPTION; + return fsp_try_extend_data_file_with_pages(space, n_used + n_pages - 1, + header, mtr) + ? DB_SUCCESS + : DB_OUT_OF_FILE_SPACE; } /** Reserves free pages from a tablespace. All mini-transactions which may @@ -2395,8 +2364,9 @@ free pages available. @param[in] n_pages for small tablespaces (tablespace size is less than FSP_EXTENT_SIZE), number of free pages to reserve. -@return true if we were able to make the reservation */ -bool +@return error code +@retval DB_SUCCESS if we were able to make the reservation */ +dberr_t fsp_reserve_free_extents( uint32_t* n_reserved, fil_space_t* space, @@ -2415,9 +2385,10 @@ fsp_reserve_free_extents( mtr->x_lock_space(space); const unsigned physical_size = space->physical_size(); - buf_block_t* header = fsp_get_header(space, mtr); + dberr_t err; + buf_block_t* header = fsp_get_header(space, mtr, &err); if (!header) { - ut_ad("corruption" == 0); + return err; } try_again: uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE @@ -2427,8 +2398,8 @@ try_again: if (size < extent_size && n_pages < extent_size / 2) { /* Use different rules for small single-table tablespaces */ *n_reserved = 0; - return(fsp_reserve_free_pages(space, header, size, - mtr, n_pages)); + return fsp_reserve_free_pages(space, header, size, + mtr, n_pages); } uint32_t n_free_list_ext = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE @@ -2491,24 +2462,26 @@ try_again: } if (space->reserve_free_extents(n_free, n_ext)) { - return(true); + return DB_SUCCESS; } try_to_extend: if (fsp_try_extend_data_file(space, header, mtr)) { goto try_again; } - return(false); + return DB_OUT_OF_FILE_SPACE; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Frees a single page of a segment. @param[in] seg_inode segment inode @param[in,out] space tablespace @param[in] offset page number @param[in,out] mtr mini-transaction -@param[in] ahi Drop adaptive hash index */ +@param[in] ahi Drop adaptive hash index +@return error code */ static -void +dberr_t fseg_free_page_low( fseg_inode_t* seg_inode, buf_block_t* iblock, @@ -2520,13 +2493,7 @@ fseg_free_page_low( #endif /* BTR_CUR_HASH_ADAPT */ ) { - ib_id_t descr_id; - ib_id_t seg_id; - - ut_ad(seg_inode != NULL); - ut_ad(mtr != NULL); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + seg_inode, 4)); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); ut_ad(iblock->page.frame == page_align(seg_inode)); ut_d(space->modify_check(*mtr)); @@ -2541,16 +2508,16 @@ fseg_free_page_low( const uint32_t extent_size = FSP_EXTENT_SIZE; ut_ad(ut_is_2pow(extent_size)); buf_block_t* xdes; - xdes_t* descr = xdes_get_descriptor(space, offset, mtr, &xdes); + dberr_t err; + xdes_t* descr = xdes_get_descriptor(space, offset, mtr, &err, &xdes); - if (!descr || xdes_is_free(descr, offset & (extent_size - 1))) { - if (space->is_stopping()) { - return; - } - ib::error() << "Page " << offset << " in file '" - << space->chain.start->name - << "' is already marked as free"; - return; + if (!descr) { + return err; + } + if (UNIV_UNLIKELY(xdes_is_free(descr, offset & (extent_size - 1)))) { +corrupted: + space->set_corrupted(); + return DB_CORRUPTION; } if (xdes_get_state(descr) != XDES_FSEG) { @@ -2569,22 +2536,13 @@ fseg_free_page_low( break; } - fsp_free_page(space, offset, mtr); - return; + return fsp_free_page(space, offset, mtr); } /* If we get here, the page is in some extent of the segment */ - descr_id = mach_read_from_8(descr + XDES_ID); - seg_id = mach_read_from_8(seg_inode + FSEG_ID); - - if (UNIV_UNLIKELY(descr_id != seg_id)) { - ib::error() << "InnoDB is trying to free page " << offset - << " in file '" << space->chain.start->name - << "' which does not belong to segment " - << descr_id - << " but belongs to segment " << seg_id; - return; + if (UNIV_UNLIKELY(memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8))) { + goto corrupted; } byte* p_not_full = seg_inode + FSEG_NOT_FULL_N_USED; @@ -2595,32 +2553,45 @@ fseg_free_page_low( if (xdes_is_full(descr)) { /* The fragment is full: move it to another list */ - flst_remove(iblock, static_cast(FSEG_FULL + ioffset), - xdes, xoffset, mtr); - flst_add_last(iblock, static_cast(FSEG_NOT_FULL - + ioffset), - xdes, xoffset, mtr); + err = flst_remove(iblock, + static_cast(FSEG_FULL + ioffset), + xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } + err = flst_add_last(iblock, static_cast(FSEG_NOT_FULL + + ioffset), + xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } not_full_n_used += extent_size - 1; } else { - ut_a(not_full_n_used > 0); + if (!not_full_n_used) { + goto corrupted; + } not_full_n_used--; } mtr->write<4>(*iblock, p_not_full, not_full_n_used); - - const ulint bit = offset & (extent_size - 1); - - xdes_set_free(*xdes, descr, bit, mtr); + xdes_set_free(*xdes, descr, offset & (extent_size - 1), mtr); if (!xdes_get_n_used(descr)) { - /* The extent has become free: free it to space */ - flst_remove(iblock, static_cast(FSEG_NOT_FULL - + ioffset), - xdes, xoffset, mtr); - fsp_free_extent(space, offset, mtr); + err = flst_remove(iblock, static_cast(FSEG_NOT_FULL + + ioffset), + xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } + err = fsp_free_extent(space, offset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } + return err; } mtr->free(*space, static_cast(offset)); + return DB_SUCCESS; } /** Free a page in a file segment. @@ -2628,11 +2599,11 @@ fseg_free_page_low( @param[in,out] space tablespace @param[in] offset page number @param[in,out] mtr mini-transaction -@param[in] have_latch whether space->x_lock() was already called */ -void fseg_free_page(fseg_header_t *seg_header, fil_space_t *space, - uint32_t offset, mtr_t *mtr, bool have_latch) +@param[in] have_latch whether space->x_lock() was already called +@return error code */ +dberr_t fseg_free_page(fseg_header_t *seg_header, fil_space_t *space, + uint32_t offset, mtr_t *mtr, bool have_latch) { - DBUG_ENTER("fseg_free_page"); buf_block_t *iblock; if (have_latch) ut_ad(space->is_owner()); @@ -2642,56 +2613,70 @@ void fseg_free_page(fseg_header_t *seg_header, fil_space_t *space, DBUG_PRINT("fseg_free_page", ("space_id: " ULINTPF ", page_no: %u", space->id, offset)); + dberr_t err; if (fseg_inode_t *seg_inode= fseg_inode_try_get(seg_header, space->id, space->zip_size(), - mtr, &iblock)) + mtr, &iblock, &err)) { if (!space->full_crc32()) fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr); - fseg_free_page_low(seg_inode, iblock, space, offset, mtr); + return fseg_free_page_low(seg_inode, iblock, space, offset, mtr); } - DBUG_VOID_RETURN; + return err; } -/** Determine whether a page is free. -@param[in,out] space tablespace -@param[in] page page number -@return whether the page is marked as free */ -bool -fseg_page_is_free(fil_space_t* space, unsigned page) +/** Determine whether a page is allocated. +@param space tablespace +@param page page number +@return error code +@retval DB_SUCCESS if the page is marked as free +@retval DB_SUCCESS_LOCKED_REC if the page is marked as allocated */ +dberr_t fseg_page_is_allocated(fil_space_t *space, unsigned page) { - bool is_free; - mtr_t mtr; - page_no_t dpage = xdes_calc_descriptor_page(space->zip_size(), - page); - - mtr.start(); - if (!space->is_owner()) { - mtr.s_lock_space(space); - } - - if (page >= space->free_limit || page >= space->size_in_header) { - is_free = true; - } else if (const xdes_t* descr = xdes_get_descriptor_const( - space, dpage, page, &mtr)) { - is_free = xdes_is_free(descr, page % FSP_EXTENT_SIZE); - } else { - is_free = true; - } - mtr.commit(); + mtr_t mtr; + uint32_t dpage= xdes_calc_descriptor_page(space->zip_size(), page); + const unsigned zip_size= space->zip_size(); + dberr_t err= DB_SUCCESS; + + mtr.start(); + if (!space->is_owner()) + mtr.s_lock_space(space); + + if (page >= space->free_limit || page >= space->size_in_header); + else if (const buf_block_t *b= + buf_page_get_gen(page_id_t(space->id, dpage), space->zip_size(), + RW_S_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, + &mtr, &err)) + { + if (!dpage && + (space->free_limit != + mach_read_from_4(FSP_FREE_LIMIT + FSP_HEADER_OFFSET + + b->page.frame) || + space->size_in_header != + mach_read_from_4(FSP_SIZE + FSP_HEADER_OFFSET + b->page.frame))) + err= DB_CORRUPTION; + else + err= xdes_is_free(b->page.frame + XDES_ARR_OFFSET + XDES_SIZE + * xdes_calc_descriptor_index(zip_size, page), + page & (FSP_EXTENT_SIZE - 1)) + ? DB_SUCCESS + : DB_SUCCESS_LOCKED_REC; + } - return(is_free); + mtr.commit(); + return err; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Free an extent of a segment to the space free list. @param[in,out] seg_inode segment inode @param[in,out] space tablespace @param[in] page page number in the extent -@param[in,out] mtr mini-transaction */ -MY_ATTRIBUTE((nonnull)) +@param[in,out] mtr mini-transaction +@return error code */ static -void +dberr_t fseg_free_extent( fseg_inode_t* seg_inode, buf_block_t* iblock, @@ -2704,17 +2689,19 @@ fseg_free_extent( ) { buf_block_t* xdes; - xdes_t* descr = xdes_get_descriptor(space, page, mtr, &xdes); + dberr_t err; + xdes_t* descr = xdes_get_descriptor(space, page, mtr, &err, &xdes); if (!descr) { - ut_ad(space->is_stopping()); - return; + return err; } - ut_a(xdes_get_state(descr) == XDES_FSEG); - ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8)); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); + if (UNIV_UNLIKELY(xdes_get_state(descr) != XDES_FSEG + || memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8) + || memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + + seg_inode, 4))) { + return DB_CORRUPTION; + } ut_d(space->modify_check(*mtr)); const uint32_t first_page_in_extent = page - (page % FSP_EXTENT_SIZE); @@ -2737,31 +2724,47 @@ fseg_free_extent( } #endif /* BTR_CUR_HASH_ADAPT */ + uint16_t lst; + if (xdes_is_full(descr)) { - flst_remove(iblock, static_cast(FSEG_FULL + ioffset), - xdes, xoffset, mtr); + lst = static_cast(FSEG_FULL + ioffset); +remove: + err = flst_remove(iblock, lst, xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } } else if (!xdes_get_n_used(descr)) { - flst_remove(iblock, static_cast(FSEG_FREE + ioffset), - xdes, xoffset, mtr); + lst = static_cast(FSEG_FREE + ioffset); + goto remove; } else { - flst_remove(iblock, static_cast(FSEG_NOT_FULL - + ioffset), - xdes, xoffset, mtr); + err = flst_remove( + iblock, static_cast(FSEG_NOT_FULL + ioffset), + xdes, xoffset, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } uint32_t not_full_n_used = mach_read_from_4( FSEG_NOT_FULL_N_USED + seg_inode); uint32_t descr_n_used = xdes_get_n_used(descr); - ut_a(not_full_n_used >= descr_n_used); + if (not_full_n_used < descr_n_used) { + return DB_CORRUPTION; + } mtr->write<4>(*iblock, seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used - descr_n_used); } - fsp_free_extent(space, page, mtr); + err = fsp_free_extent(space, page, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } for (uint32_t i = 0; i < FSP_EXTENT_SIZE; i++) { if (!xdes_is_free(descr, i)) { buf_page_free(space, first_page_in_extent + i, mtr); } } + + return DB_SUCCESS; } /** Frees part of a segment. This function can be used to free @@ -2786,8 +2789,6 @@ fseg_free_step( ulint n; fseg_inode_t* inode; - DBUG_ENTER("fseg_free_step"); - const uint32_t space_id = page_get_space_id(page_align(header)); const uint32_t header_page = page_get_page_no(page_align(header)); @@ -2795,45 +2796,44 @@ fseg_free_step( xdes_t* descr = xdes_get_descriptor(space, header_page, mtr); if (!descr) { - ut_ad(space->is_stopping()); - DBUG_RETURN(true); + return true; } /* Check that the header resides on a page which has not been freed yet */ - ut_a(!xdes_is_free(descr, header_page % FSP_EXTENT_SIZE)); + if (UNIV_UNLIKELY(xdes_is_free(descr, + header_page & (FSP_EXTENT_SIZE - 1)))) { + /* Some corruption was detected: stop the freeing + in order to prevent a crash. */ + return true; + } buf_block_t* iblock; const ulint zip_size = space->zip_size(); inode = fseg_inode_try_get(header, space_id, zip_size, mtr, &iblock); - if (space->is_stopping()) { - DBUG_RETURN(true); - } - - if (inode == NULL) { - ib::warn() << "Double free of inode from " - << page_id_t(space_id, header_page); - DBUG_RETURN(true); + if (!inode || space->is_stopping()) { + return true; } if (!space->full_crc32()) { fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr); } - descr = fseg_get_first_extent(inode, space, mtr); - if (space->is_stopping()) { - DBUG_RETURN(true); - } + dberr_t err; + descr = fseg_get_first_extent(inode, space, mtr, &err); - if (descr != NULL) { + if (descr) { /* Free the extent held by the segment */ - fseg_free_extent(inode, iblock, space, xdes_get_offset(descr), - mtr + return fseg_free_extent(inode, iblock, space, + xdes_get_offset(descr), mtr #ifdef BTR_CUR_HASH_ADAPT - , ahi + , ahi #endif /* BTR_CUR_HASH_ADAPT */ - ); - DBUG_RETURN(false); + ) != DB_SUCCESS; + } + + if (err != DB_SUCCESS || space->is_stopping()) { + return true; } /* Free a frag page */ @@ -2842,17 +2842,18 @@ fseg_free_step( if (n == ULINT_UNDEFINED) { /* Freeing completed: free the segment inode */ fsp_free_seg_inode(space, inode, iblock, mtr); - - DBUG_RETURN(true); + return true; } page_no_t page_no = fseg_get_nth_frag_page_no(inode, n); - fseg_free_page_low(inode, iblock, space, page_no, mtr + if (fseg_free_page_low(inode, iblock, space, page_no, mtr #ifdef BTR_CUR_HASH_ADAPT - , ahi + , ahi #endif /* BTR_CUR_HASH_ADAPT */ - ); + ) != DB_SUCCESS) { + return true; + } buf_page_free(space, page_no, mtr); @@ -2862,10 +2863,10 @@ fseg_free_step( /* Freeing completed: free the segment inode */ fsp_free_seg_inode(space, inode, iblock, mtr); - DBUG_RETURN(true); + return true; } - DBUG_RETURN(false); + return false; } bool @@ -2902,22 +2903,27 @@ fseg_free_step_not_header( fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr); } - if (xdes_t* descr = fseg_get_first_extent(inode, space, mtr)) { + dberr_t err; + if (xdes_t* descr = fseg_get_first_extent(inode, space, mtr, &err)) { /* Free the extent held by the segment */ - fseg_free_extent(inode, iblock, space, xdes_get_offset(descr), - mtr + return fseg_free_extent(inode, iblock, space, + xdes_get_offset(descr), + mtr #ifdef BTR_CUR_HASH_ADAPT - , ahi + , ahi #endif /* BTR_CUR_HASH_ADAPT */ - ); - return false; + ) != DB_SUCCESS; + } else if (err != DB_SUCCESS) { + return true; } /* Free a frag page */ ulint n = fseg_find_last_used_frag_page_slot(inode); - ut_a(n != ULINT_UNDEFINED); + if (UNIV_UNLIKELY(n == ULINT_UNDEFINED)) { + return true; + } uint32_t page_no = fseg_get_nth_frag_page_no(inode, n); @@ -2925,11 +2931,13 @@ fseg_free_step_not_header( return true; } - fseg_free_page_low(inode, iblock, space, page_no, mtr + if (fseg_free_page_low(inode, iblock, space, page_no, mtr #ifdef BTR_CUR_HASH_ADAPT - , ahi + , ahi #endif /* BTR_CUR_HASH_ADAPT */ - ); + ) != DB_SUCCESS) { + return true; + } buf_page_free(space, page_no, mtr); return false; } @@ -2940,36 +2948,43 @@ FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE. @param[in] inode segment inode @param[in] space tablespace @param[in,out] mtr mini-transaction -@return the first extent descriptor, or NULL if none */ +@return the first extent descriptor +@retval nullptr if none, or on corruption */ MY_ATTRIBUTE((nonnull, warn_unused_result)) static xdes_t* fseg_get_first_extent( fseg_inode_t* inode, const fil_space_t* space, - mtr_t* mtr) + mtr_t* mtr, + dberr_t* err) { - fil_addr_t first; + if (UNIV_UNLIKELY(space->id != page_get_space_id(page_align(inode)) || + memcmp(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_BYTES, 4))) + { + corrupted: + *err= DB_CORRUPTION; + return nullptr; + } - ut_ad(space->id == page_get_space_id(page_align(inode))); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); + fil_addr_t first; - if (flst_get_len(inode + FSEG_FULL) > 0) { - first = flst_get_first(inode + FSEG_FULL); - } else if (flst_get_len(inode + FSEG_NOT_FULL) > 0) { - first = flst_get_first(inode + FSEG_NOT_FULL); - } else if (flst_get_len(inode + FSEG_FREE) > 0) { - first = flst_get_first(inode + FSEG_FREE); - } else { - return nullptr; - } + if (flst_get_len(inode + FSEG_FULL)) + first= flst_get_first(inode + FSEG_FULL); + else if (flst_get_len(inode + FSEG_NOT_FULL)) + first= flst_get_first(inode + FSEG_NOT_FULL); + else if (flst_get_len(inode + FSEG_FREE)) + first= flst_get_first(inode + FSEG_FREE); + else + { + *err= DB_SUCCESS; + return nullptr; + } - if (first.page == FIL_NULL) { - ut_ad("corruption" == 0); - return nullptr; - } + if (first.page == FIL_NULL) + goto corrupted; - return xdes_lst_get_descriptor(*space, first, mtr); + return xdes_lst_get_descriptor(*space, first, mtr, nullptr, err); } #ifdef UNIV_BTR_PRINT @@ -3009,7 +3024,7 @@ static void fseg_print_low(const fseg_inode_t *inode) << " free extents " << n_free << ";" << " not full extents " << n_not_full << ": pages " << n_used; - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); + ut_ad(!memcmp(FSEG_MAGIC_N_BYTES, FSEG_MAGIC_N + inode, 4)); } /*******************************************************************//** @@ -3020,15 +3035,12 @@ fseg_print( fseg_header_t* header, /*!< in: segment header */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - fseg_inode_t* inode; - ulint space_id; - - space_id = page_get_space_id(page_align(header)); - const fil_space_t* space = mtr_x_lock_space(space_id, mtr); - - inode = fseg_inode_get(header, space_id, space->zip_size(), mtr); - - fseg_print_low(inode); + const fil_space_t *space= + mtr->x_lock_space(page_get_space_id(page_align(header))); + buf_block_t *block; + if (fseg_inode_t *inode= + fseg_inode_try_get(header, space->id, space->zip_size(), mtr, &block)) + fseg_print_low(inode); } #endif /* UNIV_BTR_PRINT */ diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index fd8826af2c5..0a82dc39303 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -216,10 +216,9 @@ fts_update_max_cache_size( /*********************************************************************//** This function fetches the document just inserted right before we commit the transaction, and tokenize the inserted text data -and insert into FTS auxiliary table and its cache. -@return TRUE if successful */ +and insert into FTS auxiliary table and its cache. */ static -ulint +void fts_add_doc_by_id( /*==============*/ fts_trx_table_t*ftt, /*!< in: FTS trx table */ @@ -1648,24 +1647,26 @@ on the given table. static dberr_t fts_drop_common_tables(trx_t *trx, fts_table_t *fts_table, bool rename) { - dberr_t error = DB_SUCCESS; - - for (ulint i = 0; fts_common_tables[i] != NULL; ++i) { - dberr_t err; - char table_name[MAX_FULL_NAME_LEN]; + dberr_t error= DB_SUCCESS; - fts_table->suffix = fts_common_tables[i]; - fts_get_table_name(fts_table, table_name, true); + for (ulint i= 0; fts_common_tables[i]; ++i) + { + char table_name[MAX_FULL_NAME_LEN]; - err = fts_drop_table(trx, table_name, rename); + fts_table->suffix= fts_common_tables[i]; + fts_get_table_name(fts_table, table_name, true); - /* We only return the status of the last error. */ - if (err != DB_SUCCESS && err != DB_FAIL) { - error = err; - } - } + if (dberr_t err= fts_drop_table(trx, table_name, rename)) + { + if (trx->state != TRX_STATE_ACTIVE) + return err; + /* We only return the status of the last error. */ + if (err != DB_FAIL) + error= err; + } + } - return(error); + return error; } /****************************************************************//** @@ -3299,10 +3300,9 @@ fts_add_doc_from_tuple( /*********************************************************************//** This function fetches the document inserted during the committing transaction, and tokenize the inserted text data and insert into -FTS auxiliary table and its cache. -@return TRUE if successful */ +FTS auxiliary table and its cache. */ static -ulint +void fts_add_doc_by_id( /*==============*/ fts_trx_table_t*ftt, /*!< in: FTS trx table */ @@ -3358,12 +3358,11 @@ fts_add_doc_by_id( mach_write_to_8((byte*) &temp_doc_id, doc_id); dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id)); - btr_pcur_open_with_no_init( - fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF, - &pcur, 0, &mtr); - /* If we have a match, add the data to doc structure */ - if (btr_pcur_get_low_match(&pcur) == 1) { + if (btr_pcur_open_with_no_init(fts_id_index, tuple, PAGE_CUR_LE, + BTR_SEARCH_LEAF, &pcur, 0, &mtr) + == DB_SUCCESS + && btr_pcur_get_low_match(&pcur) == 1) { const rec_t* rec; btr_pcur_t* doc_pcur; const rec_t* clust_rec; @@ -3396,13 +3395,16 @@ fts_add_doc_by_id( row_build_row_ref_in_tuple( clust_ref, rec, fts_id_index, NULL); - btr_pcur_open_with_no_init( - clust_index, clust_ref, PAGE_CUR_LE, - BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr); + if (btr_pcur_open_with_no_init(clust_index, clust_ref, + PAGE_CUR_LE, + BTR_SEARCH_LEAF, + &clust_pcur, 0, &mtr) + != DB_SUCCESS) { + goto func_exit; + } doc_pcur = &clust_pcur; clust_rec = btr_pcur_get_rec(&clust_pcur); - } offsets = rec_get_offsets(clust_rec, clust_index, NULL, @@ -3483,10 +3485,12 @@ fts_add_doc_by_id( mtr_start(&mtr); if (i < num_idx - 1) { - ut_d(auto status=) - doc_pcur->restore_position( - BTR_SEARCH_LEAF, &mtr); - ut_ad(status == btr_pcur_t::SAME_ALL); + if (doc_pcur->restore_position( + BTR_SEARCH_LEAF, &mtr) + != btr_pcur_t::SAME_ALL) { + ut_ad("invalid state" == 0); + i = num_idx - 1; + } } } @@ -3503,7 +3507,6 @@ func_exit: ut_free(pcur.old_rec_buf); mem_heap_free(heap); - return(TRUE); } @@ -3559,10 +3562,9 @@ fts_get_max_doc_id( mtr_start(&mtr); /* fetch the largest indexes value */ - btr_pcur_open_at_index_side( - false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - if (!page_is_empty(btr_pcur_get_page(&pcur))) { + if (btr_pcur_open_at_index_side(false, index, BTR_SEARCH_LEAF, &pcur, + true, 0, &mtr) != DB_SUCCESS) { + } else if (!page_is_empty(btr_pcur_get_page(&pcur))) { const rec_t* rec = NULL; do { @@ -4291,16 +4293,11 @@ FTS auxiliary INDEX table and clear the cache at the end. @return DB_SUCCESS on success, error code on failure. */ dberr_t fts_sync_table(dict_table_t* table, bool wait) { - dberr_t err = DB_SUCCESS; - - ut_ad(table->fts); + ut_ad(table->fts); - if (table->space && table->fts->cache - && !dict_table_is_corrupted(table)) { - err = fts_sync(table->fts->cache->sync, !wait, wait); - } - - return(err); + return table->space && !table->corrupted && table->fts->cache + ? fts_sync(table->fts->cache->sync, !wait, wait) + : DB_SUCCESS; } /** Check if a fts token is a stopword or less than fts_min_token_size diff --git a/storage/innobase/fut/fut0lst.cc b/storage/innobase/fut/fut0lst.cc index 98f37be002a..a52027f28bc 100644 --- a/storage/innobase/fut/fut0lst.cc +++ b/storage/innobase/fut/fut0lst.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, 2021, MariaDB Corporation. +Copyright (c) 2019, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,9 +38,8 @@ Created 11/28/1995 Heikki Tuuri static void flst_write_addr(const buf_block_t& block, byte *faddr, uint32_t page, uint16_t boffset, mtr_t* mtr) { - ut_ad(mtr->memo_contains_page_flagged(faddr, - MTR_MEMO_PAGE_X_FIX - | MTR_MEMO_PAGE_SX_FIX)); + ut_ad(mtr->memo_contains_page_flagged(faddr, MTR_MEMO_PAGE_X_FIX | + MTR_MEMO_PAGE_SX_FIX)); ut_a(page == FIL_NULL || boffset >= FIL_PAGE_DATA); ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA); @@ -69,7 +68,7 @@ static void flst_write_addr(const buf_block_t& block, byte *faddr, /** Write 2 null file addresses. @param[in] b file page -@param[in,out] addr file address to be zeroed out +@param[in,out] addr file address to be zeroed out @param[in,out] mtr mini-transaction */ static void flst_zero_both(const buf_block_t& b, byte *addr, mtr_t *mtr) { @@ -120,10 +119,11 @@ static void flst_add_to_empty(buf_block_t *base, uint16_t boffset, @param[in] coffset byte offset of the insert position @param[in,out] add block to be added @param[in] aoffset byte offset of the block to be added -@param[in,outr] mtr mini-transaction */ -static void flst_insert_after(buf_block_t *base, uint16_t boffset, - buf_block_t *cur, uint16_t coffset, - buf_block_t *add, uint16_t aoffset, mtr_t *mtr) +@param[in,out] mtr mini-transaction */ +static dberr_t flst_insert_after(buf_block_t *base, uint16_t boffset, + buf_block_t *cur, uint16_t coffset, + buf_block_t *add, uint16_t aoffset, + mtr_t *mtr) { ut_ad(base != cur || boffset != coffset); ut_ad(base != add || boffset != aoffset); @@ -145,23 +145,25 @@ static void flst_insert_after(buf_block_t *base, uint16_t boffset, flst_write_addr(*add, add->page.frame + aoffset + FLST_NEXT, next_addr.page, next_addr.boffset, mtr); + dberr_t err= DB_SUCCESS; + if (next_addr.page == FIL_NULL) flst_write_addr(*base, base->page.frame + boffset + FLST_LAST, add->page.id().page_no(), aoffset, mtr); - else - { - buf_block_t *block; - if (flst_node_t *next= fut_get_ptr(add->page.id().space(), add->zip_size(), - next_addr, RW_SX_LATCH, mtr, &block)) - flst_write_addr(*block, next + FLST_PREV, - add->page.id().page_no(), aoffset, mtr); - } + else if (buf_block_t *block= + buf_page_get_gen(page_id_t{add->page.id().space(), next_addr.page}, + add->zip_size(), RW_SX_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr, &err)) + flst_write_addr(*block, block->page.frame + + next_addr.boffset + FLST_PREV, + add->page.id().page_no(), aoffset, mtr); flst_write_addr(*cur, cur->page.frame + coffset + FLST_NEXT, add->page.id().page_no(), aoffset, mtr); byte *len= &base->page.frame[boffset + FLST_LEN]; mtr->write<4>(*base, len, mach_read_from_4(len) + 1); + return err; } /** Insert a node before another one. @@ -171,10 +173,12 @@ static void flst_insert_after(buf_block_t *base, uint16_t boffset, @param[in] coffset byte offset of the insert position @param[in,out] add block to be added @param[in] aoffset byte offset of the block to be added -@param[in,outr] mtr mini-transaction */ -static void flst_insert_before(buf_block_t *base, uint16_t boffset, - buf_block_t *cur, uint16_t coffset, - buf_block_t *add, uint16_t aoffset, mtr_t *mtr) +@param[in,out] mtr mini-transaction +@return error code */ +static dberr_t flst_insert_before(buf_block_t *base, uint16_t boffset, + buf_block_t *cur, uint16_t coffset, + buf_block_t *add, uint16_t aoffset, + mtr_t *mtr) { ut_ad(base != cur || boffset != coffset); ut_ad(base != add || boffset != aoffset); @@ -194,25 +198,27 @@ static void flst_insert_before(buf_block_t *base, uint16_t boffset, flst_write_addr(*add, add->page.frame + aoffset + FLST_PREV, prev_addr.page, prev_addr.boffset, mtr); flst_write_addr(*add, add->page.frame + aoffset + FLST_NEXT, - cur->page.id().page_no(), coffset, mtr); + cur->page.id().page_no(), coffset, mtr); + + dberr_t err= DB_SUCCESS; if (prev_addr.page == FIL_NULL) flst_write_addr(*base, base->page.frame + boffset + FLST_FIRST, add->page.id().page_no(), aoffset, mtr); - else - { - buf_block_t *block; - if (flst_node_t *prev= fut_get_ptr(add->page.id().space(), add->zip_size(), - prev_addr, RW_SX_LATCH, mtr, &block)) - flst_write_addr(*block, prev + FLST_NEXT, - add->page.id().page_no(), aoffset, mtr); - } + else if (buf_block_t *block= + buf_page_get_gen(page_id_t{add->page.id().space(), prev_addr.page}, + add->zip_size(), RW_SX_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr, &err)) + flst_write_addr(*block, block->page.frame + + prev_addr.boffset + FLST_NEXT, + add->page.id().page_no(), aoffset, mtr); flst_write_addr(*cur, cur->page.frame + coffset + FLST_PREV, add->page.id().page_no(), aoffset, mtr); byte *len= &base->page.frame[boffset + FLST_LEN]; mtr->write<4>(*base, len, mach_read_from_4(len) + 1); + return err; } /** Initialize a list base node. @@ -234,8 +240,8 @@ void flst_init(const buf_block_t& block, byte *base, mtr_t *mtr) @param[in,out] add block to be added @param[in] aoffset byte offset of the node to be added @param[in,outr] mtr mini-transaction */ -void flst_add_last(buf_block_t *base, uint16_t boffset, - buf_block_t *add, uint16_t aoffset, mtr_t *mtr) +dberr_t flst_add_last(buf_block_t *base, uint16_t boffset, + buf_block_t *add, uint16_t aoffset, mtr_t *mtr) { ut_ad(base != add || boffset != aoffset); ut_ad(boffset < base->physical_size()); @@ -244,21 +250,23 @@ void flst_add_last(buf_block_t *base, uint16_t boffset, MTR_MEMO_PAGE_SX_FIX)); ut_ad(mtr->memo_contains_flagged(add, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - if (!flst_get_len(base->page.frame + boffset)) + { flst_add_to_empty(base, boffset, add, aoffset, mtr); + return DB_SUCCESS; + } else { fil_addr_t addr= flst_get_last(base->page.frame + boffset); buf_block_t *cur= add; - const flst_node_t *c= addr.page == add->page.id().page_no() - ? add->page.frame + addr.boffset - : fut_get_ptr(add->page.id().space(), add->zip_size(), addr, - RW_SX_LATCH, mtr, &cur); - if (c) - flst_insert_after(base, boffset, cur, - static_cast(c - cur->page.frame), - add, aoffset, mtr); + dberr_t err; + if (addr.page != add->page.id().page_no() && + !(cur= buf_page_get_gen(page_id_t{add->page.id().space(), addr.page}, + add->zip_size(), RW_SX_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr, &err))) + return err; + return flst_insert_after(base, boffset, cur, addr.boffset, + add, aoffset, mtr); } } @@ -267,9 +275,10 @@ void flst_add_last(buf_block_t *base, uint16_t boffset, @param[in] boffset byte offset of the base node @param[in,out] add block to be added @param[in] aoffset byte offset of the node to be added -@param[in,outr] mtr mini-transaction */ -void flst_add_first(buf_block_t *base, uint16_t boffset, - buf_block_t *add, uint16_t aoffset, mtr_t *mtr) +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t flst_add_first(buf_block_t *base, uint16_t boffset, + buf_block_t *add, uint16_t aoffset, mtr_t *mtr) { ut_ad(base != add || boffset != aoffset); ut_ad(boffset < base->physical_size()); @@ -280,19 +289,22 @@ void flst_add_first(buf_block_t *base, uint16_t boffset, MTR_MEMO_PAGE_SX_FIX)); if (!flst_get_len(base->page.frame + boffset)) + { flst_add_to_empty(base, boffset, add, aoffset, mtr); + return DB_SUCCESS; + } else { fil_addr_t addr= flst_get_first(base->page.frame + boffset); buf_block_t *cur= add; - const flst_node_t *c= addr.page == add->page.id().page_no() - ? add->page.frame + addr.boffset - : fut_get_ptr(add->page.id().space(), add->zip_size(), addr, - RW_SX_LATCH, mtr, &cur); - if (c) - flst_insert_before(base, boffset, cur, - static_cast(c - cur->page.frame), - add, aoffset, mtr); + dberr_t err; + if (addr.page != add->page.id().page_no() && + !(cur= buf_page_get_gen(page_id_t{add->page.id().space(), addr.page}, + add->zip_size(), RW_SX_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr, &err))) + return err; + return flst_insert_before(base, boffset, cur, addr.boffset, + add, aoffset, mtr); } } @@ -301,9 +313,10 @@ void flst_add_first(buf_block_t *base, uint16_t boffset, @param[in] boffset byte offset of the base node @param[in,out] cur block to be removed @param[in] coffset byte offset of the current record to be removed -@param[in,outr] mtr mini-transaction */ -void flst_remove(buf_block_t *base, uint16_t boffset, - buf_block_t *cur, uint16_t coffset, mtr_t *mtr) +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t flst_remove(buf_block_t *base, uint16_t boffset, + buf_block_t *cur, uint16_t coffset, mtr_t *mtr) { ut_ad(boffset < base->physical_size()); ut_ad(coffset < cur->physical_size()); @@ -314,18 +327,19 @@ void flst_remove(buf_block_t *base, uint16_t boffset, const fil_addr_t prev_addr= flst_get_prev_addr(cur->page.frame + coffset); const fil_addr_t next_addr= flst_get_next_addr(cur->page.frame + coffset); + dberr_t err= DB_SUCCESS; if (prev_addr.page == FIL_NULL) flst_write_addr(*base, base->page.frame + boffset + FLST_FIRST, next_addr.page, next_addr.boffset, mtr); else { - buf_block_t *block= cur; - if (flst_node_t *prev= prev_addr.page == cur->page.id().page_no() - ? cur->page.frame + prev_addr.boffset - : fut_get_ptr(cur->page.id().space(), cur->zip_size(), prev_addr, - RW_SX_LATCH, mtr, &block)) - flst_write_addr(*block, prev + FLST_NEXT, + buf_block_t *b= cur; + if (prev_addr.page == b->page.id().page_no() || + (b= buf_page_get_gen(page_id_t(b->page.id().space(), prev_addr.page), + b->zip_size(), RW_SX_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr, &err))) + flst_write_addr(*b, b->page.frame + prev_addr.boffset + FLST_NEXT, next_addr.page, next_addr.boffset, mtr); } @@ -334,18 +348,23 @@ void flst_remove(buf_block_t *base, uint16_t boffset, prev_addr.page, prev_addr.boffset, mtr); else { - buf_block_t *block= cur; - if (flst_node_t *next= next_addr.page == cur->page.id().page_no() - ? cur->page.frame + next_addr.boffset - : fut_get_ptr(cur->page.id().space(), cur->zip_size(), next_addr, - RW_SX_LATCH, mtr, &block)) - flst_write_addr(*block, next + FLST_PREV, + dberr_t err2; + if (next_addr.page == cur->page.id().page_no() || + (cur= buf_page_get_gen(page_id_t(cur->page.id().space(), + next_addr.page), + cur->zip_size(), RW_SX_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr, &err2))) + flst_write_addr(*cur, cur->page.frame + next_addr.boffset + FLST_PREV, prev_addr.page, prev_addr.boffset, mtr); + else if (err == DB_SUCCESS) + err= err2; } byte *len= &base->page.frame[boffset + FLST_LEN]; - ut_ad(mach_read_from_4(len) > 0); + if (UNIV_UNLIKELY(!mach_read_from_4(len))) + return DB_CORRUPTION; mtr->write<4>(*base, len, mach_read_from_4(len) - 1); + return err; } #ifdef UNIV_DEBUG @@ -369,11 +388,11 @@ void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr) for (uint32_t i= len; i--; ) { mtr2.start(); - const flst_node_t *node= fut_get_ptr(base->page.id().space(), - base->zip_size(), addr, - RW_SX_LATCH, &mtr2); - ut_ad(node); - addr= flst_get_next_addr(node); + const buf_block_t *b= + buf_page_get_gen(page_id_t(base->page.id().space(), addr.page), + base->zip_size(), RW_SX_LATCH, nullptr, BUF_GET, mtr); + ut_ad(b); + addr= flst_get_next_addr(b->page.frame + addr.boffset); mtr2.commit(); } @@ -384,11 +403,11 @@ void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr) for (uint32_t i= len; i--; ) { mtr2.start(); - const flst_node_t *node= fut_get_ptr(base->page.id().space(), - base->zip_size(), addr, - RW_SX_LATCH, &mtr2); - ut_ad(node); - addr= flst_get_prev_addr(node); + const buf_block_t *b= + buf_page_get_gen(page_id_t(base->page.id().space(), addr.page), + base->zip_size(), RW_SX_LATCH, nullptr, BUF_GET, mtr); + ut_ad(b); + addr= flst_get_prev_addr(b->page.frame + addr.boffset); mtr2.commit(); } diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index f17c532414b..8a5e1dc3ac9 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, 2022, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -420,17 +420,14 @@ update_mbr: /* If optimistic insert fail, try reorganize the page and insert again. */ if (err != DB_SUCCESS && ins_suc) { - btr_page_reorganize(btr_cur_get_page_cur(cursor), - index, mtr); - - err = btr_cur_optimistic_insert(flags, - cursor, - &insert_offsets, - &heap, - node_ptr, - &insert_rec, - &dummy_big_rec, - 0, NULL, mtr); + err = btr_page_reorganize(btr_cur_get_page_cur(cursor), + index, mtr); + if (err == DB_SUCCESS) { + err = btr_cur_optimistic_insert( + flags, cursor, &insert_offsets, &heap, + node_ptr, &insert_rec, &dummy_big_rec, + 0, NULL, mtr); + } /* Will do pessimistic insert */ if (err != DB_SUCCESS) { @@ -536,10 +533,11 @@ update_mbr: mem_heap_free(heap); } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /**************************************************************//** Update parent page's MBR and Predicate lock information during a split */ -static MY_ATTRIBUTE((nonnull)) -void +static +dberr_t rtr_adjust_upper_level( /*===================*/ btr_cur_t* sea_cur, /*!< in: search cursor */ @@ -659,23 +657,26 @@ rtr_adjust_upper_level( const uint32_t next_page_no = btr_page_get_next(block->page.frame); - if (next_page_no != FIL_NULL) { - buf_block_t* next_block = btr_block_get( - *index, next_page_no, RW_X_LATCH, false, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_block->page.frame) - == page_is_comp(block->page.frame)); - ut_a(btr_page_get_prev(next_block->page.frame) - == block->page.id().page_no()); -#endif /* UNIV_BTR_DEBUG */ - + if (next_page_no == FIL_NULL) { + } else if (buf_block_t* next_block = + btr_block_get(*index, next_page_no, RW_X_LATCH, + false, mtr, &err)) { + if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame + + FIL_PAGE_PREV, + block->page.frame + + FIL_PAGE_OFFSET, 4))) { + return DB_CORRUPTION; + } btr_page_set_prev(next_block, new_page_no, mtr); + } else { + return err; } btr_page_set_next(block, new_page_no, mtr); btr_page_set_prev(new_block, page_no, mtr); btr_page_set_next(new_block, next_page_no, mtr); + return DB_SUCCESS; } /*************************************************************//** @@ -686,9 +687,10 @@ if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). -@return TRUE on success; FALSE on compression failure */ +@return error code +@retval DB_FAIL on ROW_FORMAT=COMPRESSED compression failure */ static -ibool +dberr_t rtr_split_page_move_rec_list( /*=========================*/ rtr_split_node_t* node_array, /*!< in: split node array. */ @@ -802,21 +804,19 @@ rtr_split_page_move_rec_list( if (!page_zip_compress(new_block, index, page_zip_level, mtr)) { - ulint ret_pos; - /* Before trying to reorganize the page, store the number of preceding records on the page. */ - ret_pos = page_rec_get_n_recs_before(ret); + ulint ret_pos = page_rec_get_n_recs_before(ret); /* Before copying, "ret" was the predecessor of the predefined supremum record. If it was the predefined infimum record, then it would still be the infimum, and we would have ret_pos == 0. */ - if (UNIV_UNLIKELY - (!page_zip_reorganize(new_block, index, - page_zip_level, mtr))) { - + switch (dberr_t err = + page_zip_reorganize(new_block, index, + page_zip_level, mtr)) { + case DB_FAIL: if (UNIV_UNLIKELY (!page_zip_decompress(new_page_zip, new_page, FALSE))) { @@ -825,12 +825,12 @@ rtr_split_page_move_rec_list( #ifdef UNIV_GIS_DEBUG ut_ad(page_validate(new_page, index)); #endif - - return(false); + /* fall through */ + default: + return err; + case DB_SUCCESS: + ret = page_rec_get_nth(new_page, ret_pos); } - - /* The page was reorganized: Seek to ret_pos. */ - ret = page_rec_get_nth(new_page, ret_pos); } } @@ -852,7 +852,7 @@ rtr_split_page_move_rec_list( } } - return(true); + return DB_SUCCESS; } /*************************************************************//** @@ -874,7 +874,8 @@ rtr_page_split_and_insert( mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ { buf_block_t* block; page_t* page; @@ -966,9 +967,9 @@ func_start: /* Allocate a new page to the index */ const uint16_t page_level = btr_page_get_level(page); new_block = btr_page_alloc(cursor->index, page_id.page_no() + 1, - FSP_UP, page_level, mtr, mtr); - if (!new_block) { - return NULL; + FSP_UP, page_level, mtr, mtr, err); + if (UNIV_UNLIKELY(!new_block)) { + return nullptr; } new_page_zip = buf_block_get_page_zip(new_block); @@ -995,10 +996,15 @@ func_start: #ifdef UNIV_ZIP_COPY || page_zip #endif - || !rtr_split_page_move_rec_list(rtr_split_node_array, - first_rec_group, - new_block, block, first_rec, - cursor->index, *heap, mtr)) { + || (*err = rtr_split_page_move_rec_list(rtr_split_node_array, + first_rec_group, + new_block, block, + first_rec, cursor->index, + *heap, mtr))) { + if (*err != DB_FAIL) { + return nullptr; + } + ulint n = 0; rec_t* rec; ulint moved = 0; @@ -1155,13 +1161,19 @@ after_insert: lock_prdt_update_split(new_block, &prdt, &new_prdt, page_id); /* Adjust the upper level. */ - rtr_adjust_upper_level(cursor, flags, block, new_block, - &mbr, &new_mbr, mtr); + *err = rtr_adjust_upper_level(cursor, flags, block, new_block, + &mbr, &new_mbr, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } /* Save the new ssn to the root page, since we need to reinit the first ssn value from it after restart server. */ - root_block = btr_root_block_get(cursor->index, RW_SX_LATCH, mtr); + root_block = btr_root_block_get(cursor->index, RW_SX_LATCH, mtr, err); + if (UNIV_UNLIKELY(!root_block)) { + return nullptr; + } page_zip = buf_block_get_page_zip(root_block); page_set_ssn_id(root_block, page_zip, next_ssn, mtr); @@ -1313,7 +1325,6 @@ rtr_page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } - btr_assert_not_corrupted(new_block, index); ut_a(page_is_comp(new_page) == page_rec_is_comp(rec)); ut_a(mach_read_from_2(new_page + srv_page_size - 10) == (ulint) (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); @@ -1834,7 +1845,8 @@ rtr_estimate_n_rows_in_range( index->set_modified(mtr); mtr_s_lock_index(index, &mtr); - buf_block_t* block = btr_root_block_get(index, RW_S_LATCH, &mtr); + dberr_t err; + buf_block_t* block = btr_root_block_get(index, RW_S_LATCH, &mtr, &err); if (!block) { err_exit: mtr.commit(); diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 86c9d06be21..9eb56f1d46c 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -260,8 +260,9 @@ rtr_pcur_getnext_from_path( next_rec.page_no), zip_size, rw_latch, NULL, BUF_GET, mtr); - if (block == NULL) { - continue; + if (!block) { + found = false; + break; } rtr_info->tree_blocks[tree_idx] = block; @@ -497,59 +498,48 @@ rtr_pcur_move_to_next( false, mtr)); } +#ifdef UNIV_DEBUG /*************************************************************//** Check if the cursor holds record pointing to the specified child page @return true if it is (pointing to the child page) false otherwise */ -static -bool -rtr_compare_cursor_rec( -/*===================*/ - dict_index_t* index, /*!< in: index */ - btr_cur_t* cursor, /*!< in: Cursor to check */ - ulint page_no, /*!< in: desired child page number */ - mem_heap_t** heap) /*!< in: memory heap */ +static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index, + ulint page_no) { - const rec_t* rec; - rec_offs* offsets; - - rec = btr_cur_get_rec(cursor); - - offsets = rec_get_offsets(rec, index, NULL, 0, ULINT_UNDEFINED, heap); - - return(btr_node_ptr_get_child_page_no(rec, offsets) == page_no); + if (!rec) + return; + mem_heap_t *heap= nullptr; + rec_offs *offsets= rec_get_offsets(rec, index, nullptr, 0, + ULINT_UNDEFINED, &heap); + ut_ad(btr_node_ptr_get_child_page_no(rec, offsets) == page_no); + mem_heap_free(heap); } +#endif /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. Mainly called by row_search_index_entry() */ -void +bool rtr_pcur_open( dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_RTREE_LOCATE, ... */ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { - btr_cur_t* btr_cursor; - ulint n_fields; - ulint low_match; - rec_t* rec; - static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); ut_ad(latch_mode & BTR_MODIFY_LEAF); - ut_ad(mode == PAGE_CUR_RTREE_LOCATE); /* Initialize the cursor */ btr_pcur_init(cursor); cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode = mode; + cursor->search_mode = PAGE_CUR_RTREE_LOCATE; + cursor->trx_if_known = NULL; /* Search with the tree cursor */ - btr_cursor = btr_pcur_get_btr_cur(cursor); + btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor); btr_cursor->rtr_info = rtr_create_rtr_info(false, false, btr_cursor, index); @@ -565,21 +555,20 @@ rtr_pcur_open( mtr->lock_upgrade(index->lock); } - btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, 0, mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - - cursor->trx_if_known = NULL; - - low_match = btr_pcur_get_low_match(cursor); + if (btr_cur_search_to_nth_level(index, 0, tuple, PAGE_CUR_RTREE_LOCATE, + latch_mode, + btr_cursor, 0, mtr) != DB_SUCCESS) { + return true; + } - rec = btr_pcur_get_rec(cursor); + cursor->pos_state = BTR_PCUR_IS_POSITIONED; - n_fields = dtuple_get_n_fields(tuple); + const rec_t* rec = btr_pcur_get_rec(cursor); const bool d= rec_get_deleted_flag(rec, index->table->not_redundant()); - if (page_rec_is_infimum(rec) || low_match != n_fields + if (page_rec_is_infimum(rec) + || btr_pcur_get_low_match(cursor) != dtuple_get_n_fields(tuple) || (d && latch_mode & (BTR_RTREE_DELETE_MARK | BTR_RTREE_UNDO_INS))) { @@ -602,27 +591,31 @@ rtr_pcur_open( } } - bool ret = rtr_pcur_getnext_from_path( - tuple, mode, btr_cursor, 0, latch_mode, - latch_mode & (8 | BTR_ALREADY_S_LATCHED), - mtr); - - if (ret) { - low_match = btr_pcur_get_low_match(cursor); - ut_ad(low_match == n_fields); + if (!rtr_pcur_getnext_from_path(tuple, PAGE_CUR_RTREE_LOCATE, + btr_cursor, 0, latch_mode, + latch_mode + & (8 | BTR_ALREADY_S_LATCHED), + mtr)) { + return true; } + + ut_ad(btr_pcur_get_low_match(cursor) + == dtuple_get_n_fields(tuple)); } + + return false; } /* Get the rtree page father. @param[in] index rtree index @param[in] block child page in the index -@param[in] mtr mtr +@param[in,out] mtr mtr @param[in] sea_cur search cursor, contains information about parent nodes in search -@param[in] cursor cursor on node pointer record, - its page x-latched */ -void +@param[out] cursor cursor on node pointer record, + its page x-latched +@return whether the cursor was successfully positioned */ +bool rtr_page_get_father( dict_index_t* index, buf_block_t* block, @@ -630,29 +623,18 @@ rtr_page_get_father( btr_cur_t* sea_cur, btr_cur_t* cursor) { - mem_heap_t* heap = mem_heap_create(100); -#ifdef UNIV_DEBUG - rec_offs* offsets; - - offsets = rtr_page_get_father_block( - NULL, heap, index, block, mtr, sea_cur, cursor); - - ulint page_no = btr_node_ptr_get_child_page_no(cursor->page_cur.rec, - offsets); - - ut_ad(page_no == block->page.id().page_no()); -#else - rtr_page_get_father_block( - NULL, heap, index, block, mtr, sea_cur, cursor); -#endif - - mem_heap_free(heap); + mem_heap_t *heap = mem_heap_create(100); + rec_offs *offsets= rtr_page_get_father_block(nullptr, heap, index, block, + mtr, sea_cur, cursor); + mem_heap_free(heap); + return offsets != nullptr; } +MY_ATTRIBUTE((warn_unused_result)) /********************************************************************//** Returns the upper level node pointer to a R-Tree page. It is assumed that mtr holds an x-latch on the tree. */ -static void rtr_get_father_node( +static const rec_t* rtr_get_father_node( dict_index_t* index, /*!< in: index */ ulint level, /*!< in: the tree level of search */ const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in @@ -664,28 +646,18 @@ static void rtr_get_father_node( ulint page_no,/*!< Current page no */ mtr_t* mtr) /*!< in: mtr */ { - mem_heap_t* heap = NULL; - bool ret = false; - const rec_t* rec; - ulint n_fields; - bool new_rtr = false; + const rec_t* rec = nullptr; + auto had_rtr = btr_cur->rtr_info; /* Try to optimally locate the parent node. Level should always less than sea_cur->tree_height unless the root is splitting */ if (sea_cur && sea_cur->tree_height > level) { ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - ret = rtr_cur_restore_position( - BTR_CONT_MODIFY_TREE, sea_cur, level, mtr); - - /* Once we block shrink tree nodes while there are - active search on it, this optimal locating should always - succeeds */ - ut_ad(ret); - - if (ret) { + if (rtr_cur_restore_position(BTR_CONT_MODIFY_TREE, sea_cur, + level, mtr)) { btr_pcur_t* r_cursor = rtr_get_parent_cursor( - sea_cur, level, false); + sea_cur, level, false); rec = btr_pcur_get_rec(r_cursor); @@ -693,70 +665,58 @@ static void rtr_get_father_node( page_cur_position(rec, btr_pcur_get_block(r_cursor), btr_cur_get_page_cur(btr_cur)); - btr_cur->rtr_info = sea_cur->rtr_info; + had_rtr = btr_cur->rtr_info = sea_cur->rtr_info; btr_cur->tree_height = sea_cur->tree_height; - ut_ad(rtr_compare_cursor_rec( - index, btr_cur, page_no, &heap)); - goto func_exit; } + goto func_exit; } /* We arrive here in one of two scenario 1) check table and btr_valide 2) index root page being raised */ - ut_ad(!sea_cur || sea_cur->tree_height == level); if (btr_cur->rtr_info) { rtr_clean_rtr_info(btr_cur->rtr_info, true); - } else { - new_rtr = true; } btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index); - if (sea_cur && sea_cur->tree_height == level) { - /* root split, and search the new root */ - btr_cur_search_to_nth_level( - index, level, tuple, PAGE_CUR_RTREE_LOCATE, - BTR_CONT_MODIFY_TREE, btr_cur, 0, mtr); - + if (btr_cur_search_to_nth_level(index, level, tuple, + PAGE_CUR_RTREE_LOCATE, + BTR_CONT_MODIFY_TREE, btr_cur, 0, mtr) + != DB_SUCCESS) { + } else if (sea_cur && sea_cur->tree_height == level) { + rec = btr_cur_get_rec(btr_cur); } else { /* btr_validate */ ut_ad(level >= 1); ut_ad(!sea_cur); - btr_cur_search_to_nth_level( - index, level, tuple, PAGE_CUR_RTREE_LOCATE, - BTR_CONT_MODIFY_TREE, btr_cur, 0, mtr); - rec = btr_cur_get_rec(btr_cur); - n_fields = dtuple_get_n_fields_cmp(tuple); + const ulint n_fields = dtuple_get_n_fields_cmp(tuple); if (page_rec_is_infimum(rec) || (btr_cur->low_match != n_fields)) { - ret = rtr_pcur_getnext_from_path( - tuple, PAGE_CUR_RTREE_LOCATE, btr_cur, - level, BTR_CONT_MODIFY_TREE, - true, mtr); - - ut_ad(ret && btr_cur->low_match == n_fields); + if (!rtr_pcur_getnext_from_path( + tuple, PAGE_CUR_RTREE_LOCATE, btr_cur, + level, BTR_CONT_MODIFY_TREE, true, mtr)) { + rec = nullptr; + } else { + ut_ad(btr_cur->low_match == n_fields); + rec = btr_cur_get_rec(btr_cur); + } } } - ret = rtr_compare_cursor_rec( - index, btr_cur, page_no, &heap); - - ut_ad(ret); - func_exit: - if (heap) { - mem_heap_free(heap); - } + ut_d(rtr_compare_cursor_rec(rec, index, page_no)); - if (new_rtr && btr_cur->rtr_info) { + if (!had_rtr && btr_cur->rtr_info) { rtr_clean_rtr_info(btr_cur->rtr_info, true); btr_cur->rtr_info = NULL; } + + return rec; } /** Returns the upper level node pointer to a R-Tree page. It is assumed @@ -774,8 +734,6 @@ rtr_page_get_father_node_ptr( mtr_t* mtr) /*!< in: mtr */ { dtuple_t* tuple; - rec_t* user_rec; - rec_t* node_ptr; ulint level; ulint page_no; dict_index_t* index; @@ -792,7 +750,7 @@ rtr_page_get_father_node_ptr( level = btr_page_get_level(btr_cur_get_page(cursor)); - user_rec = btr_cur_get_rec(cursor); + const rec_t* user_rec = btr_cur_get_rec(cursor); ut_a(page_rec_is_user_rec(user_rec)); offsets = rec_get_offsets(user_rec, index, offsets, @@ -807,50 +765,20 @@ rtr_page_get_father_node_ptr( sea_cur = NULL; } - rtr_get_father_node(index, level + 1, tuple, sea_cur, cursor, - page_no, mtr); + const rec_t* node_ptr = rtr_get_father_node(index, level + 1, tuple, + sea_cur, cursor, + page_no, mtr); + if (!node_ptr) { + return nullptr; + } - node_ptr = btr_cur_get_rec(cursor); ut_ad(!page_rec_is_comp(node_ptr) || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); - ulint child_page = btr_node_ptr_get_child_page_no(node_ptr, offsets); - - if (child_page != page_no) { - const rec_t* print_rec; - - ib::fatal error; - - error << "Corruption of index " << index->name - << " of table " << index->table->name - << " parent page " << page_no - << " child page " << child_page; - - print_rec = page_rec_get_next( - page_get_infimum_rec(page_align(user_rec))); - offsets = rec_get_offsets(print_rec, index, offsets, - page_rec_is_leaf(user_rec) - ? index->n_fields : 0, - ULINT_UNDEFINED, &heap); - error << "; child "; - rec_print(error.m_oss, print_rec, - rec_get_info_bits(print_rec, rec_offs_comp(offsets)), - offsets); - offsets = rec_get_offsets(node_ptr, index, offsets, 0, - ULINT_UNDEFINED, &heap); - error << "; parent "; - rec_print(error.m_oss, print_rec, - rec_get_info_bits(print_rec, rec_offs_comp(offsets)), - offsets); - - error << ". You should dump + drop + reimport the table to" - " fix the corruption. If the crash happens at" - " database startup, see " - "https://mariadb.com/kb/en/library/innodb-recovery-modes/" - " about forcing" - " recovery. Then dump + drop + reimport."; + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) { + offsets = nullptr; } return(offsets); @@ -1314,16 +1242,17 @@ search_again: page_id_t(index->table->space_id, page_no), zip_size, RW_X_LATCH, NULL, BUF_GET, mtr); - ut_ad(block); + if (!block) { + ret = false; + goto func_exit; + } /* Get the page SSN */ page = buf_block_get_frame(block); page_ssn = page_get_ssn_id(page); - ulint low_match = page_cur_search( - block, index, tuple, PAGE_CUR_LE, page_cursor); - - if (low_match == r_cursor->old_n_fields) { + if (page_cur_search(block, index, tuple, PAGE_CUR_LE, page_cursor) + == r_cursor->old_n_fields) { const rec_t* rec; const rec_offs* offsets1; const rec_offs* offsets2; @@ -1359,6 +1288,7 @@ search_again: goto search_again; } +func_exit: mem_heap_free(heap); return(ret); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 572496d2eb7..11b17cdf5f1 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1533,20 +1533,27 @@ static void innodb_drop_database(handlerton*, char *path) we will "manually" purge the tablespaces that belong to the records that we delete-marked. */ - mem_heap_t *heap= mem_heap_create(100); - dtuple_t *tuple= dtuple_create(heap, 1); - dfield_t *dfield= dtuple_get_nth_field(tuple, 0); + dfield_t dfield; + dtuple_t tuple{ + 0,1,1,&dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; dict_index_t* sys_index= UT_LIST_GET_FIRST(dict_sys.sys_tables->indexes); btr_pcur_t pcur; namebuf[len++]= '/'; - dfield_set_data(dfield, namebuf, len); - dict_index_copy_types(tuple, sys_index, 1); + dfield_set_data(&dfield, namebuf, len); + dict_index_copy_types(&tuple, sys_index, 1); std::vector to_close; mtr_t mtr; mtr.start(); - for (btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + err= btr_pcur_open_on_user_rec(sys_index, &tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); - btr_pcur_is_on_user_rec(&pcur); + if (err != DB_SUCCESS) + goto err_exit; + + for (; btr_pcur_is_on_user_rec(&pcur); btr_pcur_move_to_next_user_rec(&pcur, &mtr)) { const rec_t *rec= btr_pcur_get_rec(&pcur); @@ -1587,8 +1594,8 @@ static void innodb_drop_database(handlerton*, char *path) to_close.emplace_back(detached); } } + err_exit: mtr.commit(); - mem_heap_free(heap); for (pfs_os_file_t detached : to_close) os_file_close(detached); /* Any changes must be persisted before we return. */ @@ -2067,8 +2074,10 @@ static void drop_garbage_tables_after_restore() ut_d(purge_sys.stop_FTS()); mtr.start(); - btr_pcur_open_at_index_side(true, dict_sys.sys_tables->indexes.start, - BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); + if (btr_pcur_open_at_index_side(true, dict_sys.sys_tables->indexes.start, + BTR_SEARCH_LEAF, &pcur, true, 0, &mtr) != + DB_SUCCESS) + goto all_fail; for (;;) { btr_pcur_move_to_next_user_rec(&pcur, &mtr); @@ -2152,9 +2161,11 @@ fail: os_file_close(d); mtr.start(); - pcur.restore_position(BTR_SEARCH_LEAF, &mtr); + if (pcur.restore_position(BTR_SEARCH_LEAF, &mtr) == btr_pcur_t::CORRUPTED) + break; } +all_fail: mtr.commit(); trx->free(); ut_free(pcur.old_rec_buf); @@ -2278,6 +2289,7 @@ convert_error_code_to_mysql( code should be introduced */ case DB_CORRUPTION: + case DB_PAGE_CORRUPTED: return(HA_ERR_CRASHED); case DB_OUT_OF_FILE_SPACE: @@ -15052,15 +15064,8 @@ inline int ha_innobase::defragment_table() for (dict_index_t *index= dict_table_get_first_index(m_prebuilt->table); index; index= dict_table_get_next_index(index)) { - if (index->is_corrupted() || index->is_spatial()) - continue; - - if (index->page == FIL_NULL) - { - /* Do not defragment auxiliary tables related to FULLTEXT INDEX. */ - ut_ad(index->type & DICT_FTS); + if (!index->is_btree()) continue; - } if (btr_defragment_find_index(index)) { @@ -15171,7 +15176,6 @@ ha_innobase::check( THD* thd, /*!< in: user thread handle */ HA_CHECK_OPT* check_opt) /*!< in: check options */ { - dict_index_t* index; ulint n_rows; ulint n_rows_in_table = ULINT_UNDEFINED; bool is_ok = true; @@ -15212,30 +15216,6 @@ ha_innobase::check( m_prebuilt->trx->op_info = "checking table"; - if (m_prebuilt->table->corrupted) { - /* If some previous operation has marked the table as - corrupted in memory, and has not propagated such to - clustered index, we will do so here */ - index = dict_table_get_first_index(m_prebuilt->table); - - if (!index->is_corrupted()) { - dict_set_corrupted(index, "CHECK TABLE", false); - } - - push_warning_printf(m_user_thd, - Sql_condition::WARN_LEVEL_WARN, - HA_ERR_INDEX_CORRUPT, - "InnoDB: Index %s is marked as" - " corrupted", - index->name()); - - /* Now that the table is already marked as corrupted, - there is no need to check any index of this table */ - m_prebuilt->trx->op_info = ""; - - DBUG_RETURN(HA_ADMIN_CORRUPT); - } - uint old_isolation_level = m_prebuilt->trx->isolation_level; /* We must run the index record counts at an isolation level @@ -15246,10 +15226,9 @@ ha_innobase::check( ? TRX_ISO_READ_UNCOMMITTED : TRX_ISO_REPEATABLE_READ; - ut_ad(!m_prebuilt->table->corrupted); - - for (index = dict_table_get_first_index(m_prebuilt->table); - index != NULL; + for (dict_index_t* index + = dict_table_get_first_index(m_prebuilt->table); + index; index = dict_table_get_next_index(index)) { /* If this is an index being created or dropped, skip */ if (!index->is_committed()) { @@ -15265,25 +15244,13 @@ ha_innobase::check( if (err != DB_SUCCESS) { is_ok = false; - if (err == DB_DECRYPTION_FAILED) { - push_warning_printf( - thd, - Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_TABLE, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue checking table.", - index->table->name.m_name); - } else { - push_warning_printf( - thd, - Sql_condition::WARN_LEVEL_WARN, - ER_NOT_KEYFILE, - "InnoDB: The B-tree of" - " index %s is corrupted.", - index->name()); - } - + push_warning_printf( + thd, + Sql_condition::WARN_LEVEL_WARN, + ER_NOT_KEYFILE, + "InnoDB: The B-tree of" + " index %s is corrupted.", + index->name()); continue; } } @@ -15301,8 +15268,7 @@ ha_innobase::check( if (!index->is_primary()) { m_prebuilt->index_usable = FALSE; dict_set_corrupted(index, - "dict_set_index_corrupted", - false); + "dict_set_index_corrupted"); }); if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) { @@ -15364,8 +15330,7 @@ ha_innobase::check( " index %s is corrupted.", index->name()); is_ok = false; - dict_set_corrupted(index, "CHECK TABLE-check index", - false); + dict_set_corrupted(index, "CHECK TABLE-check index"); } @@ -15380,8 +15345,7 @@ ha_innobase::check( " entries, should be " ULINTPF ".", index->name(), n_rows, n_rows_in_table); is_ok = false; - dict_set_corrupted(index, "CHECK TABLE; Wrong count", - false); + dict_set_corrupted(index, "CHECK TABLE; Wrong count"); } } diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index ad5d9d5b0e4..d63f5b33654 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -1997,8 +1997,13 @@ static bool innobase_table_is_empty(const dict_table_t *table, bool next_page= false; mtr.start(); - btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, - &pcur, true, 0, &mtr); + if (btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, + &pcur, true, 0, &mtr) != DB_SUCCESS) + { +non_empty: + mtr.commit(); + return false; + } btr_pcur_move_to_next_user_rec(&pcur, &mtr); if (!rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index)) btr_pcur_move_to_prev_on_page(&pcur); @@ -2016,9 +2021,10 @@ next_page: } next_page= false; - block= page_cur_get_block(cur); block= btr_block_get(*clust_index, next_page_no, BTR_SEARCH_LEAF, false, &mtr); + if (!block) + goto non_empty; btr_leaf_page_release(page_cur_get_block(cur), BTR_SEARCH_LEAF, &mtr); page_cur_set_before_first(block, cur); page_cur_move_to_next(cur); @@ -2029,9 +2035,7 @@ next_page: { if (ignore_delete_marked) goto scan_leaf; -non_empty: - mtr.commit(); - return false; + goto non_empty; } else if (!page_rec_is_supremum(rec)) goto non_empty; @@ -5884,8 +5888,19 @@ add_all_virtual: mtr.start(); index->set_modified(mtr); btr_pcur_t pcur; - btr_pcur_open_at_index_side(true, index, BTR_MODIFY_TREE, &pcur, true, - 0, &mtr); + dberr_t err = btr_pcur_open_at_index_side(true, index, BTR_MODIFY_TREE, + &pcur, true, 0, &mtr); + if (err != DB_SUCCESS) { +func_exit: + mtr.commit(); + + if (err != DB_SUCCESS) { + my_error_innodb(err, table->s->table_name.str, + user_table->flags); + return true; + } + return false; + } ut_ad(btr_pcur_is_before_first_on_page(&pcur)); btr_pcur_move_to_next_on_page(&pcur); @@ -5898,7 +5913,6 @@ add_all_virtual: NULL, trx, ctx->heap, NULL); const bool is_root = block->page.id().page_no() == index->page; - dberr_t err = DB_SUCCESS; if (rec_is_metadata(rec, *index)) { ut_ad(page_rec_is_user_rec(rec)); if (is_root @@ -5915,8 +5929,11 @@ add_all_virtual: /* Ensure that the root page is in the correct format. */ buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, - &mtr); - DBUG_ASSERT(root); + &mtr, &err); + if (UNIV_UNLIKELY(!root)) { + goto func_exit; + } + if (fil_page_get_type(root->page.frame) != FIL_PAGE_TYPE_INSTANT) { DBUG_ASSERT("wrong page type" == 0); @@ -6007,10 +6024,12 @@ empty_table: mtr.commit(); mtr.start(); index->set_modified(mtr); - if (buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, &mtr)) { + if (buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, &mtr, + &err)) { if (fil_page_get_type(root->page.frame) != FIL_PAGE_INDEX) { DBUG_ASSERT("wrong page type" == 0); - goto err_exit; + err = DB_CORRUPTION; + goto func_exit; } btr_set_instant(root, *index, &mtr); @@ -6020,21 +6039,9 @@ empty_table: err = row_ins_clust_index_entry_low( BTR_NO_LOCKING_FLAG, BTR_MODIFY_TREE, index, index->n_uniq, entry, 0, thr); - } else { -err_exit: - err = DB_CORRUPTION; - } - -func_exit: - mtr.commit(); - - if (err != DB_SUCCESS) { - my_error_innodb(err, table->s->table_name.str, - user_table->flags); - return true; } - return false; + goto func_exit; } /** Adjust the create index column number from "New table" to @@ -8542,6 +8549,11 @@ oom: switch (error) { KEY* dup_key; + default: + my_error_innodb(error, + table_share->table_name.str, + m_prebuilt->table->flags); + break; all_done: case DB_SUCCESS: ut_d(dict_sys.freeze(SRW_LOCK_CALL)); @@ -8578,18 +8590,14 @@ oom: get_error_key_name(m_prebuilt->trx->error_key_num, ha_alter_info, m_prebuilt->table)); break; - case DB_DECRYPTION_FAILED: { + case DB_DECRYPTION_FAILED: String str; const char* engine= table_type(); get_error_message(HA_ERR_DECRYPTION_FAILED, &str); - my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine); + my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, + str.c_ptr(), engine); break; } - default: - my_error_innodb(error, - table_share->table_name.str, - m_prebuilt->table->flags); - } /* prebuilt->table->n_ref_count can be anything here, given that we hold at most a shared lock on the table. */ @@ -10414,7 +10422,10 @@ handle_error: default: sql_print_error("InnoDB: %s: %s\n", op, ut_strerr(error)); - DBUG_ASSERT(0); + DBUG_ASSERT(error == DB_IO_ERROR + || error == DB_DECRYPTION_FAILED + || error == DB_PAGE_CORRUPTED + || error == DB_CORRUPTION); my_error(ER_INTERNAL_ERROR, MYF(0), op); } diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 97f7013465c..0a00b29a90b 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -46,7 +46,6 @@ Created July 18, 2007 Vasil Dimov #include "trx0i_s.h" #include "trx0trx.h" #include "srv0mon.h" -#include "fut0fut.h" #include "pars0pars.h" #include "fts0types.h" #include "fts0opt.h" diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 847e20563f8..94c01f5108a 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -325,23 +325,18 @@ ibuf_header_page_get( /** Acquire the change buffer root page. @param[in,out] mtr mini-transaction @return change buffer root page, SX-latched */ -static buf_block_t *ibuf_tree_root_get(mtr_t *mtr) +static buf_block_t *ibuf_tree_root_get(mtr_t *mtr, dberr_t *err= nullptr) { - buf_block_t* block; - - ut_ad(ibuf_inside(mtr)); - mysql_mutex_assert_owner(&ibuf_mutex); + ut_ad(ibuf_inside(mtr)); + mysql_mutex_assert_owner(&ibuf_mutex); - mtr_sx_lock_index(ibuf.index, mtr); + mtr_sx_lock_index(ibuf.index, mtr); - /* only segment list access is exclusive each other */ - block = buf_page_get( - page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO), - 0, RW_SX_LATCH, mtr); - - ut_ad(ibuf.empty == page_is_empty(block->page.frame)); - - return block; + buf_block_t *block= + buf_page_get_gen(page_id_t{IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO}, + 0, RW_SX_LATCH, nullptr, BUF_GET, mtr, err); + ut_ad(!block || ibuf.empty == page_is_empty(block->page.frame)); + return block; } /******************************************************************//** @@ -393,7 +388,6 @@ ibuf_init_at_db_start(void) /*=======================*/ { page_t* root; - ulint n_used; ut_ad(!ibuf.index); mtr_t mtr; @@ -401,13 +395,15 @@ ibuf_init_at_db_start(void) compile_time_assert(IBUF_SPACE_ID == TRX_SYS_SPACE); compile_time_assert(IBUF_SPACE_ID == 0); mtr.x_lock_space(fil_system.sys_space); - buf_block_t* header_page = buf_page_get( + dberr_t err; + buf_block_t* header_page = buf_page_get_gen( page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO), - 0, RW_X_LATCH, &mtr); + 0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err); if (!header_page) { +err_exit: mtr.commit(); - return DB_DECRYPTION_FAILED; + return err; } /* At startup we intialize ibuf to have a maximum of @@ -426,20 +422,20 @@ ibuf_init_at_db_start(void) fseg_n_reserved_pages(*header_page, IBUF_HEADER + IBUF_TREE_SEG_HEADER - + header_page->page.frame, &n_used, &mtr); - - ut_ad(n_used >= 2); - - ibuf.seg_size = n_used; - - { - buf_block_t* block; - - block = buf_page_get( - page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO), - 0, RW_X_LATCH, &mtr); + + header_page->page.frame, &ibuf.seg_size, &mtr); + do { + DBUG_EXECUTE_IF("intermittent_read_failure", continue;); + ut_ad(ibuf.seg_size >= 2); + } while (0); + + if (buf_block_t* block = + buf_page_get_gen(page_id_t(IBUF_SPACE_ID, + FSP_IBUF_TREE_ROOT_PAGE_NO), + 0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err)) { root = buf_block_get_frame(block); + } else { + goto err_exit; } ibuf_size_update(root); @@ -662,9 +658,9 @@ ibuf_bitmap_get_map_page( ulint zip_size, mtr_t* mtr) { - return buf_page_get_gen( - ibuf_bitmap_page_no_calc(page_id, zip_size), - zip_size, RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, mtr); + return buf_page_get_gen(ibuf_bitmap_page_no_calc(page_id, zip_size), + zip_size, RW_X_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, mtr); } /************************************************************************//** @@ -717,42 +713,36 @@ ibuf_set_free_bits_func( #endif /* UNIV_IBUF_DEBUG */ ulint val) /*!< in: value to set: < 4 */ { - if (!page_is_leaf(block->page.frame)) { - return; - } - - mtr_t mtr; - mtr.start(); - const page_id_t id(block->page.id()); - - const fil_space_t* space = mtr.set_named_space_id(id.space()); + if (!page_is_leaf(block->page.frame)) + return; - buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(id, - block->zip_size(), - &mtr); + mtr_t mtr; + mtr.start(); + const page_id_t id(block->page.id()); + const fil_space_t *space= mtr.set_named_space_id(id.space()); - if (space->purpose != FIL_TYPE_TABLESPACE) { - mtr.set_log_mode(MTR_LOG_NO_REDO); - } + if (buf_block_t *bitmap_page= + ibuf_bitmap_get_map_page(id, block->zip_size(), &mtr)) + { + if (space->purpose != FIL_TYPE_TABLESPACE) + mtr.set_log_mode(MTR_LOG_NO_REDO); #ifdef UNIV_IBUF_DEBUG - if (max_val != ULINT_UNDEFINED) { - ulint old_val; - - old_val = ibuf_bitmap_page_get_bits( - bitmap_page, id, - IBUF_BITMAP_FREE, &mtr); - ut_a(old_val <= max_val); - } - - ut_a(val <= ibuf_index_page_calc_free(block)); + if (max_val != ULINT_UNDEFINED) + { + ulint old_val= ibuf_bitmap_page_get_bits(bitmap_page, id, + IBUF_BITMAP_FREE, &mtr); + ut_a(old_val <= max_val); + } + + ut_a(val <= ibuf_index_page_calc_free(block)); #endif /* UNIV_IBUF_DEBUG */ - ibuf_bitmap_page_set_bits( - bitmap_page, id, block->physical_size(), - val, &mtr); + ibuf_bitmap_page_set_bits + (bitmap_page, id, block->physical_size(), val, &mtr); + } - mtr.commit(); + mtr.commit(); } /************************************************************************//** @@ -934,14 +924,12 @@ ibuf_page_low( not be modified by any other thread. Nobody should be calling ibuf_add_free_page() or ibuf_remove_free_page() while the page is linked to the insert buffer b-tree. */ - dberr_t err = DB_SUCCESS; - buf_block_t* block = buf_page_get_gen( ibuf_bitmap_page_no_calc(page_id, zip_size), - zip_size, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, - &local_mtr, &err); + zip_size, RW_NO_LATCH, nullptr, BUF_GET, &local_mtr); - ret = ibuf_bitmap_page_get_bits_low( + ret = block + && ibuf_bitmap_page_get_bits_low( block->page.frame, page_id, zip_size, MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF); @@ -955,11 +943,12 @@ ibuf_page_low( mtr_start(mtr); } - ret = ibuf_bitmap_page_get_bits(ibuf_bitmap_get_map_page( - page_id, zip_size, - mtr)->page.frame, - page_id, zip_size, - IBUF_BITMAP_IBUF, mtr); + buf_block_t *block = ibuf_bitmap_get_map_page(page_id, zip_size, + mtr); + ret = block + && ibuf_bitmap_page_get_bits(block->page.frame, + page_id, zip_size, + IBUF_BITMAP_IBUF, mtr); if (mtr == &local_mtr) { mtr_commit(mtr); @@ -1780,6 +1769,10 @@ static bool ibuf_add_free_page() order */ mtr.x_lock_space(fil_system.sys_space); header_page = ibuf_header_page_get(&mtr); + if (!header_page) { + mtr.commit(); + return false; + } /* Allocate a new page: NOTE that if the page has been a part of a non-clustered index which has subsequently been dropped, then the @@ -1791,11 +1784,12 @@ static bool ibuf_add_free_page() of a deadlock. This is the reason why we created a special ibuf header page apart from the ibuf tree. */ - block = fseg_alloc_free_page( + dberr_t err; + block = fseg_alloc_free_page_general( header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP, - &mtr); + false, &mtr, &mtr, &err); - if (block == NULL) { + if (!block) { mtr.commit(); return false; } @@ -1806,15 +1800,24 @@ static bool ibuf_add_free_page() mtr.write<2>(*block, block->page.frame + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST); + buf_block_t* ibuf_root = ibuf_tree_root_get(&mtr); + if (UNIV_UNLIKELY(!ibuf_root)) { +corrupted: + /* Do not bother to try to free the allocated block, because + the change buffer is seriously corrupted already. */ + mysql_mutex_unlock(&ibuf_mutex); + ibuf_mtr_commit(&mtr); + return false; + } /* Add the page to the free list and update the ibuf size data */ - flst_add_last(ibuf_tree_root_get(&mtr), - PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); - - ibuf.seg_size++; - ibuf.free_list_len++; + err = flst_add_last(ibuf_root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, + &mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto corrupted; + } /* Set the bit indicating that this page is now an ibuf tree page (level 2 page) */ @@ -1822,23 +1825,24 @@ static bool ibuf_add_free_page() const page_id_t page_id(block->page.id()); buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(page_id, 0, &mtr); + if (UNIV_UNLIKELY(!bitmap_page)) { + goto corrupted; + } + + ibuf.seg_size++; + ibuf.free_list_len++; + mysql_mutex_unlock(&ibuf_mutex); ibuf_bitmap_page_set_bits(bitmap_page, page_id, - srv_page_size, true, - &mtr); - + srv_page_size, true, &mtr); ibuf_mtr_commit(&mtr); - return true; } /*********************************************************************//** Removes a page from the free list and frees it to the fsp system. */ -static -void -ibuf_remove_free_page(void) -/*=======================*/ +static void ibuf_remove_free_page() { mtr_t mtr; mtr_t mtr2; @@ -1858,8 +1862,8 @@ ibuf_remove_free_page(void) mysql_mutex_lock(&ibuf_pessimistic_insert_mutex); mysql_mutex_lock(&ibuf_mutex); - if (!ibuf_data_too_much_free()) { - + if (!header_page || !ibuf_data_too_much_free()) { +early_exit: mysql_mutex_unlock(&ibuf_mutex); mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex); @@ -1872,10 +1876,16 @@ ibuf_remove_free_page(void) buf_block_t* root = ibuf_tree_root_get(&mtr2); + if (UNIV_UNLIKELY(!root)) { + ibuf_mtr_commit(&mtr2); + goto early_exit; + } + mysql_mutex_unlock(&ibuf_mutex); - uint32_t page_no = flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST - + root->page.frame).page; + const uint32_t page_no = flst_get_last(PAGE_HEADER + + PAGE_BTR_IBUF_FREE_LIST + + root->page.frame).page; /* NOTE that we must release the latch on the ibuf tree root because in fseg_free_page we access level 1 pages, and the root @@ -1891,43 +1901,60 @@ ibuf_remove_free_page(void) page from it. */ compile_time_assert(IBUF_SPACE_ID == 0); - fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, - fil_system.sys_space, page_no, &mtr); + const page_id_t page_id{IBUF_SPACE_ID, page_no}; + buf_block_t* bitmap_page = nullptr; + dberr_t err = fseg_free_page( + header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, + fil_system.sys_space, page_no, &mtr); - const page_id_t page_id(IBUF_SPACE_ID, page_no); + if (err != DB_SUCCESS) { + goto func_exit; + } ibuf_enter(&mtr); mysql_mutex_lock(&ibuf_mutex); - root = ibuf_tree_root_get(&mtr); + root = ibuf_tree_root_get(&mtr, &err); + if (UNIV_UNLIKELY(!root)) { + mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex); + goto func_exit; + } ut_ad(page_no == flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST + root->page.frame).page); - buf_block_t* block = buf_page_get(page_id, 0, RW_X_LATCH, &mtr); - /* Remove the page from the free list and update the ibuf size data */ - - flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + if (buf_block_t* block = + buf_page_get_gen(page_id, 0, RW_X_LATCH, nullptr, BUF_GET, + &mtr, &err)) { + err = flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + block, + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, + &mtr); + } mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex); - ibuf.seg_size--; - ibuf.free_list_len--; - - /* Set the bit indicating that this page is no more an ibuf tree page - (level 2 page) */ - - buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(page_id, 0, &mtr); + if (err == DB_SUCCESS) { + ibuf.seg_size--; + ibuf.free_list_len--; + bitmap_page = ibuf_bitmap_get_map_page(page_id, 0, &mtr); + } +func_exit: mysql_mutex_unlock(&ibuf_mutex); - ibuf_bitmap_page_set_bits( - bitmap_page, page_id, srv_page_size, false, &mtr); + if (bitmap_page) { + /* Set the bit indicating that this page is no more an + ibuf tree page (level 2 page) */ + ibuf_bitmap_page_set_bits( + bitmap_page, page_id, srv_page_size, false, &mtr); + } - buf_page_free(fil_system.sys_space, page_no, &mtr); + if (err == DB_SUCCESS) { + buf_page_free(fil_system.sys_space, page_no, &mtr); + } ibuf_mtr_commit(&mtr); } @@ -2263,13 +2290,17 @@ tablespace_deleted: if (UNIV_LIKELY(page_nos[i] < size)) { mtr.start(); dberr_t err; + buf_block_t *b = buf_page_get_gen(page_id_t(space_id, page_nos[i]), zip_size, RW_X_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, &mtr, &err, true); mtr.commit(); - if (err == DB_TABLESPACE_DELETED) { + if (b) { + } else if (err == DB_TABLESPACE_DELETED) { goto tablespace_deleted; + } else { + continue; } } #ifndef DBUG_OFF @@ -2302,8 +2333,11 @@ work_around: loop: btr_pcur_t pcur; ibuf_mtr_start(&mtr); - btr_pcur_open(ibuf.index, tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, - &pcur, &mtr); + if (btr_pcur_open(ibuf.index, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr) + != DB_SUCCESS) { + goto done; + } if (!btr_pcur_is_on_user_rec(&pcur)) { ut_ad(btr_pcur_is_after_last_on_page(&pcur)); goto done; @@ -2370,12 +2404,18 @@ ibuf_merge_pages( /* Open a cursor to a randomly chosen leaf of the tree, at a random position within the leaf */ - bool available; + pcur.pos_state = BTR_PCUR_IS_POSITIONED; + pcur.old_stored = false; + pcur.trx_if_known = NULL; + pcur.search_mode = PAGE_CUR_G; + pcur.latch_mode = BTR_SEARCH_LEAF; - available = btr_pcur_open_at_rnd_pos(ibuf.index, BTR_SEARCH_LEAF, - &pcur, &mtr); - /* No one should make this index unavailable when server is running */ - ut_a(available); + btr_pcur_init(&pcur); + + if (!btr_cur_open_at_rnd_pos(ibuf.index, BTR_SEARCH_LEAF, + btr_pcur_get_btr_cur(&pcur), &mtr)) { + return 0; + } ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf.index)); @@ -2424,19 +2464,19 @@ ibuf_merge_space( /* Position the cursor on the first matching record. */ - btr_pcur_open( - ibuf.index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, - &mtr); + dberr_t err = btr_pcur_open(ibuf.index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur), + ibuf.index)); mem_heap_free(heap); - ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf.index)); - ulint sum_sizes = 0; uint32_t pages[IBUF_MAX_N_PAGES_MERGED]; uint32_t spaces[IBUF_MAX_N_PAGES_MERGED]; - if (page_is_empty(btr_pcur_get_page(&pcur))) { + if (err != DB_SUCCESS) { + } else if (page_is_empty(btr_pcur_get_page(&pcur))) { /* If a B-tree page is empty, it must be the root page and the whole B-tree must be empty. InnoDB does not allow empty B-tree pages other than the root. */ @@ -2799,23 +2839,22 @@ ibuf_get_volume_buffered( goto count_later; } - { - buf_block_t* block; - - block = buf_page_get( - page_id_t(IBUF_SPACE_ID, prev_page_no), - 0, RW_X_LATCH, mtr); - + if (buf_block_t* block = + buf_page_get(page_id_t(IBUF_SPACE_ID, prev_page_no), + 0, RW_X_LATCH, mtr)) { prev_page = buf_block_get_frame(block); ut_ad(page_validate(prev_page, ibuf.index)); + } else { + return srv_page_size; } -#ifdef UNIV_BTR_DEBUG static_assert(FIL_PAGE_NEXT % 4 == 0, "alignment"); static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - ut_a(!memcmp_aligned<4>(prev_page + FIL_PAGE_NEXT, - page + FIL_PAGE_OFFSET, 4)); -#endif /* UNIV_BTR_DEBUG */ + + if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_page + FIL_PAGE_NEXT, + page + FIL_PAGE_OFFSET, 4))) { + return 0; + } rec = page_get_supremum_rec(prev_page); rec = page_rec_get_prev_const(rec); @@ -2872,23 +2911,22 @@ count_later: return(volume); } - { - buf_block_t* block; - - block = buf_page_get( - page_id_t(IBUF_SPACE_ID, next_page_no), - 0, RW_X_LATCH, mtr); - + if (buf_block_t* block = + buf_page_get(page_id_t(IBUF_SPACE_ID, next_page_no), + 0, RW_X_LATCH, mtr)) { next_page = buf_block_get_frame(block); ut_ad(page_validate(next_page, ibuf.index)); + } else { + return srv_page_size; } -#ifdef UNIV_BTR_DEBUG static_assert(FIL_PAGE_PREV % 4 == 0, "alignment"); static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - ut_a(!memcmp_aligned<4>(next_page + FIL_PAGE_PREV, - page + FIL_PAGE_OFFSET, 4)); -#endif /* UNIV_BTR_DEBUG */ + + if (UNIV_UNLIKELY(memcmp_aligned<4>(next_page + FIL_PAGE_PREV, + page + FIL_PAGE_OFFSET, 4))) { + return 0; + } rec = page_get_infimum_rec(next_page); rec = page_rec_get_next_const(rec); @@ -2922,7 +2960,6 @@ void ibuf_update_max_tablespace_id(void) /*===============================*/ { - ulint max_space_id; const rec_t* rec; const byte* field; ulint len; @@ -2933,26 +2970,27 @@ ibuf_update_max_tablespace_id(void) ibuf_mtr_start(&mtr); - btr_pcur_open_at_index_side( - false, ibuf.index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); + if (btr_pcur_open_at_index_side(false, ibuf.index, BTR_SEARCH_LEAF, + &pcur, true, 0, &mtr) != DB_SUCCESS) { +func_exit: + ibuf_mtr_commit(&mtr); + return; + } ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf.index)); - btr_pcur_move_to_prev(&pcur, &mtr); - - if (btr_pcur_is_before_first_on_page(&pcur)) { - /* The tree is empty */ + if (!btr_pcur_move_to_prev(&pcur, &mtr) + || btr_pcur_is_before_first_on_page(&pcur)) { + goto func_exit; + } - max_space_id = 0; - } else { - rec = btr_pcur_get_rec(&pcur); + rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); - ut_a(len == 4); + ut_a(len == 4); - max_space_id = mach_read_from_4(field); - } + const uint32_t max_space_id = mach_read_from_4(field); ibuf_mtr_commit(&mtr); @@ -3147,7 +3185,7 @@ ibuf_insert_low( lint min_n_recs; rec_t* ins_rec; buf_block_t* bitmap_page; - buf_block_t* block; + buf_block_t* block = NULL; page_t* root; dberr_t err; ibool do_merge; @@ -3229,7 +3267,29 @@ ibuf_insert_low( ibuf_mtr_start(&mtr); - btr_pcur_open(ibuf.index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); + err = btr_pcur_open(ibuf.index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, + &mtr); + if (err != DB_SUCCESS) { +func_exit: + ibuf_mtr_commit(&mtr); + ut_free(pcur.old_rec_buf); + mem_heap_free(heap); + + if (err == DB_SUCCESS + && BTR_LATCH_MODE_WITHOUT_INTENTION(mode) + == BTR_MODIFY_TREE) { + ibuf_contract_after_insert(entry_size); + } + + if (do_merge) { +#ifdef UNIV_IBUF_DEBUG + ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED); +#endif + ibuf_read_merge_pages(space_ids, page_nos, n_stored); + } + return err; + } + ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf.index)); /* Find out the volume of already buffered inserts for the same index @@ -3288,7 +3348,7 @@ fail_exit: /* We check if the index page is suitable for buffered entries */ - if (buf_pool.page_hash_contains( + if (!bitmap_page || buf_pool.page_hash_contains( page_id, buf_pool.page_hash.cell_get(page_id.fold()))) { commit_exit: ibuf_mtr_commit(&bitmap_mtr); @@ -3387,8 +3447,14 @@ commit_exit: because a pessimistic insert releases the tree x-latch, which would cause the sx-latching of the root after that to break the latching order. */ - - root = ibuf_tree_root_get(&mtr)->page.frame; + if (buf_block_t* ibuf_root = ibuf_tree_root_get(&mtr)) { + root = ibuf_root->page.frame; + } else { + err = DB_CORRUPTION; + mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex); + mysql_mutex_unlock(&ibuf_mutex); + goto ibuf_insert_done; + } err = btr_cur_optimistic_insert( BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, @@ -3413,6 +3479,7 @@ commit_exit: ut_ad(block->page.id().space() == IBUF_SPACE_ID); } +ibuf_insert_done: if (offsets_heap) { mem_heap_free(offsets_heap); } @@ -3423,24 +3490,7 @@ commit_exit: thr_get_trx(thr)->id, &mtr); } -func_exit: - ibuf_mtr_commit(&mtr); - ut_free(pcur.old_rec_buf); - mem_heap_free(heap); - - if (err == DB_SUCCESS - && BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE) { - ibuf_contract_after_insert(entry_size); - } - - if (do_merge) { -#ifdef UNIV_IBUF_DEBUG - ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED); -#endif - ibuf_read_merge_pages(space_ids, page_nos, n_stored); - } - - return(err); + goto func_exit; } /** Buffer an operation in the change buffer, instead of applying it @@ -3577,12 +3627,13 @@ skip_watch: DBUG_RETURN(err == DB_SUCCESS); } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /********************************************************************//** During merge, inserts to an index page a secondary index entry extracted from the insert buffer. -@return newly inserted record */ -static MY_ATTRIBUTE((nonnull)) -rec_t* +@return error code */ +static +dberr_t ibuf_insert_to_index_page_low( /*==========================*/ const dtuple_t* entry, /*!< in: buffered entry to insert */ @@ -3595,66 +3646,31 @@ ibuf_insert_to_index_page_low( page_cur_t* page_cur)/*!< in/out: cursor positioned on the record after which to insert the buffered entry */ { - rec_t* rec; - DBUG_ENTER("ibuf_insert_to_index_page_low"); - - rec = page_cur_tuple_insert(page_cur, entry, index, - offsets, &heap, 0, mtr); - if (rec != NULL) { - DBUG_RETURN(rec); - } - - /* Page reorganization or recompression should already have - been attempted by page_cur_tuple_insert(). Besides, per - ibuf_index_page_calc_free_zip() the page should not have been - recompressed or reorganized. */ - ut_ad(!is_buf_block_get_page_zip(block)); + if (page_cur_tuple_insert(page_cur, entry, index, offsets, &heap, 0, mtr)) + return DB_SUCCESS; - /* If the record did not fit, reorganize */ + /* Page reorganization or recompression should already have been + attempted by page_cur_tuple_insert(). Besides, per + ibuf_index_page_calc_free_zip() the page should not have been + recompressed or reorganized. */ + ut_ad(!is_buf_block_get_page_zip(block)); - btr_page_reorganize(page_cur, index, mtr); - - /* This time the record must fit */ - - rec = page_cur_tuple_insert(page_cur, entry, index, - offsets, &heap, 0, mtr); - if (rec != NULL) { - DBUG_RETURN(rec); - } + /* If the record did not fit, reorganize */ + if (dberr_t err= btr_page_reorganize(page_cur, index, mtr)) + return err; - ib::error() << "Insert buffer insert fails; page free " - << page_get_max_insert_size(block->page.frame, 1) - << ", dtuple size " - << rec_get_converted_size(index, entry, 0); + /* This time the record must fit */ + if (page_cur_tuple_insert(page_cur, entry, index, offsets, &heap, 0, mtr)) + return DB_SUCCESS; - fputs("InnoDB: Cannot insert index record ", stderr); - dtuple_print(stderr, entry); - fputs("\nInnoDB: The table where this index record belongs\n" - "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n" - "InnoDB: that table.\n", stderr); - - if (buf_block_t *bitmap_page = ibuf_bitmap_get_map_page( - block->page.id(), block->zip_size(), mtr)) { - - ib::error() << "page " << block->page.id() << ", size " - << block->physical_size() << ", bitmap bits " - << ibuf_bitmap_page_get_bits( - bitmap_page->page.frame, - block->page.id(), block->zip_size(), - IBUF_BITMAP_FREE, mtr); - } - - ib::error() << BUG_REPORT_MSG; - - ut_ad(0); - DBUG_RETURN(NULL); + return DB_CORRUPTION; } /************************************************************************ During merge, inserts to an index page a secondary index entry extracted from the insert buffer. */ static -void +dberr_t ibuf_insert_to_index_page( /*======================*/ const dtuple_t* entry, /*!< in: buffered entry to insert */ @@ -3670,8 +3686,6 @@ ibuf_insert_to_index_page( rec_offs* offsets; mem_heap_t* heap; - DBUG_ENTER("ibuf_insert_to_index_page"); - DBUG_PRINT("ibuf", ("page " UINT32PF ":" UINT32PF, block->page.id().space(), block->page.id().page_no())); @@ -3690,37 +3704,20 @@ ibuf_insert_to_index_page( if (UNIV_UNLIKELY(dict_table_is_comp(index->table) != (ibool)!!page_is_comp(page))) { - ib::warn() << "Trying to insert a record from the insert" - " buffer to an index page but the 'compact' flag does" - " not match!"; - goto dump; + return DB_CORRUPTION; } rec = page_rec_get_next(page_get_infimum_rec(page)); if (page_rec_is_supremum(rec)) { - ib::warn() << "Trying to insert a record from the insert" - " buffer to an index page but the index page" - " is empty!"; - goto dump; + return DB_CORRUPTION; } if (!rec_n_fields_is_sane(index, rec, entry)) { - ib::warn() << "Trying to insert a record from the insert" - " buffer to an index page but the number of fields" - " does not match!"; - rec_print(stderr, rec, index); -dump: - dtuple_print(stderr, entry); - ut_ad(0); - - ib::warn() << "The table where this index record belongs" - " is now probably corrupt. Please run CHECK TABLE on" - " your tables. " << BUG_REPORT_MSG; - - DBUG_VOID_RETURN; + return DB_CORRUPTION; } + dberr_t err = DB_SUCCESS; low_match = page_cur_search(block, index, entry, &page_cur); heap = mem_heap_create( @@ -3805,21 +3802,16 @@ dump: page_cur_delete_rec(&page_cur, index, offsets, mtr); page_cur_move_to_prev(&page_cur); - rec = ibuf_insert_to_index_page_low(entry, block, index, - &offsets, heap, mtr, - &page_cur); - - ut_ad(!cmp_dtuple_rec(entry, rec, offsets)); } else { offsets = NULL; - ibuf_insert_to_index_page_low(entry, block, index, - &offsets, heap, mtr, - &page_cur); - } + } + + err = ibuf_insert_to_index_page_low(entry, block, index, + &offsets, heap, mtr, &page_cur); updated_in_place: mem_heap_free(heap); - DBUG_VOID_RETURN; + return err; } /****************************************************************//** @@ -4024,7 +4016,6 @@ static MY_ATTRIBUTE((warn_unused_result, nonnull)) bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, const dtuple_t* search_tuple, mtr_t* mtr) { - page_t* root; dberr_t err; ut_ad(ibuf_inside(mtr)); @@ -4034,13 +4025,16 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == page_id.space()); - if (btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), - BTR_CREATE_FLAG, mtr)) { + switch (btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), + BTR_CREATE_FLAG, mtr)) { + case DB_FAIL: + break; + case DB_SUCCESS: if (page_is_empty(btr_pcur_get_page(pcur))) { /* If a B-tree page is empty, it must be the root page and the whole B-tree must be empty. InnoDB does not allow empty B-tree pages other than the root. */ - root = btr_pcur_get_page(pcur); + ut_d(const page_t* root = btr_pcur_get_page(pcur)); ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); ut_ad(page_get_page_no(root) @@ -4051,7 +4045,8 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ut_ad(!ibuf.empty); ibuf.empty = true; } - + /* fall through */ + default: return(FALSE); } @@ -4077,16 +4072,20 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, goto func_exit; } - root = ibuf_tree_root_get(mtr)->page.frame; + if (buf_block_t* ibuf_root = ibuf_tree_root_get(mtr)) { + btr_cur_pessimistic_delete(&err, TRUE, + btr_pcur_get_btr_cur(pcur), + BTR_CREATE_FLAG, false, mtr); + ut_a(err == DB_SUCCESS); - btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), - BTR_CREATE_FLAG, false, mtr); - ut_a(err == DB_SUCCESS); + ibuf_size_update(ibuf_root->page.frame); + mysql_mutex_unlock(&ibuf_mutex); - ibuf_size_update(root); - mysql_mutex_unlock(&ibuf_mutex); + ibuf.empty = page_is_empty(ibuf_root->page.frame); + } else { + mysql_mutex_unlock(&ibuf_mutex); + } - ibuf.empty = page_is_empty(root); ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: @@ -4156,25 +4155,16 @@ exist entries for such a page if the page belonged to an index which subsequently was dropped. @param block X-latched page to try to apply changes to, or NULL to discard @param page_id page identifier -@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */ -void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, - ulint zip_size) +@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 +@return error code */ +dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block, + const page_id_t page_id, + ulint zip_size) { if (trx_sys_hdr_page(page_id)) { - return; + return DB_SUCCESS; } - btr_pcur_t pcur; -#ifdef UNIV_IBUF_DEBUG - ulint volume = 0; -#endif /* UNIV_IBUF_DEBUG */ - bool corruption_noticed = false; - mtr_t mtr; - - /* Counts for merged & discarded operations. */ - ulint mops[IBUF_OP_COUNT]; - ulint dops[IBUF_OP_COUNT]; - ut_ad(!block || page_id == block->page.id()); ut_ad(!block || block->page.frame); ut_ad(!block || !block->page.is_ibuf_exist()); @@ -4186,13 +4176,20 @@ void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, if (ibuf_fixed_addr_page(page_id, physical_size) || fsp_descr_page(page_id, physical_size)) { - return; + return DB_SUCCESS; } + btr_pcur_t pcur; +#ifdef UNIV_IBUF_DEBUG + ulint volume = 0; +#endif /* UNIV_IBUF_DEBUG */ + dberr_t err = DB_SUCCESS; + mtr_t mtr; + fil_space_t* space = fil_space_t::get(page_id.space()); if (UNIV_UNLIKELY(!space)) { - block = NULL; + block = nullptr; } else { ulint bitmap_bits = 0; @@ -4211,8 +4208,9 @@ void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, ibuf_mtr_commit(&mtr); - if (bitmap_bits && fseg_page_is_free( - space, page_id.page_no())) { + if (bitmap_bits + && DB_SUCCESS + == fseg_page_is_allocated(space, page_id.page_no())) { ibuf_mtr_start(&mtr); mtr.set_named_space(space); ibuf_reset_bitmap(block, page_id, zip_size, &mtr); @@ -4223,40 +4221,33 @@ void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, if (!bitmap_bits) { /* No changes are buffered for this page. */ space->release(); - return; + return DB_SUCCESS; } } - mem_heap_t* heap = mem_heap_create(512); - - const dtuple_t* search_tuple = ibuf_search_tuple_build( - page_id.space(), page_id.page_no(), heap); - - if (block != NULL) { + if (!block) { + } else if (!fil_page_index_page_check(block->page.frame) + || !page_is_leaf(block->page.frame)) { + space->set_corrupted(); + err = DB_CORRUPTION; + block = nullptr; + } else { /* Move the ownership of the x-latch on the page to this OS thread, so that we can acquire a second x-latch on it. This is needed for the insert operations to the index page to pass the debug checks. */ block->page.lock.claim_ownership(); + } - if (!fil_page_index_page_check(block->page.frame) - || !page_is_leaf(block->page.frame)) { + mem_heap_t* heap = mem_heap_create(512); - corruption_noticed = true; + const dtuple_t* search_tuple = ibuf_search_tuple_build( + page_id.space(), page_id.page_no(), heap); - ib::error() << "Corruption in the tablespace. Bitmap" - " shows insert buffer records to page " - << page_id << " though the page type is " - << fil_page_get_type(block->page.frame) - << ", which is not an index leaf page. We try" - " to resolve the problem by skipping the" - " insert buffer merge for this page. Please" - " run CHECK TABLE on your tables to determine" - " if they are corrupt after this."; - ut_ad(0); - } - } + /* Counts for merged & discarded operations. */ + ulint mops[IBUF_OP_COUNT]; + ulint dops[IBUF_OP_COUNT]; memset(mops, 0, sizeof(mops)); memset(dops, 0, sizeof(dops)); @@ -4266,9 +4257,12 @@ loop: /* Position pcur in the insert buffer at the first entry for this index page */ - btr_pcur_open_on_user_rec( - ibuf.index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, - &pcur, &mtr); + if (btr_pcur_open_on_user_rec(ibuf.index, search_tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr) + != DB_SUCCESS) { + err = DB_CORRUPTION; + goto reset_bit; + } if (block) { block->page.fix(); @@ -4303,7 +4297,7 @@ loop: goto reset_bit; } - if (corruption_noticed) { + if (err) { fputs("InnoDB: Discarding record\n ", stderr); rec_print_old(stderr, rec); fputs("\nInnoDB: from the insert buffer!\n\n", stderr); @@ -4438,6 +4432,8 @@ reset_bit: ibuf.n_merges++; ibuf_add_ops(ibuf.n_merged_ops, mops); ibuf_add_ops(ibuf.n_discarded_ops, dops); + + return err; } /** Delete all change buffer entries for a tablespace, @@ -4467,9 +4463,11 @@ loop: /* Position pcur in the insert buffer at the first entry for the space */ - btr_pcur_open_on_user_rec( - ibuf.index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, - &pcur, &mtr); + if (btr_pcur_open_on_user_rec(ibuf.index, search_tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr) + != DB_SUCCESS) { + goto leave_loop; + } if (!btr_pcur_is_on_user_rec(&pcur)) { ut_ad(btr_pcur_is_after_last_on_page(&pcur)); @@ -4531,8 +4529,8 @@ ibuf_is_empty(void) ut_d(mysql_mutex_lock(&ibuf_mutex)); const buf_block_t* root = ibuf_tree_root_get(&mtr); - bool is_empty = page_is_empty(root->page.frame); - ut_a(is_empty == ibuf.empty); + bool is_empty = root && page_is_empty(root->page.frame); + ut_ad(!root || is_empty == ibuf.empty); ut_d(mysql_mutex_unlock(&ibuf_mutex)); ibuf_mtr_commit(&mtr); @@ -4609,6 +4607,7 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space) page_id_t(space->id, page_no), zip_size, &mtr); if (!bitmap_page) { mysql_mutex_unlock(&ibuf_mutex); + ibuf_exit(&mtr); mtr.commit(); return DB_CORRUPTION; } @@ -4697,29 +4696,21 @@ ibuf_set_bitmap_for_bulk_load( buf_block_t* block, bool reset) { - mtr_t mtr; - ulint free_val; - - ut_a(page_is_leaf(buf_block_get_frame(block))); - - free_val = ibuf_index_page_calc_free(block); - - mtr.start(); - fil_space_t* space = mtr.set_named_space_id(block->page.id().space()); - - buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(block->page.id(), - space->zip_size(), - &mtr); - - free_val = reset ? 0 : ibuf_index_page_calc_free(block); - /* FIXME: update the bitmap byte only once! */ - ibuf_bitmap_page_set_bits( - bitmap_page, block->page.id(), block->physical_size(), - free_val, &mtr); - - ibuf_bitmap_page_set_bits( - bitmap_page, block->page.id(), block->physical_size(), - false, &mtr); - - mtr.commit(); + mtr_t mtr; + + ut_a(page_is_leaf(block->page.frame)); + mtr.start(); + fil_space_t *space= mtr.set_named_space_id(block->page.id().space()); + + if (buf_block_t *bitmap_page= + ibuf_bitmap_get_map_page(block->page.id(), space->zip_size(), &mtr)) + { + ulint free_val= reset ? 0 : ibuf_index_page_calc_free(block); + /* FIXME: update the bitmap byte only once! */ + ibuf_bitmap_page_set_bits + (bitmap_page, block->page.id(), block->physical_size(), free_val, &mtr); + ibuf_bitmap_page_set_bits + (bitmap_page, block->page.id(), block->physical_size(), false, &mtr); + } + mtr.commit(); } diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index e4cfc42c88c..5a6b836819a 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -172,20 +172,6 @@ record is in spatial index */ | BTR_LATCH_FOR_DELETE \ | BTR_MODIFY_EXTERNAL))) -/** Report that an index page is corrupted. -@param[in] buffer block -@param[in] index tree */ -ATTRIBUTE_COLD ATTRIBUTE_NORETURN __attribute__((nonnull)) -void btr_corruption_report(const buf_block_t* block,const dict_index_t* index); - -/** Assert that a B-tree page is not corrupted. -@param block buffer block containing a B-tree page -@param index the B-tree index */ -#define btr_assert_not_corrupted(block, index) \ - if (!!page_is_comp(buf_block_get_frame(block)) \ - != index->table->not_redundant()) \ - btr_corruption_report(block, index) - /**************************************************************//** Checks and adjusts the root node of a tree during IMPORT TABLESPACE. @return error code, or DB_SUCCESS */ @@ -195,37 +181,20 @@ btr_root_adjust_on_import( const dict_index_t* index) /*!< in: index tree */ MY_ATTRIBUTE((warn_unused_result)); +/** Report a decryption failure. */ +ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index); + /** Get an index page and declare its latching order level. @param[in] index index tree @param[in] page page number @param[in] mode latch mode @param[in] merge whether change buffer merge should be attempted @param[in,out] mtr mini-transaction +@param[out] err error code @return block */ -inline buf_block_t *btr_block_get(const dict_index_t &index, - uint32_t page, ulint mode, bool merge, - mtr_t *mtr) -{ - dberr_t err; - - if (buf_block_t* block = buf_page_get_gen( - page_id_t(index.table->space->id, page), - index.table->space->zip_size(), mode, NULL, BUF_GET, - mtr, &err, merge && !index.is_clust())) { - ut_ad(err == DB_SUCCESS); - return block; - } else { - ut_ad(err != DB_SUCCESS); - - if (err == DB_DECRYPTION_FAILED) { - if (index.table) { - index.table->file_unreadable = true; - } - } - - return NULL; - } -} +buf_block_t *btr_block_get(const dict_index_t &index, + uint32_t page, ulint mode, bool merge, + mtr_t *mtr, dberr_t *err= nullptr); /**************************************************************//** Gets the index id field of a page. @@ -296,6 +265,7 @@ btr_node_ptr_get_child_page_no( @param[in] index_id index id @param[in] index index, or NULL to create a system table @param[in,out] mtr mini-transaction +@param[out] err error code @return page number of the created root @retval FIL_NULL if did not succeed */ uint32_t @@ -304,7 +274,9 @@ btr_create( fil_space_t* space, index_id_t index_id, dict_index_t* index, - mtr_t* mtr); + mtr_t* mtr, + dberr_t* err) + MY_ATTRIBUTE((nonnull(2,5,6), warn_unused_result)); /** Free a persistent index tree if it exists. @param[in,out] space tablespce @@ -352,12 +324,13 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false) @param[in,out] mtr mini-transaction */ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr); +ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction */ -ATTRIBUTE_COLD __attribute__((nonnull)) -void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); /*************************************************************//** Makes tree one level higher by splitting the root, and inserts @@ -379,7 +352,8 @@ btr_root_raise_and_insert( that can be emptied, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ MY_ATTRIBUTE((warn_unused_result)); /*************************************************************//** Reorganizes an index page. @@ -390,15 +364,15 @@ be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, IBUF_BITMAP_FREE is unaffected by reorganization. -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -bool +@return error code +@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */ +dberr_t btr_page_reorganize( /*================*/ page_cur_t* cursor, /*!< in/out: page cursor */ dict_index_t* index, /*!< in: the index tree of the page */ mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Decide if the page should be split at the convergence point of inserts converging to the left. @param[in] cursor insert position @@ -437,18 +411,20 @@ btr_page_split_and_insert( that can be emptied, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err) /*!< out: error code */ MY_ATTRIBUTE((nonnull, warn_unused_result)); /*******************************************************//** Inserts a data tuple to a tree on a non-leaf level. It is assumed that mtr holds an x-latch on the tree. */ -void +dberr_t btr_insert_on_non_leaf_level( ulint flags, /*!< in: undo logging and locking flags */ dict_index_t* index, /*!< in: index */ ulint level, /*!< in: level, must be > 0 */ dtuple_t* tuple, /*!< in: the record to be inserted */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Set a child page pointer record as the predefined minimum record. @tparam has_prev whether the page is supposed to have a left sibling @@ -477,10 +453,11 @@ inline void btr_set_min_rec_mark(rec_t *rec, const buf_block_t &block, @param[in,out] index b-tree @param[in] block child page @param[in,out] mtr mini-transaction -@param[out] cursor cursor pointing to the x-latched parent page */ -void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr, +@param[out] cursor cursor pointing to the x-latched parent page +@return whether the cursor was successfully positioned */ +bool btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr, btr_cur_t* cursor) - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull,warn_unused_result)); #ifdef UNIV_DEBUG /************************************************************//** Checks that the node pointer to a page is appropriate. @@ -502,23 +479,24 @@ level lifts the records of the page to the father page, thus reducing the tree height. It is assumed that mtr holds an x-latch on the tree and on the page. If cursor is on the leaf level, mtr must also hold x-latches to the brothers, if they exist. -@return TRUE on success */ -ibool +@return error code +@retval DB_FAIL if the tree could not be merged */ +dberr_t btr_compress( /*=========*/ btr_cur_t* cursor, /*!< in/out: cursor on the page to merge or lift; the page must not be empty: when deleting records, use btr_discard_page() if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ + bool adjust, /*!< in: whether the cursor position should be + adjusted even when compression occurs */ mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*************************************************************//** Discards a page from a B-tree. This is used to remove the last record from a B-tree page: the whole page must be removed at the same time. This cannot be used for the root page, which is allowed to be empty. */ -void +dberr_t btr_discard_page( /*=============*/ btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on @@ -540,9 +518,10 @@ btr_page_alloc( in the tree */ mtr_t* mtr, /*!< in/out: mini-transaction for the allocation */ - mtr_t* init_mtr) /*!< in/out: mini-transaction + mtr_t* init_mtr, /*!< in/out: mini-transaction for x-latching and initializing the page */ + dberr_t* err) /*!< out: error code */ MY_ATTRIBUTE((warn_unused_result)); /** Empty an index page (possibly the root page). @see btr_page_create(). @param[in,out] block page to be emptied @@ -577,8 +556,8 @@ btr_page_create( @param[in] blob whether this is freeing a BLOB page @param[in] latched whether index->table->space->x_lock() was called */ MY_ATTRIBUTE((nonnull)) -void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, - bool blob = false, bool space_latched = false); +dberr_t btr_page_free(dict_index_t *index, buf_block_t *block, mtr_t *mtr, + bool blob= false, bool space_latched= false); /**************************************************************//** Gets the root node of a tree and x- or s-latches it. @@ -589,7 +568,8 @@ btr_root_block_get( const dict_index_t* index, /*!< in: index tree */ rw_lock_type_t mode, /*!< in: either RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err); /*!< out: error code */ /*************************************************************//** Reorganizes an index page. @@ -599,15 +579,15 @@ be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, IBUF_BITMAP_FREE is unaffected by reorganization. -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -bool btr_page_reorganize_block( +@return error code +@retval DB_FAIL if reorganizing a ROW_FORMAT=COMPRESSED page failed */ +dberr_t btr_page_reorganize_block( ulint z_level,/*!< in: compression level to be used if dealing with compressed page */ buf_block_t* block, /*!< in/out: B-tree page */ dict_index_t* index, /*!< in: the index tree of the page */ mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); + __attribute__((nonnull, warn_unused_result)); #ifdef UNIV_BTR_PRINT /*************************************************************//** @@ -669,7 +649,8 @@ btr_lift_page_up( must not be empty: use btr_discard_only_page_on_level if the last record from the page should be removed */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + dberr_t* err) /*!< out: error code */ __attribute__((nonnull)); #define BTR_N_LEAF_PAGES 1 diff --git a/storage/innobase/include/btr0bulk.h b/storage/innobase/include/btr0bulk.h index 943836f8759..9fcea86d95d 100644 --- a/storage/innobase/include/btr0bulk.h +++ b/storage/innobase/include/btr0bulk.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, 2020, MariaDB Corporation. +Copyright (c) 2019, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -170,7 +170,7 @@ public: inline void release(); /** Start mtr and latch block */ - inline dberr_t latch(); + inline void latch(); /** Check if required space is available in the page for the rec to be inserted. We check fill factor & padding here. diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 49fa5df6390..32dc2a1d9c6 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -65,7 +65,6 @@ enum { /* btr_cur_latch_leaves() returns latched blocks and savepoints. */ struct btr_latch_leaves_t { - /* left block, target block and right block */ buf_block_t* blocks[3]; ulint savepoints[3]; }; @@ -142,6 +141,7 @@ btr_cur_optimistic_latch_leaves( btr_cur_t* cursor, mtr_t* mtr); +MY_ATTRIBUTE((warn_unused_result)) /********************************************************************//** Searches an index tree and positions a tree cursor on a given level. NOTE: n_fields_cmp in tuple must be set so that it cannot be compared @@ -203,7 +203,7 @@ btr_cur_open_at_index_side( ulint level, /*!< in: level to search for (0=leaf) */ mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /**********************************************************************//** Positions a cursor at a randomly chosen position within a B-tree. @@ -214,7 +214,8 @@ btr_cur_open_at_rnd_pos( dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in/out: B-tree cursor */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((nonnull,warn_unused_result)); /*************************************************************//** Tries to perform an insert to a page in an index tree, next to cursor. It is assumed that mtr holds an x-latch on the page. The operation does @@ -442,25 +443,26 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist. NOTE: it is assumed that the caller has reserved enough free extents so that the compression will always succeed if done! -@return TRUE if compression occurred */ -ibool +@return whether compression occurred */ +bool btr_cur_compress_if_useful( /*=======================*/ btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; - cursor does not stay valid if compression - occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ + cursor does not stay valid if !adjust and + compression occurs */ + bool adjust, /*!< in: whether the cursor position should be + adjusted even when compression occurs */ mtr_t* mtr) /*!< in/out: mini-transaction */ MY_ATTRIBUTE((nonnull)); /*******************************************************//** Removes the record on which the tree cursor is positioned. It is assumed that the mtr has an x-latch on the page where the cursor is positioned, but no latch on the whole tree. -@return TRUE if success, i.e., the page did not become too empty */ -ibool +@return error code +@retval DB_FAIL if the page would become too empty */ +dberr_t btr_cur_optimistic_delete( -/*===========================*/ +/*======================*/ btr_cur_t* cursor, /*!< in: cursor on the record to delete; cursor stays valid: if deletion succeeds, on function exit it points to the successor @@ -502,8 +504,8 @@ btr_cur_pessimistic_delete( /** Delete the node pointer in a parent page. @param[in,out] parent cursor pointing to parent record @param[in,out] mtr mini-transaction */ -void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr) - MY_ATTRIBUTE((nonnull)); +dberr_t btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /***********************************************************//** Parses a redo log record of updating a record in-place. @return end of log record or NULL */ @@ -696,14 +698,15 @@ btr_rec_copy_externally_stored_field( @param[in] block leaf page where the search converged @param[in] latch_mode BTR_SEARCH_LEAF, ... @param[in] cursor cursor -@param[in] mtr mini-transaction -@return blocks and savepoints which actually latched. */ -btr_latch_leaves_t +@param[in,out] mtr mini-transaction +@param[out] latch_leaves latched blocks and savepoints */ +void btr_cur_latch_leaves( buf_block_t* block, ulint latch_mode, btr_cur_t* cursor, - mtr_t* mtr); + mtr_t* mtr, + btr_latch_leaves_t* latch_leaves = nullptr); /*######################################################################*/ diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index 57ee40e9ee6..5fc4e28527f 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -24,8 +24,7 @@ The index tree persistent cursor Created 2/23/1996 Heikki Tuuri *******************************************************/ -#ifndef btr0pcur_h -#define btr0pcur_h +#pragma once #include "dict0dict.h" #include "btr0cur.h" @@ -93,7 +92,7 @@ btr_pcur_free( /**************************************************************//** Initializes and opens a persistent cursor to an index tree. */ -UNIV_INLINE +inline dberr_t btr_pcur_open_low( /*==============*/ @@ -110,7 +109,8 @@ btr_pcur_open_low( btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written (0 if none) */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); #define btr_pcur_open(i,t,md,l,c,m) \ btr_pcur_open_low(i,0,t,md,l,c,0,m) /**************************************************************//** @@ -137,7 +137,8 @@ btr_pcur_open_with_no_init_func( srw_spin_lock* ahi_latch, /*!< in: currently held AHI rdlock, or NULL */ #endif /* BTR_CUR_HASH_ADAPT */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((warn_unused_result)); #ifdef BTR_CUR_HASH_ADAPT # define btr_pcur_open_with_no_init(ix,t,md,l,cur,ahi,m) \ btr_pcur_open_with_no_init_func(ix,t,md,l,cur,ahi,m) @@ -161,7 +162,7 @@ btr_pcur_open_at_index_side( ulint level, /*!< in: level to search for (0=leaf) */ mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull,warn_unused_result)); /**************************************************************//** Gets the up_match value for a pcur after a search. @return number of matched fields at the cursor or to the right if @@ -180,34 +181,7 @@ ulint btr_pcur_get_low_match( /*===================*/ const btr_pcur_t* cursor); /*!< in: persistent cursor */ -/**************************************************************//** -If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first -user record satisfying the search condition, in the case PAGE_CUR_L or -PAGE_CUR_LE, on the last user record. If no such user record exists, then -in the first case sets the cursor after last in tree, and in the latter case -before first in tree. The latching mode must be BTR_SEARCH_LEAF or -BTR_MODIFY_LEAF. */ -void -btr_pcur_open_on_user_rec( - dict_index_t* index, /*!< in: index */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent - cursor */ - mtr_t* mtr); /*!< in: mtr */ -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree. -@return true if the index is available and we have put the cursor, false -if the index is unavailable */ -UNIV_INLINE -bool -btr_pcur_open_at_rnd_pos( - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ - mtr_t* mtr); /*!< in: mtr */ + /**************************************************************//** Frees the possible memory heap of a persistent cursor and sets the latch mode of the persistent cursor to BTR_NO_LATCHES. @@ -283,13 +257,14 @@ btr_pcur_move_to_next( /*********************************************************//** Moves the persistent cursor to the previous record in the tree. If no records are left, the cursor stays 'before first in tree'. -@return TRUE if the cursor was not before first in tree */ -ibool +@return true if the cursor was not before first in tree */ +bool btr_pcur_move_to_prev( /*==================*/ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the function may release the page latch */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*********************************************************//** Moves the persistent cursor to the next user record in the tree. If no user records are left, the cursor ends up 'after last in tree'. @@ -306,12 +281,13 @@ Moves the persistent cursor to the first record on the next page. Releases the latch on the current page, and bufferunfixes it. Note that there must not be modifications on the current page, as then the x-latch can be released only in mtr_commit. */ -void +dberr_t btr_pcur_move_to_next_page( /*=======================*/ btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the last record of the current page */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); #define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur) #define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur) @@ -406,7 +382,9 @@ struct btr_pcur_t{ SAME_UNIQ, /** cursor position is not on user rec or points on the record with not the same uniq field values as in the stored record */ - NOT_SAME + NOT_SAME, + /** the index tree is corrupted */ + CORRUPTED }; /** a B-tree cursor */ btr_cur_t btr_cur; @@ -465,6 +443,7 @@ struct btr_pcur_t{ /** Return the index of this persistent cursor */ dict_index_t* index() const { return(btr_cur.index); } + MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Restores the stored position of a persistent cursor bufferfixing the page and obtaining the specified latches. If the cursor position was saved when the @@ -480,12 +459,13 @@ struct btr_pcur_t{ empty tree: restores to before first or after the last in the tree. @param restore_latch_mode BTR_SEARCH_LEAF, ... @param mtr mtr - @return btr_pcur_t::SAME_ALL cursor position on user rec and points on + @retval SAME_ALL cursor position on user rec and points on the record with the same field values as in the stored record, - btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the + @retval SAME_UNIQ cursor position is on user rec and points on the record with the same unique field values as in the stored record, - btr_pcur_t::NOT_SAME cursor position is not on user rec or points on - the record with not the samebuniq field values as in the stored */ + @retval NOT_SAME cursor position is not on user rec or points on + the record with not the same uniq field values as in the stored + @retval CORRUPTED if the index is corrupted */ restore_status restore_position(ulint latch_mode, mtr_t *mtr); }; @@ -508,6 +488,32 @@ inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor) return cursor->btr_cur.page_cur.rec; } -#include "btr0pcur.inl" +/** Open a cursor on the first user record satisfying the search condition; +in case of no match, after the last index record. */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) +inline +dberr_t +btr_pcur_open_on_user_rec( + dict_index_t* index, /*!< in: index */ + const dtuple_t* tuple, /*!< in: tuple on which search done */ + page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */ + ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or + BTR_MODIFY_LEAF */ + btr_pcur_t* cursor, /*!< in: memory buffer for persistent + cursor */ + mtr_t* mtr) /*!< in: mtr */ +{ + ut_ad(mode == PAGE_CUR_GE || mode == PAGE_CUR_G); + ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); + if (dberr_t err= btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr)) + return err; + if (!btr_pcur_is_after_last_on_page(cursor) || + btr_pcur_is_after_last_in_tree(cursor)) + return DB_SUCCESS; + if (dberr_t err= btr_pcur_move_to_next_page(cursor, mtr)) + return err; + btr_pcur_move_to_next_on_page(cursor); + return DB_SUCCESS; +} -#endif +#include "btr0pcur.inl" diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl index 4bd15fd7287..fd4eeb9392a 100644 --- a/storage/innobase/include/btr0pcur.inl +++ b/storage/innobase/include/btr0pcur.inl @@ -124,16 +124,8 @@ btr_pcur_is_on_user_rec( /*====================*/ const btr_pcur_t* cursor) /*!< in: persistent cursor */ { - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - if (btr_pcur_is_before_first_on_page(cursor) - || btr_pcur_is_after_last_on_page(cursor)) { - - return(FALSE); - } - - return(TRUE); + return !btr_pcur_is_before_first_on_page(cursor) && + !btr_pcur_is_after_last_on_page(cursor); } /*********************************************************//** @@ -209,11 +201,10 @@ btr_pcur_move_to_next_user_rec( cursor->old_stored = false; loop: if (btr_pcur_is_after_last_on_page(cursor)) { - if (btr_pcur_is_after_last_in_tree(cursor)) { + if (btr_pcur_is_after_last_in_tree(cursor) + || btr_pcur_move_to_next_page(cursor, mtr) != DB_SUCCESS) { return(FALSE); } - - btr_pcur_move_to_next_page(cursor, mtr); } else { btr_pcur_move_to_next_on_page(cursor); } @@ -244,15 +235,13 @@ btr_pcur_move_to_next( cursor->old_stored = false; if (btr_pcur_is_after_last_on_page(cursor)) { - if (btr_pcur_is_after_last_in_tree(cursor)) { + if (btr_pcur_is_after_last_in_tree(cursor) + || btr_pcur_move_to_next_page(cursor, mtr) != DB_SUCCESS) { return(FALSE); } - - btr_pcur_move_to_next_page(cursor, mtr); - return(TRUE); + } else { + btr_pcur_move_to_next_on_page(cursor); } - - btr_pcur_move_to_next_on_page(cursor); return(TRUE); } @@ -330,7 +319,7 @@ btr_pcur_free( /**************************************************************//** Initializes and opens a persistent cursor to an index tree. */ -UNIV_INLINE +inline dberr_t btr_pcur_open_low( /*==============*/ @@ -349,42 +338,18 @@ btr_pcur_open_low( (0 if none) */ mtr_t* mtr) /*!< in: mtr */ { - btr_cur_t* btr_cursor; - dberr_t err = DB_SUCCESS; - - /* Initialize the cursor */ - - btr_pcur_init(cursor); - - cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode = mode; - - /* Search with the tree cursor */ - - btr_cursor = btr_pcur_get_btr_cur(cursor); - - ut_ad(!dict_index_is_spatial(index)); - - err = btr_cur_search_to_nth_level_func( - index, level, tuple, mode, latch_mode, btr_cursor, + ut_ad(!index->is_spatial()); + btr_pcur_init(cursor); + cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + cursor->search_mode= mode; + cursor->pos_state= BTR_PCUR_IS_POSITIONED; + cursor->trx_if_known= nullptr; + return btr_cur_search_to_nth_level_func(index, level, tuple, mode, latch_mode, + btr_pcur_get_btr_cur(cursor), #ifdef BTR_CUR_HASH_ADAPT - NULL, -#endif /* BTR_CUR_HASH_ADAPT */ - mtr, autoinc); - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - ib::warn() << "btr_pcur_open_low" - << " level: " << level - << " table: " << index->table->name - << " index: " << index->name - << " error: " << err; - } - - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - - cursor->trx_if_known = NULL; - - return(err); + nullptr, +#endif + mtr, autoinc); } /**************************************************************//** @@ -476,38 +441,6 @@ btr_pcur_open_at_index_side( return (err); } -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree. -@return true if the index is available and we have put the cursor, false -if the index is unavailable */ -UNIV_INLINE -bool -btr_pcur_open_at_rnd_pos( - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ - mtr_t* mtr) /*!< in: mtr */ -{ - /* Initialize the cursor */ - - cursor->latch_mode = latch_mode; - cursor->search_mode = PAGE_CUR_G; - - btr_pcur_init(cursor); - - bool available; - - available = btr_cur_open_at_rnd_pos(index, latch_mode, - btr_pcur_get_btr_cur(cursor), - mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - cursor->old_stored = false; - - cursor->trx_if_known = NULL; - - return(available); -} - /**************************************************************//** Frees the possible memory heap of a persistent cursor and sets the latch mode of the persistent cursor to BTR_NO_LATCHES. diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index c9534a2f455..22a07e8d86a 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -43,21 +43,12 @@ Created 11/5/1995 Heikki Tuuri #include "transactional_lock_guard.h" #include -// Forward declaration -struct fil_addr_t; - /** @name Modes for buf_page_get_gen */ /* @{ */ #define BUF_GET 10 /*!< get always */ #define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */ #define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make the block young in the LRU list */ -#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but - set no latch; we have - separated this case, because - it is error-prone programming - not to set a latch, and it - should be used with care */ #define BUF_GET_IF_IN_POOL_OR_WATCH 15 /*!< Get the page only if it's in the buffer pool, if not then set a watch @@ -65,7 +56,6 @@ struct fil_addr_t; #define BUF_GET_POSSIBLY_FREED 16 /*!< Like BUF_GET, but do not mind if the file page has been freed. */ -#define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */ /* @} */ /** If LRU list of a buf_pool is less than this size then LRU eviction @@ -167,21 +157,9 @@ buf_block_free( /*===========*/ buf_block_t* block); /*!< in, own: block to be freed */ -/**************************************************************//** -NOTE! The following macros should be used instead of buf_page_get_gen, -to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed -in LA! */ #define buf_page_get(ID, SIZE, LA, MTR) \ buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, MTR) -/**************************************************************//** -Use these macros to bufferfix a page with no latching. Remember not to -read the contents of the page unless you know it is safe. Do not modify -the contents of the page! We have separated this case, because it is -error-prone programming not to set a latch, and it should be used -with care. */ -#define buf_page_get_with_no_latch(ID, SIZE, MTR) \ - buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, MTR) /** Try to acquire a page latch. @param rw_latch RW_S_LATCH or RW_X_LATCH @param block guessed block @@ -217,8 +195,8 @@ buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size); @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] guess guessed block or NULL @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, -BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH -@param[in] mtr mini-transaction +BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH +@param[in,out] mtr mini-transaction @param[out] err DB_SUCCESS or error code @param[in] allow_ibuf_merge Allow change buffer merge while reading the pages from file. @@ -232,7 +210,8 @@ buf_page_get_gen( ulint mode, mtr_t* mtr, dberr_t* err = NULL, - bool allow_ibuf_merge = false); + bool allow_ibuf_merge = false) + MY_ATTRIBUTE((nonnull(6), warn_unused_result)); /** This is the low level function used to get access to a database page. @param[in] page_id page id @@ -240,8 +219,9 @@ buf_page_get_gen( @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] guess guessed block or NULL @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL, -BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH -@param[in] mtr mini-transaction +BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH +@param[in,out] mtr mini-transaction, or NULL if a + block with page_id is to be evicted @param[out] err DB_SUCCESS or error code @param[in] allow_ibuf_merge Allow change buffer merge to happen while reading the page from file @@ -1394,8 +1374,9 @@ public: } /** Release and evict a corrupted page. - @param bpage page that was being read */ - ATTRIBUTE_COLD void corrupted_evict(buf_page_t *bpage); + @param bpage x-latched page that was found corrupted + @param state expected current state of the page */ + ATTRIBUTE_COLD void corrupted_evict(buf_page_t *bpage, uint32_t state); /** Release a memory block to the buffer pool. */ ATTRIBUTE_COLD void free_block(buf_block_t *block); diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h index 869905c6b97..3e14e0ace69 100644 --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2020, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -33,9 +33,6 @@ Created 4/18/1996 Heikki Tuuri #include "buf0buf.h" #include "dict0dict.h" -/** @return the DICT_HDR block, x-latched */ -#define dict_hdr_get(mtr) buf_page_get \ - (page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO), 0, RW_X_LATCH, mtr) /**********************************************************************//** Returns a new table, index, or space id. */ void diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h index cc7ccbfb9d8..c40df12babe 100644 --- a/storage/innobase/include/dict0crea.h +++ b/storage/innobase/include/dict0crea.h @@ -110,7 +110,7 @@ uint32_t dict_drop_index_tree(btr_pcur_t *pcur, trx_t *trx, mtr_t *mtr) /***************************************************************//** Creates an index tree for the index if it is not a member of a cluster. Don't update SYSTEM TABLES. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +@return error code */ dberr_t dict_create_index_tree_in_mem( /*==========================*/ diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index f3a00a81f6b..f02ee0ddf9d 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -369,12 +369,8 @@ dberr_t dict_table_rename_in_cache( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - const char* new_name, /*!< in: new name */ - bool rename_also_foreigns, - /*!< in: in ALTER TABLE we want - to preserve the original table name - in constraints which reference it */ - bool replace_new_file = false) + span new_name, /*!< in: new name */ + bool replace_new_file) /*!< in: whether to replace the file with the new name (as part of rolling back TRUNCATE) */ @@ -642,19 +638,6 @@ dict_table_get_next_index( # define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index) #endif /* UNIV_DEBUG */ -/* Skip corrupted index */ -#define dict_table_skip_corrupt_index(index) \ - while (index && index->is_corrupted()) { \ - index = dict_table_get_next_index(index); \ - } - -/* Get the next non-corrupt index */ -#define dict_table_next_uncorrupted_index(index) \ -do { \ - index = dict_table_get_next_index(index); \ - dict_table_skip_corrupt_index(index); \ -} while (0) - #define dict_index_is_clust(index) (index)->is_clust() #define dict_index_is_auto_gen_clust(index) (index)->is_gen_clust() #define dict_index_is_unique(index) (index)->is_unique() @@ -1677,41 +1660,13 @@ dict_fs2utf8( size_t table_utf8_size)/*!< in: table_utf8 size */ MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Check whether the table is corrupted. -@return nonzero for corrupted table, zero for valid tables */ -UNIV_INLINE -ulint -dict_table_is_corrupted( -/*====================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/** Flag an index and table corrupted both in the data dictionary cache +/** Flag an index corrupted both in the data dictionary cache and in the system table SYS_INDEXES. @param index index to be flagged as corrupted -@param ctx context (for error log reporting) -@param dict_locked whether dict_sys.latch is held in exclusive mode */ -void dict_set_corrupted(dict_index_t *index, const char *ctx, bool dict_locked) +@param ctx context (for error log reporting) */ +void dict_set_corrupted(dict_index_t *index, const char *ctx) ATTRIBUTE_COLD __attribute__((nonnull)); -/** Flags an index corrupted in the data dictionary cache only. This -is used mostly to mark a corrupted index when index's own dictionary -is corrupted, and we force to load such index for repair purpose -@param[in,out] index index that is corrupted */ -void -dict_set_corrupted_index_cache_only( - dict_index_t* index); - -/**********************************************************************//** -Flags a table with specified space_id corrupted in the table dictionary -cache. -@return TRUE if successful */ -bool dict_set_corrupted_by_space(const fil_space_t* space); - -/** Flag a table encrypted in the data dictionary cache. */ -void dict_set_encrypted_by_space(const fil_space_t* space); - /** Sets merge_threshold in the SYS_INDEXES @param[in,out] index index @param[in] merge_threshold value to set */ diff --git a/storage/innobase/include/dict0dict.inl b/storage/innobase/include/dict0dict.inl index a98ca38d9b3..a210c839020 100644 --- a/storage/innobase/include/dict0dict.inl +++ b/storage/innobase/include/dict0dict.inl @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1100,19 +1100,6 @@ dict_max_v_field_len_store_undo( return(max_log_len); } -/********************************************************************//** -Check whether the table is corrupted. -@return nonzero for corrupted table, zero for valid tables */ -UNIV_INLINE -ulint -dict_table_is_corrupted( -/*====================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - return(table->corrupted); -} - /** Check if the table is found is a file_per_table tablespace. This test does not use table flags2 since some REDUNDANT tables in the system tablespace may have garbage in the MIX_LEN field where flags2 is diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 622a453c136..da96d07e260 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1187,6 +1187,13 @@ public: /** @return whether this index requires locking */ bool has_locking() const { return !is_ibuf(); } + /** @return whether this is a normal B-tree index + (not the change buffer, not SPATIAL or FULLTEXT) */ + bool is_btree() const { + return UNIV_LIKELY(!(type & (DICT_IBUF | DICT_SPATIAL + | DICT_FTS | DICT_CORRUPT))); + } + /** @return whether the index includes virtual columns */ bool has_virtual() const { return type & DICT_VIRTUAL; } @@ -1400,8 +1407,9 @@ public: /** Clear the index tree and reinitialize the root page, in the rollback of TRX_UNDO_EMPTY. The BTR_SEG_LEAF is freed and reinitialized. - @param thr query thread */ - void clear(que_thr_t *thr); + @param thr query thread + @return error code */ + dberr_t clear(que_thr_t *thr); /** Check whether the online log is dummy value to indicate whether table undergoes active DDL. @@ -1963,8 +1971,9 @@ struct dict_table_t { char (&tbl_name)[NAME_LEN + 1], size_t *db_name_len, size_t *tbl_name_len) const; - /** Clear the table when rolling back TRX_UNDO_EMPTY */ - void clear(que_thr_t *thr); + /** Clear the table when rolling back TRX_UNDO_EMPTY + @return error code */ + dberr_t clear(que_thr_t *thr); #ifdef UNIV_DEBUG /** @return whether the current thread holds the lock_mutex */ @@ -2015,7 +2024,7 @@ struct dict_table_t { @param new_name name of the table @param replace whether to replace the file with the new name (as part of rolling back TRUNCATE) */ - dberr_t rename_tablespace(const char *new_name, bool replace) const; + dberr_t rename_tablespace(span new_name, bool replace) const; private: /** Initialize instant->field_map. diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 3bcb3be563d..aff5109300a 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -418,12 +418,16 @@ public: /** MariaDB encryption data */ fil_space_crypt_t *crypt_data; - /** Checks that this tablespace in a list of unflushed tablespaces. */ + /** Whether needs_flush(), or this is in fil_system.unflushed_spaces */ bool is_in_unflushed_spaces; - /** Checks that this tablespace needs key rotation. */ + + /** Whether this in fil_system.default_encrypt_tables (needs key rotation) */ bool is_in_default_encrypt; private: + /** Whether any corrupton of this tablespace has been reported */ + mutable std::atomic_flag is_corrupted; + /** mutex to protect freed_ranges and last_freed_lsn */ std::mutex freed_range_mutex; @@ -501,6 +505,9 @@ public: written while the space ID is being updated in each page. */ inline void set_imported(); + /** Report the tablespace as corrupted */ + ATTRIBUTE_COLD void set_corrupted() const; + /** @return whether the storage device is rotational (HDD, not SSD) */ inline bool is_rotational() const; diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index a519fa096b1..7b66e58e488 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -207,16 +207,17 @@ typedef byte fseg_inode_t; (16 + 3 * FLST_BASE_NODE_SIZE \ + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) -static constexpr uint32_t FSEG_MAGIC_N_VALUE= 97937874; +static constexpr byte FSEG_MAGIC_N_BYTES[4]={0x05,0xd6,0x69,0xd2}; -#define FSEG_FILLFACTOR 8 /* If this value is x, then if - the number of unused but reserved +#define FSEG_FILLFACTOR 8 /* If the number of unused but reserved pages in a segment is less than - reserved pages * 1/x, and there are + reserved pages / FSEG_FILLFACTOR, + and there are at least FSEG_FRAG_LIMIT used pages, then we allow a new empty extent to be added to the segment in - fseg_alloc_free_page. Otherwise, we + fseg_alloc_free_page_general(). + Otherwise, we use unused pages of the segment. */ #define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS @@ -345,22 +346,25 @@ fsp_header_check_encryption_key( /** Initialize a tablespace header. @param[in,out] space tablespace @param[in] size current size in blocks -@param[in,out] mtr mini-transaction */ -void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr) - MY_ATTRIBUTE((nonnull)); +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t fsp_header_init(fil_space_t *space, uint32_t size, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Create a new segment. @param space tablespace @param byte_offset byte offset of the created segment header @param mtr mini-transaction +@param err error code @param has_done_reservation whether fsp_reserve_free_extents() was invoked @param block block where segment header is placed, or NULL to allocate an additional page for that @return the block where the segment header is placed, x-latched -@retval NULL if could not create segment because of lack of space */ +@retval nullptr if could not create segment */ buf_block_t* -fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, - bool has_done_reservation= false, buf_block_t *block= NULL); +fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, dberr_t *err, + bool has_done_reservation= false, buf_block_t *block= nullptr) + MY_ATTRIBUTE((nonnull(1,3,4), warn_unused_result)); /** Calculate the number of pages reserved by a segment, and how many pages are currently used. @@ -375,22 +379,6 @@ ulint fseg_n_reserved_pages(const buf_block_t &block, MY_ATTRIBUTE((nonnull)); /**********************************************************************//** Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize -file space fragmentation. -@param[in,out] seg_header segment header -@param[in] hint hint of which page would be desirable -@param[in] direction if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR -@param[in,out] mtr mini-transaction -@return X-latched block, or NULL if no page could be allocated */ -#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \ - fseg_alloc_free_page_general(seg_header, hint, direction, \ - false, mtr, mtr) -/**********************************************************************//** -Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. @retval NULL if no page could be allocated */ @@ -411,8 +399,9 @@ fseg_alloc_free_page_general( is no need to do the check for this individual page */ mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + mtr_t* init_mtr,/*!< in/out: mtr or another mini-transaction in which the page should be initialized. */ + dberr_t* err) /*!< out: error code */ MY_ATTRIBUTE((warn_unused_result, nonnull)); /** Reserves free pages from a tablespace. All mini-transactions which may @@ -441,19 +430,21 @@ if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply different rules in that special case, just ensuring that there are n_pages free pages available. -@param[out] n_reserved number of extents actually reserved; if we - return true and the tablespace size is < - FSP_EXTENT_SIZE pages, then this can be 0, - otherwise it is n_ext -@param[in,out] space tablespace -@param[in] n_ext number of extents to reserve -@param[in] alloc_type page reservation type (FSP_BLOB, etc) -@param[in,out] mtr the mini transaction -@param[in] n_pages for small tablespaces (tablespace size is - less than FSP_EXTENT_SIZE), number of free - pages to reserve. -@return true if we were able to make the reservation */ -bool +@param[out] n_reserved number of extents actually reserved; if we + return true and the tablespace size is < + FSP_EXTENT_SIZE pages, then this can be 0, + otherwise it is n_ext +@param[in,out] space tablespace +@param[in] n_ext number of extents to reserve +@param[in] alloc_type page reservation type (FSP_BLOB, etc) +@param[in,out] mtr the mini transaction +@param[out] err error code +@param[in] n_pages for small tablespaces (tablespace size is + less than FSP_EXTENT_SIZE), number of free + pages to reserve. +@return error code +@retval DB_SUCCESS if we were able to make the reservation */ +dberr_t fsp_reserve_free_extents( uint32_t* n_reserved, fil_space_t* space, @@ -467,22 +458,26 @@ fsp_reserve_free_extents( @param[in,out] space tablespace @param[in] offset page number @param[in,out] mtr mini-transaction -@param[in] have_latch whether space->x_lock() was already called */ -void +@param[in] have_latch whether space->x_lock() was already called +@return error code */ +dberr_t fseg_free_page( fseg_header_t* seg_header, fil_space_t* space, uint32_t offset, mtr_t* mtr, - bool have_latch = false); -/** Determine whether a page is free. -@param[in,out] space tablespace -@param[in] page page number -@return whether the page is marked as free */ -bool -fseg_page_is_free(fil_space_t* space, unsigned page) + bool have_latch = false) MY_ATTRIBUTE((nonnull, warn_unused_result)); +/** Determine whether a page is allocated. +@param space tablespace +@param page page number +@return error code +@retval DB_SUCCESS if the page is marked as free +@retval DB_SUCCESS_LOCKED_REC if the page is marked as allocated */ +dberr_t fseg_page_is_allocated(fil_space_t *space, unsigned page) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + /** Frees part of a segment. This function can be used to free a segment by repeatedly calling this function in different mini-transactions. Doing the freeing in a single mini-transaction diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h index f8e4c06baae..1912c31b744 100644 --- a/storage/innobase/include/fsp0types.h +++ b/storage/innobase/include/fsp0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2020, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -49,7 +49,7 @@ static constexpr size_t SRV_SPACE_ID_UPPER_BOUND= 0xFFFFFFF0; If records are inserted in order, there are the following flags to tell this (their type is made byte for the compiler to warn if direction and hint parameters are switched in -fseg_alloc_free_page) */ +fseg_alloc_free_page_general) */ /* @{ */ #define FSP_UP ((byte)111) /*!< alphabetically upwards */ #define FSP_DOWN ((byte)112) /*!< alphabetically downwards */ diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h index 41eacc21b81..fb278d543c4 100644 --- a/storage/innobase/include/fts0types.h +++ b/storage/innobase/include/fts0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,7 +28,6 @@ Created 2007-03-27 Sunny Bains #define INNOBASE_FTS0TYPES_H #include "fts0fts.h" -#include "fut0fut.h" #include "pars0pars.h" #include "que0types.h" #include "ut0byte.h" diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h deleted file mode 100644 index 58c33c12a29..00000000000 --- a/storage/innobase/include/fut0fut.h +++ /dev/null @@ -1,77 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, 2021, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fut0fut.h -File-based utilities - -Created 12/13/1995 Heikki Tuuri -***********************************************************************/ - - -#ifndef fut0fut_h -#define fut0fut_h - -#include "mtr0mtr.h" - -/** Gets a pointer to a file address and latches the page. -@param[in] space space id -@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 -@param[in] addr file address -@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_SX_LATCH -@param[out] ptr_block file page -@param[in,out] mtr mini-transaction -@return pointer to a byte in (*ptr_block)->frame; the *ptr_block is -bufferfixed and latched */ -inline -byte* -fut_get_ptr( - ulint space, - ulint zip_size, - fil_addr_t addr, - rw_lock_type_t rw_latch, - mtr_t* mtr, - buf_block_t** ptr_block = NULL) -{ - buf_block_t* block; - byte* ptr = NULL; - - ut_ad(addr.boffset < srv_page_size); - ut_ad((rw_latch == RW_S_LATCH) - || (rw_latch == RW_X_LATCH) - || (rw_latch == RW_SX_LATCH)); - - block = buf_page_get_gen(page_id_t(space, addr.page), zip_size, - rw_latch, nullptr, BUF_GET_POSSIBLY_FREED, - mtr); - if (!block) { - } else if (block->page.is_freed()) { - block = nullptr; - } else { - ptr = buf_block_get_frame(block) + addr.boffset; - } - - if (ptr_block != NULL) { - *ptr_block = block; - } - - return(ptr); -} - -#endif /* fut0fut_h */ diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h index c27de3db786..746dab80400 100644 --- a/storage/innobase/include/fut0lst.h +++ b/storage/innobase/include/fut0lst.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,30 +24,21 @@ File-based list utilities Created 11/28/1995 Heikki Tuuri ***********************************************************************/ -#ifndef fut0lst_h -#define fut0lst_h +#pragma once + +/* The physical size of a list base node in bytes */ +#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE) +/* The physical size of a list node in bytes */ +#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE) #ifdef UNIV_INNOCHECKSUM # include "fil0fil.h" #else -#include "fut0fut.h" -#include "mtr0log.h" - -/* The C 'types' of base node and list node: these should be used to -write self-documenting code. Of course, the sizeof macro cannot be -applied to these types! */ +# include "mtr0log.h" typedef byte flst_base_node_t; typedef byte flst_node_t; -#endif /* !UNIV_INNOCHECKSUM */ - -/* The physical size of a list base node in bytes */ -#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE) -/* The physical size of a list node in bytes */ -#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE) - -#ifndef UNIV_INNOCHECKSUM /* We define the field offsets of a node for the list */ #define FLST_PREV 0 /* 6-byte address of the previous list element; the page part of address is FIL_NULL, if no @@ -83,7 +74,7 @@ inline void flst_init(const buf_block_t* block, uint16_t ofs, mtr_t* mtr) @param[in] block file page @param[in,out] base base node @param[in,out] mtr mini-transaction */ -void flst_init(const buf_block_t& block, byte *base, mtr_t *mtr) +void flst_init(const buf_block_t &block, byte *base, mtr_t *mtr) MY_ATTRIBUTE((nonnull)); /** Append a file list node to a list. @@ -91,28 +82,31 @@ void flst_init(const buf_block_t& block, byte *base, mtr_t *mtr) @param[in] boffset byte offset of the base node @param[in,out] add block to be added @param[in] aoffset byte offset of the node to be added -@param[in,outr] mtr mini-transaction */ -void flst_add_last(buf_block_t *base, uint16_t boffset, - buf_block_t *add, uint16_t aoffset, mtr_t *mtr) - MY_ATTRIBUTE((nonnull)); +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t flst_add_last(buf_block_t *base, uint16_t boffset, + buf_block_t *add, uint16_t aoffset, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Prepend a file list node to a list. @param[in,out] base base node block @param[in] boffset byte offset of the base node @param[in,out] add block to be added @param[in] aoffset byte offset of the node to be added -@param[in,outr] mtr mini-transaction */ -void flst_add_first(buf_block_t *base, uint16_t boffset, +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t flst_add_first(buf_block_t *base, uint16_t boffset, buf_block_t *add, uint16_t aoffset, mtr_t *mtr) - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Remove a file list node. @param[in,out] base base node block @param[in] boffset byte offset of the base node @param[in,out] cur block to be removed @param[in] coffset byte offset of the current record to be removed -@param[in,outr] mtr mini-transaction */ -void flst_remove(buf_block_t *base, uint16_t boffset, - buf_block_t *cur, uint16_t coffset, mtr_t *mtr) - MY_ATTRIBUTE((nonnull)); +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t flst_remove(buf_block_t *base, uint16_t boffset, + buf_block_t *cur, uint16_t coffset, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** @return the length of a list */ inline uint32_t flst_get_len(const flst_base_node_t *base) @@ -154,11 +148,9 @@ inline fil_addr_t flst_get_prev_addr(const flst_node_t *node) return flst_read_addr(node + FLST_PREV); } -#ifdef UNIV_DEBUG +# ifdef UNIV_DEBUG /** Validate a file-based list. */ void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr); -#endif +# endif #endif /* !UNIV_INNOCHECKSUM */ - -#endif diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h index 024df3e4094..4e10b90173e 100644 --- a/storage/innobase/include/gis0rtree.h +++ b/storage/innobase/include/gis0rtree.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -93,7 +93,8 @@ rtr_page_split_and_insert( mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr, /*!< in: mtr */ + dberr_t* err); /*!< out: error code */ /**************************************************************//** Sets the child node mbr in a node pointer. */ @@ -123,7 +124,8 @@ rtr_pcur_move_to_next( function may release the page latch */ ulint cur_level, /*!< in: current level */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((warn_unused_result)); /****************************************************************//** Searches the right position in rtree for a page cursor. */ @@ -257,18 +259,20 @@ rtr_get_mbr_from_tuple( @param[in] offsets work area for the return value @param[in] index rtree index @param[in] block child page in the index -@param[in] mtr mtr +@param[in,out] mtr mtr @param[in] sea_cur search cursor, contains information about parent nodes in search -@param[in] cursor cursor on node pointer record, - its page x-latched */ -void +@param[out] cursor cursor on node pointer record, + its page x-latched +@return whether the cursor was successfully positioned */ +bool rtr_page_get_father( dict_index_t* index, buf_block_t* block, mtr_t* mtr, btr_cur_t* sea_cur, - btr_cur_t* cursor); + btr_cur_t* cursor) + MY_ATTRIBUTE((nonnull(1,2,3,5), warn_unused_result)); /************************************************************//** Returns the father block to a page. It is assumed that mtr holds @@ -302,21 +306,14 @@ rtr_store_parent_path( /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ -void +bool rtr_pcur_open( dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - mtr_t* mtr); /*!< in: mtr */ - -struct btr_cur_t; + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((warn_unused_result)); /*********************************************************//** Returns the R-Tree node stored in the parent search path diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h index b1abb90ff8e..93e46e7b0be 100644 --- a/storage/innobase/include/ibuf0ibuf.h +++ b/storage/innobase/include/ibuf0ibuf.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2020, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -327,9 +327,11 @@ exist entries for such a page if the page belonged to an index which subsequently was dropped. @param block X-latched page to try to apply changes to, or NULL to discard @param page_id page identifier -@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */ -void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, - ulint zip_size); +@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 +@return error code */ +dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block, + const page_id_t page_id, + ulint zip_size); /** Delete all change buffer entries for a tablespace, in DISCARD TABLESPACE, IMPORT TABLESPACE, or crash recovery. diff --git a/storage/innobase/include/lock0lock.inl b/storage/innobase/include/lock0lock.inl index ca64587628a..1b9255ffb3e 100644 --- a/storage/innobase/include/lock0lock.inl +++ b/storage/innobase/include/lock0lock.inl @@ -71,7 +71,6 @@ lock_rec_create( /*!< in: TRUE if caller owns trx mutex */ { - btr_assert_not_corrupted(block, index); return lock_rec_create_low( c_lock, type_mode, block->page.id(), block->page.frame, heap_no, diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 9bca993b0f7..b1c09cfa2bc 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,11 +44,12 @@ dberr_t recv_find_max_checkpoint(ulint* max_field) MY_ATTRIBUTE((nonnull, warn_unused_result)); +ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Apply any buffered redo log to a page that was just read from a data file. @param[in,out] space tablespace -@param[in,out] bpage buffer pool page */ -ATTRIBUTE_COLD void recv_recover_page(fil_space_t* space, buf_page_t* bpage) - MY_ATTRIBUTE((nonnull)); +@param[in,out] bpage buffer pool page +@return whether the page was recovered correctly */ +bool recv_recover_page(fil_space_t* space, buf_page_t* bpage); /** Start recovering from a redo log checkpoint. @param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN @@ -296,13 +297,16 @@ private: @param p iterator pointing to page_id @param mtr mini-transaction @param b pre-allocated buffer pool block - @return whether the page was successfully initialized */ + @return the recovered block + @retval nullptr if the page cannot be initialized based on log records + @retval -1 if the page cannot be recovered due to corruption */ inline buf_block_t *recover_low(const page_id_t page_id, map::iterator &p, mtr_t &mtr, buf_block_t *b); /** Attempt to initialize a page based on redo log records. @param page_id page identifier @return the recovered block - @retval nullptr if the page cannot be initialized based on log records */ + @retval nullptr if the page cannot be initialized based on log records + @retval -1 if the page cannot be recovered due to corruption */ buf_block_t *recover_low(const page_id_t page_id); /** All found log files (multiple ones are possible if we are upgrading @@ -404,7 +408,8 @@ public: /** Attempt to initialize a page based on redo log records. @param page_id page identifier @return the recovered block - @retval nullptr if the page cannot be initialized based on log records */ + @retval nullptr if the page cannot be initialized based on log records + @retval -1 if the page cannot be recovered due to corruption */ buf_block_t *recover(const page_id_t page_id) { return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr; diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 1fb5b7707e9..3208e492c2f 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -24,8 +24,7 @@ Mini-transaction buffer Created 11/26/1995 Heikki Tuuri *******************************************************/ -#ifndef mtr0mtr_h -#define mtr0mtr_h +#pragma once #include "fil0fil.h" #include "dyn0buf.h" @@ -54,9 +53,6 @@ savepoint. */ #define mtr_memo_release(m, o, t) \ (m)->memo_release((o), (t)) -/** Push an object to an mtr memo stack. */ -#define mtr_memo_push(m, o, t) (m)->memo_push(o, t) - #ifdef UNIV_PFS_RWLOCK # define mtr_s_lock_index(i,m) (m)->s_lock(__FILE__, __LINE__, &(i)->lock) # define mtr_x_lock_index(i,m) (m)->x_lock(__FILE__, __LINE__, &(i)->lock) @@ -587,6 +583,12 @@ public: PAGE_FLUSH_SYNC }; +#ifdef BTR_CUR_HASH_ADAPT + /** If a stale adaptive hash index exists on the block, drop it. */ + ATTRIBUTE_COLD + static void defer_drop_ahi(buf_block_t *block, mtr_memo_type_t fix_type); +#endif + private: /** Log a write of a byte string to a page. @param block buffer page @@ -691,5 +693,3 @@ private: }; #include "mtr0mtr.inl" - -#endif /* mtr0mtr_h */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 66d646cab7c..fe977c10633 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -2,7 +2,7 @@ Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -1056,23 +1056,6 @@ os_file_status( bool* exists, os_file_type_t* type); -/** This function returns a new path name after replacing the basename -in an old path with a new basename. The old_path is a full path -name including the extension. The tablename is in the normal -form "databasename/tablename". The new base name is found after -the forward slash. Both input strings are null terminated. - -This function allocates memory to be returned. It is the callers -responsibility to free the return value after it is no longer needed. - -@param[in] old_path pathname -@param[in] new_name new file name -@return own: new full pathname */ -char* -os_file_make_new_pathname( - const char* old_path, - const char* new_name); - /** This function reduces a null-terminated full remote path name into the path that is sent by MySQL for DATA DIRECTORY clause. It replaces the 'databasename/tablename.ibd' found at the end of the path with just diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index ff96202c1c2..a4a86791cd8 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -1,6 +1,6 @@ /***************************************************************************** Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -930,6 +930,8 @@ page_create_empty( dict_index_t* index, /*!< in: the index of the page */ mtr_t* mtr) /*!< in/out: mini-transaction */ MY_ATTRIBUTE((nonnull(1,2))); + +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*************************************************************//** Differs from page_copy_rec_list_end, because this function does not touch the lock table and max trx id on page or compress the page. @@ -937,8 +939,10 @@ touch the lock table and max trx id on page or compress the page. IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -void +or by invoking ibuf_reset_free_bits() before mtr_t::commit(). + +@return error code */ +dberr_t page_copy_rec_list_end_no_locks( /*============================*/ buf_block_t* new_block, /*!< in: index page to copy to */ @@ -954,10 +958,10 @@ The records are copied to the start of the record list on new_page. IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). +or by invoking ibuf_reset_free_bits() before mtr_t::commit(). -@return pointer to the original successor of the infimum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ +@return pointer to the original successor of the infimum record on new_block +@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */ rec_t* page_copy_rec_list_end( /*===================*/ @@ -965,8 +969,9 @@ page_copy_rec_list_end( buf_block_t* block, /*!< in: index page containing rec */ rec_t* rec, /*!< in: record on page */ dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); + mtr_t* mtr, /*!< in/out: mini-transaction */ + dberr_t* err) /*!< out: error code */ + MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result)); /*************************************************************//** Copies records from page to new_page, up to the given record, NOT including that record. Infimum and supremum records are not copied. @@ -977,8 +982,8 @@ if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). -@return pointer to the original predecessor of the supremum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ +@return pointer to the original predecessor of the supremum record on new_block +@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */ rec_t* page_copy_rec_list_start( /*=====================*/ @@ -986,8 +991,9 @@ page_copy_rec_list_start( buf_block_t* block, /*!< in: index page containing rec */ rec_t* rec, /*!< in: record on page */ dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); + mtr_t* mtr, /*!< in/out: mini-transaction */ + dberr_t* err) /*!< out: error code */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*************************************************************//** Deletes records from a page from a given record onward, including that record. The infimum and supremum records are not deleted. */ @@ -1015,45 +1021,6 @@ page_delete_rec_list_start( dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Moves record list end to another page. Moved records include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure (new_block will -be decompressed) */ -ibool -page_move_rec_list_end( -/*===================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in: index page from where to move */ - rec_t* split_rec, /*!< in: first record to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull(1, 2, 4, 5))); -/*************************************************************//** -Moves record list start to another page. Moved records do not include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure */ -ibool -page_move_rec_list_start( -/*=====================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in/out: page containing split_rec */ - rec_t* split_rec, /*!< in: first record not to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull(1, 2, 4, 5))); /** Create an index page. @param[in,out] block buffer block @param[in] comp nonzero=compact page format */ diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h index 3a28384138b..4332990619e 100644 --- a/storage/innobase/include/page0zip.h +++ b/storage/innobase/include/page0zip.h @@ -2,7 +2,7 @@ Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -327,9 +327,9 @@ IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a non-clustered index, the caller must update the insert buffer free bits in the same mini-transaction in such a way that the modification will be redo-logged. -@retval true on success -@retval false on failure; the block_zip will be left intact */ -bool +@return error code +@retval DB_FAIL on overflow; the block_zip will be left intact */ +dberr_t page_zip_reorganize( buf_block_t* block, /*!< in/out: page with compressed page; on the compressed page, in: size; @@ -339,7 +339,7 @@ page_zip_reorganize( ulint z_level,/*!< in: compression level */ mtr_t* mtr, /*!< in: mini-transaction */ bool restore = false)/*!< whether to restore on failure */ - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /**********************************************************************//** Copy the records of a page byte for byte. Do not copy the page header diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h index 83cea9bef0a..8143fd419fa 100644 --- a/storage/innobase/include/trx0rec.h +++ b/storage/innobase/include/trx0rec.h @@ -69,10 +69,10 @@ Returns the start of the undo record data area. */ /**********************************************************************//** Reads from an undo log record the general parameters. @return remaining part of undo log record after reading these values */ -byte* +const byte* trx_undo_rec_get_pars( /*==================*/ - trx_undo_rec_t* undo_rec, /*!< in: undo log record */ + const trx_undo_rec_t* undo_rec, /*!< in: undo log record */ ulint* type, /*!< out: undo record type: TRX_UNDO_INSERT_REC, ... */ ulint* cmpl_info, /*!< out: compiler info, relevant only @@ -86,10 +86,10 @@ trx_undo_rec_get_pars( /*******************************************************************//** Builds a row reference from an undo log record. @return pointer to remaining part of undo record */ -byte* +const byte* trx_undo_rec_get_row_ref( /*=====================*/ - byte* ptr, /*!< in: remaining part of a copy of an undo log + const byte* ptr, /*!< in: remaining part of a copy of an undo log record, at the start of the row reference; NOTE that this copy of the undo log record must be preserved as long as the row reference is @@ -97,8 +97,9 @@ trx_undo_rec_get_row_ref( record! */ dict_index_t* index, /*!< in: clustered index */ const dtuple_t**ref, /*!< out, own: row reference */ - mem_heap_t* heap); /*!< in: memory heap from which the memory + mem_heap_t* heap) /*!< in: memory heap from which the memory needed is allocated */ + MY_ATTRIBUTE((nonnull)); /**********************************************************************//** Reads from an undo log update record the system field values of the old version. @@ -250,14 +251,14 @@ trx_undo_prev_version_build( ulint v_status); /** Read from an undo log record a non-virtual column value. -@param[in,out] ptr pointer to remaining part of the undo record -@param[in,out] field stored field -@param[in,out] len length of the field, or UNIV_SQL_NULL -@param[in,out] orig_len original length of the locally stored part +@param ptr pointer to remaining part of the undo record +@param field stored field +@param len length of the field, or UNIV_SQL_NULL +@param orig_len original length of the locally stored part of an externally stored column, or 0 @return remaining part of undo log record after reading these values */ -byte *trx_undo_rec_get_col_val(const byte *ptr, const byte **field, - uint32_t *len, uint32_t *orig_len); +const byte *trx_undo_rec_get_col_val(const byte *ptr, const byte **field, + uint32_t *len, uint32_t *orig_len); /** Read virtual column value from undo log @param[in] table the table diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index f03e33db6f3..7ad20b0fff0 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -27,52 +27,25 @@ Created 3/26/1996 Heikki Tuuri #pragma once #include "trx0types.h" #include "fut0lst.h" -#ifdef WITH_WSREP -# include "trx0xa.h" -#endif /* WITH_WSREP */ - -/** Gets a rollback segment header. -@param[in] space space where placed -@param[in] page_no page number of the header -@param[in,out] mtr mini-transaction -@return rollback segment header, page x-latched */ -UNIV_INLINE -buf_block_t* -trx_rsegf_get(fil_space_t* space, uint32_t page_no, mtr_t* mtr); - -/** Gets a newly created rollback segment header. -@param[in] space space where placed -@param[in] page_no page number of the header -@param[in,out] mtr mini-transaction -@return rollback segment header, page x-latched */ -UNIV_INLINE -buf_block_t* -trx_rsegf_get_new( - ulint space, - uint32_t page_no, - mtr_t* mtr); /** Create a rollback segment header. -@param[in,out] space system, undo, or temporary tablespace -@param[in] rseg_id rollback segment identifier -@param[in] max_trx_id new value of TRX_RSEG_MAX_TRX_ID -@param[in,out] sys_header the TRX_SYS page (NULL for temporary rseg) -@param[in,out] mtr mini-transaction +@param[in,out] space system, undo, or temporary tablespace +@param[in] rseg_id rollback segment identifier +@param[in] max_trx_id new value of TRX_RSEG_MAX_TRX_ID +@param[in,out] mtr mini-transaction +@param[out] err error code @return the created rollback segment -@retval NULL on failure */ -buf_block_t* -trx_rseg_header_create( - fil_space_t* space, - ulint rseg_id, - trx_id_t max_trx_id, - buf_block_t* sys_header, - mtr_t* mtr); +@retval nullptr on failure */ +buf_block_t *trx_rseg_header_create(fil_space_t *space, ulint rseg_id, + trx_id_t max_trx_id, mtr_t *mtr, + dberr_t *err) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Initialize or recover the rollback segments at startup. */ dberr_t trx_rseg_array_init(); /** Create the temporary rollback segments. */ -void trx_temp_rseg_create(); +dberr_t trx_temp_rseg_create(mtr_t *mtr); /* Number of undo log slots in a rollback segment file copy */ #define TRX_RSEG_N_SLOTS (srv_page_size / 16) @@ -225,6 +198,12 @@ public: last_commit_and_offset= static_cast(last_offset) << 48 | trx_no; } + /** @return the page identifier */ + page_id_t page_id() const { return page_id_t{space->id, page_no}; } + + /** @return the rollback segment header page, exclusively latched */ + buf_block_t *get(mtr_t *mtr, dberr_t *err) const; + /** @return whether the rollback segment is persistent */ bool is_persistent() const { @@ -280,32 +259,8 @@ If no binlog information is present, the first byte is NUL. */ #define TRX_RSEG_BINLOG_NAME_LEN 512 #ifdef WITH_WSREP -/** The offset to WSREP XID headers */ -#define TRX_RSEG_WSREP_XID_INFO TRX_RSEG_MAX_TRX_ID + 16 + 512 - -/** WSREP XID format (1 if present and valid, 0 if not present) */ -#define TRX_RSEG_WSREP_XID_FORMAT TRX_RSEG_WSREP_XID_INFO -/** WSREP XID GTRID length */ -#define TRX_RSEG_WSREP_XID_GTRID_LEN TRX_RSEG_WSREP_XID_INFO + 4 -/** WSREP XID bqual length */ -#define TRX_RSEG_WSREP_XID_BQUAL_LEN TRX_RSEG_WSREP_XID_INFO + 8 -/** WSREP XID data (XIDDATASIZE bytes) */ -#define TRX_RSEG_WSREP_XID_DATA TRX_RSEG_WSREP_XID_INFO + 12 -#endif /* WITH_WSREP*/ - -/*-------------------------------------------------------------*/ - -/** Read the page number of an undo log slot. -@param[in] rseg_header rollback segment header -@param[in] n slot number */ -inline uint32_t trx_rsegf_get_nth_undo(const buf_block_t *rseg_header, ulint n) -{ - ut_ad(n < TRX_RSEG_N_SLOTS); - return mach_read_from_4(TRX_RSEG + TRX_RSEG_UNDO_SLOTS + - n * TRX_RSEG_SLOT_SIZE + rseg_header->page.frame); -} +# include "trx0xa.h" -#ifdef WITH_WSREP /** Update the WSREP XID information in rollback segment header. @param[in,out] rseg_header rollback segment header @param[in] xid WSREP XID @@ -331,6 +286,16 @@ void trx_rseg_update_wsrep_checkpoint(const XID* xid); bool trx_rseg_read_wsrep_checkpoint(XID& xid); #endif /* WITH_WSREP */ +/** Read the page number of an undo log slot. +@param[in] rseg_header rollback segment header +@param[in] n slot number */ +inline uint32_t trx_rsegf_get_nth_undo(const buf_block_t *rseg_header, ulint n) +{ + ut_ad(n < TRX_RSEG_N_SLOTS); + return mach_read_from_4(TRX_RSEG + TRX_RSEG_UNDO_SLOTS + + n * TRX_RSEG_SLOT_SIZE + rseg_header->page.frame); +} + /** Upgrade a rollback segment header page to MariaDB 10.3 format. @param[in,out] rseg_header rollback segment header page @param[in,out] mtr mini-transaction */ @@ -345,5 +310,3 @@ up to which replication has proceeded. @param[in,out] mtr mini-transaction */ void trx_rseg_update_binlog_offset(buf_block_t *rseg_header, const trx_t *trx, mtr_t *mtr); - -#include "trx0rseg.inl" diff --git a/storage/innobase/include/trx0rseg.inl b/storage/innobase/include/trx0rseg.inl deleted file mode 100644 index 8805a4c8ac3..00000000000 --- a/storage/innobase/include/trx0rseg.inl +++ /dev/null @@ -1,64 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0rseg.ic -Rollback segment - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "srv0srv.h" -#include "mtr0log.h" - -/** Gets a rollback segment header. -@param[in] space space where placed -@param[in] page_no page number of the header -@param[in,out] mtr mini-transaction -@return rollback segment header, page x-latched */ -UNIV_INLINE -buf_block_t* -trx_rsegf_get(fil_space_t* space, uint32_t page_no, mtr_t* mtr) -{ - ut_ad(space == fil_system.sys_space || space == fil_system.temp_space - || srv_is_undo_tablespace(space->id) - || !srv_was_started); - - return buf_page_get(page_id_t(space->id, page_no), - 0, RW_X_LATCH, mtr); -} - -/** Gets a newly created rollback segment header. -@param[in] space space where placed -@param[in] page_no page number of the header -@param[in,out] mtr mini-transaction -@return rollback segment header, page x-latched */ -UNIV_INLINE -buf_block_t* -trx_rsegf_get_new( - ulint space, - uint32_t page_no, - mtr_t* mtr) -{ - ut_ad(space <= srv_undo_tablespaces_active || space == SRV_TMP_SPACE_ID - || !srv_was_started); - ut_ad(space <= TRX_SYS_MAX_UNDO_SPACES || space == SRV_TMP_SPACE_ID); - - return buf_page_get(page_id_t(space, page_no), 0, RW_X_LATCH, mtr); -} diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index c59ae776bb6..30f2b431b54 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -52,9 +52,8 @@ inline bool trx_sys_hdr_page(const page_id_t page_id) /*****************************************************************//** Creates and initializes the transaction system at the database creation. */ -void -trx_sys_create_sys_pages(void); -/*==========================*/ +dberr_t trx_sys_create_sys_pages(mtr_t *mtr); + /** Find an available rollback segment. @param[in] sys_header @return an unallocated rollback segment slot in the TRX_SYS header @@ -131,9 +130,6 @@ trx_sys_print_mysql_binlog_offset(); bool trx_sys_create_rsegs(); -/** The automatically created system rollback segment has this id */ -#define TRX_SYS_SYSTEM_RSEG_ID 0 - /** The offset of the transaction system header on the page */ #define TRX_SYS FSEG_PAGE_DATA diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index cf2b99c6113..abfa7c61c1f 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -96,22 +96,6 @@ inline roll_ptr_t trx_read_roll_ptr(const byte* ptr) return mach_read_from_7(ptr); } -/** Gets an undo log page and x-latches it. -@param[in] page_id page id -@param[in,out] mtr mini-transaction -@return pointer to page x-latched */ -UNIV_INLINE -buf_block_t* -trx_undo_page_get(const page_id_t page_id, mtr_t* mtr); - -/** Gets an undo log page and s-latches it. -@param[in] page_id page id -@param[in,out] mtr mini-transaction -@return pointer to page s-latched */ -UNIV_INLINE -buf_block_t* -trx_undo_page_get_s_latched(const page_id_t page_id, mtr_t* mtr); - /** Get the next record in an undo log. @param[in] undo_page undo log page @param[in] rec undo record offset in the page @@ -140,8 +124,8 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, @param[in,out] mtr mini-transaction @return undo log record, the page latched, NULL if none */ trx_undo_rec_t* -trx_undo_get_next_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, - uint16_t offset, mtr_t *mtr); +trx_undo_get_next_rec(const buf_block_t *&block, uint16_t rec, + uint32_t page_no, uint16_t offset, mtr_t *mtr); /** Get the first record in an undo log. @param[in] space undo log header space @@ -150,11 +134,13 @@ trx_undo_get_next_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, @param[in] mode latching mode: RW_S_LATCH or RW_X_LATCH @param[out] block undo log page @param[in,out] mtr mini-transaction -@return undo log record, the page latched, NULL if none */ +@param[out] err error code +@return undo log record, the page latched +@retval nullptr if none */ trx_undo_rec_t* trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no, - uint16_t offset, ulint mode, buf_block_t*& block, - mtr_t *mtr); + uint16_t offset, ulint mode, const buf_block_t*& block, + mtr_t *mtr, dberr_t *err); /** Initialize an undo log page. NOTE: This corresponds to a redo log record and must not be changed! @@ -165,24 +151,24 @@ void trx_undo_page_init(const buf_block_t &block); /** Allocate an undo log page. @param[in,out] undo undo log @param[in,out] mtr mini-transaction that does not hold any page latch +@param[out] err error code @return X-latched block if success -@retval NULL on failure */ -buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr) - MY_ATTRIBUTE((nonnull, warn_unused_result)); +@retval nullptr on failure */ +buf_block_t *trx_undo_add_page(trx_undo_t *undo, mtr_t *mtr, dberr_t *err) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Free the last undo log page. The caller must hold the rseg mutex. @param[in,out] undo undo log @param[in,out] mtr mini-transaction that does not hold any undo log page - or that has allocated the undo log page */ -void -trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr) - MY_ATTRIBUTE((nonnull)); + or that has allocated the undo log page +@return error code */ +dberr_t trx_undo_free_last_page(trx_undo_t *undo, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); -/** Truncate the tail of an undo log during rollback. -@param[in,out] undo undo log -@param[in] limit all undo logs after this limit will be discarded -@param[in] is_temp whether this is temporary undo log */ -void trx_undo_truncate_end(trx_undo_t& undo, undo_no_t limit, bool is_temp); +/** Try to truncate the undo logs. +@param trx transaction +@return error code */ +dberr_t trx_undo_try_truncate(const trx_t &trx); /** Truncate the head of an undo log. NOTE that only whole pages are freed; the header page is not @@ -191,13 +177,15 @@ freed, but emptied, if all the records there are below the limit. @param[in] hdr_page_no header page number @param[in] hdr_offset header offset on the page @param[in] limit first undo number to preserve -(everything below the limit will be truncated) */ -void +(everything below the limit will be truncated) +@return error code */ +dberr_t trx_undo_truncate_start( trx_rseg_t* rseg, uint32_t hdr_page_no, uint16_t hdr_offset, - undo_no_t limit); + undo_no_t limit) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Mark that an undo log header belongs to a data dictionary transaction. @param[in] trx dictionary transaction @param[in,out] undo undo log @@ -335,7 +323,7 @@ class UndorecApplier /** Undo log block page id */ page_id_t page_id; /** Undo log record pointer */ - trx_undo_rec_t *undo_rec; + const trx_undo_rec_t *undo_rec; /** Offset of the undo log record within the block */ uint16_t offset; /** Transaction id of the undo log */ diff --git a/storage/innobase/include/trx0undo.inl b/storage/innobase/include/trx0undo.inl index 1a9c7774580..9f05989f634 100644 --- a/storage/innobase/include/trx0undo.inl +++ b/storage/innobase/include/trx0undo.inl @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -95,28 +95,6 @@ trx_undo_trx_id_is_insert( return bool(trx_id[DATA_TRX_ID_LEN] >> 7); } -/** Gets an undo log page and x-latches it. -@param[in] page_id page id -@param[in,out] mtr mini-transaction -@return pointer to page x-latched */ -UNIV_INLINE -buf_block_t* -trx_undo_page_get(const page_id_t page_id, mtr_t* mtr) -{ - return buf_page_get(page_id, 0, RW_X_LATCH, mtr); -} - -/** Gets an undo log page and s-latches it. -@param[in] page_id page id -@param[in,out] mtr mini-transaction -@return pointer to page s-latched */ -UNIV_INLINE -buf_block_t* -trx_undo_page_get_s_latched(const page_id_t page_id, mtr_t* mtr) -{ - return buf_page_get(page_id, 0, RW_S_LATCH, mtr); -} - /** Determine the end offset of undo log records of an undo log page. @param[in] undo_page undo log page @param[in] page_no undo log header page number diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index ced26762ff7..c2b60a9ea41 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -191,9 +191,6 @@ using the call command. */ info output */ #endif -#define UNIV_BTR_DEBUG /* check B-tree links */ -#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */ - // #define UNIV_SQL_DEBUG #ifndef MY_ATTRIBUTE diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 0e348344694..91791fb8a3a 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4848,9 +4848,7 @@ Validate a record lock's block */ static void lock_rec_block_validate(const page_id_t page_id) { /* The lock and the block that it is referring to may be freed at - this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check. - If the lock exists in lock_rec_validate_page() we assert - block->page.status != FREED. */ + this point. */ buf_block_t* block; mtr_t mtr; @@ -4867,11 +4865,11 @@ static void lock_rec_block_validate(const page_id_t page_id) block = buf_page_get_gen( page_id, space->zip_size(), - RW_X_LATCH, NULL, + RW_S_LATCH, NULL, BUF_GET_POSSIBLY_FREED, &mtr, &err); - ut_ad(!block || block->page.is_freed() + ut_ad(!block || lock_rec_validate_page(block, space->is_latched())); mtr_commit(&mtr); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 49331fcbe49..6f120d2c7fd 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -887,9 +887,10 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p, { mtr_t mtr; buf_block_t *block= recover_low(first, p, mtr, free_block); - ut_ad(block == free_block); + ut_ad(block == free_block || block == reinterpret_cast(-1)); free_block= nullptr; - + if (UNIV_UNLIKELY(!block || block == reinterpret_cast(-1))) + goto fail; const byte *page= UNIV_LIKELY_NULL(block->page.zip.data) ? block->page.zip.data : block->page.frame; @@ -2633,11 +2634,13 @@ lsn of a log record. @param[in,out] mtr mini-transaction @param[in,out] p recovery address @param[in,out] space tablespace, or NULL if not looked up yet -@param[in,out] init page initialization operation, or NULL */ -static void recv_recover_page(buf_block_t* block, mtr_t& mtr, - const recv_sys_t::map::iterator& p, - fil_space_t* space = NULL, - mlog_init_t::init* init = NULL) +@param[in,out] init page initialization operation, or NULL +@return the recovered page +@retval nullptr on failure */ +static buf_block_t *recv_recover_page(buf_block_t *block, mtr_t &mtr, + const recv_sys_t::map::iterator &p, + fil_space_t *space= nullptr, + mlog_init_t::init *init= nullptr) { mysql_mutex_assert_owner(&recv_sys.mutex); ut_ad(recv_sys.apply_log_recs); @@ -2795,7 +2798,21 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, set_start_lsn: if (recv_sys.is_corrupt_log() && !srv_force_recovery) { - break; + if (init) { + init->created = false; + if (space || block->page.id().page_no()) { + block->page.lock.x_lock_recursive(); + } + } + + mtr.discard_modifications(); + mtr.commit(); + + buf_pool.corrupted_evict(&block->page, + block->page.state() & + buf_page_t::LRU_MASK); + block = nullptr; + goto done; } if (!start_lsn) { @@ -2834,6 +2851,7 @@ set_start_lsn: mtr.discard_modifications(); mtr.commit(); +done: time_t now = time(NULL); mysql_mutex_lock(&recv_sys.mutex); @@ -2842,8 +2860,8 @@ set_start_lsn: recv_max_page_lsn = page_lsn; } - ut_ad(p->second.is_being_processed()); - ut_ad(!recv_sys.pages.empty()); + ut_ad(!block || p->second.is_being_processed()); + ut_ad(!block || !recv_sys.pages.empty()); if (recv_sys.report(now)) { const ulint n = recv_sys.pages.size(); @@ -2851,6 +2869,8 @@ set_start_lsn: service_manager_extend_timeout( INNODB_EXTEND_TIMEOUT_INTERVAL, "To recover: " ULINTPF " pages from log", n); } + + return block; } /** Remove records for a corrupted page. @@ -2858,13 +2878,19 @@ This function should only be called when innodb_force_recovery is set. @param page_id corrupted page identifier */ ATTRIBUTE_COLD void recv_sys_t::free_corrupted_page(page_id_t page_id) { + if (!recovery_on) + return; + mysql_mutex_lock(&mutex); map::iterator p= pages.find(page_id); if (p != pages.end()) { p->second.log.clear(); pages.erase(p); + if (!srv_force_recovery) + set_corrupt_fs(); } + if (pages.empty()) pthread_cond_broadcast(&cond); mysql_mutex_unlock(&mutex); @@ -2896,8 +2922,9 @@ ATTRIBUTE_COLD void recv_sys_t::set_corrupt_fs() /** Apply any buffered redo log to a page that was just read from a data file. @param[in,out] space tablespace -@param[in,out] bpage buffer pool page */ -void recv_recover_page(fil_space_t* space, buf_page_t* bpage) +@param[in,out] bpage buffer pool page +@return whether the page was recovered correctly */ +bool recv_recover_page(fil_space_t* space, buf_page_t* bpage) { mtr_t mtr; mtr.start(); @@ -2914,16 +2941,18 @@ void recv_recover_page(fil_space_t* space, buf_page_t* bpage) mtr.memo_push(reinterpret_cast(bpage), MTR_MEMO_PAGE_X_FIX); + buf_block_t* success = reinterpret_cast(bpage); + mysql_mutex_lock(&recv_sys.mutex); if (recv_sys.apply_log_recs) { recv_sys_t::map::iterator p = recv_sys.pages.find(bpage->id()); if (p != recv_sys.pages.end() && !p->second.is_being_processed()) { - recv_recover_page( - reinterpret_cast(bpage), mtr, p, - space); - p->second.log.clear(); - recv_sys.pages.erase(p); + success = recv_recover_page(success, mtr, p, space); + if (UNIV_LIKELY(!!success)) { + p->second.log.clear(); + recv_sys.pages.erase(p); + } recv_sys.maybe_finish_batch(); goto func_exit; } @@ -2933,6 +2962,7 @@ void recv_recover_page(fil_space_t* space, buf_page_t* bpage) func_exit: mysql_mutex_unlock(&recv_sys.mutex); ut_ad(mtr.has_committed()); + return success; } /** Read pages for which log needs to be applied. @@ -2969,7 +2999,9 @@ static void recv_read_in_area(page_id_t page_id, recv_sys_t::map::iterator i) @param p iterator pointing to page_id @param mtr mini-transaction @param b pre-allocated buffer pool block -@return whether the page was successfully initialized */ +@return the recovered block +@retval nullptr if the page cannot be initialized based on log records +@retval -1 if the page cannot be recovered due to corruption */ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, map::iterator &p, mtr_t &mtr, buf_block_t *b) @@ -2981,15 +3013,11 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, buf_block_t* block= nullptr; mlog_init_t::init &i= mlog_init.last(page_id); const lsn_t end_lsn = recs.log.last()->lsn; - bool first_page= page_id.page_no() == 0; if (end_lsn < i.lsn) DBUG_LOG("ib_log", "skip log for page " << page_id << " LSN " << end_lsn << " < " << i.lsn); fil_space_t *space= fil_space_t::get(page_id.space()); - if (!space && !first_page) - return block; - mtr.start(); mtr.set_log_mode(MTR_LOG_NO_REDO); @@ -2997,41 +3025,51 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, if (!space) { + if (page_id.page_no() != 0) + { + nothing_recoverable: + mtr.commit(); + return nullptr; + } auto it= recv_spaces.find(page_id.space()); ut_ad(it != recv_spaces.end()); uint32_t flags= it->second.flags; zip_size= fil_space_t::zip_size(flags); block= buf_page_create_deferred(page_id.space(), zip_size, &mtr, b); + ut_ad(block == b); + block->page.lock.x_lock_recursive(); } else + { block= buf_page_create(space, page_id.page_no(), zip_size, &mtr, b); - if (UNIV_UNLIKELY(block != b)) - { - /* The page happened to exist in the buffer pool, or it - was just being read in. Before buf_page_get_with_no_latch() - returned to buf_page_create(), all changes must have been - applied to the page already. */ - ut_ad(pages.find(page_id) == pages.end()); - mtr.commit(); - block= nullptr; + if (UNIV_UNLIKELY(block != b)) + { + /* The page happened to exist in the buffer pool, or it + was just being read in. Before the exclusive page latch was acquired by + buf_page_create(), all changes to the page must have been applied. */ + ut_ad(pages.find(page_id) == pages.end()); + space->release(); + goto nothing_recoverable; + } } - else + + ut_ad(&recs == &pages.find(page_id)->second); + i.created= true; + map::iterator r= p++; + block= recv_recover_page(block, mtr, r, space, &i); + ut_ad(mtr.has_committed()); + + if (block) { - /* Buffer fix the first page while deferring the tablespace - and unfix it after creating defer tablespace */ - if (first_page && !space) - block->page.lock.x_lock(); - ut_ad(&recs == &pages.find(page_id)->second); - i.created= true; - recv_recover_page(block, mtr, p, space, &i); - ut_ad(mtr.has_committed()); recs.log.clear(); - map::iterator r= p++; pages.erase(r); - if (pages.empty()) - pthread_cond_signal(&cond); } + else + block= reinterpret_cast(-1); + + if (pages.empty()) + pthread_cond_signal(&cond); if (space) space->release(); @@ -3041,7 +3079,8 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, /** Attempt to initialize a page based on redo log records. @param page_id page identifier -@return whether the page was successfully initialized */ +@return recovered block +@retval nullptr if the page cannot be initialized based on log records */ buf_block_t *recv_sys_t::recover_low(const page_id_t page_id) { buf_block_t *free_block= buf_LRU_get_free_block(false); @@ -3054,7 +3093,8 @@ buf_block_t *recv_sys_t::recover_low(const page_id_t page_id) { mtr_t mtr; block= recover_low(page_id, p, mtr, free_block); - ut_ad(!block || block == free_block); + ut_ad(!block || block == reinterpret_cast(-1) || + block == free_block); } mysql_mutex_unlock(&mutex); diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 7260b74723f..af3ac0c626c 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1124,18 +1124,17 @@ bool mtr_t::memo_contains(const fil_space_t& space, bool shared) } #ifdef BTR_CUR_HASH_ADAPT -/** If a stale adaptive hash index exists on the block, drop it. -Multiple executions of btr_search_drop_page_hash_index() on the -same block must be prevented by exclusive page latch. */ +/** If a stale adaptive hash index exists on the block, drop it. */ ATTRIBUTE_COLD -static void mtr_defer_drop_ahi(buf_block_t *block, mtr_memo_type_t fix_type) +void mtr_t::defer_drop_ahi(buf_block_t *block, mtr_memo_type_t fix_type) { switch (fix_type) { - case MTR_MEMO_BUF_FIX: - /* We do not drop the adaptive hash index, because safely doing - so would require acquiring block->lock, and that is not safe - to acquire in some RW_NO_LATCH access paths. Those code paths - should have no business accessing the adaptive hash index anyway. */ + default: + ut_ad(fix_type == MTR_MEMO_BUF_FIX); + /* We do not drop the adaptive hash index, because safely doing so + would require acquiring exclusive block->page.lock, which could + lead to hangs in some access paths. Those code paths should have + no business accessing the adaptive hash index anyway. */ break; case MTR_MEMO_PAGE_S_FIX: /* Temporarily release our S-latch. */ @@ -1155,8 +1154,7 @@ static void mtr_defer_drop_ahi(buf_block_t *block, mtr_memo_type_t fix_type) btr_search_drop_page_hash_index(block); block->page.lock.x_u_downgrade(); break; - default: - ut_ad(fix_type == MTR_MEMO_PAGE_X_FIX); + case MTR_MEMO_PAGE_X_FIX: btr_search_drop_page_hash_index(block); } } @@ -1245,7 +1243,7 @@ void mtr_t::page_lock(buf_block_t *block, ulint rw_latch) #ifdef BTR_CUR_HASH_ADAPT if (dict_index_t *index= block->index) if (index->freed()) - mtr_defer_drop_ahi(block, fix_type); + defer_drop_ahi(block, fix_type); #endif /* BTR_CUR_HASH_ADAPT */ done: diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index ff174bf69b9..5155f6d0de0 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -413,46 +413,6 @@ os_file_read_string( } } -/** This function returns a new path name after replacing the basename -in an old path with a new basename. The old_path is a full path -name including the extension. The tablename is in the normal -form "databasename/tablename". The new base name is found after -the forward slash. Both input strings are null terminated. - -This function allocates memory to be returned. It is the callers -responsibility to free the return value after it is no longer needed. - -@param[in] old_path Pathname -@param[in] tablename Contains new base name -@return own: new full pathname */ -char *os_file_make_new_pathname(const char *old_path, const char *tablename) -{ - /* Split the tablename into its database and table name components. - They are separated by a '/'. */ - const char *last_slash= strrchr(tablename, '/'); - const char *base_name= last_slash ? last_slash + 1 : tablename; - - /* Find the offset of the last slash. We will strip off the - old basename.ibd which starts after that slash. */ - last_slash = strrchr(old_path, '/'); -#ifdef _WIN32 - if (const char *last= strrchr(old_path, '\\')) - if (last > last_slash) - last_slash= last; -#endif - - size_t dir_len= last_slash - ? size_t(last_slash - old_path) - : strlen(old_path); - - /* allocate a new path and move the old directory path to it. */ - size_t new_path_len= dir_len + strlen(base_name) + sizeof "/.ibd"; - char *new_path= static_cast(ut_malloc_nokey(new_path_len)); - memcpy(new_path, old_path, dir_len); - snprintf(new_path + dir_len, new_path_len - dir_len, "/%s.ibd", base_name); - return new_path; -} - /** This function reduces a null-terminated full remote path name into the path that is sent by MySQL for DATA DIRECTORY clause. It replaces the 'databasename/tablename.ibd' found at the end of the path with just diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index c8062ac3914..2c9e2f5572a 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -1784,10 +1784,14 @@ page_cur_insert_rec_zip( { ulint pos= page_rec_get_n_recs_before(cursor->rec); - if (!page_zip_reorganize(cursor->block, index, level, mtr, true)) - { + switch (page_zip_reorganize(cursor->block, index, level, mtr, true)) { + case DB_FAIL: ut_ad(cursor->rec == cursor_rec); return nullptr; + case DB_SUCCESS: + break; + default: + return nullptr; } if (pos) @@ -1814,28 +1818,30 @@ page_cur_insert_rec_zip( /* We are writing entire page images to the log. Reduce the redo log volume by reorganizing the page at the same time. */ - if (page_zip_reorganize(cursor->block, index, level, mtr)) - { + switch (page_zip_reorganize(cursor->block, index, level, mtr)) { + case DB_SUCCESS: /* The page was reorganized: Seek to pos. */ cursor->rec= pos > 1 ? page_rec_get_nth(page, pos - 1) : page + PAGE_NEW_INFIMUM; insert_rec= page + rec_get_next_offs(cursor->rec, 1); rec_offs_make_valid(insert_rec, index, page_is_leaf(page), offsets); - return insert_rec; + break; + case DB_FAIL: + /* Theoretically, we could try one last resort of + page_zip_reorganize() followed by page_zip_available(), but that + would be very unlikely to succeed. (If the full reorganized page + failed to compress, why would it succeed to compress the page, + plus log the insert of this record?) */ + + /* Out of space: restore the page */ + if (!page_zip_decompress(page_zip, page, false)) + ut_error; /* Memory corrupted? */ + ut_ad(page_validate(page, index)); + /* fall through */ + default: + insert_rec= nullptr; } - - /* Theoretically, we could try one last resort of - page_zip_reorganize() followed by page_zip_available(), but that - would be very unlikely to succeed. (If the full reorganized page - failed to compress, why would it succeed to compress the page, - plus log the insert of this record?) */ - - /* Out of space: restore the page */ - if (!page_zip_decompress(page_zip, page, false)) - ut_error; /* Memory corrupted? */ - ut_ad(page_validate(page, index)); - insert_rec= nullptr; } return insert_rec; } diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 27aab0c475a..bb2c267c633 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -455,8 +455,10 @@ touch the lock table and max trx id on page or compress the page. IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -void +or by invoking ibuf_reset_free_bits() before mtr_commit(). + +@return error code */ +dberr_t page_copy_rec_list_end_no_locks( /*============================*/ buf_block_t* new_block, /*!< in: index page to copy to */ @@ -480,13 +482,17 @@ page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } - btr_assert_not_corrupted(new_block, index); - ut_a(page_is_comp(new_page) == page_rec_is_comp(rec)); - ut_a(mach_read_from_2(new_page + srv_page_size - 10) == (ulint) - (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); + if (UNIV_UNLIKELY(page_is_comp(new_page) != page_rec_is_comp(rec) + || mach_read_from_2(new_page + srv_page_size - 10) + != ulint(page_is_comp(new_page) + ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM))) { + return DB_CORRUPTION; + } + const ulint n_core = page_is_leaf(block->page.frame) ? index->n_core_fields : 0; + dberr_t err = DB_SUCCESS; page_cur_set_before_first(new_block, &cur2); /* Copy records from the original page to the new page */ @@ -498,9 +504,8 @@ page_copy_rec_list_end_no_locks( ins_rec = page_cur_insert_rec_low(&cur2, index, cur1.rec, offsets, mtr); if (UNIV_UNLIKELY(!ins_rec)) { - ib::fatal() << "Rec offset " << page_offset(rec) - << ", cur1 offset " << page_offset(cur1.rec) - << ", cur2 offset " << page_offset(cur2.rec); + err = DB_CORRUPTION; + break; } page_cur_move_to_next(&cur1); @@ -512,6 +517,8 @@ page_copy_rec_list_end_no_locks( if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } + + return err; } /*************************************************************//** @@ -522,10 +529,10 @@ The records are copied to the start of the record list on new_page. IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). +or by invoking ibuf_reset_free_bits() before mtr_t::commit(). -@return pointer to the original successor of the infimum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ +@return pointer to the original successor of the infimum record on new_block +@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */ rec_t* page_copy_rec_list_end( /*===================*/ @@ -533,7 +540,8 @@ page_copy_rec_list_end( buf_block_t* block, /*!< in: index page containing rec */ rec_t* rec, /*!< in: record on page */ dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + dberr_t* err) /*!< out: error code */ { page_t* new_page = new_block->page.frame; page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); @@ -588,8 +596,11 @@ page_copy_rec_list_end( &num_moved, mtr); } else { - page_copy_rec_list_end_no_locks(new_block, block, rec, - index, mtr); + *err = page_copy_rec_list_end_no_locks(new_block, block, rec, + index, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return nullptr; + } if (was_empty) { mtr->memcpy(*new_block, PAGE_HEADER + PAGE_LAST_INSERT @@ -630,21 +641,22 @@ page_copy_rec_list_end( that is smaller than "ret"). */ ut_a(ret_pos > 0); - if (!page_zip_reorganize(new_block, index, - page_zip_level, mtr)) { - + *err = page_zip_reorganize(new_block, index, + page_zip_level, mtr); + switch (*err) { + case DB_FAIL: if (!page_zip_decompress(new_page_zip, new_page, FALSE)) { ut_error; } ut_ad(page_validate(new_page, index)); - + /* fall through */ + default: if (heap) { mem_heap_free(heap); } - - return(NULL); - } else { + return nullptr; + case DB_SUCCESS: /* The page was reorganized: Seek to ret_pos. */ ret = page_rec_get_nth(new_page, ret_pos); @@ -680,8 +692,8 @@ if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). -@return pointer to the original predecessor of the supremum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ +@return pointer to the original predecessor of the supremum record on new_block +@retval nullptr on ROW_FORMAT=COMPRESSED page overflow */ rec_t* page_copy_rec_list_start( /*=====================*/ @@ -689,7 +701,8 @@ page_copy_rec_list_start( buf_block_t* block, /*!< in: index page containing rec */ rec_t* rec, /*!< in: record on page */ dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + dberr_t* err) /*!< out: error code */ { ut_ad(page_align(rec) == block->page.frame); @@ -710,6 +723,7 @@ page_copy_rec_list_start( predefined infimum record. */ if (page_rec_is_infimum(rec)) { + *err = DB_SUCCESS; return(ret); } @@ -751,7 +765,10 @@ page_copy_rec_list_start( cur2.rec = page_cur_insert_rec_low(&cur2, index, cur1.rec, offsets, mtr); - ut_a(cur2.rec); + if (UNIV_UNLIKELY(!cur2.rec)) { + *err = DB_CORRUPTION; + return nullptr; + } page_cur_move_to_next(&cur1); ut_ad(!(rec_get_info_bits(cur1.rec, @@ -783,39 +800,38 @@ page_copy_rec_list_start( if (!page_zip_compress(new_block, index, page_zip_level, mtr)) { - ulint ret_pos; #ifndef DBUG_OFF zip_reorganize: #endif /* DBUG_OFF */ /* Before trying to reorganize the page, store the number of preceding records on the page. */ - ret_pos = page_rec_get_n_recs_before(ret); + ulint ret_pos = page_rec_get_n_recs_before(ret); /* Before copying, "ret" was the predecessor of the predefined supremum record. If it was the predefined infimum record, then it would still be the infimum, and we would have ret_pos == 0. */ - - if (UNIV_UNLIKELY - (!page_zip_reorganize(new_block, index, - page_zip_level, mtr))) { - + *err = page_zip_reorganize(new_block, index, + page_zip_level, mtr); + switch (*err) { + case DB_SUCCESS: + ret = page_rec_get_nth(new_page, ret_pos); + break; + case DB_FAIL: if (UNIV_UNLIKELY (!page_zip_decompress(new_page_zip, new_page, FALSE))) { ut_error; } ut_ad(page_validate(new_page, index)); - + /* fall through */ + default: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - return(NULL); + return nullptr; } - - /* The page was reorganized: Seek to ret_pos. */ - ret = page_rec_get_nth(new_page, ret_pos); } } @@ -834,6 +850,7 @@ zip_reorganize: btr_search_move_or_delete_hash_entries(new_block, block); + *err = DB_SUCCESS; return(ret); } @@ -1115,97 +1132,6 @@ page_delete_rec_list_start( } } -/*************************************************************//** -Moves record list end to another page. Moved records include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure (new_block will -be decompressed) */ -ibool -page_move_rec_list_end( -/*===================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in: index page from where to move */ - rec_t* split_rec, /*!< in: first record to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* new_page = buf_block_get_frame(new_block); - ulint old_data_size; - ulint new_data_size; - ulint old_n_recs; - ulint new_n_recs; - - ut_ad(!dict_index_is_spatial(index)); - - old_data_size = page_get_data_size(new_page); - old_n_recs = page_get_n_recs(new_page); -#ifdef UNIV_ZIP_DEBUG - { - page_zip_des_t* new_page_zip - = buf_block_get_page_zip(new_block); - page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(!new_page_zip == !page_zip); - ut_a(!new_page_zip - || page_zip_validate(new_page_zip, new_page, index)); - ut_a(!page_zip - || page_zip_validate(page_zip, page_align(split_rec), - index)); - } -#endif /* UNIV_ZIP_DEBUG */ - - if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block, - split_rec, index, mtr))) { - return(FALSE); - } - - new_data_size = page_get_data_size(new_page); - new_n_recs = page_get_n_recs(new_page); - - ut_ad(new_data_size >= old_data_size); - - page_delete_rec_list_end(split_rec, block, index, - new_n_recs - old_n_recs, - new_data_size - old_data_size, mtr); - - return(TRUE); -} - -/*************************************************************//** -Moves record list start to another page. Moved records do not include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure */ -ibool -page_move_rec_list_start( -/*=====================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in/out: page containing split_rec */ - rec_t* split_rec, /*!< in: first record not to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block, - split_rec, index, mtr))) { - return(FALSE); - } - - page_delete_rec_list_start(split_rec, block, index, mtr); - - return(TRUE); -} - /************************************************************//** Returns the nth record of the record list. This is the inverse function of page_rec_get_n_recs_before(). diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 423cafe4629..1fd5c3c146c 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -4383,9 +4383,9 @@ IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a non-clustered index, the caller must update the insert buffer free bits in the same mini-transaction in such a way that the modification will be redo-logged. -@retval true on success -@retval false on failure; the block will be left intact */ -bool +@return error code +@retval DB_FAIL on overflow; the block_zip will be left intact */ +dberr_t page_zip_reorganize( buf_block_t* block, /*!< in/out: page with compressed page; on the compressed page, in: size; @@ -4436,20 +4436,22 @@ page_zip_reorganize( /* Copy the records from the temporary space to the recreated page; do not copy the lock bits yet */ - page_copy_rec_list_end_no_locks(block, temp_block, - page_get_infimum_rec(temp_page), - index, mtr); + dberr_t err = page_copy_rec_list_end_no_locks( + block, temp_block, page_get_infimum_rec(temp_page), + index, mtr); /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */ memcpy_aligned<8>(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), temp_page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 8); /* PAGE_MAX_TRX_ID must be set on secondary index leaf pages. */ - ut_ad(dict_index_is_clust(index) || !page_is_leaf(temp_page) + ut_ad(err != DB_SUCCESS + || index->is_clust() || !page_is_leaf(temp_page) || page_get_max_trx_id(page) != 0); /* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than clustered index root pages. */ - ut_ad(page_get_max_trx_id(page) == 0 - || (dict_index_is_clust(index) + ut_ad(err != DB_SUCCESS + || page_get_max_trx_id(page) == 0 + || (index->is_clust() ? !page_has_siblings(temp_page) : page_is_leaf(temp_page))); @@ -4481,14 +4483,13 @@ page_zip_reorganize( #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ } - buf_block_free(temp_block); - return false; + err = DB_FAIL; + } else { + lock_move_reorganize_page(block, temp_block); } - lock_move_reorganize_page(block, temp_block); - buf_block_free(temp_block); - return true; + return err; } /**********************************************************************//** diff --git a/storage/innobase/pars/pars0opt.cc b/storage/innobase/pars/pars0opt.cc index e1a913b0179..f3b71132998 100644 --- a/storage/innobase/pars/pars0opt.cc +++ b/storage/innobase/pars/pars0opt.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, MariaDB Corporation. +Copyright (c) 2019, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -552,13 +552,8 @@ opt_search_plan_for_table( { plan_t* plan; dict_index_t* index; - dict_index_t* best_index; ulint n_fields; - ulint goodness; - ulint last_op = 75946965; /* Eliminate a Purify - warning */ - ulint best_goodness; - ulint best_last_op = 0; /* remove warning */ + ulint best_last_op; que_node_t* index_plan[256]; que_node_t* best_index_plan[256]; @@ -571,30 +566,28 @@ opt_search_plan_for_table( /* Calculate goodness for each index of the table */ - index = dict_table_get_first_index(table); - best_index = index; /* Eliminate compiler warning */ - best_goodness = 0; + plan->index = index = dict_table_get_first_index(table); + ulint best_goodness = opt_calc_index_goodness( + index, sel_node, i, best_index_plan, &best_last_op); - /* should be do ... until ? comment by Jani */ - while (index) { - goodness = opt_calc_index_goodness(index, sel_node, i, - index_plan, &last_op); + while ((index = dict_table_get_next_index(index))) { + if (!index->is_btree()) { + continue; + } + ulint last_op; + ulint goodness = opt_calc_index_goodness(index, sel_node, i, + index_plan, &last_op); if (goodness > best_goodness) { - - best_index = index; best_goodness = goodness; + plan->index = index; n_fields = opt_calc_n_fields_from_goodness(goodness); memcpy(best_index_plan, index_plan, n_fields * sizeof *index_plan); best_last_op = last_op; } - - dict_table_next_uncorrupted_index(index); } - plan->index = best_index; - n_fields = opt_calc_n_fields_from_goodness(best_goodness); if (n_fields == 0) { @@ -612,27 +605,25 @@ opt_search_plan_for_table( memcpy(plan->tuple_exps, best_index_plan, n_fields * sizeof *best_index_plan); - if (best_last_op == '=' - || best_last_op == PARS_LIKE_TOKEN_EXACT - || best_last_op == PARS_LIKE_TOKEN_PREFIX - || best_last_op == PARS_LIKE_TOKEN_SUFFIX - || best_last_op == PARS_LIKE_TOKEN_SUBSTR) { - plan->n_exact_match = n_fields; - } else { - plan->n_exact_match = n_fields - 1; + + switch (best_last_op) { + case '=': + case PARS_LIKE_TOKEN_EXACT: + case PARS_LIKE_TOKEN_PREFIX: + case PARS_LIKE_TOKEN_SUFFIX: + case PARS_LIKE_TOKEN_SUBSTR: + break; + default: + n_fields--; } + plan->n_exact_match = n_fields; plan->mode = opt_op_to_search_mode(sel_node->asc, best_last_op); } - if (dict_index_is_clust(best_index) - && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) { - - plan->unique_search = TRUE; - } else { - plan->unique_search = FALSE; - } + plan->unique_search = plan->index->is_clust() + && plan->n_exact_match >= plan->index->n_uniq; plan->old_vers_heap = NULL; diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 62e3a7021b1..96202311198 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -369,24 +369,23 @@ public: } private: - /** Begin import, position the cursor on the first record. */ - void open() UNIV_NOTHROW; + /** Begin import, position the cursor on the first record. */ + inline bool open() noexcept; - /** Close the persistent curosr and commit the mini-transaction. */ - void close() UNIV_NOTHROW; + /** Close the persistent cursor and commit the mini-transaction. */ + void close() noexcept { m_mtr.commit(); btr_pcur_close(&m_pcur); } - /** Position the cursor on the next record. - @return DB_SUCCESS or error code */ - dberr_t next() UNIV_NOTHROW; + /** Position the cursor on the next record. + @return DB_SUCCESS or error code */ + dberr_t next() noexcept; - /** Store the persistent cursor position and reopen the - B-tree cursor in BTR_MODIFY_TREE mode, because the - tree structure may be changed during a pessimistic delete. */ - void purge_pessimistic_delete() UNIV_NOTHROW; + /** Store the persistent cursor position and reopen the + B-tree cursor in BTR_MODIFY_TREE mode, because the + tree structure may be changed during a pessimistic delete. */ + inline dberr_t purge_pessimistic_delete() noexcept; - /** Purge delete-marked records. - @param offsets current row offsets. */ - void purge() UNIV_NOTHROW; + /** Purge a delete-marked record. */ + dberr_t purge() noexcept; protected: // Disable copying @@ -1493,14 +1492,13 @@ Purge delete marked records. dberr_t IndexPurge::garbage_collect() UNIV_NOTHROW { - dberr_t err; ibool comp = dict_table_is_comp(m_index->table); /* Open the persistent cursor and start the mini-transaction. */ - open(); + dberr_t err = open() ? next() : DB_CORRUPTION; - while ((err = next()) == DB_SUCCESS) { + for (; err == DB_SUCCESS; err = next()) { rec_t* rec = btr_pcur_get_rec(&m_pcur); ibool deleted = rec_get_deleted_flag(rec, comp); @@ -1508,7 +1506,10 @@ IndexPurge::garbage_collect() UNIV_NOTHROW if (!deleted) { ++m_n_rows; } else { - purge(); + err = purge(); + if (err != DB_SUCCESS) { + break; + } } } @@ -1521,38 +1522,31 @@ IndexPurge::garbage_collect() UNIV_NOTHROW /** Begin import, position the cursor on the first record. */ -void -IndexPurge::open() UNIV_NOTHROW +inline bool IndexPurge::open() noexcept { - mtr_start(&m_mtr); + m_mtr.start(); + m_mtr.set_log_mode(MTR_LOG_NO_REDO); - mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); - - btr_pcur_open_at_index_side( - true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr); - btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr); - if (rec_is_metadata(btr_pcur_get_rec(&m_pcur), *m_index)) { - ut_ad(btr_pcur_is_on_user_rec(&m_pcur)); - /* Skip the metadata pseudo-record. */ - } else { - btr_pcur_move_to_prev_on_page(&m_pcur); - } -} + if (btr_pcur_open_at_index_side(true, m_index, BTR_MODIFY_LEAF, + &m_pcur, true, 0, &m_mtr) != DB_SUCCESS) + return false; -/** -Close the persistent curosr and commit the mini-transaction. */ -void -IndexPurge::close() UNIV_NOTHROW -{ - btr_pcur_close(&m_pcur); - mtr_commit(&m_mtr); + btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr); + if (rec_is_metadata(btr_pcur_get_rec(&m_pcur), *m_index)) + { + if (!btr_pcur_is_on_user_rec(&m_pcur)) + return false; + /* Skip the metadata pseudo-record. */ + } + else + btr_pcur_move_to_prev_on_page(&m_pcur); + return true; } /** Position the cursor on the next record. @return DB_SUCCESS or error code */ -dberr_t -IndexPurge::next() UNIV_NOTHROW +dberr_t IndexPurge::next() noexcept { btr_pcur_move_to_next_on_page(&m_pcur); @@ -1575,7 +1569,10 @@ IndexPurge::next() UNIV_NOTHROW mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); - m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr); + if (m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr) + == btr_pcur_t::CORRUPTED) { + return DB_CORRUPTION; + } /* The following is based on btr_pcur_move_to_next_user_rec(). */ m_pcur.old_stored = false; ut_ad(m_pcur.latch_mode == BTR_MODIFY_LEAF); @@ -1646,40 +1643,36 @@ IndexPurge::next() UNIV_NOTHROW Store the persistent cursor position and reopen the B-tree cursor in BTR_MODIFY_TREE mode, because the tree structure may be changed during a pessimistic delete. */ -void -IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW +inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept { - dberr_t err; - - m_pcur.restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, &m_mtr); - - ut_ad(rec_get_deleted_flag( - btr_pcur_get_rec(&m_pcur), - dict_table_is_comp(m_index->table))); - - btr_cur_pessimistic_delete( - &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, false, &m_mtr); - - ut_a(err == DB_SUCCESS); + dberr_t err; + if (m_pcur.restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + &m_mtr) != btr_pcur_t::CORRUPTED) + { + ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(&m_pcur), + m_index->table->not_redundant())); + btr_cur_pessimistic_delete(&err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, + false, &m_mtr); + } + else + err= DB_CORRUPTION; - /* Reopen the B-tree cursor in BTR_MODIFY_LEAF mode */ - mtr_commit(&m_mtr); + m_mtr.commit(); + return err; } -/** -Purge delete-marked records. */ -void -IndexPurge::purge() UNIV_NOTHROW +dberr_t IndexPurge::purge() noexcept { - btr_pcur_store_position(&m_pcur, &m_mtr); - - purge_pessimistic_delete(); - - mtr_start(&m_mtr); - - mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); - - m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr); + btr_pcur_store_position(&m_pcur, &m_mtr); + dberr_t err= purge_pessimistic_delete(); + + m_mtr.start(); + m_mtr.set_log_mode(MTR_LOG_NO_REDO); + if (err == DB_SUCCESS) + err= (m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr) == + btr_pcur_t::CORRUPTED) + ? DB_CORRUPTION : DB_SUCCESS; + return err; } /** Adjust the BLOB reference for a single column that is externally stored @@ -2121,8 +2114,9 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW /* If we already had an old page with matching number in the buffer pool, evict it now, because we no longer evict the pages on DISCARD TABLESPACE. */ - buf_page_get_gen(block->page.id(), get_zip_size(), - RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL, NULL); + buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH, + nullptr, BUF_PEEK_IF_IN_POOL, + nullptr, nullptr, false); uint16_t page_type; @@ -2351,26 +2345,20 @@ row_import_set_sys_max_row_id( mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - btr_pcur_open_at_index_side( - false, // High end - index, - BTR_SEARCH_LEAF, - &pcur, - true, // Init cursor - 0, // Leaf level - &mtr); - - btr_pcur_move_to_prev_on_page(&pcur); - rec = btr_pcur_get_rec(&pcur); - - /* Check for empty table. */ - if (page_rec_is_infimum(rec)) { - /* The table is empty. */ - } else if (rec_is_metadata(rec, *index)) { - /* The clustered index contains the metadata record only, - that is, the table is empty. */ - } else { - row_id = mach_read_from_6(rec); + if (btr_pcur_open_at_index_side(false, index, BTR_SEARCH_LEAF, + &pcur, true, 0, &mtr) == DB_SUCCESS) { + btr_pcur_move_to_prev_on_page(&pcur); + rec = btr_pcur_get_rec(&pcur); + + /* Check for empty table. */ + if (page_rec_is_infimum(rec)) { + /* The table is empty. */ + } else if (rec_is_metadata(rec, *index)) { + /* The clustered index contains the metadata + record only, that is, the table is empty. */ + } else { + row_id = mach_read_from_6(rec); + } } mtr_commit(&mtr); @@ -3216,14 +3204,6 @@ static dberr_t handle_instant_metadata(dict_table_t *table, { dict_index_t *index= dict_table_get_first_index(table); - auto tmp1= table->space_id; - table->space_id= page_get_space_id(page.get()); - SCOPE_EXIT([tmp1, table]() { table->space_id= tmp1; }); - - auto tmp2= index->page; - index->page= page_get_page_no(page.get()); - SCOPE_EXIT([tmp2, index]() { index->page= tmp2; }); - if (!page_is_comp(page.get()) != !dict_table_is_comp(table)) { ib_errf(current_thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH, @@ -3232,7 +3212,7 @@ static dberr_t handle_instant_metadata(dict_table_t *table, } if (btr_cur_instant_root_init(index, page.get())) - return DB_ERROR; + return DB_CORRUPTION; ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 4fc40dca3f9..b51251094b9 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1137,9 +1137,12 @@ row_ins_foreign_check_on_constraint( ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, tmp_heap); - btr_pcur_open_with_no_init(clust_index, ref, - PAGE_CUR_LE, BTR_SEARCH_LEAF, - cascade->pcur, 0, mtr); + err = btr_pcur_open_with_no_init(clust_index, ref, + PAGE_CUR_LE, BTR_SEARCH_LEAF, + cascade->pcur, 0, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto nonstandard_exit_func; + } clust_rec = btr_pcur_get_rec(cascade->pcur); clust_block = btr_pcur_get_block(cascade->pcur); @@ -1350,7 +1353,10 @@ row_ins_foreign_check_on_constraint( /* Restore pcur position */ - pcur->restore_position(BTR_SEARCH_LEAF, mtr); + if (pcur->restore_position(BTR_SEARCH_LEAF, mtr) + != btr_pcur_t::SAME_ALL) { + err = DB_CORRUPTION; + } if (tmp_heap) { mem_heap_free(tmp_heap); @@ -1369,7 +1375,10 @@ nonstandard_exit_func: mtr_commit(mtr); mtr_start(mtr); - pcur->restore_position(BTR_SEARCH_LEAF, mtr); + if (pcur->restore_position(BTR_SEARCH_LEAF, mtr) + != btr_pcur_t::SAME_ALL && err == DB_SUCCESS) { + err = DB_CORRUPTION; + } DBUG_RETURN(err); } @@ -1613,8 +1622,11 @@ row_ins_check_foreign_constraint( dtuple_set_n_fields_cmp(entry, foreign->n_fields); - btr_pcur_open(check_index, entry, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); + err = btr_pcur_open(check_index, entry, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto end_scan; + } /* Scan index records and check if there is a matching record */ @@ -1822,7 +1834,7 @@ do_possible_lock_wait: } exit_func: - if (heap != NULL) { + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -2021,8 +2033,6 @@ row_ins_scan_sec_index_for_duplicate( int cmp; ulint n_fields_cmp; btr_pcur_t pcur; - dberr_t err = DB_SUCCESS; - ulint allow_duplicates; rec_offs offsets_[REC_OFFS_SEC_INDEX_SIZE]; rec_offs* offsets = offsets_; DBUG_ENTER("row_ins_scan_sec_index_for_duplicate"); @@ -2052,10 +2062,13 @@ row_ins_scan_sec_index_for_duplicate( n_fields_cmp = dtuple_get_n_fields_cmp(entry); dtuple_set_n_fields_cmp(entry, n_unique); + const auto allow_duplicates = thr_get_trx(thr)->duplicates; - btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, mtr); - - allow_duplicates = thr_get_trx(thr)->duplicates; + dberr_t err = btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, + &pcur, mtr); + if (err != DB_SUCCESS) { + goto end_scan; + } /* Scan index records and check if there is a duplicate */ @@ -2443,9 +2456,8 @@ row_ins_index_entry_big_rec( mtr_t mtr; btr_pcur_t pcur; rec_t* rec; - dberr_t error; - ut_ad(dict_index_is_clust(index)); + ut_ad(index->is_primary()); DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch"); @@ -2456,8 +2468,12 @@ row_ins_index_entry_big_rec( index->set_modified(mtr); } - btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, - &pcur, &mtr); + dberr_t error = btr_pcur_open(index, entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, &pcur, &mtr); + if (error != DB_SUCCESS) { + return error; + } + rec = btr_pcur_get_rec(&pcur); offsets = rec_get_offsets(rec, index, offsets, index->n_core_fields, ULINT_UNDEFINED, heap); @@ -2483,7 +2499,7 @@ extern "C" int thd_is_slave(const MYSQL_THD thd); /* Avoid GCC 4.8.5 internal compiler error due to srw_mutex::wr_unlock(). We would only need this for row_ins_clust_index_entry_low(), but GCC 4.8.5 does not support pop_options. */ -# pragma GCC optimize ("no-expensive-optimizations") +# pragma GCC optimize ("O0") #endif /***************************************************************//** @@ -2827,7 +2843,7 @@ row_ins_sec_index_entry_low( btr_cur_t cursor; ulint search_mode = mode; - dberr_t err = DB_SUCCESS; + dberr_t err; ulint n_unique; mtr_t mtr; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; @@ -2870,7 +2886,8 @@ row_ins_sec_index_entry_low( search_mode, &cursor, 0, &mtr); - if (search_mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) { + if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF + && rtr_info.mbr_adj) { mtr_commit(&mtr); search_mode = mode = BTR_MODIFY_TREE; rtr_clean_rtr_info(&rtr_info, true); @@ -2906,13 +2923,7 @@ row_ins_sec_index_entry_low( if (err != DB_SUCCESS) { if (err == DB_DECRYPTION_FAILED) { - ib_push_warning(thr_get_trx(thr)->mysql_thd, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name.m_name); - index->table->file_unreadable = true; + btr_decryption_failed(*index); } goto func_exit; } @@ -2983,11 +2994,14 @@ row_ins_sec_index_entry_low( prevent any insertion of a duplicate by another transaction. Let us now reposition the cursor and continue the insertion. */ - btr_cur_search_to_nth_level( + err = btr_cur_search_to_nth_level( index, 0, entry, PAGE_CUR_LE, (search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)), &cursor, 0, &mtr); + if (err != DB_SUCCESS) { + goto func_exit; + } } if (row_ins_must_modify_rec(&cursor)) { @@ -3556,8 +3570,7 @@ row_ins( ut_ad(node->state == INS_NODE_INSERT_ENTRIES); - while (node->index != NULL) { - dict_index_t *index = node->index; + while (dict_index_t *index = node->index) { /* We do not insert history rows into FTS_DOC_ID_INDEX because it is unique by FTS_DOC_ID only and we do not want to add @@ -3566,7 +3579,7 @@ row_ins( FTS_DOC_ID for history is enough. */ const unsigned type = index->type; - if (index->type & DICT_FTS + if (index->type & (DICT_FTS | DICT_CORRUPT) || !index->is_committed()) { } else if (!(type & DICT_UNIQUE) || index->n_uniq > 1 || !node->vers_history_row()) { @@ -3587,12 +3600,6 @@ row_ins( node->index = dict_table_get_next_index(node->index); ++node->entry; - - /* Skip corrupted secondary index and its entry */ - while (node->index && node->index->is_corrupted()) { - node->index = dict_table_get_next_index(node->index); - ++node->entry; - } } ut_ad(node->entry == node->entry_list.end()); diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 2a570bd1d0d..f2e2511a116 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -405,9 +405,6 @@ start_log: buf, byte_offset, srv_sort_buf_size) != DB_SUCCESS) { write_failed: - /* We set the flag directly instead of invoking - dict_set_corrupted_index_cache_only(index) here, - because the index is not "public" yet. */ index->type |= DICT_CORRUPT; } @@ -1681,12 +1678,14 @@ row_log_table_apply_delete_low( btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur), BTR_CREATE_FLAG, false, mtr); - mtr_commit(mtr); - if (error != DB_SUCCESS) { - return(error); +err_exit: + mtr->commit(); + return error; } + mtr->commit(); + while ((index = dict_table_get_next_index(index)) != NULL) { if (index->type & DICT_FTS) { continue; @@ -1696,9 +1695,12 @@ row_log_table_apply_delete_low( row, ext, index, heap); mtr->start(); index->set_modified(*mtr); - btr_pcur_open(index, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, - pcur, mtr); + error = btr_pcur_open(index, entry, PAGE_CUR_LE, + BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + pcur, mtr); + if (error) { + goto err_exit; + } #ifdef UNIV_DEBUG switch (btr_pcur_get_btr_cur(pcur)->flag) { case BTR_CUR_DELETE_REF: @@ -1777,9 +1779,12 @@ row_log_table_apply_delete( mtr_start(&mtr); index->set_modified(mtr); - btr_pcur_open(index, old_pk, PAGE_CUR_LE, - BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, - &pcur, &mtr); + dberr_t err = btr_pcur_open(index, old_pk, PAGE_CUR_LE, + BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + &pcur, &mtr); + if (err != DB_SUCCESS) { + goto all_done; + } #ifdef UNIV_DEBUG switch (btr_pcur_get_btr_cur(&pcur)->flag) { case BTR_CUR_DELETE_REF: @@ -1806,7 +1811,7 @@ all_done: ROW_T_INSERT was skipped or ROW_T_UPDATE was interpreted as ROW_T_DELETE due to BLOBs having been freed by rollback. */ - return(DB_SUCCESS); + return err; } offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, nullptr, @@ -1908,10 +1913,24 @@ row_log_table_apply_update( return(error); } - mtr_start(&mtr); + mtr.start(); index->set_modified(mtr); - btr_pcur_open(index, old_pk, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, &mtr); + error = btr_pcur_open(index, old_pk, PAGE_CUR_LE, + BTR_MODIFY_TREE, &pcur, &mtr); + if (error != DB_SUCCESS) { +func_exit: + mtr.commit(); +func_exit_committed: + ut_ad(mtr.has_committed()); + + if (error != DB_SUCCESS) { + /* Report the erroneous row using the new + version of the table. */ + innobase_row_to_mysql(dup->table, log->table, row); + } + + return error; + } #ifdef UNIV_DEBUG switch (btr_pcur_get_btr_cur(&pcur)->flag) { case BTR_CUR_DELETE_REF: @@ -1961,18 +1980,7 @@ row_log_table_apply_update( index, entry, btr_pcur_get_rec(&pcur), cur_offsets, false, NULL, heap, dup->table, &error); if (error != DB_SUCCESS || !update->n_fields) { -func_exit: - mtr.commit(); -func_exit_committed: - ut_ad(mtr.has_committed()); - - if (error != DB_SUCCESS) { - /* Report the erroneous row using the new - version of the table. */ - innobase_row_to_mysql(dup->table, log->table, row); - } - - return error; + goto func_exit; } const bool pk_updated @@ -2069,7 +2077,7 @@ func_exit_committed: dtuple_copy_v_fields(old_row, old_pk); } - mtr_commit(&mtr); + mtr.commit(); entry = row_build_index_entry(old_row, old_ext, index, heap); if (!entry) { @@ -2077,7 +2085,7 @@ func_exit_committed: return(DB_CORRUPTION); } - mtr_start(&mtr); + mtr.start(); index->set_modified(mtr); if (ROW_FOUND != row_search_index_entry( @@ -2095,7 +2103,7 @@ func_exit_committed: break; } - mtr_commit(&mtr); + mtr.commit(); entry = row_build_index_entry(row, NULL, index, heap); error = row_ins_sec_index_entry_low( @@ -2109,7 +2117,7 @@ func_exit_committed: thr_get_trx(thr)->error_key_num = n_index; } - mtr_start(&mtr); + mtr.start(); index->set_modified(mtr); } @@ -3067,11 +3075,14 @@ row_log_apply_op_low( record. The operation may already have been performed, depending on when the row in the clustered index was scanned. */ - btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - has_index_lock - ? BTR_MODIFY_TREE - : BTR_MODIFY_LEAF, - &cursor, 0, &mtr); + *error = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, + has_index_lock + ? BTR_MODIFY_TREE + : BTR_MODIFY_LEAF, + &cursor, 0, &mtr); + if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { + goto func_exit; + } ut_ad(dict_index_get_n_unique(index) > 0); /* This test is somewhat similar to row_ins_must_modify_rec(), @@ -3105,9 +3116,10 @@ row_log_apply_op_low( goto func_exit; } - if (btr_cur_optimistic_delete( - &cursor, BTR_CREATE_FLAG, &mtr)) { - *error = DB_SUCCESS; + *error = btr_cur_optimistic_delete( + &cursor, BTR_CREATE_FLAG, &mtr); + + if (*error != DB_FAIL) { break; } @@ -3117,10 +3129,12 @@ row_log_apply_op_low( mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - btr_cur_search_to_nth_level( + *error = btr_cur_search_to_nth_level( index, 0, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, &cursor, 0, &mtr); - + if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { + goto func_exit; + } /* No other thread than the current one is allowed to modify the index tree. Thus, the record should still exist. */ @@ -3219,9 +3233,12 @@ insert_the_rec: mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - btr_cur_search_to_nth_level( + *error = btr_cur_search_to_nth_level( index, 0, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, &cursor, 0, &mtr); + if (*error != DB_SUCCESS) { + break; + } } /* We already determined that the @@ -3692,9 +3709,6 @@ func_exit: } /* fall through */ default: - /* We set the flag directly instead of invoking - dict_set_corrupted_index_cache_only(index) here, - because the index is not "public" yet. */ index->type |= DICT_CORRUPT; } @@ -3739,8 +3753,7 @@ row_log_apply( index->lock.x_lock(SRW_LOCK_CALL); - if (!dict_table_is_corrupted(index->table) - && index->online_log) { + if (index->online_log && !index->table->corrupted) { error = row_log_apply_ops(trx, index, &dup, stage); } else { error = DB_SUCCESS; @@ -3748,9 +3761,6 @@ row_log_apply( if (error != DB_SUCCESS) { ut_ad(index->table->space); - /* We set the flag directly instead of invoking - dict_set_corrupted_index_cache_only(index) here, - because the index is not "public" yet. */ index->type |= DICT_CORRUPT; index->table->drop_aborted = TRUE; @@ -3779,8 +3789,9 @@ dberr_t row_log_get_error(const dict_index_t *index) return index->online_log->error; } -void dict_table_t::clear(que_thr_t *thr) +dberr_t dict_table_t::clear(que_thr_t *thr) { + dberr_t err= DB_SUCCESS; for (dict_index_t *index= UT_LIST_GET_FIRST(indexes); index; index= UT_LIST_GET_NEXT(indexes, index)) { @@ -3798,8 +3809,10 @@ void dict_table_t::clear(que_thr_t *thr) MY_ASSERT_UNREACHABLE(); break; } - index->clear(thr); + if (dberr_t err_index= index->clear(thr)) + err= err_index; } + return err; } const rec_t * diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index a845baa39d3..f01231fda16 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -156,14 +156,13 @@ public: false); rtr_info_update_btr(&ins_cur, &rtr_info); - btr_cur_search_to_nth_level(m_index, 0, dtuple, - PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_LEAF, &ins_cur, - 0, &mtr); + error = btr_cur_search_to_nth_level( + m_index, 0, dtuple, PAGE_CUR_RTREE_INSERT, + BTR_MODIFY_LEAF, &ins_cur, 0, &mtr); /* It need to update MBR in parent entry, so change search mode to BTR_MODIFY_TREE */ - if (rtr_info.mbr_adj) { + if (error == DB_SUCCESS && rtr_info.mbr_adj) { mtr_commit(&mtr); rtr_clean_rtr_info(&rtr_info, true); rtr_init_rtr_info(&rtr_info, false, &ins_cur, @@ -171,18 +170,22 @@ public: rtr_info_update_btr(&ins_cur, &rtr_info); mtr_start(&mtr); m_index->set_modified(mtr); - btr_cur_search_to_nth_level( + error = btr_cur_search_to_nth_level( m_index, 0, dtuple, PAGE_CUR_RTREE_INSERT, BTR_MODIFY_TREE, &ins_cur, 0, &mtr); } - error = btr_cur_optimistic_insert( - flag, &ins_cur, &ins_offsets, &row_heap, - dtuple, &rec, &big_rec, 0, NULL, &mtr); + if (error == DB_SUCCESS) { + error = btr_cur_optimistic_insert( + flag, &ins_cur, &ins_offsets, + &row_heap, dtuple, &rec, &big_rec, + 0, NULL, &mtr); + } + + ut_ad(!big_rec); if (error == DB_FAIL) { - ut_ad(!big_rec); mtr.commit(); mtr.start(); m_index->set_modified(mtr); @@ -192,18 +195,22 @@ public: &ins_cur, m_index, false); rtr_info_update_btr(&ins_cur, &rtr_info); - btr_cur_search_to_nth_level( + error = btr_cur_search_to_nth_level( m_index, 0, dtuple, PAGE_CUR_RTREE_INSERT, BTR_MODIFY_TREE, &ins_cur, 0, &mtr); - error = btr_cur_pessimistic_insert( + if (error == DB_SUCCESS) { + error = btr_cur_pessimistic_insert( flag, &ins_cur, &ins_offsets, &row_heap, dtuple, &rec, &big_rec, 0, NULL, &mtr); + } } + ut_ad(!big_rec); + DBUG_EXECUTE_IF( "row_merge_ins_spatial_fail", error = DB_FAIL; @@ -1824,10 +1831,15 @@ row_merge_read_clustered_index( == (DATA_ROLL_PTR | DATA_NOT_NULL)); const ulint new_trx_id_col = col_map ? col_map[old_trx_id_col] : old_trx_id_col; + uint64_t n_rows = 0; - btr_pcur_open_at_index_side( - true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - mtr_started = true; + err = btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, + &pcur, true, 0, &mtr); + if (err != DB_SUCCESS) { +err_exit: + trx->error_key_num = 0; + goto func_exit; + } btr_pcur_move_to_next_user_rec(&pcur, &mtr); if (rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index)) { ut_ad(btr_pcur_is_on_user_rec(&pcur)); @@ -1837,8 +1849,6 @@ row_merge_read_clustered_index( btr_pcur_move_to_prev_on_page(&pcur); } - uint64_t n_rows = 0; - /* Check if the table is supposed to be empty for our read view. If we read bulk_trx_id as an older transaction ID, it is not @@ -1921,8 +1931,7 @@ row_merge_read_clustered_index( /* Do not continue if table pages are still encrypted */ if (!old_table->is_readable() || !new_table->is_readable()) { err = DB_DECRYPTION_FAILED; - trx->error_key_num = 0; - goto func_exit; + goto err_exit; } const rec_t* rec; @@ -1944,15 +1953,13 @@ row_merge_read_clustered_index( if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { err = DB_INTERRUPTED; - trx->error_key_num = 0; - goto func_exit; + goto err_exit; } if (online && old_table != new_table) { err = row_log_table_get_error(clust_index); if (err != DB_SUCCESS) { - trx->error_key_num = 0; - goto func_exit; + goto err_exit; } } @@ -2001,8 +2008,13 @@ scan_next: /* Restore position on the record, or its predecessor if the record was purged meanwhile. */ - pcur.restore_position( - BTR_SEARCH_LEAF, &mtr); + if (pcur.restore_position(BTR_SEARCH_LEAF, + &mtr) + == btr_pcur_t::CORRUPTED) { +corrupted_index: + err = DB_CORRUPTION; + goto func_exit; + } /* Move to the successor of the original record. */ if (!btr_pcur_move_to_next_user_rec( @@ -2025,10 +2037,15 @@ end_of_index: goto end_of_index; } - buf_block_t* block = btr_block_get( - *clust_index, next_page_no, - RW_S_LATCH, false, &mtr); - + buf_block_t* block = buf_page_get_gen( + page_id_t(old_table->space->id, + next_page_no), + old_table->space->zip_size(), + RW_S_LATCH, nullptr, BUF_GET, &mtr, + &err, false); + if (!block) { + goto err_exit; + } btr_leaf_page_release(page_cur_get_block(cur), BTR_SEARCH_LEAF, &mtr); page_cur_set_before_first(block, cur); @@ -2181,8 +2198,7 @@ end_of_index: if (!allow_not_null) { err = DB_INVALID_NULL; - trx->error_key_num = 0; - goto func_exit; + goto err_exit; } const dfield_t& default_field @@ -2263,13 +2279,10 @@ end_of_index: byte* b = static_cast(dfield_get_data(dfield)); if (sequence.eof()) { - err = DB_ERROR; - trx->error_key_num = 0; - ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, ER_AUTOINC_READ_FAILED, "[NULL]"); - - goto func_exit; + err = DB_ERROR; + goto err_exit; } ulonglong value = sequence++; @@ -2545,8 +2558,11 @@ write_buffers: overflow). */ mtr.start(); mtr_started = true; - pcur.restore_position( - BTR_SEARCH_LEAF, &mtr); + if (pcur.restore_position( + BTR_SEARCH_LEAF, &mtr) + == btr_pcur_t::CORRUPTED) { + goto corrupted_index; + } buf = row_merge_buf_empty(buf); merge_buf[i] = buf; /* Restart the outer loop on the diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 143365f02a7..84f2b54f517 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -701,6 +701,8 @@ handle_new_error: trx->rollback(); break; + case DB_IO_ERROR: + case DB_TABLE_CORRUPT: case DB_CORRUPTION: case DB_PAGE_CORRUPTED: ib::error() << "We detected index corruption in an InnoDB type" @@ -1199,10 +1201,10 @@ row_mysql_get_table_status( // to decrypt if (push_warning) { ib_push_warning(trx, DB_DECRYPTION_FAILED, - "Table %s in tablespace %lu encrypted." + "Table %s is encrypted." "However key management plugin or used key_id is not found or" " used encryption algorithm or method does not match.", - table->name.m_name, table->space); + table->name.m_name); } err = DB_DECRYPTION_FAILED; @@ -1250,30 +1252,19 @@ row_insert_for_mysql( ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED); - if (!prebuilt->table->space) { - - ib::error() << "The table " << prebuilt->table->name + if (!table->space) { + ib::error() << "The table " << table->name << " doesn't have a corresponding tablespace, it was" " discarded."; return(DB_TABLESPACE_DELETED); - - } else if (!prebuilt->table->is_readable()) { - return(row_mysql_get_table_status(prebuilt->table, trx, true)); + } else if (!table->is_readable()) { + return row_mysql_get_table_status(table, trx, true); } else if (high_level_read_only) { return(DB_READ_ONLY); - } - - DBUG_EXECUTE_IF("mark_table_corrupted", { - /* Mark the table corrupted for the clustered index */ - dict_index_t* index = dict_table_get_first_index(table); - ut_ad(dict_index_is_clust(index)); - dict_set_corrupted(index, "INSERT TABLE", false); }); - - if (dict_table_is_corrupted(table)) { - - ib::error() << "Table " << table->name << " is corrupt."; - return(DB_TABLE_CORRUPT); + } else if (UNIV_UNLIKELY(table->corrupted) + || dict_table_get_first_index(table)->is_corrupted()) { + return DB_TABLE_CORRUPT; } trx->op_info = "inserting"; @@ -1792,57 +1783,30 @@ row_unlock_for_mysql( row_prebuilt_t* prebuilt, ibool has_latches_on_recs) { - btr_pcur_t* pcur = prebuilt->pcur; - btr_pcur_t* clust_pcur = prebuilt->clust_pcur; - trx_t* trx = prebuilt->trx; - - ut_ad(prebuilt != NULL); - ut_ad(trx != NULL); - ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED); - - if (dict_index_is_spatial(prebuilt->index)) { - return; - } - - trx->op_info = "unlock_row"; - - if (prebuilt->new_rec_locks >= 1) { + if (prebuilt->new_rec_locks == 1 && prebuilt->index->is_clust()) { + trx_t* trx = prebuilt->trx; + ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED); + trx->op_info = "unlock_row"; const rec_t* rec; dict_index_t* index; trx_id_t rec_trx_id; mtr_t mtr; + btr_pcur_t* pcur = prebuilt->pcur; mtr_start(&mtr); /* Restore the cursor position and find the record */ - if (!has_latches_on_recs) { - pcur->restore_position(BTR_SEARCH_LEAF, &mtr); + if (!has_latches_on_recs + && pcur->restore_position(BTR_SEARCH_LEAF, &mtr) + != btr_pcur_t::SAME_ALL) { + goto no_unlock; } rec = btr_pcur_get_rec(pcur); index = btr_pcur_get_btr_cur(pcur)->index; - if (prebuilt->new_rec_locks >= 2) { - /* Restore the cursor position and find the record - in the clustered index. */ - - if (!has_latches_on_recs) { - clust_pcur->restore_position(BTR_SEARCH_LEAF, - &mtr); - } - - rec = btr_pcur_get_rec(clust_pcur); - index = btr_pcur_get_btr_cur(clust_pcur)->index; - } - - if (!dict_index_is_clust(index)) { - /* This is not a clustered index record. We - do not know how to unlock the record. */ - goto no_unlock; - } - /* If the record has been modified by this transaction, do not unlock it. */ @@ -1877,24 +1841,11 @@ row_unlock_for_mysql( rec, static_cast( prebuilt->select_lock_type)); - - if (prebuilt->new_rec_locks >= 2) { - rec = btr_pcur_get_rec(clust_pcur); - - lock_rec_unlock( - trx, - btr_pcur_get_block(clust_pcur) - ->page.id(), - rec, - static_cast( - prebuilt->select_lock_type)); - } } no_unlock: mtr_commit(&mtr); + trx->op_info = ""; } - - trx->op_info = ""; } /** Write query start time as SQL field data to a buffer. Needed by InnoDB. @@ -2297,6 +2248,13 @@ row_mysql_table_id_reassign( trx_t* trx, table_id_t* new_id) { + if (!dict_sys.sys_tables || dict_sys.sys_tables->corrupted || + !dict_sys.sys_columns || dict_sys.sys_columns->corrupted || + !dict_sys.sys_indexes || dict_sys.sys_indexes->corrupted || + !dict_sys.sys_virtual || dict_sys.sys_virtual->corrupted) { + return DB_CORRUPTION; + } + dberr_t err; pars_info_t* info = pars_info_create(); @@ -2447,9 +2405,6 @@ row_discard_tablespace( /* All persistent operations successful, update the data dictionary memory cache. */ - table->file_unreadable = true; - table->space = NULL; - table->flags2 |= DICT_TF2_DISCARDED; dict_table_change_id_in_cache(table, new_id); dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); @@ -2521,6 +2476,9 @@ rollback: It would be better to remove the integrity-breaking ALTER TABLE...DISCARD TABLESPACE operation altogether. */ + table->file_unreadable= true; + table->space= nullptr; + table->flags2|= DICT_TF2_DISCARDED; err= row_discard_tablespace(trx, table); DBUG_EXECUTE_IF("ib_discard_before_commit_crash", log_buffer_flush_to_disk(); DBUG_SUICIDE();); @@ -2529,7 +2487,6 @@ rollback: trx->commit(deleted); const auto space_id= table->space_id; pfs_os_file_t d= fil_delete_tablespace(space_id); - table->space= nullptr; DBUG_EXECUTE_IF("ib_discard_after_commit_crash", DBUG_SUICIDE();); row_mysql_unlock_data_dictionary(trx); @@ -2895,7 +2852,8 @@ row_rename_table_for_mysql( DEBUG_SYNC_C("innodb_rename_in_cache"); /* The following call will also rename the .ibd file */ err = dict_table_rename_in_cache( - table, new_name, !new_is_tmp); + table, span{new_name,strlen(new_name)}, + false); if (err != DB_SUCCESS) { goto rollback_and_exit; } diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index f6d1ee48ea9..8a81a45ca12 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -212,7 +212,7 @@ close_and_exit: ut_ad(row_get_rec_trx_id(rec, index, offsets)); if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete( + success = DB_FAIL != btr_cur_optimistic_delete( btr_pcur_get_btr_cur(&node->pcur), 0, &mtr); } else { dberr_t err; @@ -220,16 +220,7 @@ close_and_exit: btr_cur_pessimistic_delete( &err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0, false, &mtr); - - switch (err) { - case DB_SUCCESS: - break; - case DB_OUT_OF_FILE_SPACE: - success = false; - break; - default: - ut_error; - } + success = err == DB_SUCCESS; } func_exit: @@ -503,8 +494,9 @@ row_purge_remove_sec_if_poss_leaf( << rec_index_print( btr_cur_get_rec(btr_cur), index); - ut_ad(0); - goto func_exit; + mtr.commit(); + dict_set_corrupted(index, "purge"); + goto cleanup; } if (index->is_spatial()) { @@ -533,11 +525,8 @@ row_purge_remove_sec_if_poss_leaf( } } - if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) { - - /* The index entry could not be deleted. */ - success = false; - } + success = btr_cur_optimistic_delete(btr_cur, 0, &mtr) + != DB_FAIL; } /* (The index entry is still needed, @@ -551,6 +540,7 @@ row_purge_remove_sec_if_poss_leaf( /* The index entry does not exist, nothing to do. */ func_exit: mtr.commit(); +cleanup: btr_pcur_close(&pcur); // FIXME: do we need these? when is btr_cur->rtr_info set? return(success); } @@ -603,25 +593,6 @@ retry: ut_a(success); } -/** Skip uncommitted virtual indexes on newly added virtual column. -@param[in,out] index dict index object */ -static -inline -void -row_purge_skip_uncommitted_virtual_index( - dict_index_t*& index) -{ - /* We need to skip virtual indexes which is not - committed yet. It's safe because these indexes are - newly created by alter table, and because we do - not support LOCK=NONE when adding an index on newly - added virtual column.*/ - while (index != NULL && dict_index_has_virtual(index) - && !index->is_committed() && index->has_new_v_col()) { - index = dict_table_get_next_index(index); - } -} - /***********************************************************//** Purges a delete marking of a record. @retval true if the row was not found, or it was successfully removed @@ -633,34 +604,30 @@ row_purge_del_mark( /*===============*/ purge_node_t* node) /*!< in/out: row purge node */ { - mem_heap_t* heap; - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - /* skip corrupted secondary index */ - dict_table_skip_corrupt_index(node->index); - - row_purge_skip_uncommitted_virtual_index(node->index); - - if (!node->index) { - break; - } - - if (node->index->type != DICT_FTS) { - dtuple_t* entry = row_build_index_entry_low( - node->row, NULL, node->index, - heap, ROW_BUILD_FOR_PURGE); - row_purge_remove_sec_if_poss(node, node->index, entry); - mem_heap_empty(heap); - } - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - - return(row_purge_remove_clust_if_poss(node)); + if (node->index) + { + mem_heap_t *heap= mem_heap_create(1024); + + do + { + const auto type= node->index->type; + if (type & (DICT_FTS | DICT_CORRUPT)) + continue; + if (UNIV_UNLIKELY(DICT_VIRTUAL & type) && !node->index->is_committed() && + node->index->has_new_v_col()) + continue; + dtuple_t* entry= row_build_index_entry_low(node->row, nullptr, + node->index, heap, + ROW_BUILD_FOR_PURGE); + row_purge_remove_sec_if_poss(node, node->index, entry); + mem_heap_empty(heap); + } + while ((node->index= dict_table_get_next_index(node->index))); + + mem_heap_free(heap); + } + + return row_purge_remove_clust_if_poss(node); } void purge_sys_t::wait_SYS() @@ -777,20 +744,25 @@ row_purge_upd_exist_or_extern_func( ut_ad(!node->table->skip_alter_undo); if (node->rec_type == TRX_UNDO_UPD_DEL_REC - || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { + || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) + || !node->index) { goto skip_secondaries; } heap = mem_heap_create(1024); - while (node->index != NULL) { - dict_table_skip_corrupt_index(node->index); + do { + const auto type = node->index->type; - row_purge_skip_uncommitted_virtual_index(node->index); + if (type & (DICT_FTS | DICT_CORRUPT)) { + continue; + } - if (!node->index) { - break; + if (UNIV_UNLIKELY(DICT_VIRTUAL & type) + && !node->index->is_committed() + && node->index->has_new_v_col()) { + continue; } if (row_upd_changes_ord_field_binary(node->index, node->update, @@ -805,9 +777,7 @@ row_purge_upd_exist_or_extern_func( mem_heap_empty(heap); } - - node->index = dict_table_get_next_index(node->index); - } + } while ((node->index = dict_table_get_next_index(node->index))); mem_heap_free(heap); @@ -821,8 +791,6 @@ skip_secondaries: = upd_get_nth_field(node->update, i); if (dfield_is_ext(&ufield->new_val)) { - buf_block_t* block; - byte* data_field; bool is_insert; ulint rseg_id; uint32_t page_no; @@ -866,22 +834,26 @@ skip_secondaries: latching order if we would only later latch the root page of such a tree! */ - btr_root_block_get(index, RW_SX_LATCH, &mtr); - - block = buf_page_get( - page_id_t(rseg.space->id, page_no), - 0, RW_X_LATCH, &mtr); - - data_field = buf_block_get_frame(block) - + offset + internal_offset; + dberr_t err; + if (!btr_root_block_get(index, RW_SX_LATCH, &mtr, + &err)) { + } else if (buf_block_t* block = + buf_page_get(page_id_t(rseg.space->id, + page_no), + 0, RW_X_LATCH, &mtr)) { + byte* data_field = block->page.frame + + offset + internal_offset; + + ut_a(dfield_get_len(&ufield->new_val) + >= BTR_EXTERN_FIELD_REF_SIZE); + btr_free_externally_stored_field( + index, + data_field + + dfield_get_len(&ufield->new_val) + - BTR_EXTERN_FIELD_REF_SIZE, + NULL, NULL, block, 0, false, &mtr); + } - ut_a(dfield_get_len(&ufield->new_val) - >= BTR_EXTERN_FIELD_REF_SIZE); - btr_free_externally_stored_field( - index, - data_field + dfield_get_len(&ufield->new_val) - - BTR_EXTERN_FIELD_REF_SIZE, - NULL, NULL, block, 0, false, &mtr); mtr.commit(); } } @@ -897,6 +869,7 @@ skip_secondaries: row_purge_upd_exist_or_extern_func(node,undo_rec) #endif /* UNIV_DEBUG */ +MY_ATTRIBUTE((nonnull,warn_unused_result)) /** Parses the row reference and other info in a modify undo log record. @param[in] node row undo node @param[in] undo_rec record to purge @@ -913,17 +886,13 @@ row_purge_parse_undo_rec( bool* updated_extern) { dict_index_t* clust_index; - byte* ptr; undo_no_t undo_no; table_id_t table_id; roll_ptr_t roll_ptr; byte info_bits; ulint type; - ut_ad(node != NULL); - ut_ad(thr != NULL); - - ptr = trx_undo_rec_get_pars( + const byte* ptr = trx_undo_rec_get_pars( undo_rec, &type, &node->cmpl_info, updated_extern, &undo_no, &table_id); @@ -1077,16 +1046,14 @@ row_purge_record_func( #endif /* UNIV_DEBUG || WITH_WSREP */ bool updated_extern) { - dict_index_t* clust_index; - bool purged = true; - ut_ad(!node->found_clust); ut_ad(!node->table->skip_alter_undo); + ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); - clust_index = dict_table_get_first_index(node->table); + node->index = dict_table_get_next_index( + dict_table_get_first_index(node->table)); - node->index = dict_table_get_next_index(clust_index); - ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); + bool purged = true; switch (node->rec_type) { case TRX_UNDO_EMPTY: @@ -1120,8 +1087,8 @@ row_purge_record_func( } if (node->found_clust) { + node->found_clust = false; btr_pcur_close(&node->pcur); - node->found_clust = FALSE; } return(purged); diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index 9f9c81a4b5b..9218e739e96 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1296,12 +1296,15 @@ row_search_index_entry( ut_ad(dtuple_check_typed(entry)); - if (dict_index_is_spatial(index)) { - ut_ad(mode & (BTR_MODIFY_LEAF | BTR_MODIFY_TREE)); - rtr_pcur_open(index, entry, PAGE_CUR_RTREE_LOCATE, - mode, pcur, mtr); + if (index->is_spatial()) { + if (rtr_pcur_open(index, entry, mode, pcur, mtr)) { + return ROW_NOT_FOUND; + } } else { - btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr); + if (btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr) + != DB_SUCCESS) { + return ROW_NOT_FOUND; + } } switch (btr_pcur_get_btr_cur(pcur)->flag) { diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 09d90cc3e53..47c4b87a7c9 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -988,7 +988,6 @@ row_sel_get_clust_rec( dict_index_t* index; rec_t* clust_rec; rec_t* old_vers; - dberr_t err = DB_SUCCESS; mem_heap_t* heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets = offsets_; @@ -1006,9 +1005,12 @@ row_sel_get_clust_rec( index = dict_table_get_first_index(plan->table); - btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE, - BTR_SEARCH_LEAF, &plan->clust_pcur, - 0, mtr); + dberr_t err = btr_pcur_open_with_no_init(index, plan->clust_ref, + PAGE_CUR_LE, BTR_SEARCH_LEAF, + &plan->clust_pcur, 0, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto err_exit; + } clust_rec = btr_pcur_get_rec(&(plan->clust_pcur)); @@ -1019,9 +1021,10 @@ row_sel_get_clust_rec( || btr_pcur_get_low_match(&(plan->clust_pcur)) < dict_index_get_n_unique(index)) { - ut_a(rec_get_deleted_flag(rec, - dict_table_is_comp(plan->table))); - ut_a(node->read_view); + if (!node->read_view || + !rec_get_deleted_flag(rec, plan->table->not_redundant())) { + err = DB_CORRUPTION; + } /* In a rare case it is possible that no clust rec is found for a delete-marked secondary index record: if in row0umod.cc @@ -1201,13 +1204,13 @@ re_scan: /* MDEV-14059 FIXME: why re-latch the block? pcur is already positioned on it! */ - uint32_t page_no = page_get_page_no( - btr_pcur_get_page(pcur)); - cur_block = buf_page_get_gen( - page_id_t(index->table->space_id, page_no), - index->table->space->zip_size(), + btr_pcur_get_block(pcur)->page.id(), + btr_pcur_get_block(pcur)->zip_size(), RW_X_LATCH, NULL, BUF_GET, mtr, &err); + if (!cur_block) { + goto func_end; + } } else { mtr->start(); goto func_end; @@ -1224,6 +1227,7 @@ re_scan: } match->matched_recs->clear(); + // FIXME: check for !cur_block rtr_cur_search_with_match( cur_block, index, @@ -1355,8 +1359,9 @@ sel_set_rec_lock( /*********************************************************************//** Opens a pcur to a table index. */ +MY_ATTRIBUTE((warn_unused_result, nonnull)) static -void +dberr_t row_sel_open_pcur( /*==============*/ plan_t* plan, /*!< in: table plan */ @@ -1368,6 +1373,10 @@ row_sel_open_pcur( ulint n_fields; ulint i; + ut_ad(!plan->n_rows_prefetched); + ut_ad(!plan->n_rows_fetched); + ut_ad(!plan->cursor_at_end); + index = plan->index; /* Calculate the value of the search tuple: the exact match columns @@ -1382,6 +1391,8 @@ row_sel_open_pcur( cond = UT_LIST_GET_NEXT(cond_list, cond); } + dberr_t err; + if (plan->tuple) { n_fields = dtuple_get_n_fields(plan->tuple); @@ -1399,24 +1410,17 @@ row_sel_open_pcur( que_node_get_val(exp)); } - /* Open pcur to the index */ - - btr_pcur_open_with_no_init(index, plan->tuple, plan->mode, - BTR_SEARCH_LEAF, &plan->pcur, - NULL, mtr); + err = btr_pcur_open_with_no_init(index, plan->tuple, + plan->mode, BTR_SEARCH_LEAF, + &plan->pcur, nullptr, mtr); } else { - /* Open the cursor to the start or the end of the index - (FALSE: no init) */ - - btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF, - &(plan->pcur), false, 0, mtr); + err = btr_pcur_open_at_index_side(plan->asc, index, + BTR_SEARCH_LEAF, &plan->pcur, + false, 0, mtr); } - ut_ad(plan->n_rows_prefetched == 0); - ut_ad(plan->n_rows_fetched == 0); - ut_ad(plan->cursor_at_end == FALSE); - - plan->pcur_is_open = TRUE; + plan->pcur_is_open = err == DB_SUCCESS; + return err; } /*********************************************************************//** @@ -1551,7 +1555,9 @@ row_sel_try_search_shortcut( ut_ad(plan->unique_search); ut_ad(!plan->must_get_clust); - row_sel_open_pcur(plan, mtr); + if (row_sel_open_pcur(plan, mtr) != DB_SUCCESS) { + return SEL_RETRY; + } const rec_t* rec = btr_pcur_get_rec(&(plan->pcur)); @@ -1732,7 +1738,11 @@ table_loop: if (!plan->pcur_is_open) { /* Evaluate the expressions to build the search tuple and open the cursor */ - row_sel_open_pcur(plan, &mtr); + err = row_sel_open_pcur(plan, &mtr); + + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto mtr_commit_exit; + } cursor_just_opened = TRUE; @@ -2263,10 +2273,8 @@ stop_for_a_while: plan->stored_cursor_rec_processed = FALSE; btr_pcur_store_position(&(plan->pcur), &mtr); - mtr.commit(); - err = DB_SUCCESS; - goto func_exit; + goto mtr_commit_exit; commit_mtr_for_a_while: /* Stores the cursor position and commits &mtr; this is used if @@ -2290,7 +2298,7 @@ lock_wait_or_error: plan->stored_cursor_rec_processed = FALSE; btr_pcur_store_position(&(plan->pcur), &mtr); - +mtr_commit_exit: mtr.commit(); func_exit: @@ -3332,7 +3340,6 @@ Row_sel_get_clust_rec_for_mysql::operator()( dict_index_t* clust_index; const rec_t* clust_rec; rec_t* old_vers; - dberr_t err; trx_t* trx; *out_rec = NULL; @@ -3346,9 +3353,13 @@ Row_sel_get_clust_rec_for_mysql::operator()( clust_index = dict_table_get_first_index(sec_index->table); - btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref, - PAGE_CUR_LE, BTR_SEARCH_LEAF, - prebuilt->clust_pcur, 0, mtr); + dberr_t err = btr_pcur_open_with_no_init(clust_index, + prebuilt->clust_ref, + PAGE_CUR_LE, BTR_SEARCH_LEAF, + prebuilt->clust_pcur, 0, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; + } clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur); @@ -3408,6 +3419,7 @@ Row_sel_get_clust_rec_for_mysql::operator()( btr_pcur_get_block(prebuilt->pcur)->page.id(), btr_pcur_get_block(prebuilt->pcur)->zip_size(), RW_NO_LATCH, NULL, BUF_GET, mtr, &err); + ut_ad(block); // FIXME: avoid crash mem_heap_t* heap = mem_heap_create(256); dtuple_t* tuple = dict_index_build_data_tuple( rec, sec_index, true, @@ -3427,7 +3439,7 @@ Row_sel_get_clust_rec_for_mysql::operator()( #endif /* UNIV_DEBUG */ } else if (!rec_get_deleted_flag(rec, dict_table_is_comp(sec_index->table)) - || prebuilt->select_lock_type != LOCK_NONE) { + || prebuilt->select_lock_type != LOCK_NONE) { /* In a rare case it is possible that no clust rec is found for a delete-marked secondary index record: if in row0umod.cc in @@ -3446,17 +3458,13 @@ Row_sel_get_clust_rec_for_mysql::operator()( fputs("\n" "InnoDB: clust index record ", stderr); rec_print(stderr, clust_rec, clust_index); - putc('\n', stderr); - trx_print(stderr, trx, 600); - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to https://jira.mariadb.org/\n", stderr); - ut_ad(0); + err = DB_CORRUPTION; + clust_rec = NULL; + goto func_exit; } - clust_rec = NULL; - err = DB_SUCCESS; + clust_rec = NULL; goto func_exit; } @@ -3481,7 +3489,7 @@ Row_sel_get_clust_rec_for_mysql::operator()( case DB_SUCCESS_LOCKED_REC: break; default: - goto err_exit; + return err; } } else { /* This is a non-locking consistent read: if necessary, fetch @@ -3512,9 +3520,8 @@ Row_sel_get_clust_rec_for_mysql::operator()( clust_rec, offsets, offset_heap, &old_vers, vrow, mtr); - if (err != DB_SUCCESS) { - - goto err_exit; + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + return err; } cached_lsn = lsn; cached_page_id = bpage.id(); @@ -3538,7 +3545,7 @@ Row_sel_get_clust_rec_for_mysql::operator()( } if (old_vers == NULL) { - goto err_exit; + return err; } clust_rec = old_vers; @@ -3574,7 +3581,7 @@ Row_sel_get_clust_rec_for_mysql::operator()( case DB_SUCCESS_LOCKED_REC: break; default: - goto err_exit; + return err; } } @@ -3591,8 +3598,7 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } -err_exit: - return(err); + return err; } /** Restores cursor position after it has been stored. We have to take into @@ -3653,7 +3659,9 @@ prev: if (btr_pcur_is_on_user_rec(pcur) && !moves_up && !rec_is_metadata(btr_pcur_get_rec(pcur), *pcur->btr_cur.index)) { - btr_pcur_move_to_prev(pcur, mtr); + if (!btr_pcur_move_to_prev(pcur, mtr)) { + return true; + } } return true; case BTR_PCUR_BEFORE: @@ -3933,8 +3941,12 @@ row_sel_try_search_shortcut_for_mysql( srw_spin_lock* ahi_latch = btr_search_sys.get_latch(*index); ahi_latch->rd_lock(SRW_LOCK_CALL); - btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, pcur, ahi_latch, mtr); + if (btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, pcur, ahi_latch, mtr) + != DB_SUCCESS) { + goto retry; + } + rec = btr_pcur_get_rec(pcur); if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, *index)) { @@ -4350,9 +4362,13 @@ row_search_mvcc( if (!prebuilt->table->space) { DBUG_RETURN(DB_TABLESPACE_DELETED); } else if (!prebuilt->table->is_readable()) { - DBUG_RETURN(prebuilt->table->space - ? DB_DECRYPTION_FAILED - : DB_TABLESPACE_NOT_FOUND); + if (fil_space_crypt_t* crypt_data = + prebuilt->table->space->crypt_data) { + if (crypt_data->should_encrypt()) { + DBUG_RETURN(DB_DECRYPTION_FAILED); + } + } + DBUG_RETURN(DB_CORRUPTION); } else if (!prebuilt->index_usable) { DBUG_RETURN(DB_MISSING_HISTORY); } else if (prebuilt->index->is_corrupted()) { @@ -4788,13 +4804,7 @@ wait_table_again: if (err != DB_SUCCESS) { if (err == DB_DECRYPTION_FAILED) { - ib_push_warning(trx->mysql_thd, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - prebuilt->table->name.m_name); - index->table->file_unreadable = true; + btr_decryption_failed(*index); } rec = NULL; goto page_read_error; @@ -5760,9 +5770,7 @@ next_rec: goto rec_loop; } } else { - const buf_block_t* block = btr_pcur_get_block(pcur); - /* This is based on btr_pcur_move_to_next(), - but avoids infinite read loop of a corrupted page. */ + /* This is based on btr_pcur_move_to_next() */ ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED); ut_ad(pcur->latch_mode != BTR_NO_LATCHES); pcur->old_stored = false; @@ -5770,10 +5778,8 @@ next_rec: if (btr_pcur_is_after_last_in_tree(pcur)) { goto not_moved; } - btr_pcur_move_to_next_page(pcur, &mtr); - if (UNIV_UNLIKELY(btr_pcur_get_block(pcur) - == block)) { - err = DB_CORRUPTION; + err = btr_pcur_move_to_next_page(pcur, &mtr); + if (err != DB_SUCCESS) { goto lock_wait_or_error; } } else { @@ -6103,8 +6109,10 @@ row_search_get_max_rec( btr_pcur_t pcur; const rec_t* rec; /* Open at the high/right end (false), and init cursor */ - btr_pcur_open_at_index_side( - false, index, BTR_SEARCH_LEAF, &pcur, true, 0, mtr); + if (btr_pcur_open_at_index_side(false, index, BTR_SEARCH_LEAF, &pcur, + true, 0, mtr) != DB_SUCCESS) { + return nullptr; + } do { const page_t* page; diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 825e7442746..f730637c8d2 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -192,8 +192,9 @@ restart: BTR_MODIFY_LEAF, &mtr) == btr_pcur_t::SAME_ALL); } - if (btr_cur_optimistic_delete(&node->pcur.btr_cur, 0, &mtr)) { - err = DB_SUCCESS; + err = btr_cur_optimistic_delete(&node->pcur.btr_cur, 0, &mtr); + + if (err != DB_FAIL) { goto func_exit; } @@ -233,7 +234,7 @@ func_exit: if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) { /* When rolling back the very first instant ADD COLUMN operation, reset the root page to the basic state. */ - btr_reset_instant(*index, true, &mtr); + err = btr_reset_instant(*index, true, &mtr); } btr_pcur_commit_specify_mtr(&node->pcur, &mtr); @@ -305,8 +306,7 @@ row_undo_ins_remove_sec_low( btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); if (modify_leaf) { - err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) - ? DB_SUCCESS : DB_FAIL; + err = btr_cur_optimistic_delete(btr_cur, 0, &mtr); } else { /* Passing rollback=false here, because we are deleting a secondary index record: the distinction @@ -375,7 +375,7 @@ retry: static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) { dict_index_t* clust_index; - byte* ptr; + const byte* ptr; undo_no_t undo_no; table_id_t table_id; ulint dummy; @@ -421,16 +421,14 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) == !is_system_tablespace(table->space_id)); size_t len = mach_read_from_2(node->undo_rec) + size_t(node->undo_rec - ptr) - 2; - ptr[len] = 0; - const char* name = reinterpret_cast(ptr); - if (strcmp(table->name.m_name, name)) { - dict_table_rename_in_cache( - table, name, - !dict_table_t::is_temporary_name(name), - true); + const span name(reinterpret_cast(ptr), + len); + if (strlen(table->name.m_name) != len + || memcmp(table->name.m_name, ptr, len)) { + dict_table_rename_in_cache(table, name, true); } else if (table->space && table->space->id) { const auto s = table->space->name(); - if (len != s.size() || memcmp(name, s.data(), len)) { + if (len != s.size() || memcmp(ptr, s.data(), len)) { table->rename_tablespace(name, true); } } @@ -509,16 +507,15 @@ row_undo_ins_remove_sec_rec( que_thr_t* thr) /*!< in: query thread */ { dberr_t err = DB_SUCCESS; - dict_index_t* index = node->index; + dict_index_t* index; mem_heap_t* heap; heap = mem_heap_create(1024); - while (index != NULL) { - dtuple_t* entry; - - if (index->type & DICT_FTS || !index->is_committed()) { - dict_table_next_uncorrupted_index(index); + for (index = node->index; index; + index = dict_table_get_next_index(index)) { + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { continue; } @@ -526,7 +523,7 @@ row_undo_ins_remove_sec_rec( always contain all fields of the index. It does not matter if any indexes were created afterwards; all index entries can be reconstructed from the row. */ - entry = row_build_index_entry( + dtuple_t* entry = row_build_index_entry( node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after @@ -549,7 +546,6 @@ row_undo_ins_remove_sec_rec( } mem_heap_empty(heap); - dict_table_next_uncorrupted_index(index); } func_exit: @@ -594,8 +590,6 @@ row_undo_ins( /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index(node->index); - dict_table_skip_corrupt_index(node->index); - err = row_undo_ins_remove_sec_rec(node, thr); if (err != DB_SUCCESS) { @@ -640,8 +634,7 @@ row_undo_ins( err = row_undo_ins_remove_clust_rec(node); break; case TRX_UNDO_EMPTY: - node->table->clear(thr); - err = DB_SUCCESS; + err = node->table->clear(thr); break; } diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 0f96e021d9f..91925219ea8 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -93,10 +93,10 @@ row_undo_mod_clust_low( pcur = &node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); - ut_d(auto pcur_restore_result =) - pcur->restore_position(mode, mtr); + if (pcur->restore_position(mode, mtr) != btr_pcur_t::SAME_ALL) { + return DB_CORRUPTION; + } - ut_ad(pcur_restore_result == btr_pcur_t::SAME_ALL); ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur)) == thr_get_trx(thr)->id @@ -133,8 +133,8 @@ row_undo_mod_clust_low( && node->ref == &trx_undo_metadata && btr_cur_get_index(btr_cur)->table->instant && node->update->info_bits == REC_INFO_METADATA_ADD) { - btr_reset_instant(*btr_cur_get_index(btr_cur), false, - mtr); + err = btr_reset_instant(*btr_cur_get_index(btr_cur), + false, mtr); } } @@ -338,10 +338,12 @@ row_undo_mod_clust( if (index->table->is_temporary()) { mtr.set_log_mode(MTR_LOG_NO_REDO); - if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, - &mtr)) { + err = btr_cur_optimistic_delete(&pcur->btr_cur, 0, + &mtr); + if (err != DB_FAIL) { goto mtr_commit_exit; } + err = DB_SUCCESS; btr_pcur_commit_specify_mtr(pcur, &mtr); } else { index->set_modified(mtr); @@ -350,10 +352,12 @@ row_undo_mod_clust( if (!row_undo_mod_must_purge(node, &mtr)) { goto mtr_commit_exit; } - if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, - &mtr)) { + err = btr_cur_optimistic_delete(&pcur->btr_cur, 0, + &mtr); + if (err != DB_FAIL) { goto mtr_commit_exit; } + err = DB_SUCCESS; purge_sys.latch.rd_unlock(); btr_pcur_commit_specify_mtr(pcur, &mtr); have_latch = false; @@ -596,8 +600,7 @@ row_undo_mod_del_mark_or_remove_sec_low( } if (modify_leaf) { - err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) - ? DB_SUCCESS : DB_FAIL; + err = btr_cur_optimistic_delete(btr_cur, 0, &mtr); } else { /* Passing rollback=false, because we are deleting a secondary index record: @@ -853,12 +856,11 @@ row_undo_mod_upd_del_sec( heap = mem_heap_create(1024); - while (node->index != NULL) { - dict_index_t* index = node->index; - dtuple_t* entry; + do { + dict_index_t* index = node->index; - if (index->type & DICT_FTS || !index->is_committed()) { - dict_table_next_uncorrupted_index(node->index); + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { continue; } @@ -869,7 +871,7 @@ row_undo_mod_upd_del_sec( time when the undo log record was written. When we get to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, it should always cover all affected indexes. */ - entry = row_build_index_entry( + dtuple_t* entry = row_build_index_entry( node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { @@ -894,8 +896,7 @@ row_undo_mod_upd_del_sec( } mem_heap_empty(heap); - dict_table_next_uncorrupted_index(node->index); - } + } while ((node->index = dict_table_get_next_index(node->index))); mem_heap_free(heap); @@ -919,12 +920,11 @@ row_undo_mod_del_mark_sec( heap = mem_heap_create(1024); - while (node->index != NULL) { - dict_index_t* index = node->index; - dtuple_t* entry; + do { + dict_index_t* index = node->index; - if (index->type == DICT_FTS || !index->is_committed()) { - dict_table_next_uncorrupted_index(node->index); + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { continue; } @@ -935,7 +935,7 @@ row_undo_mod_del_mark_sec( time when the undo log record was written. When we get to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, it should always cover all affected indexes. */ - entry = row_build_index_entry( + dtuple_t* entry = row_build_index_entry( node->row, node->ext, index, heap); ut_a(entry); @@ -961,8 +961,7 @@ row_undo_mod_del_mark_sec( } mem_heap_empty(heap); - dict_table_next_uncorrupted_index(node->index); - } + } while ((node->index = dict_table_get_next_index(node->index))); mem_heap_free(heap); @@ -979,54 +978,33 @@ row_undo_mod_upd_exist_sec( undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { - mem_heap_t* heap; - dberr_t err = DB_SUCCESS; - - if (node->index == NULL - || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) { - /* No change in secondary indexes */ - - return(err); + if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { + return DB_SUCCESS; } - heap = mem_heap_create(1024); + mem_heap_t* heap = mem_heap_create(1024); + dberr_t err = DB_SUCCESS; + do { + dict_index_t* index = node->index; - while (node->index != NULL) { - - if (!node->index->is_committed()) { - dict_table_next_uncorrupted_index(node->index); + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { continue; } - dict_index_t* index = node->index; - dtuple_t* entry; - - if (dict_index_is_spatial(index)) { - if (!row_upd_changes_ord_field_binary_func( - index, node->update, + if (!row_upd_changes_ord_field_binary_func( + index, node->update, #ifdef UNIV_DEBUG - thr, + thr, #endif /* UNIV_DEBUG */ - node->row, - node->ext, ROW_BUILD_FOR_UNDO)) { - dict_table_next_uncorrupted_index(node->index); - continue; - } - } else { - if (index->type == DICT_FTS - || !row_upd_changes_ord_field_binary(index, - node->update, - thr, node->row, - node->ext)) { - dict_table_next_uncorrupted_index(node->index); - continue; - } + node->row, node->ext, ROW_BUILD_FOR_UNDO)) { + continue; } /* Build the newest version of the index entry */ - entry = row_build_index_entry(node->row, node->ext, - index, heap); + dtuple_t* entry = row_build_index_entry( + node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in row_upd_clust_rec_by_insert() before @@ -1078,17 +1056,10 @@ row_undo_mod_upd_exist_sec( the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ - if (dict_index_is_spatial(index)) { - entry = row_build_index_entry_low(node->undo_row, - node->undo_ext, - index, heap, - ROW_BUILD_FOR_UNDO); - } else { - entry = row_build_index_entry(node->undo_row, - node->undo_ext, - index, heap); - } - + entry = row_build_index_entry_low(node->undo_row, + node->undo_ext, + index, heap, + ROW_BUILD_FOR_UNDO); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( @@ -1106,8 +1077,7 @@ row_undo_mod_upd_exist_sec( } mem_heap_empty(heap); - dict_table_next_uncorrupted_index(node->index); - } + } while ((node->index = dict_table_get_next_index(node->index))); mem_heap_free(heap); @@ -1120,7 +1090,6 @@ row_undo_mod_upd_exist_sec( static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked) { dict_index_t* clust_index; - byte* ptr; undo_no_t undo_no; table_id_t table_id; trx_id_t trx_id; @@ -1135,8 +1104,9 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked) ut_ad(node->trx->in_rollback); ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, - &dummy_extern, &undo_no, &table_id); + const byte *ptr = trx_undo_rec_get_pars( + node->undo_rec, &type, &cmpl_info, + &dummy_extern, &undo_no, &table_id); node->rec_type = type; if (node->state == UNDO_UPDATE_PERSISTENT) { @@ -1253,7 +1223,7 @@ row_undo_mod( undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { - dberr_t err; + dberr_t err = DB_SUCCESS; ut_ad(thr_get_trx(thr) == node->trx); const bool dict_locked = node->trx->dict_operation_lock_mode; @@ -1273,23 +1243,20 @@ row_undo_mod( /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index(node->index); - - /* Skip all corrupted secondary index */ - dict_table_skip_corrupt_index(node->index); - - switch (node->rec_type) { - case TRX_UNDO_UPD_EXIST_REC: - err = row_undo_mod_upd_exist_sec(node, thr); - break; - case TRX_UNDO_DEL_MARK_REC: - err = row_undo_mod_del_mark_sec(node, thr); - break; - case TRX_UNDO_UPD_DEL_REC: - err = row_undo_mod_upd_del_sec(node, thr); - break; - default: - ut_error; - err = DB_ERROR; + if (node->index) { + switch (node->rec_type) { + case TRX_UNDO_UPD_EXIST_REC: + err = row_undo_mod_upd_exist_sec(node, thr); + break; + case TRX_UNDO_DEL_MARK_REC: + err = row_undo_mod_del_mark_sec(node, thr); + break; + case TRX_UNDO_UPD_DEL_REC: + err = row_undo_mod_upd_del_sec(node, thr); + break; + default: + MY_ASSERT_UNREACHABLE(); + } } if (err == DB_SUCCESS) { diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 0d8d49efc6f..9c41862e0b4 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -256,21 +256,6 @@ func_exit: return(found); } -/** Try to truncate the undo logs. -@param[in,out] trx transaction */ -static void row_undo_try_truncate(trx_t* trx) -{ - if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { - ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); - trx_undo_truncate_end(*undo, trx->undo_no, false); - } - - if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { - ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); - trx_undo_truncate_end(*undo, trx->undo_no, true); - } -} - /** Get the latest undo log record for rollback. @param[in,out] node rollback context @return whether an undo log record was fetched */ @@ -280,7 +265,7 @@ static bool row_undo_rec_get(undo_node_t* node) if (trx->pages_undone) { trx->pages_undone = 0; - row_undo_try_truncate(trx); + trx_undo_try_truncate(*trx); } trx_undo_t* undo = NULL; @@ -308,7 +293,7 @@ static bool row_undo_rec_get(undo_node_t* node) } if (undo == NULL) { - row_undo_try_truncate(trx); + trx_undo_try_truncate(*trx); /* Mark any ROLLBACK TO SAVEPOINT completed, so that if the transaction object is committed and reused later, we will default to a full ROLLBACK. */ @@ -327,8 +312,12 @@ static bool row_undo_rec_get(undo_node_t* node) mtr_t mtr; mtr.start(); - buf_block_t* undo_page = trx_undo_page_get_s_latched( - page_id_t(undo->rseg->space->id, undo->top_page_no), &mtr); + buf_block_t* undo_page = buf_page_get( + page_id_t(undo->rseg->space->id, undo->top_page_no), + 0, RW_S_LATCH, &mtr); + if (!undo_page) { + return false; + } uint16_t offset = undo->top_offset; diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index b51133aaa0d..b53416e2976 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -1887,9 +1887,7 @@ row_upd_sec_index_entry( ut_ad(trx->id != 0); index = node->index; - if (!index->is_committed()) { - return DB_SUCCESS; - } + ut_ad(index->is_committed()); /* For secondary indexes, index->online_status==ONLINE_INDEX_COMPLETE if index->is_committed(). */ @@ -2594,6 +2592,10 @@ row_upd_clust_step( index = dict_table_get_first_index(node->table); + if (index->is_corrupted()) { + return DB_TABLE_CORRUPT; + } + const bool referenced = row_upd_index_is_referenced(index, trx); #ifdef WITH_WSREP const bool foreign = wsrep_row_upd_index_is_foreign(index, trx); @@ -2808,14 +2810,11 @@ row_upd( DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;); do { - /* Skip corrupted index */ - dict_table_skip_corrupt_index(node->index); - if (!node->index) { break; } - if (!(node->index->type & DICT_FTS) + if (!(node->index->type & (DICT_FTS | DICT_CORRUPT)) && node->index->is_committed()) { err = row_upd_sec_step(node, thr); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index ae3c39f9be2..0ffb665c26d 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -751,10 +751,12 @@ srv_undo_tablespaces_init(bool create_new_db) mtr_t mtr; for (ulint i= 0; i < srv_undo_tablespaces; ++i) { - mtr.start(); - fsp_header_init(fil_space_get(srv_undo_space_id_start + i), - SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); - mtr.commit(); + mtr.start(); + dberr_t err= fsp_header_init(fil_space_get(srv_undo_space_id_start + i), + SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); + mtr.commit(); + if (err) + return err; } } @@ -800,10 +802,13 @@ srv_open_tmp_tablespace(bool create_new_db) mtr_t mtr; mtr.start(); mtr.set_log_mode(MTR_LOG_NO_REDO); - fsp_header_init(fil_system.temp_space, - srv_tmp_space.get_sum_of_sizes(), - &mtr); + err = fsp_header_init(fil_system.temp_space, + srv_tmp_space.get_sum_of_sizes(), + &mtr); mtr.commit(); + if (err == DB_SUCCESS) { + err = trx_temp_rseg_create(&mtr); + } } else { /* This file was just opened in the code above! */ ib::error() << "The innodb_temporary" @@ -1377,17 +1382,18 @@ file_checked: ut_ad(fil_system.sys_space->id == 0); compile_time_assert(TRX_SYS_SPACE == 0); compile_time_assert(IBUF_SPACE_ID == 0); - fsp_header_init(fil_system.sys_space, - uint32_t(sum_of_new_sizes), &mtr); + ut_a(fsp_header_init(fil_system.sys_space, + uint32_t(sum_of_new_sizes), &mtr) + == DB_SUCCESS); ulint ibuf_root = btr_create( DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space, - DICT_IBUF_ID_MIN, nullptr, &mtr); + DICT_IBUF_ID_MIN, nullptr, &mtr, &err); mtr_commit(&mtr); if (ibuf_root == FIL_NULL) { - return(srv_init_abort(DB_ERROR)); + return srv_init_abort(err); } ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO); @@ -1396,8 +1402,7 @@ file_checked: the first rollback segment before the double write buffer. All the remaining rollback segments will be created later, after the double write buffer has been created. */ - trx_sys_create_sys_pages(); - err = trx_lists_init_at_db_start(); + err = trx_sys_create_sys_pages(&mtr); if (err != DB_SUCCESS) { return(srv_init_abort(err)); @@ -1509,6 +1514,10 @@ file_checked: buf_block_t* block = buf_page_get( page_id_t(0, 0), 0, RW_SX_LATCH, &mtr); + /* The first page of the system tablespace + should already have been successfully + accessed earlier during startup. */ + ut_a(block); ulint size = mach_read_from_4( FSP_HEADER_OFFSET + FSP_SIZE + block->page.frame); @@ -1712,6 +1721,11 @@ file_checked: page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO), 0, RW_X_LATCH, &mtr); + if (UNIV_UNLIKELY(!block)) { + corrupted_old_page: + mtr.commit(); + return srv_init_abort(DB_CORRUPTION); + } fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr); /* Already MySQL 3.23.53 initialized FSP_IBUF_TREE_ROOT_PAGE_NO to @@ -1719,16 +1733,25 @@ file_checked: block = buf_page_get( page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), 0, RW_X_LATCH, &mtr); + if (UNIV_UNLIKELY(!block)) { + goto corrupted_old_page; + } fil_block_check_type(*block, FIL_PAGE_TYPE_TRX_SYS, &mtr); block = buf_page_get( page_id_t(TRX_SYS_SPACE, FSP_FIRST_RSEG_PAGE_NO), 0, RW_X_LATCH, &mtr); + if (UNIV_UNLIKELY(!block)) { + goto corrupted_old_page; + } fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr); block = buf_page_get( page_id_t(TRX_SYS_SPACE, FSP_DICT_HDR_PAGE_NO), 0, RW_X_LATCH, &mtr); + if (UNIV_UNLIKELY(!block)) { + goto corrupted_old_page; + } fil_block_check_type(*block, FIL_PAGE_TYPE_SYS, &mtr); mtr.commit(); } @@ -1828,8 +1851,6 @@ skip_monitors: return(srv_init_abort(err)); } - trx_temp_rseg_create(); - if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { srv_start_periodic_timer(srv_master_timer, srv_master_callback, 1000); } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index f5d8c66353a..84d7d785490 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -26,7 +26,6 @@ Created 3/26/1996 Heikki Tuuri #include "trx0purge.h" #include "fsp0fsp.h" -#include "fut0fut.h" #include "mach0data.h" #include "mtr0log.h" #include "que0que.h" @@ -230,8 +229,12 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) ut_ad(undo == trx->rsegs.m_redo.undo); trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; ut_ad(undo->rseg == rseg); - buf_block_t* rseg_header = trx_rsegf_get( - rseg->space, rseg->page_no, mtr); + buf_block_t* rseg_header = rseg->get(mtr, nullptr); + /* We are in transaction commit; we cannot return an error. If the + database is corrupted, it is better to crash it than to + intentionally violate ACID by committing something that is known to + be corrupted. */ + ut_ad(rseg_header); buf_block_t* undo_page = trx_undo_set_state_at_finish( undo, mtr); trx_ulogf_t* undo_header = undo_page->page.frame @@ -311,9 +314,15 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) } /* Add the log as the first in the history list */ - flst_add_first(rseg_header, TRX_RSEG + TRX_RSEG_HISTORY, undo_page, - static_cast(undo->hdr_offset - + TRX_UNDO_HISTORY_NODE), mtr); + + /* We are in transaction commit; we cannot return an error + when detecting corruption. It is better to crash the server + than to intentionally violate ACID by committing something + that is known to be corrupted. */ + ut_a(flst_add_first(rseg_header, TRX_RSEG + TRX_RSEG_HISTORY, undo_page, + static_cast(undo->hdr_offset + + TRX_UNDO_HISTORY_NODE), + mtr) == DB_SUCCESS); mtr->write<8,mtr_t::MAYBE_NOP>(*undo_page, undo_header + TRX_UNDO_TRX_NO, @@ -341,116 +350,126 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) undo = NULL; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Remove undo log header from the history list. @param[in,out] rseg rollback segment header page @param[in] log undo log segment header page @param[in] offset byte offset in the undo log segment header page @param[in,out] mtr mini-transaction */ -static void trx_purge_remove_log_hdr(buf_block_t *rseg, buf_block_t* log, - uint16_t offset, mtr_t *mtr) +static dberr_t trx_purge_remove_log_hdr(buf_block_t *rseg, buf_block_t* log, + uint16_t offset, mtr_t *mtr) { - flst_remove(rseg, TRX_RSEG + TRX_RSEG_HISTORY, - log, static_cast(offset + TRX_UNDO_HISTORY_NODE), mtr); + return flst_remove(rseg, TRX_RSEG + TRX_RSEG_HISTORY, log, + uint16_t(offset + TRX_UNDO_HISTORY_NODE), mtr); } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Free an undo log segment, and remove the header from the history list. @param[in,out] rseg rollback segment -@param[in] hdr_addr file address of log_hdr */ -static void trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) +@param[in] hdr_addr file address of log_hdr +@return error code */ +static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) { - mtr_t mtr; - - mtr.start(); - const page_id_t hdr_page_id(rseg->space->id, hdr_addr.page); - - /* We only need the latch to maintain rseg->curr_size. To follow the - latching order, we must acquire it before acquiring any related - page latch. */ - rseg->latch.wr_lock(SRW_LOCK_CALL); - - buf_block_t* rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); - buf_block_t* block = trx_undo_page_get(hdr_page_id, &mtr); - - /* Mark the last undo log totally purged, so that if the - system crashes, the tail of the undo log will not get accessed - again. The list of pages in the undo log tail gets - inconsistent during the freeing of the segment, and therefore - purge should not try to access them again. */ - mtr.write<2,mtr_t::MAYBE_NOP>(*block, block->page.frame - + hdr_addr.boffset - + TRX_UNDO_NEEDS_PURGE, 0U); - - while (!fseg_free_step_not_header( - TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + block->page.frame, &mtr)) { - rseg->latch.wr_unlock(); - mtr.commit(); - mtr.start(); - rseg->latch.wr_lock(SRW_LOCK_CALL); - - rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); - - block = trx_undo_page_get(hdr_page_id, &mtr); - } - - /* The page list may now be inconsistent, but the length field - stored in the list base node tells us how big it was before we - started the freeing. */ - - const uint32_t seg_size = flst_get_len( - TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->page.frame); - - /* We may free the undo log segment header page; it must be freed - within the same mtr as the undo log header is removed from the - history list: otherwise, in case of a database crash, the segment - could become inaccessible garbage in the file space. */ - - trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr); - - do { - - /* Here we assume that a file segment with just the header - page can be freed in a few steps, so that the buffer pool - is not flooded with bufferfixed pages: see the note in - fsp0fsp.cc. */ - - } while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + block->page.frame, &mtr)); - - byte* hist = TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->page.frame; - ut_ad(mach_read_from_4(hist) >= seg_size); + const page_id_t hdr_page_id{rseg->space->id, hdr_addr.page}; + mtr_t mtr; + mtr.start(); + + /* We only need the latch to maintain rseg->curr_size. To follow the + latching order, we must acquire it before acquiring any related + page latch. */ + rseg->latch.wr_lock(SRW_LOCK_CALL); + + dberr_t err; + buf_block_t *rseg_hdr= rseg->get(&mtr, &err); + if (!rseg_hdr) + goto func_exit; + if (buf_block_t *block= buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, + &mtr, &err)) + { + /* Mark the last undo log totally purged, so that if the system + crashes, the tail of the undo log will not get accessed again. The + list of pages in the undo log tail gets inconsistent during the + freeing of the segment, and therefore purge should not try to + access them again. */ + mtr.write<2,mtr_t::MAYBE_NOP>(*block, block->page.frame + + hdr_addr.boffset + TRX_UNDO_NEEDS_PURGE, 0U); + while (!fseg_free_step_not_header(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + block->page.frame, &mtr)) + { + rseg->latch.wr_unlock(); + rseg_hdr->fix(); + block->fix(); + mtr.commit(); + mtr.start(); + mtr.memo_push(rseg_hdr, MTR_MEMO_PAGE_X_FIX); + mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); + rseg->latch.wr_lock(SRW_LOCK_CALL); + rseg_hdr->page.lock.x_lock(); + block->page.lock.x_lock(); + } - mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size); + /* The page list may now be inconsistent, but the length field + stored in the list base node tells us how big it was before we + started the freeing. */ + const uint32_t seg_size= + flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->page.frame); + + /* We may free the undo log segment header page; it must be freed + within the same mtr as the undo log header is removed from the + history list: otherwise, in case of a database crash, the segment + could become inaccessible garbage in the file space. */ + err= trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) + goto func_exit; + byte *hist= TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->page.frame; + if (UNIV_UNLIKELY(mach_read_from_4(hist) < seg_size)) + { + err= DB_CORRUPTION; + goto func_exit; + } + mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size); - ut_ad(rseg->curr_size >= seg_size); + /* Here we assume that a file segment with just the header page + can be freed in a few steps, so that the buffer pool is not + flooded with bufferfixed pages: see the note in fsp0fsp.cc. */ + while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + block->page.frame, &mtr)); - rseg->history_size--; - rseg->curr_size -= seg_size; + ut_ad(rseg->curr_size >= seg_size); - rseg->latch.wr_unlock(); + rseg->history_size--; + rseg->curr_size -= seg_size; + } - mtr.commit(); +func_exit: + rseg->latch.wr_unlock(); + mtr.commit(); + return err; } /** Remove unnecessary history data from a rollback segment. @param[in,out] rseg rollback segment -@param[in] limit truncate anything before this */ +@param[in] limit truncate anything before this +@return error code */ static -void +dberr_t trx_purge_truncate_rseg_history( trx_rseg_t& rseg, const purge_sys_t::iterator& limit) { fil_addr_t hdr_addr; - fil_addr_t prev_hdr_addr; mtr_t mtr; - trx_id_t undo_trx_no; mtr.start(); ut_ad(rseg.is_persistent()); rseg.latch.wr_lock(SRW_LOCK_CALL); - buf_block_t* rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr); + dberr_t err; + buf_block_t* rseg_hdr = rseg.get(&mtr, &err); + if (!rseg_hdr) { + goto func_exit; + } hdr_addr = flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY + rseg_hdr->page.frame); @@ -462,18 +481,24 @@ loop: func_exit: rseg.latch.wr_unlock(); mtr.commit(); - return; + return err; + } + + buf_block_t* block = buf_page_get_gen(page_id_t(rseg.space->id, + hdr_addr.page), + 0, RW_X_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, + &mtr, &err); + if (!block) { + goto func_exit; } - buf_block_t* block = trx_undo_page_get(page_id_t(rseg.space->id, - hdr_addr.page), - &mtr); - undo_trx_no = mach_read_from_8(block->page.frame + hdr_addr.boffset - + TRX_UNDO_TRX_NO); + const trx_id_t undo_trx_no = mach_read_from_8( + block->page.frame + hdr_addr.boffset + TRX_UNDO_TRX_NO); if (undo_trx_no >= limit.trx_no) { if (undo_trx_no == limit.trx_no) { - trx_undo_truncate_start( + err = trx_undo_truncate_start( &rseg, hdr_addr.page, hdr_addr.boffset, limit.undo_no); } @@ -481,8 +506,8 @@ func_exit: goto func_exit; } - prev_hdr_addr = flst_get_prev_addr(block->page.frame + hdr_addr.boffset - + TRX_UNDO_HISTORY_NODE); + fil_addr_t prev_hdr_addr = flst_get_prev_addr( + block->page.frame + hdr_addr.boffset + TRX_UNDO_HISTORY_NODE); prev_hdr_addr.boffset = static_cast(prev_hdr_addr.boffset - TRX_UNDO_HISTORY_NODE); @@ -499,11 +524,18 @@ func_exit: /* calls the trx_purge_remove_log_hdr() inside trx_purge_free_segment(). */ - trx_purge_free_segment(&rseg, hdr_addr); + err = trx_purge_free_segment(&rseg, hdr_addr); + if (err != DB_SUCCESS) { + return err; + } } else { /* Remove the log hdr from the rseg history. */ - trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, - &mtr); + err = trx_purge_remove_log_hdr(rseg_hdr, block, + hdr_addr.boffset, &mtr); + if (err != DB_SUCCESS) { + goto func_exit; + } + rseg.history_size--; rseg.latch.wr_unlock(); mtr.commit(); @@ -512,10 +544,13 @@ func_exit: mtr.start(); rseg.latch.wr_lock(SRW_LOCK_CALL); - rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr); - hdr_addr = prev_hdr_addr; + rseg_hdr = rseg.get(&mtr, &err); + if (!rseg_hdr) { + goto func_exit; + } + goto loop; } @@ -580,11 +615,13 @@ TRANSACTIONAL_TARGET static void trx_purge_truncate_history() head.undo_no= 0; } + dberr_t err= DB_SUCCESS; for (auto &rseg : trx_sys.rseg_array) if (rseg.space) - trx_purge_truncate_rseg_history(rseg, head); + if (dberr_t e= trx_purge_truncate_rseg_history(rseg, head)) + err= e; - if (srv_undo_tablespaces_active < 2) + if (err != DB_SUCCESS || srv_undo_tablespaces_active < 2) return; while (srv_undo_log_truncate) @@ -778,29 +815,28 @@ not_free: tablespace object to clear all freed ranges */ mtr.set_named_space(&space); mtr.trim_pages(page_id_t(space.id, size)); - fsp_header_init(&space, size, &mtr); + ut_a(fsp_header_init(&space, size, &mtr) == DB_SUCCESS); mysql_mutex_lock(&fil_system.mutex); space.size= file->size= size; mysql_mutex_unlock(&fil_system.mutex); - buf_block_t *sys_header= trx_sysf_get(&mtr); - for (auto &rseg : trx_sys.rseg_array) { if (rseg.space != &space) continue; + dberr_t err; buf_block_t *rblock= trx_rseg_header_create(&space, &rseg - trx_sys.rseg_array, trx_sys.get_max_trx_id(), - sys_header, &mtr); - ut_ad(rblock); + &mtr, &err); + ut_a(rblock); /* These were written by trx_rseg_header_create(). */ - ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rblock->page.frame)); - ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE - + rblock->page.frame)); - rseg.reinit(rblock ? rblock->page.id().page_no() : FIL_NULL); + ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + + rblock->page.frame)); + ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE + + rblock->page.frame)); + rseg.reinit(rblock->page.id().page_no()); } mtr.commit_shrink(space); @@ -842,87 +878,88 @@ static void trx_purge_rseg_get_next_history_log( ulint* n_pages_handled)/*!< in/out: number of UNDO pages handled */ { - fil_addr_t prev_log_addr; - trx_id_t trx_no; - mtr_t mtr; - - mtr.start(); + fil_addr_t prev_log_addr; + mtr_t mtr; - purge_sys.rseg->latch.wr_lock(SRW_LOCK_CALL); + mtr.start(); - ut_a(purge_sys.rseg->last_page_no != FIL_NULL); + purge_sys.rseg->latch.wr_lock(SRW_LOCK_CALL); - purge_sys.tail.trx_no = purge_sys.rseg->last_trx_no() + 1; - purge_sys.tail.undo_no = 0; - purge_sys.next_stored = false; + ut_a(purge_sys.rseg->last_page_no != FIL_NULL); - const buf_block_t* undo_page = trx_undo_page_get_s_latched( - page_id_t(purge_sys.rseg->space->id, - purge_sys.rseg->last_page_no), &mtr); + purge_sys.tail.trx_no= purge_sys.rseg->last_trx_no() + 1; + purge_sys.tail.undo_no= 0; + purge_sys.next_stored= false; - const trx_ulogf_t* log_hdr = undo_page->page.frame - + purge_sys.rseg->last_offset(); - - /* Increase the purge page count by one for every handled log */ - - (*n_pages_handled)++; - - prev_log_addr = flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE); - prev_log_addr.boffset = static_cast(prev_log_addr.boffset - - TRX_UNDO_HISTORY_NODE); - - - const bool empty = prev_log_addr.page == FIL_NULL; - - if (empty) { - /* No logs left in the history list */ - purge_sys.rseg->last_page_no = FIL_NULL; - } + if (const buf_block_t* undo_page= + buf_page_get_gen(page_id_t(purge_sys.rseg->space->id, + purge_sys.rseg->last_page_no), + 0, RW_S_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, &mtr)) + { + const trx_ulogf_t *log_hdr= + undo_page->page.frame + purge_sys.rseg->last_offset(); + /* Increase the purge page count by one for every handled log */ + ++*n_pages_handled; + prev_log_addr= flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE); + prev_log_addr.boffset = static_cast(prev_log_addr.boffset - + TRX_UNDO_HISTORY_NODE); + } + else + prev_log_addr.page= FIL_NULL; - purge_sys.rseg->latch.wr_unlock(); - mtr.commit(); + const bool empty= prev_log_addr.page == FIL_NULL; - if (empty) { - return; - } + if (empty) + /* No logs left in the history list */ + purge_sys.rseg->last_page_no= FIL_NULL; - /* Read the previous log header. */ - mtr.start(); + purge_sys.rseg->latch.wr_unlock(); + mtr.commit(); - log_hdr = trx_undo_page_get_s_latched( - page_id_t(purge_sys.rseg->space->id, prev_log_addr.page), - &mtr)->page.frame - + prev_log_addr.boffset; + if (empty) + return; - trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); - ut_ad(mach_read_from_2(log_hdr + TRX_UNDO_NEEDS_PURGE) <= 1); - const byte needs_purge = log_hdr[TRX_UNDO_NEEDS_PURGE + 1]; + /* Read the previous log header. */ + mtr.start(); - mtr.commit(); + byte needs_purge= 0; + trx_id_t trx_no= 0; - purge_sys.rseg->latch.wr_lock(SRW_LOCK_CALL); + if (const buf_block_t* undo_page= + buf_page_get_gen(page_id_t(purge_sys.rseg->space->id, prev_log_addr.page), + 0, RW_S_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, &mtr)) + { + const byte *log_hdr= undo_page->page.frame + prev_log_addr.boffset; - purge_sys.rseg->last_page_no = prev_log_addr.page; - purge_sys.rseg->set_last_commit(prev_log_addr.boffset, trx_no); + trx_no= mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); + ut_ad(mach_read_from_2(log_hdr + TRX_UNDO_NEEDS_PURGE) <= 1); + needs_purge= log_hdr[TRX_UNDO_NEEDS_PURGE + 1]; + } - if (needs_purge) { - purge_sys.rseg->set_needs_purge(); - } else { - purge_sys.rseg->clear_needs_purge(); - } + mtr.commit(); - /* Purge can also produce events, however these are already ordered - in the rollback segment and any user generated event will be greater - than the events that Purge produces. ie. Purge can never produce - events from an empty rollback segment. */ + if (UNIV_UNLIKELY(!trx_no)) + return; - mysql_mutex_lock(&purge_sys.pq_mutex); + purge_sys.rseg->latch.wr_lock(SRW_LOCK_CALL); + purge_sys.rseg->last_page_no= prev_log_addr.page; + purge_sys.rseg->set_last_commit(prev_log_addr.boffset, trx_no); - purge_sys.purge_queue.push(*purge_sys.rseg); + if (needs_purge) + purge_sys.rseg->set_needs_purge(); + else + purge_sys.rseg->clear_needs_purge(); - mysql_mutex_unlock(&purge_sys.pq_mutex); + /* Purge can also produce events, however these are already ordered + in the rollback segment and any user generated event will be greater + than the events that Purge produces. ie. Purge can never produce + events from an empty rollback segment. */ - purge_sys.rseg->latch.wr_unlock(); + mysql_mutex_lock(&purge_sys.pq_mutex); + purge_sys.purge_queue.push(*purge_sys.rseg); + mysql_mutex_unlock(&purge_sys.pq_mutex); + purge_sys.rseg->latch.wr_unlock(); } /** Position the purge sys "iterator" on the undo record to use for purging. */ @@ -938,11 +975,11 @@ static void trx_purge_read_undo_rec() if (purge_sys.rseg->needs_purge()) { mtr_t mtr; mtr.start(); - buf_block_t* undo_page; + const buf_block_t* undo_page; if (trx_undo_rec_t* undo_rec = trx_undo_get_first_rec( *purge_sys.rseg->space, purge_sys.hdr_page_no, purge_sys.hdr_offset, RW_S_LATCH, - undo_page, &mtr)) { + undo_page, &mtr, nullptr)) { offset = page_offset(undo_rec); undo_no = trx_undo_rec_get_undo_no(undo_rec); @@ -998,8 +1035,7 @@ trx_purge_get_next_rec( ut_ad(purge_sys.next_stored); ut_ad(purge_sys.tail.trx_no < purge_sys.low_limit_no()); - const ulint space = purge_sys.rseg->space->id; - const uint32_t page_no = purge_sys.page_no; + const page_id_t page_id{purge_sys.rseg->space->id, purge_sys.page_no}; const uint16_t offset = purge_sys.offset; if (offset == 0) { @@ -1017,9 +1053,16 @@ trx_purge_get_next_rec( mtr_start(&mtr); - buf_block_t* undo_page = trx_undo_page_get_s_latched( - page_id_t(space, page_no), &mtr); - buf_block_t* rec2_page = undo_page; + const buf_block_t* undo_page + = buf_page_get_gen(page_id, 0, RW_S_LATCH, nullptr, + BUF_GET_POSSIBLY_FREED, &mtr); + if (UNIV_UNLIKELY(!undo_page)) { +corrupted: + mtr.commit(); + return &trx_purge_dummy_rec; + } + + const buf_block_t* rec2_page = undo_page; const trx_undo_rec_t* rec2 = trx_undo_page_get_next_rec( undo_page, offset, purge_sys.hdr_page_no, purge_sys.hdr_offset); @@ -1041,8 +1084,12 @@ trx_purge_get_next_rec( mtr_start(&mtr); - undo_page = trx_undo_page_get_s_latched( - page_id_t(space, page_no), &mtr); + undo_page = buf_page_get_gen(page_id, 0, RW_S_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, + &mtr); + if (UNIV_UNLIKELY(!undo_page)) { + goto corrupted; + } } else { purge_sys.offset = page_offset(rec2); purge_sys.page_no = rec2_page->page.id().page_no(); diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index 324a87414a3..59c84e9119d 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -479,10 +479,10 @@ done: /**********************************************************************//** Reads from an undo log record the general parameters. @return remaining part of undo log record after reading these values */ -byte* +const byte* trx_undo_rec_get_pars( /*==================*/ - trx_undo_rec_t* undo_rec, /*!< in: undo log record */ + const trx_undo_rec_t* undo_rec, /*!< in: undo log record */ ulint* type, /*!< out: undo record type: TRX_UNDO_INSERT_REC, ... */ ulint* cmpl_info, /*!< out: compiler info, relevant only @@ -492,13 +492,10 @@ trx_undo_rec_get_pars( undo_no_t* undo_no, /*!< out: undo log record number */ table_id_t* table_id) /*!< out: table id */ { - const byte* ptr; ulint type_cmpl; - ptr = undo_rec + 2; - - type_cmpl = mach_read_from_1(ptr); - ptr++; + type_cmpl = undo_rec[2]; + const byte *ptr = undo_rec + 3; *updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN); type_cmpl &= ~TRX_UNDO_UPD_EXTERN; @@ -511,22 +508,18 @@ trx_undo_rec_get_pars( *table_id = mach_read_next_much_compressed(&ptr); ut_ad(*table_id); - return(const_cast(ptr)); + return ptr; } /** Read from an undo log record a non-virtual column value. -@param[in,out] ptr pointer to remaining part of the undo record -@param[in,out] field stored field -@param[in,out] len length of the field, or UNIV_SQL_NULL -@param[in,out] orig_len original length of the locally stored part +@param ptr pointer to remaining part of the undo record +@param field stored field +@param len length of the field, or UNIV_SQL_NULL +@param orig_len original length of the locally stored part of an externally stored column, or 0 @return remaining part of undo log record after reading these values */ -byte* -trx_undo_rec_get_col_val( - const byte* ptr, - const byte** field, - uint32_t* len, - uint32_t* orig_len) +const byte *trx_undo_rec_get_col_val(const byte *ptr, const byte **field, + uint32_t *len, uint32_t *orig_len) { *len = mach_read_next_compressed(&ptr); *orig_len = 0; @@ -564,16 +557,16 @@ trx_undo_rec_get_col_val( } } - return(const_cast(ptr)); + return ptr; } /*******************************************************************//** Builds a row reference from an undo log record. @return pointer to remaining part of undo record */ -byte* +const byte* trx_undo_rec_get_row_ref( /*=====================*/ - byte* ptr, /*!< in: remaining part of a copy of an undo log + const byte* ptr, /*!< in: remaining part of a copy of an undo log record, at the start of the row reference; NOTE that this copy of the undo log record must be preserved as long as the row reference is @@ -584,20 +577,16 @@ trx_undo_rec_get_row_ref( mem_heap_t* heap) /*!< in: memory heap from which the memory needed is allocated */ { - ulint ref_len; - ulint i; - - ut_ad(index && ptr && ref && heap); - ut_a(dict_index_is_clust(index)); + ut_ad(index->is_primary()); - ref_len = dict_index_get_n_unique(index); + const ulint ref_len = dict_index_get_n_unique(index); dtuple_t* tuple = dtuple_create(heap, ref_len); *ref = tuple; dict_index_copy_types(tuple, index, ref_len); - for (i = 0; i < ref_len; i++) { + for (ulint i = 0; i < ref_len; i++) { const byte* field; uint32_t len, orig_len; @@ -608,29 +597,21 @@ trx_undo_rec_get_row_ref( dfield_set_data(dfield, field, len); } - return(ptr); + return ptr; } -/*******************************************************************//** -Skips a row reference from an undo log record. +/** Skip a row reference from an undo log record. +@param ptr part of an update undo log record +@param index clustered index @return pointer to remaining part of undo record */ -static -byte* -trx_undo_rec_skip_row_ref( -/*======================*/ - byte* ptr, /*!< in: remaining part in update undo log - record, at the start of the row reference */ - dict_index_t* index) /*!< in: clustered index */ +static const byte *trx_undo_rec_skip_row_ref(const byte *ptr, + const dict_index_t *index) { - ulint ref_len; - ulint i; - - ut_ad(index && ptr); - ut_a(dict_index_is_clust(index)); + ut_ad(index->is_primary()); - ref_len = dict_index_get_n_unique(index); + ulint ref_len = dict_index_get_n_unique(index); - for (i = 0; i < ref_len; i++) { + for (ulint i = 0; i < ref_len; i++) { const byte* field; uint32_t len, orig_len; @@ -1938,9 +1919,8 @@ dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table) } else { mtr.commit(); mtr.start(); - block = trx_undo_add_page(undo, &mtr); + block = trx_undo_add_page(undo, &mtr, &err); if (!block) { - err = DB_OUT_OF_FILE_SPACE; break; } } @@ -2134,7 +2114,7 @@ err_exit: } rseg->latch.wr_lock(SRW_LOCK_CALL); - trx_undo_free_last_page(undo, &mtr); + err = trx_undo_free_last_page(undo, &mtr); rseg->latch.wr_unlock(); if (m.second) { @@ -2143,7 +2123,9 @@ err_exit: trx->mod_tables.erase(m.first); } - err = DB_UNDO_RECORD_TOO_BIG; + if (err == DB_SUCCESS) { + err = DB_UNDO_RECORD_TOO_BIG; + } goto err_exit; } else { /* Write log for clearing the unused @@ -2205,12 +2187,16 @@ err_exit: mtr.set_log_mode(MTR_LOG_NO_REDO); } - undo_block = trx_undo_add_page(undo, &mtr); + undo_block = trx_undo_add_page(undo, &mtr, &err); DBUG_EXECUTE_IF("ib_err_ins_undo_page_add_failure", undo_block = NULL;); } while (UNIV_LIKELY(undo_block != NULL)); + if (err != DB_OUT_OF_FILE_SPACE) { + goto err_exit; + } + ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, DB_OUT_OF_FILE_SPACE, //ER_INNODB_UNDO_LOG_FULL, @@ -2221,8 +2207,6 @@ err_exit: undo->rseg->space == fil_system.sys_space ? "system" : is_temp ? "temporary" : "undo"); - /* Did not succeed: out of space */ - err = DB_OUT_OF_FILE_SPACE; goto err_exit; } @@ -2237,31 +2221,29 @@ trx_undo_get_undo_rec_low( roll_ptr_t roll_ptr, mem_heap_t* heap) { - trx_undo_rec_t* undo_rec; - ulint rseg_id; - uint32_t page_no; - uint16_t offset; - bool is_insert; - mtr_t mtr; + ulint rseg_id; + uint32_t page_no; + uint16_t offset; + bool is_insert; + mtr_t mtr; - trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, - &offset); - ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO); - ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); - trx_rseg_t* rseg = &trx_sys.rseg_array[rseg_id]; - ut_ad(rseg->is_persistent()); + trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, &offset); + ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO); + ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); + trx_rseg_t *rseg= &trx_sys.rseg_array[rseg_id]; + ut_ad(rseg->is_persistent()); - mtr.start(); + mtr.start(); - buf_block_t *undo_page = trx_undo_page_get_s_latched( - page_id_t(rseg->space->id, page_no), &mtr); + const buf_block_t* undo_page= + buf_page_get(page_id_t(rseg->space->id, page_no), 0, RW_S_LATCH, &mtr); - undo_rec = trx_undo_rec_copy( - undo_page->page.frame + offset, heap); + trx_undo_rec_t *undo_rec= undo_page + ? trx_undo_rec_copy(undo_page->page.frame + offset, heap) + : nullptr; - mtr.commit(); - - return(undo_rec); + mtr.commit(); + return undo_rec; } /** Copy an undo record to heap. @@ -2290,11 +2272,12 @@ trx_undo_get_undo_rec( bool missing_history = purge_sys.changes_visible(trx_id, name); if (!missing_history) { *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); + missing_history = !*undo_rec; } purge_sys.latch.rd_unlock(); - return(missing_history); + return missing_history; } #ifdef UNIV_DEBUG @@ -2354,7 +2337,6 @@ trx_undo_prev_version_build( trx_id_t trx_id; roll_ptr_t roll_ptr; upd_t* update; - byte* ptr; byte info_bits; ulint cmpl_info; bool dummy_extern; @@ -2386,6 +2368,9 @@ trx_undo_prev_version_build( if (v_status & TRX_UNDO_PREV_IN_PURGE) { /* We are fetching the record being purged */ undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); + if (!undo_rec) { + return false; + } } else { /* The undo record may already have been purged, during purge or semi-consistent read. */ @@ -2393,8 +2378,9 @@ trx_undo_prev_version_build( } } - ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, - &dummy_extern, &undo_no, &table_id); + const byte *ptr = + trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, + &dummy_extern, &undo_no, &table_id); if (table_id != index->table->id) { /* The table should have been rebuilt, but purge has diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc index 449282634c3..34ec21d6013 100644 --- a/storage/innobase/trx/trx0rseg.cc +++ b/storage/innobase/trx/trx0rseg.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -32,12 +32,24 @@ Created 3/26/1996 Heikki Tuuri #include "srv0mon.h" #ifdef WITH_WSREP -#include +# include -#ifdef UNIV_DEBUG +/** The offset to WSREP XID headers, after TRX_RSEG */ +# define TRX_RSEG_WSREP_XID_INFO TRX_RSEG_MAX_TRX_ID + 16 + 512 + +/** WSREP XID format (1 if present and valid, 0 if not present) */ +# define TRX_RSEG_WSREP_XID_FORMAT TRX_RSEG_WSREP_XID_INFO +/** WSREP XID GTRID length */ +# define TRX_RSEG_WSREP_XID_GTRID_LEN TRX_RSEG_WSREP_XID_INFO + 4 +/** WSREP XID bqual length */ +# define TRX_RSEG_WSREP_XID_BQUAL_LEN TRX_RSEG_WSREP_XID_INFO + 8 +/** WSREP XID data (XIDDATASIZE bytes) */ +# define TRX_RSEG_WSREP_XID_DATA TRX_RSEG_WSREP_XID_INFO + 12 + +# ifdef UNIV_DEBUG /** The latest known WSREP XID sequence number */ static long long wsrep_seqno = -1; -#endif /* UNIV_DEBUG */ +# endif /* UNIV_DEBUG */ /** The latest known WSREP XID UUID */ static unsigned char wsrep_uuid[16]; @@ -114,52 +126,37 @@ trx_rseg_update_wsrep_checkpoint( trx_rseg_write_wsrep_checkpoint(rseg_header, xid, mtr); } -/** Clear the WSREP XID information from rollback segment header. -@param[in,out] block rollback segment header -@param[in,out] mtr mini-transaction */ -static void trx_rseg_clear_wsrep_checkpoint(buf_block_t *block, mtr_t *mtr) +static dberr_t trx_rseg_update_wsrep_checkpoint(const XID* xid, mtr_t* mtr) { - mtr->memset(block, TRX_RSEG + TRX_RSEG_WSREP_XID_INFO, - TRX_RSEG_WSREP_XID_DATA + XIDDATASIZE - TRX_RSEG_WSREP_XID_INFO, - 0); -} - -static void -trx_rseg_update_wsrep_checkpoint(const XID* xid, mtr_t* mtr) -{ - const byte* xid_uuid = wsrep_xid_uuid(xid); - /* We must make check against wsrep_uuid here, the - trx_rseg_update_wsrep_checkpoint() writes over wsrep_uuid with - xid contents in debug mode and the memcmp() will never give nonzero - result. */ - const bool must_clear_rsegs = memcmp(wsrep_uuid, xid_uuid, - sizeof wsrep_uuid); - const trx_rseg_t* rseg = &trx_sys.rseg_array[0]; - - buf_block_t* rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, - mtr); - if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rseg_header->page.frame))) { - trx_rseg_format_upgrade(rseg_header, mtr); - } - - trx_rseg_update_wsrep_checkpoint(rseg_header, xid, mtr); - - if (must_clear_rsegs) { - /* Because the UUID part of the WSREP XID differed - from current_xid_uuid, the WSREP group UUID was - changed, and we must reset the XID in all rollback - segment headers. */ - for (ulint rseg_id = 1; rseg_id < TRX_SYS_N_RSEGS; ++rseg_id) { - const trx_rseg_t &rseg = trx_sys.rseg_array[rseg_id]; - if (rseg.space) { - trx_rseg_clear_wsrep_checkpoint( - trx_rsegf_get(rseg.space, rseg.page_no, - mtr), - mtr); - } - } - } + dberr_t err; + buf_block_t *rseg_header = trx_sys.rseg_array[0].get(mtr, &err); + + if (UNIV_UNLIKELY(!rseg_header)) + return err; + + /* We must make check against wsrep_uuid here, the + trx_rseg_update_wsrep_checkpoint() writes over wsrep_uuid with xid + contents in debug mode and the memcmp() will never give nonzero + result. */ + const bool must_clear_rsegs= + memcmp(wsrep_uuid, wsrep_xid_uuid(xid), sizeof wsrep_uuid); + + if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + + rseg_header->page.frame))) + trx_rseg_format_upgrade(rseg_header, mtr); + + trx_rseg_update_wsrep_checkpoint(rseg_header, xid, mtr); + + if (must_clear_rsegs) + /* Because the UUID part of the WSREP XID differed from + current_xid_uuid, the WSREP group UUID was changed, and we must + reset the XID in all rollback segment headers. */ + for (ulint rseg_id= 1; rseg_id < TRX_SYS_N_RSEGS; ++rseg_id) + if (buf_block_t* block= trx_sys.rseg_array[rseg_id].get(mtr, &err)) + mtr->memset(block, TRX_RSEG + TRX_RSEG_WSREP_XID_INFO, + TRX_RSEG_WSREP_XID_DATA + XIDDATASIZE - + TRX_RSEG_WSREP_XID_INFO, 0); + return err; } /** Update WSREP checkpoint XID in first rollback segment header @@ -250,6 +247,9 @@ bool trx_rseg_read_wsrep_checkpoint(XID& xid) rseg_id++, mtr.commit()) { mtr.start(); const buf_block_t* sys = trx_sysf_get(&mtr, false); + if (UNIV_UNLIKELY(!sys)) { + continue; + } const uint32_t page_no = trx_sysf_rseg_get_page_no( sys, rseg_id); @@ -257,8 +257,14 @@ bool trx_rseg_read_wsrep_checkpoint(XID& xid) continue; } - const buf_block_t* rseg_header = trx_rsegf_get_new( - trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr); + const buf_block_t* rseg_header = buf_page_get_gen( + page_id_t(trx_sysf_rseg_get_space(sys, rseg_id), + page_no), + 0, RW_S_LATCH, nullptr, BUF_GET, &mtr); + + if (!rseg_header) { + continue; + } if (mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rseg_header->page.frame)) { @@ -282,6 +288,12 @@ bool trx_rseg_read_wsrep_checkpoint(XID& xid) } #endif /* WITH_WSREP */ +buf_block_t *trx_rseg_t::get(mtr_t *mtr, dberr_t *err) const +{ + return buf_page_get_gen(page_id(), 0, RW_X_LATCH, nullptr, + BUF_GET, mtr, err); +} + /** Upgrade a rollback segment header page to MariaDB 10.3 format. @param[in,out] rseg_header rollback segment header page @param[in,out] mtr mini-transaction */ @@ -297,69 +309,40 @@ void trx_rseg_format_upgrade(buf_block_t *rseg_header, mtr_t *mtr) } /** Create a rollback segment header. -@param[in,out] space system, undo, or temporary tablespace -@param[in] rseg_id rollback segment identifier -@param[in] max_trx_id new value of TRX_RSEG_MAX_TRX_ID -@param[in,out] sys_header the TRX_SYS page (NULL for temporary rseg) -@param[in,out] mtr mini-transaction +@param[in,out] space system, undo, or temporary tablespace +@param[in] rseg_id rollback segment identifier +@param[in] max_trx_id new value of TRX_RSEG_MAX_TRX_ID +@param[in,out] mtr mini-transaction +@param[out] err error code @return the created rollback segment -@retval NULL on failure */ -buf_block_t* -trx_rseg_header_create( - fil_space_t* space, - ulint rseg_id, - trx_id_t max_trx_id, - buf_block_t* sys_header, - mtr_t* mtr) +@retval nullptr on failure */ +buf_block_t *trx_rseg_header_create(fil_space_t *space, ulint rseg_id, + trx_id_t max_trx_id, mtr_t *mtr, + dberr_t *err) { - buf_block_t* block; - - ut_ad(mtr->memo_contains(*space)); - ut_ad(!sys_header == (space == fil_system.temp_space)); - - /* Allocate a new file segment for the rollback segment */ - block = fseg_create(space, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr); - - if (block == NULL) { - /* No space left */ - return block; - } - - ut_ad(0 == mach_read_from_4(TRX_RSEG_FORMAT + TRX_RSEG - + block->page.frame)); - ut_ad(0 == mach_read_from_4(TRX_RSEG_HISTORY_SIZE + TRX_RSEG - + block->page.frame)); - ut_ad(0 == mach_read_from_4(TRX_RSEG_MAX_TRX_ID + TRX_RSEG - + block->page.frame)); - - /* Initialize the history list */ - flst_init(block, TRX_RSEG_HISTORY + TRX_RSEG, mtr); - - mtr->write<8,mtr_t::MAYBE_NOP>(*block, - TRX_RSEG + TRX_RSEG_MAX_TRX_ID - + block->page.frame, max_trx_id); - - /* Reset the undo log slots */ - mtr->memset(block, TRX_RSEG_UNDO_SLOTS + TRX_RSEG, - TRX_RSEG_N_SLOTS * 4, 0xff); - - if (sys_header) { - /* Add the rollback segment info to the free slot in - the trx system header */ - - mtr->write<4,mtr_t::MAYBE_NOP>( - *sys_header, - TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SPACE - + rseg_id * TRX_SYS_RSEG_SLOT_SIZE - + sys_header->page.frame, space->id); - mtr->write<4,mtr_t::MAYBE_NOP>( - *sys_header, - TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO - + rseg_id * TRX_SYS_RSEG_SLOT_SIZE - + sys_header->page.frame, block->page.id().page_no()); - } - - return block; + ut_ad(mtr->memo_contains(*space)); + buf_block_t *block= + fseg_create(space, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr, err); + if (block) + { + ut_ad(0 == mach_read_from_4(TRX_RSEG_FORMAT + TRX_RSEG + + block->page.frame)); + ut_ad(0 == mach_read_from_4(TRX_RSEG_HISTORY_SIZE + TRX_RSEG + + block->page.frame)); + ut_ad(0 == mach_read_from_4(TRX_RSEG_MAX_TRX_ID + TRX_RSEG + + block->page.frame)); + + /* Initialize the history list */ + flst_init(block, TRX_RSEG_HISTORY + TRX_RSEG, mtr); + + mtr->write<8,mtr_t::MAYBE_NOP>(*block, TRX_RSEG + TRX_RSEG_MAX_TRX_ID + + block->page.frame, max_trx_id); + + /* Reset the undo log slots */ + mtr->memset(block, TRX_RSEG_UNDO_SLOTS + TRX_RSEG, TRX_RSEG_N_SLOTS * 4, + 0xff); + } + return block; } void trx_rseg_t::destroy() @@ -452,107 +435,97 @@ static dberr_t trx_undo_lists_init(trx_rseg_t *rseg, trx_id_t &max_trx_id, static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id, mtr_t *mtr) { - buf_block_t* rseg_hdr = trx_rsegf_get_new( - rseg->space->id, rseg->page_no, mtr); - - if (!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rseg_hdr->page.frame)) { - trx_id_t id = mach_read_from_8(TRX_RSEG + TRX_RSEG_MAX_TRX_ID - + rseg_hdr->page.frame); + dberr_t err; + const buf_block_t *rseg_hdr= + buf_page_get_gen(rseg->page_id(), 0, RW_S_LATCH, nullptr, BUF_GET, mtr, + &err); + if (!rseg_hdr) + return err; + + if (!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rseg_hdr->page.frame)) + { + trx_id_t id= mach_read_from_8(TRX_RSEG + TRX_RSEG_MAX_TRX_ID + + rseg_hdr->page.frame); - if (id > max_trx_id) { - max_trx_id = id; - } + if (id > max_trx_id) + max_trx_id= id; - const byte* binlog_name = TRX_RSEG + TRX_RSEG_BINLOG_NAME - + rseg_hdr->page.frame; - if (*binlog_name) { - lsn_t lsn = mach_read_from_8(my_assume_aligned<8>( - FIL_PAGE_LSN - + rseg_hdr - ->page.frame)); - compile_time_assert(TRX_RSEG_BINLOG_NAME_LEN == sizeof - trx_sys.recovered_binlog_filename); - if (lsn > trx_sys.recovered_binlog_lsn) { - trx_sys.recovered_binlog_lsn = lsn; - trx_sys.recovered_binlog_offset - = mach_read_from_8( - TRX_RSEG - + TRX_RSEG_BINLOG_OFFSET - + rseg_hdr->page.frame); - memcpy(trx_sys.recovered_binlog_filename, - binlog_name, - TRX_RSEG_BINLOG_NAME_LEN); - } + const byte *binlog_name= + TRX_RSEG + TRX_RSEG_BINLOG_NAME + rseg_hdr->page.frame; + if (*binlog_name) + { + lsn_t lsn= mach_read_from_8(my_assume_aligned<8> + (FIL_PAGE_LSN + rseg_hdr->page.frame)); + static_assert(TRX_RSEG_BINLOG_NAME_LEN == + sizeof trx_sys.recovered_binlog_filename, "compatibility"); + if (lsn > trx_sys.recovered_binlog_lsn) + { + trx_sys.recovered_binlog_lsn= lsn; + trx_sys.recovered_binlog_offset= + mach_read_from_8(TRX_RSEG + TRX_RSEG_BINLOG_OFFSET + + rseg_hdr->page.frame); + memcpy(trx_sys.recovered_binlog_filename, binlog_name, + TRX_RSEG_BINLOG_NAME_LEN); + } #ifdef WITH_WSREP - trx_rseg_read_wsrep_checkpoint( - rseg_hdr, trx_sys.recovered_wsrep_xid); + trx_rseg_read_wsrep_checkpoint(rseg_hdr, trx_sys.recovered_wsrep_xid); #endif - } - } - - if (srv_operation == SRV_OPERATION_RESTORE) { - /* mariabackup --prepare only deals with - the redo log and the data files, not with - transactions or the data dictionary. */ - return DB_SUCCESS; - } - - /* Initialize the undo log lists according to the rseg header */ - - rseg->curr_size = mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE - + rseg_hdr->page.frame) - + 1; - if (dberr_t err = trx_undo_lists_init(rseg, max_trx_id, rseg_hdr)) { - return err; - } - - if (auto len = flst_get_len(TRX_RSEG + TRX_RSEG_HISTORY - + rseg_hdr->page.frame)) { - rseg->history_size += len; - - fil_addr_t node_addr = flst_get_last(TRX_RSEG - + TRX_RSEG_HISTORY - + rseg_hdr->page.frame); - node_addr.boffset = static_cast( - node_addr.boffset - TRX_UNDO_HISTORY_NODE); - - rseg->last_page_no = node_addr.page; - - const buf_block_t* block = trx_undo_page_get( - page_id_t(rseg->space->id, node_addr.page), mtr); - - trx_id_t id = mach_read_from_8(block->page.frame - + node_addr.boffset - + TRX_UNDO_TRX_ID); - if (id > max_trx_id) { - max_trx_id = id; - } - id = mach_read_from_8(block->page.frame + node_addr.boffset - + TRX_UNDO_TRX_NO); - if (id > max_trx_id) { - max_trx_id = id; - } + } + } - rseg->set_last_commit(node_addr.boffset, id); - unsigned purge = mach_read_from_2(block->page.frame - + node_addr.boffset - + TRX_UNDO_NEEDS_PURGE); - ut_ad(purge <= 1); - if (purge != 0) { - rseg->set_needs_purge(); - } + if (srv_operation == SRV_OPERATION_RESTORE) + /* mariabackup --prepare only deals with + the redo log and the data files, not with + transactions or the data dictionary. */ + return DB_SUCCESS; - if (rseg->last_page_no != FIL_NULL) { + /* Initialize the undo log lists according to the rseg header */ - /* There is no need to cover this operation by the purge - mutex because we are still bootstrapping. */ - purge_sys.purge_queue.push(*rseg); - } - } + rseg->curr_size = mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE + + rseg_hdr->page.frame) + 1; + err= trx_undo_lists_init(rseg, max_trx_id, rseg_hdr); + if (err != DB_SUCCESS); + else if (auto len= flst_get_len(TRX_RSEG + TRX_RSEG_HISTORY + + rseg_hdr->page.frame)) + { + rseg->history_size+= len; + + fil_addr_t node_addr= flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY + + rseg_hdr->page.frame); + node_addr.boffset= static_cast(node_addr.boffset - + TRX_UNDO_HISTORY_NODE); + rseg->last_page_no= node_addr.page; + + const buf_block_t* block= + buf_page_get_gen(page_id_t(rseg->space->id, node_addr.page), + 0, RW_S_LATCH, nullptr, BUF_GET, mtr, &err); + if (!block) + return err; + + trx_id_t id= mach_read_from_8(block->page.frame + node_addr.boffset + + TRX_UNDO_TRX_ID); + if (id > max_trx_id) + max_trx_id= id; + id= mach_read_from_8(block->page.frame + node_addr.boffset + + TRX_UNDO_TRX_NO); + if (id > max_trx_id) + max_trx_id= id; + + rseg->set_last_commit(node_addr.boffset, id); + unsigned purge= mach_read_from_2(block->page.frame + node_addr.boffset + + TRX_UNDO_NEEDS_PURGE); + ut_ad(purge <= 1); + if (purge != 0) + rseg->set_needs_purge(); + + if (rseg->last_page_no != FIL_NULL) + /* There is no need to cover this operation by the purge + mutex because we are still bootstrapping. */ + purge_sys.purge_queue.push(*rseg); + } - return DB_SUCCESS; + return err; } /** Read binlog metadata from the TRX_SYS page, in case we are upgrading @@ -678,21 +651,26 @@ dberr_t trx_rseg_array_init() } /** Create the temporary rollback segments. */ -void trx_temp_rseg_create() +dberr_t trx_temp_rseg_create(mtr_t *mtr) { - mtr_t mtr; - - for (ulong i = 0; i < array_elements(trx_sys.temp_rsegs); i++) { - mtr.start(); - mtr.set_log_mode(MTR_LOG_NO_REDO); - mtr.x_lock_space(fil_system.temp_space); - - buf_block_t* rblock = trx_rseg_header_create( - fil_system.temp_space, i, 0, NULL, &mtr); - trx_sys.temp_rsegs[i].init(fil_system.temp_space, - rblock->page.id().page_no()); - mtr.commit(); - } + for (ulong i= 0; i < array_elements(trx_sys.temp_rsegs); i++) + { + mtr->start(); + mtr->set_log_mode(MTR_LOG_NO_REDO); + mtr->x_lock_space(fil_system.temp_space); + dberr_t err; + buf_block_t *rblock= + trx_rseg_header_create(fil_system.temp_space, i, 0, mtr, &err); + if (UNIV_UNLIKELY(!rblock)) + { + mtr->commit(); + return err; + } + trx_sys.temp_rsegs[i].init(fil_system.temp_space, + rblock->page.id().page_no()); + mtr->commit(); + } + return DB_SUCCESS; } /** Update the offset information about the end of the binlog entry diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index e24cb059a9b..2479e5a4cc1 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -136,65 +136,47 @@ trx_sysf_get_n_rseg_slots() mtr.commit(); } -/*****************************************************************//** -Creates the file page for the transaction system. This function is called only -at the database creation, before trx_sys_init. */ -static -void -trx_sysf_create( -/*============*/ - mtr_t* mtr) /*!< in: mtr */ +/** Initialize the transaction system when creating the database. */ +dberr_t trx_sys_create_sys_pages(mtr_t *mtr) { - ulint slot_no; - buf_block_t* block; - - ut_ad(mtr); - - /* Note that below we first reserve the file space x-latch, and - then enter the kernel: we must do it in this order to conform - to the latching order rules. */ - - mtr->x_lock_space(fil_system.sys_space); - compile_time_assert(TRX_SYS_SPACE == 0); - - /* Create the trx sys file block in a new allocated file segment */ - block = fseg_create(fil_system.sys_space, - TRX_SYS + TRX_SYS_FSEG_HEADER, - mtr); - - ut_a(block->page.id() == page_id_t(0, TRX_SYS_PAGE_NO)); - - mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame, - FIL_PAGE_TYPE_TRX_SYS); - - ut_ad(!mach_read_from_4(block->page.frame - + TRX_SYS_DOUBLEWRITE - + TRX_SYS_DOUBLEWRITE_MAGIC)); - - /* Reset the rollback segment slots. Old versions of InnoDB - (before MySQL 5.5) define TRX_SYS_N_RSEGS as 256 and expect - that the whole array is initialized. */ - compile_time_assert(256 >= TRX_SYS_N_RSEGS); - compile_time_assert(TRX_SYS + TRX_SYS_RSEGS - + 256 * TRX_SYS_RSEG_SLOT_SIZE - <= UNIV_PAGE_SIZE_MIN - FIL_PAGE_DATA_END); - mtr->memset(block, TRX_SYS + TRX_SYS_RSEGS, - 256 * TRX_SYS_RSEG_SLOT_SIZE, 0xff); - /* Initialize all of the page. This part used to be uninitialized. */ - mtr->memset(block, TRX_SYS + TRX_SYS_RSEGS - + 256 * TRX_SYS_RSEG_SLOT_SIZE, - srv_page_size - - (FIL_PAGE_DATA_END + TRX_SYS + TRX_SYS_RSEGS - + 256 * TRX_SYS_RSEG_SLOT_SIZE), - 0); - - /* Create the first rollback segment in the SYSTEM tablespace */ - slot_no = trx_sys_rseg_find_free(block); - buf_block_t* rblock = trx_rseg_header_create(fil_system.sys_space, - slot_no, 0, block, mtr); - - ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID); - ut_a(rblock->page.id() == page_id_t(0, FSP_FIRST_RSEG_PAGE_NO)); + mtr->start(); + mtr->x_lock_space(fil_system.sys_space); + static_assert(TRX_SYS_SPACE == 0, "compatibility"); + + /* Create the trx sys file block in a new allocated file segment */ + dberr_t err; + buf_block_t *block= fseg_create(fil_system.sys_space, + TRX_SYS + TRX_SYS_FSEG_HEADER, mtr, &err); + if (UNIV_UNLIKELY(!block)) + { + error: + mtr->commit(); + return err; + } + ut_a(block->page.id() == page_id_t(0, TRX_SYS_PAGE_NO)); + + mtr->write<2>(*block, FIL_PAGE_TYPE + block->page.frame, + FIL_PAGE_TYPE_TRX_SYS); + + /* Reset the rollback segment slots. Old versions of InnoDB + (before MySQL 5.5) define TRX_SYS_N_RSEGS as 256 and expect + that the whole array is initialized. */ + static_assert(256 >= TRX_SYS_N_RSEGS, ""); + static_assert(TRX_SYS + TRX_SYS_RSEGS + 256 * TRX_SYS_RSEG_SLOT_SIZE <= + UNIV_PAGE_SIZE_MIN - FIL_PAGE_DATA_END, ""); + mtr->write<4>(*block, TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO + + block->page.frame, FSP_FIRST_RSEG_PAGE_NO); + mtr->memset(block, TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SLOT_SIZE, + 255 * TRX_SYS_RSEG_SLOT_SIZE, 0xff); + + buf_block_t *r= trx_rseg_header_create(fil_system.sys_space, 0, 0, + mtr, &err); + if (UNIV_UNLIKELY(!r)) + goto error; + ut_a(r->page.id() == page_id_t(0, FSP_FIRST_RSEG_PAGE_NO)); + mtr->commit(); + + return trx_lists_init_at_db_start(); } void trx_sys_t::create() @@ -260,21 +242,6 @@ TPOOL_SUPPRESS_TSAN size_t trx_sys_t::history_size_approx() const return size; } -/*****************************************************************//** -Creates and initializes the transaction system at the database creation. */ -void -trx_sys_create_sys_pages(void) -/*==========================*/ -{ - mtr_t mtr; - - mtr_start(&mtr); - - trx_sysf_create(&mtr); - - mtr_commit(&mtr); -} - /** Create a persistent rollback segment. @param space_id system or undo tablespace id @return pointer to new rollback segment @@ -292,14 +259,21 @@ static trx_rseg_t *trx_rseg_create(ulint space_id) if (buf_block_t *sys_header= trx_sysf_get(&mtr)) { ulint rseg_id= trx_sys_rseg_find_free(sys_header); + dberr_t err; if (buf_block_t *rblock= rseg_id == ULINT_UNDEFINED - ? nullptr : trx_rseg_header_create(space, rseg_id, 0, sys_header, - &mtr)) + ? nullptr : trx_rseg_header_create(space, rseg_id, 0, &mtr, &err)) { - ut_ad(trx_sysf_rseg_get_space(sys_header, rseg_id) == space_id); rseg= &trx_sys.rseg_array[rseg_id]; rseg->init(space, rblock->page.id().page_no()); ut_ad(rseg->is_persistent()); + mtr.write<4,mtr_t::MAYBE_NOP> + (*sys_header, TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SPACE + + rseg_id * TRX_SYS_RSEG_SLOT_SIZE + sys_header->page.frame, + space_id); + mtr.write<4,mtr_t::MAYBE_NOP> + (*sys_header, TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO + + rseg_id * TRX_SYS_RSEG_SLOT_SIZE + sys_header->page.frame, + rseg->page_no); } } } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index ba0809dc444..95bc6273e48 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -47,6 +47,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0xa.h" #include "ut0pool.h" #include "ut0vec.h" +#include "log.h" #include #include @@ -554,96 +555,95 @@ void trx_disconnect_prepared(trx_t *trx) trx_sys.rw_trx_hash.put_pins(trx); } -/****************************************************************//** -Resurrect the table locks for a resurrected transaction. */ -static -void -trx_resurrect_table_locks( -/*======================*/ - trx_t* trx, /*!< in/out: transaction */ - const trx_undo_t* undo) /*!< in: undo log */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) +/** Resurrect the table locks for a resurrected transaction. */ +static dberr_t trx_resurrect_table_locks(trx_t *trx, const trx_undo_t &undo) { - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || - trx_state_eq(trx, TRX_STATE_PREPARED)); - ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); - - if (undo->empty()) { - return; - } - - mtr_t mtr; - std::map tables; - mtr.start(); - - /* trx_rseg_mem_create() may have acquired an X-latch on this - page, so we cannot acquire an S-latch. */ - buf_block_t* block = trx_undo_page_get( - page_id_t(trx->rsegs.m_redo.rseg->space->id, - undo->top_page_no), &mtr); - buf_block_t* undo_block = block; - trx_undo_rec_t* undo_rec = block->page.frame + undo->top_offset; + ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || + trx_state_eq(trx, TRX_STATE_PREPARED)); + ut_ad(undo.rseg == trx->rsegs.m_redo.rseg); + + if (undo.empty()) + return DB_SUCCESS; + + mtr_t mtr; + std::map tables; + mtr.start(); + + dberr_t err; + if (buf_block_t *block= + buf_page_get_gen(page_id_t(trx->rsegs.m_redo.rseg->space->id, + undo.top_page_no), 0, RW_S_LATCH, nullptr, + BUF_GET, &mtr, &err)) + { + buf_block_t *undo_block= block; + const trx_undo_rec_t *undo_rec= block->page.frame + undo.top_offset; - do { - ulint type; - undo_no_t undo_no; - table_id_t table_id; - ulint cmpl_info; - bool updated_extern; - - if (undo_block != block) { - mtr.memo_release(undo_block, MTR_MEMO_PAGE_X_FIX); - undo_block = block; - } + do + { + ulint type; + undo_no_t undo_no; + table_id_t table_id; + ulint cmpl_info; + bool updated_extern; - trx_undo_rec_get_pars( - undo_rec, &type, &cmpl_info, - &updated_extern, &undo_no, &table_id); + if (undo_block != block) + { + mtr.memo_release(undo_block, MTR_MEMO_PAGE_S_FIX); + undo_block= block; + } + trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, + &updated_extern, &undo_no, &table_id); + tables.emplace(table_id, type == TRX_UNDO_EMPTY); + undo_rec= trx_undo_get_prev_rec(block, page_offset(undo_rec), + undo.hdr_page_no, undo.hdr_offset, + true, &mtr); + } + while (undo_rec); + } - tables.emplace(table_id, type == TRX_UNDO_EMPTY); + mtr.commit(); - undo_rec = trx_undo_get_prev_rec( - block, page_offset(undo_rec), undo->hdr_page_no, - undo->hdr_offset, false, &mtr); - } while (undo_rec); + if (err != DB_SUCCESS) + return err; - mtr.commit(); - - for (auto p : tables) { - if (dict_table_t* table = dict_table_open_on_id( - p.first, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) { - if (!table->is_readable()) { - dict_sys.lock(SRW_LOCK_CALL); - table->release(); - dict_sys.remove(table); - dict_sys.unlock(); - continue; - } + for (auto p : tables) + { + if (dict_table_t *table= + dict_table_open_on_id(p.first, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) + { + if (!table->is_readable()) + { + dict_sys.lock(SRW_LOCK_CALL); + table->release(); + dict_sys.remove(table); + dict_sys.unlock(); + continue; + } - if (trx->state == TRX_STATE_PREPARED) { - trx->mod_tables.emplace(table, 0); - } + if (trx->state == TRX_STATE_PREPARED) + trx->mod_tables.emplace(table, 0); - lock_table_resurrect(table, trx, - p.second ? LOCK_X : LOCK_IX); + lock_table_resurrect(table, trx, p.second ? LOCK_X : LOCK_IX); - DBUG_LOG("ib_trx", - "resurrect " << ib::hex(trx->id) - << " lock on " << table->name); + DBUG_LOG("ib_trx", + "resurrect " << ib::hex(trx->id) << " lock on " << table->name); + table->release(); + } + } - table->release(); - } - } + return DB_SUCCESS; } +MY_ATTRIBUTE((nonnull, warn_unused_result)) /** Resurrect the transactions that were doing inserts/updates the time of the crash, they need to be undone. */ - -static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, - time_t start_time, ulonglong start_time_micro, - uint64_t *rows_to_undo) +static dberr_t trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, + time_t start_time, ulonglong start_time_micro, + uint64_t *rows_to_undo) { trx_state_t state; /* @@ -660,13 +660,12 @@ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, Prepared transactions are left in the prepared state waiting for a commit or abort decision from MySQL */ - ib::info() << "Transaction " << undo->trx_id - << " was in the XA prepared state."; - state= TRX_STATE_PREPARED; + sql_print_information("InnoDB: Transaction " TRX_ID_FMT + " was in the XA prepared state.", undo->trx_id); break; default: - return; + return DB_SUCCESS; } trx_t *trx= trx_create(); @@ -692,9 +691,9 @@ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, trx_sys.rw_trx_hash.insert(trx); trx_sys.rw_trx_hash.put_pins(trx); - trx_resurrect_table_locks(trx, undo); if (trx_state_eq(trx, TRX_STATE_ACTIVE)) *rows_to_undo+= trx->undo_no; + return trx_resurrect_table_locks(trx, *undo); } @@ -716,7 +715,10 @@ dberr_t trx_lists_init_at_db_start() } purge_sys.create(); - if (dberr_t err = trx_rseg_array_init()) { + dberr_t err = trx_rseg_array_init(); + + if (err != DB_SUCCESS) { +corrupted: ib::info() << "Retry with innodb_force_recovery=5"; return err; } @@ -736,14 +738,15 @@ dberr_t trx_lists_init_at_db_start() if (!rseg.space) { continue; } - /* Ressurrect other transactions. */ + /* Resurrect other transactions. */ for (undo = UT_LIST_GET_FIRST(rseg.undo_list); undo != NULL; undo = UT_LIST_GET_NEXT(undo_list, undo)) { trx_t *trx = trx_sys.find(0, undo->trx_id, false); if (!trx) { - trx_resurrect(undo, &rseg, start_time, - start_time_micro, &rows_to_undo); + err = trx_resurrect(undo, &rseg, start_time, + start_time_micro, + &rows_to_undo); } else { ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || trx_state_eq(trx, TRX_STATE_PREPARED)); @@ -763,7 +766,11 @@ dberr_t trx_lists_init_at_db_start() trx->undo_no = undo->top_undo_no + 1; } - trx_resurrect_table_locks(trx, undo); + err = trx_resurrect_table_locks(trx, *undo); + } + + if (err != DB_SUCCESS) { + goto corrupted; } } } diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 90372f490c5..353d82c89cf 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -179,7 +179,7 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec, block= buf_page_get(page_id_t(block->page.id().space(), prev_page_no), 0, shared ? RW_S_LATCH : RW_X_LATCH, mtr); - return trx_undo_page_get_last_rec(block, page_no, offset); + return block ? trx_undo_page_get_last_rec(block, page_no, offset) : nullptr; } /** Get the previous undo log record. @@ -233,8 +233,9 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, @param[in,out] mtr mini-transaction @return undo log record, the page latched, NULL if none */ static trx_undo_rec_t* -trx_undo_get_next_rec_from_next_page(buf_block_t *&block, uint32_t page_no, - uint16_t offset, ulint mode, mtr_t *mtr) +trx_undo_get_next_rec_from_next_page(const buf_block_t *&block, + uint32_t page_no, uint16_t offset, + ulint mode, mtr_t *mtr) { if (page_no == block->page.id().page_no() && mach_read_from_2(block->page.frame + offset + TRX_UNDO_NEXT_LOG)) @@ -246,9 +247,10 @@ trx_undo_get_next_rec_from_next_page(buf_block_t *&block, uint32_t page_no, if (next == FIL_NULL) return nullptr; - block= buf_page_get(page_id_t(block->page.id().space(), next), 0, mode, mtr); + block= buf_page_get_gen(page_id_t(block->page.id().space(), next), 0, mode, + nullptr, BUF_GET_POSSIBLY_FREED, mtr); - return trx_undo_page_get_first_rec(block, page_no, offset); + return block ? trx_undo_page_get_first_rec(block, page_no, offset) : nullptr; } /** Get the next record in an undo log. @@ -259,8 +261,8 @@ trx_undo_get_next_rec_from_next_page(buf_block_t *&block, uint32_t page_no, @param[in,out] mtr mini-transaction @return undo log record, the page latched, NULL if none */ trx_undo_rec_t* -trx_undo_get_next_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, - uint16_t offset, mtr_t *mtr) +trx_undo_get_next_rec(const buf_block_t *&block, uint16_t rec, + uint32_t page_no, uint16_t offset, mtr_t *mtr) { if (trx_undo_rec_t *next= trx_undo_page_get_next_rec(block, rec, page_no, offset)) @@ -277,13 +279,18 @@ trx_undo_get_next_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, @param[in] mode latching mode: RW_S_LATCH or RW_X_LATCH @param[out] block undo log page @param[in,out] mtr mini-transaction -@return undo log record, the page latched, NULL if none */ +@param[out] err error code +@return undo log record, the page latched +@retval nullptr if none */ trx_undo_rec_t* trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no, - uint16_t offset, ulint mode, buf_block_t*& block, - mtr_t *mtr) + uint16_t offset, ulint mode, const buf_block_t*& block, + mtr_t *mtr, dberr_t *err) { - block = buf_page_get(page_id_t(space.id, page_no), 0, mode, mtr); + block= buf_page_get_gen(page_id_t{space.id, page_no}, 0, mode, + nullptr, BUF_GET, mtr, err); + if (!block) + return nullptr; if (trx_undo_rec_t *rec= trx_undo_page_get_first_rec(block, page_no, offset)) return rec; @@ -361,7 +368,11 @@ ATTRIBUTE_COLD void trx_t::apply_log() mtr_t mtr; mtr.start(); buf_block_t *block= buf_page_get(page_id, 0, RW_S_LATCH, &mtr); - ut_ad(block); + if (UNIV_UNLIKELY(!block)) + { + mtr.commit(); + return; + } UndorecApplier log_applier(page_id, id); @@ -376,6 +387,8 @@ ATTRIBUTE_COLD void trx_t::apply_log() log_applier.apply_undo_rec(); mtr.start(); block= buf_page_get(log_applier.get_page_id(), 0, RW_S_LATCH, &mtr); + if (UNIV_UNLIKELY(!block)) + goto func_exit; rec= trx_undo_page_get_next_rec(block, log_applier.get_offset(), page_id.page_no(), undo->hdr_offset); } @@ -389,9 +402,11 @@ ATTRIBUTE_COLD void trx_t::apply_log() mtr.commit(); mtr.start(); block= buf_page_get_gen(next_page_id, 0, RW_S_LATCH, block, BUF_GET, &mtr); + if (UNIV_UNLIKELY(!block)) + break; log_applier.assign_next(next_page_id); - ut_ad(block); } +func_exit: mtr.commit(); apply_online_log= false; } @@ -468,7 +483,6 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id, { buf_block_t* block; uint32_t n_reserved; - bool success; const ulint slot_no = trx_rsegf_undo_find_free(rseg_hdr); @@ -483,22 +497,20 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id, ut_ad(slot_no < TRX_RSEG_N_SLOTS); - success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, + *err = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, mtr); - if (!success) { - *err = DB_OUT_OF_FILE_SPACE; + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { return NULL; } /* Allocate a new file segment for the undo log */ block = fseg_create(space, TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, - mtr, true); + mtr, err, true); space->release_free_extents(n_reserved); - if (block == NULL) { - *err = DB_OUT_OF_FILE_SPACE; - return NULL; + if (!block) { + return block; } mtr->undo_create(*block); @@ -514,8 +526,9 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id, flst_init(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->page.frame, mtr); - flst_add_last(block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); + *err = flst_add_last(block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, + block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, + mtr); *id = slot_no; mtr->write<4>(*rseg_hdr, TRX_RSEG + TRX_RSEG_UNDO_SLOTS @@ -656,52 +669,58 @@ trx_undo_read_xid(const trx_ulogf_t* log_hdr, XID* xid) /** Allocate an undo log page. @param[in,out] undo undo log @param[in,out] mtr mini-transaction that does not hold any page latch +@param[out] err error code @return X-latched block if success -@retval NULL on failure */ -buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr) +@retval nullptr on failure */ +buf_block_t *trx_undo_add_page(trx_undo_t *undo, mtr_t *mtr, dberr_t *err) { - trx_rseg_t* rseg = undo->rseg; - buf_block_t* new_block = NULL; - uint32_t n_reserved; - - /* When we add a page to an undo log, this is analogous to - a pessimistic insert in a B-tree, and we must reserve the - counterpart of the tree latch, which is the rseg mutex. */ - - rseg->latch.wr_lock(SRW_LOCK_CALL); - - buf_block_t* header_block = trx_undo_page_get( - page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr); - - if (!fsp_reserve_free_extents(&n_reserved, undo->rseg->space, 1, - FSP_UNDO, mtr)) { - goto func_exit; - } - - new_block = fseg_alloc_free_page_general( - TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + header_block->page.frame, - undo->top_page_no + 1, FSP_UP, true, mtr, mtr); - - rseg->space->release_free_extents(n_reserved); - - if (!new_block) { - goto func_exit; - } - - undo->last_page_no = new_block->page.id().page_no(); - - mtr->undo_create(*new_block); - trx_undo_page_init(*new_block); - - flst_add_last(header_block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - new_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); - undo->size++; - rseg->curr_size++; + buf_block_t *new_block= nullptr; + uint32_t n_reserved; + + /* When we add a page to an undo log, this is analogous to + a pessimistic insert in a B-tree, and we must reserve the + counterpart of the tree latch, which is the rseg mutex. */ + + trx_rseg_t *rseg= undo->rseg; + rseg->latch.wr_lock(SRW_LOCK_CALL); + + buf_block_t *header_block= + buf_page_get_gen(page_id_t{rseg->space->id, undo->hdr_page_no}, + 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err); + if (!header_block) + goto func_exit; + *err= fsp_reserve_free_extents(&n_reserved, rseg->space, 1, FSP_UNDO, mtr); + + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) + goto func_exit; + + new_block= + fseg_alloc_free_page_general(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + header_block->page.frame, + undo->top_page_no + 1, FSP_UP, true, + mtr, mtr, err); + rseg->space->release_free_extents(n_reserved); + + if (!new_block) + goto func_exit; + + undo->last_page_no= new_block->page.id().page_no(); + + mtr->undo_create(*new_block); + trx_undo_page_init(*new_block); + *err= flst_add_last(header_block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, + new_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) + new_block= nullptr; + else + { + undo->size++; + rseg->curr_size++; + } func_exit: - rseg->latch.wr_unlock(); - return(new_block); + rseg->latch.wr_unlock(); + return new_block; } /********************************************************************//** @@ -717,26 +736,42 @@ trx_undo_free_page( uint32_t hdr_page_no, /*!< in: header page number */ uint32_t page_no, /*!< in: page number to free: must not be the header page */ - mtr_t* mtr) /*!< in: mtr which does not have a latch to any + mtr_t* mtr, /*!< in: mtr which does not have a latch to any undo log page; the caller must have reserved the rollback segment mutex */ + dberr_t* err) /*!< out: error code */ { - const ulint space = rseg->space->id; - ut_a(hdr_page_no != page_no); - buf_block_t* undo_block = trx_undo_page_get(page_id_t(space, page_no), - mtr); - buf_block_t* header_block = trx_undo_page_get(page_id_t(space, - hdr_page_no), - mtr); + buf_block_t* undo_block = buf_page_get_gen(page_id_t(rseg->space->id, + page_no), + 0, RW_X_LATCH, nullptr, + BUF_GET, mtr, err); + if (UNIV_UNLIKELY(!undo_block)) { + return FIL_NULL; + } + buf_block_t* header_block = buf_page_get_gen(page_id_t(rseg->space->id, + hdr_page_no), + 0, RW_X_LATCH, nullptr, + BUF_GET, mtr, err); + if (UNIV_UNLIKELY(!header_block)) { + return FIL_NULL; + } + + *err = flst_remove(header_block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, + undo_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, + mtr); - flst_remove(header_block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - undo_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return FIL_NULL; + } - fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + header_block->page.frame, - rseg->space, page_no, mtr); + *err = fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + header_block->page.frame, + rseg->space, page_no, mtr); + if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { + return FIL_NULL; + } buf_page_free(rseg->space, page_no, mtr); const fil_addr_t last_addr = flst_get_last( @@ -744,14 +779,15 @@ trx_undo_free_page( + header_block->page.frame); rseg->curr_size--; - if (in_history) { - buf_block_t* rseg_header = trx_rsegf_get( - rseg->space, rseg->page_no, mtr); + if (!in_history) { + } else if (buf_block_t* rseg_header = rseg->get(mtr, err)) { byte* rseg_hist_size = TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_header->page.frame; uint32_t hist_size = mach_read_from_4(rseg_hist_size); ut_ad(hist_size > 0); mtr->write<4>(*rseg_header, rseg_hist_size, hist_size - 1); + } else { + return FIL_NULL; } return(last_addr.page); @@ -760,74 +796,101 @@ trx_undo_free_page( /** Free the last undo log page. The caller must hold the rseg mutex. @param[in,out] undo undo log @param[in,out] mtr mini-transaction that does not hold any undo log page - or that has allocated the undo log page */ -void -trx_undo_free_last_page(trx_undo_t* undo, mtr_t* mtr) + or that has allocated the undo log page +@return error code */ +dberr_t trx_undo_free_last_page(trx_undo_t *undo, mtr_t *mtr) { - ut_ad(undo->hdr_page_no != undo->last_page_no); - ut_ad(undo->size > 0); - - undo->last_page_no = trx_undo_free_page( - undo->rseg, false, undo->hdr_page_no, undo->last_page_no, mtr); - - undo->size--; + ut_ad(undo->hdr_page_no != undo->last_page_no); + ut_ad(undo->size > 0); + undo->size--; + + dberr_t err; + undo->last_page_no= trx_undo_free_page(undo->rseg, false, undo->hdr_page_no, + undo->last_page_no, mtr, &err); + return err; } /** Truncate the tail of an undo log during rollback. @param[in,out] undo undo log @param[in] limit all undo logs after this limit will be discarded -@param[in] is_temp whether this is temporary undo log */ -void trx_undo_truncate_end(trx_undo_t& undo, undo_no_t limit, bool is_temp) +@param[in] is_temp whether this is temporary undo log +@return error code */ +static dberr_t trx_undo_truncate_end(trx_undo_t &undo, undo_no_t limit, + bool is_temp) { - mtr_t mtr; - ut_ad(is_temp == !undo.rseg->is_persistent()); + ut_ad(is_temp == !undo.rseg->is_persistent()); - for (;;) { - mtr.start(); - if (is_temp) { - mtr.set_log_mode(MTR_LOG_NO_REDO); - } - - trx_undo_rec_t* trunc_here = NULL; - undo.rseg->latch.wr_lock(SRW_LOCK_CALL); - buf_block_t* undo_block = trx_undo_page_get( - page_id_t(undo.rseg->space->id, undo.last_page_no), - &mtr); - trx_undo_rec_t* rec = trx_undo_page_get_last_rec( - undo_block, undo.hdr_page_no, undo.hdr_offset); - while (rec) { - if (trx_undo_rec_get_undo_no(rec) < limit) { - goto func_exit; - } - /* Truncate at least this record off, maybe more */ - trunc_here = rec; - - rec = trx_undo_page_get_prev_rec(undo_block, rec, - undo.hdr_page_no, - undo.hdr_offset); - } + for (mtr_t mtr;;) + { + mtr.start(); + if (is_temp) + mtr.set_log_mode(MTR_LOG_NO_REDO); + + trx_undo_rec_t *trunc_here= nullptr; + undo.rseg->latch.wr_lock(SRW_LOCK_CALL); + dberr_t err; + buf_block_t *undo_block= + buf_page_get_gen(page_id_t{undo.rseg->space->id, undo.last_page_no}, + 0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err); + if (UNIV_UNLIKELY(!undo_block)) + goto func_exit; + + for (trx_undo_rec_t *rec= + trx_undo_page_get_last_rec(undo_block, + undo.hdr_page_no, undo.hdr_offset); + rec; ) + { + if (trx_undo_rec_get_undo_no(rec) < limit) + goto func_exit; + /* Truncate at least this record off, maybe more */ + trunc_here= rec; + rec= trx_undo_page_get_prev_rec(undo_block, rec, + undo.hdr_page_no, undo.hdr_offset); + } - if (undo.last_page_no != undo.hdr_page_no) { - trx_undo_free_last_page(&undo, &mtr); - undo.rseg->latch.wr_unlock(); - mtr.commit(); - continue; - } + if (undo.last_page_no != undo.hdr_page_no) + { + err= trx_undo_free_last_page(&undo, &mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) + goto func_exit; + undo.rseg->latch.wr_unlock(); + mtr.commit(); + continue; + } func_exit: - undo.rseg->latch.wr_unlock(); - - if (trunc_here) { - mtr.write<2>(*undo_block, - TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + undo_block->page.frame, - ulint(trunc_here - - undo_block->page.frame)); - } + undo.rseg->latch.wr_unlock(); - mtr.commit(); - return; - } + if (trunc_here && err == DB_SUCCESS) + mtr.write<2>(*undo_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + + undo_block->page.frame, + ulint(trunc_here - undo_block->page.frame)); + + mtr.commit(); + return err; + } +} + +/** Try to truncate the undo logs. +@param trx transaction +@return error code */ +dberr_t trx_undo_try_truncate(const trx_t &trx) +{ + if (trx_undo_t *undo= trx.rsegs.m_redo.undo) + { + ut_ad(undo->rseg == trx.rsegs.m_redo.rseg); + if (dberr_t err= trx_undo_truncate_end(*undo, trx.undo_no, false)) + return err; + } + + if (trx_undo_t *undo = trx.rsegs.m_noredo.undo) + { + ut_ad(undo->rseg == trx.rsegs.m_noredo.rseg); + if (dberr_t err= trx_undo_truncate_end(*undo, trx.undo_no, true)) + return err; + } + + return DB_SUCCESS; } /** Truncate the head of an undo log. @@ -837,8 +900,9 @@ freed, but emptied, if all the records there are below the limit. @param[in] hdr_page_no header page number @param[in] hdr_offset header offset on the page @param[in] limit first undo number to preserve -(everything below the limit will be truncated) */ -void +(everything below the limit will be truncated) +@return error code */ +dberr_t trx_undo_truncate_start( trx_rseg_t* rseg, uint32_t hdr_page_no, @@ -850,7 +914,7 @@ trx_undo_truncate_start( mtr_t mtr; if (!limit) { - return; + return DB_SUCCESS; } loop: mtr_start(&mtr); @@ -859,14 +923,15 @@ loop: mtr.set_log_mode(MTR_LOG_NO_REDO); } - buf_block_t* undo_page; + dberr_t err; + const buf_block_t* undo_page; rec = trx_undo_get_first_rec(*rseg->space, hdr_page_no, hdr_offset, - RW_X_LATCH, undo_page, &mtr); + RW_X_LATCH, undo_page, &mtr, &err); if (rec == NULL) { /* Already empty */ done: mtr.commit(); - return; + return err; } last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no, @@ -888,11 +953,13 @@ done: + TRX_UNDO_LOG_START, end); } else { trx_undo_free_page(rseg, true, hdr_page_no, - undo_page->page.id().page_no(), &mtr); + undo_page->page.id().page_no(), &mtr, &err); + if (err != DB_SUCCESS) { + goto done; + } } - mtr_commit(&mtr); - + mtr.commit(); goto loop; } @@ -900,38 +967,42 @@ done: @param undo temporary undo log */ static void trx_undo_seg_free(const trx_undo_t *undo) { - ut_ad(undo->id < TRX_RSEG_N_SLOTS); - - trx_rseg_t* const rseg = undo->rseg; - bool finished; - mtr_t mtr; - ut_ad(rseg->space == fil_system.temp_space); - - do { - mtr.start(); - mtr.set_log_mode(MTR_LOG_NO_REDO); + ut_ad(undo->id < TRX_RSEG_N_SLOTS); - buf_block_t* block = trx_undo_page_get( - page_id_t(SRV_TMP_SPACE_ID, undo->hdr_page_no), &mtr); + trx_rseg_t *const rseg= undo->rseg; + bool finished; + mtr_t mtr; + ut_ad(rseg->space == fil_system.temp_space); - fseg_header_t* file_seg = TRX_UNDO_SEG_HDR - + TRX_UNDO_FSEG_HEADER + block->page.frame; + do + { + mtr.start(); + mtr.set_log_mode(MTR_LOG_NO_REDO); - finished = fseg_free_step(file_seg, &mtr); + finished= true; - if (finished) { - /* Update the rseg header */ - buf_block_t* rseg_header = trx_rsegf_get( - rseg->space, rseg->page_no, &mtr); - compile_time_assert(FIL_NULL == 0xffffffff); - memset(TRX_RSEG + TRX_RSEG_UNDO_SLOTS - + undo->id * TRX_RSEG_SLOT_SIZE + - rseg_header->page.frame, 0xff, 4); - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); - } + if (buf_block_t *block= + buf_page_get(page_id_t(SRV_TMP_SPACE_ID, undo->hdr_page_no), 0, + RW_X_LATCH, &mtr)) + { + fseg_header_t *file_seg= TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + block->page.frame; + + finished= fseg_free_step(file_seg, &mtr); + + if (!finished); + else if (buf_block_t* rseg_header = rseg->get(&mtr, nullptr)) + { + static_assert(FIL_NULL == 0xffffffff, "compatibility"); + memset(TRX_RSEG + TRX_RSEG_UNDO_SLOTS + undo->id * TRX_RSEG_SLOT_SIZE + + rseg_header->page.frame, 0xff, 4); + MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); + } + } - mtr.commit(); - } while (!finished); + mtr.commit(); + } + while (!finished); } /*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/ @@ -953,8 +1024,14 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no, ut_ad(id < TRX_RSEG_N_SLOTS); mtr.start(); - const buf_block_t* block = trx_undo_page_get( - page_id_t(rseg->space->id, page_no), &mtr); + const buf_block_t* block = buf_page_get( + page_id_t(rseg->space->id, page_no), 0, RW_X_LATCH, &mtr); + if (UNIV_UNLIKELY(!block)) { +corrupted: + mtr.commit(); + return nullptr; + } + const uint16_t type = mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE + block->page.frame); @@ -962,9 +1039,7 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no, corrupted_type: sql_print_error("InnoDB: unsupported undo header type %u", type); -corrupted: - mtr.commit(); - return nullptr; + goto corrupted; } uint16_t offset = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_LAST_LOG @@ -1049,8 +1124,14 @@ corrupted: undo->last_page_no = last_addr.page; undo->top_page_no = last_addr.page; - const buf_block_t* last = trx_undo_page_get( - page_id_t(rseg->space->id, undo->last_page_no), &mtr); + const buf_block_t* last = buf_page_get( + page_id_t(rseg->space->id, undo->last_page_no), 0, + RW_X_LATCH, &mtr); + + if (UNIV_UNLIKELY(!last)) { + ut_free(undo); + goto corrupted; + } if (const trx_undo_rec_t* rec = trx_undo_page_get_last_rec( last, page_no, offset)) { @@ -1163,9 +1244,11 @@ trx_undo_create(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, dberr_t* err, mtr_t* mtr) { ulint id; - buf_block_t* block = trx_undo_seg_create( - rseg->space, - trx_rsegf_get(rseg->space, rseg->page_no, mtr), &id, err, mtr); + buf_block_t* block = rseg->get(mtr, err); + + if (block) { + block = trx_undo_seg_create(rseg->space, block, &id, err, mtr); + } if (!block) { return NULL; @@ -1363,22 +1446,25 @@ trx_undo_set_state_at_finish( trx_undo_t* undo, /*!< in: undo log memory copy */ mtr_t* mtr) /*!< in: mtr */ { - ut_a(undo->id < TRX_RSEG_N_SLOTS); - - buf_block_t* block = trx_undo_page_get( - page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr); - - const uint16_t state = undo->size == 1 - && TRX_UNDO_PAGE_REUSE_LIMIT - > mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE - + block->page.frame) - ? TRX_UNDO_CACHED - : TRX_UNDO_TO_PURGE; - - undo->state = state; - mtr->write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE - + block->page.frame, state); - return block; + ut_ad(undo->id < TRX_RSEG_N_SLOTS); + + buf_block_t *block= + buf_page_get(page_id_t(undo->rseg->space->id, undo->hdr_page_no), 0, + RW_X_LATCH, mtr); + /* This function is invoked during transaction commit, which is not + allowed to fail. If we get a corrupted undo header, we will crash here. */ + ut_a(block); + const uint16_t state = undo->size == 1 && + TRX_UNDO_PAGE_REUSE_LIMIT > + mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE + + block->page.frame) + ? TRX_UNDO_CACHED + : TRX_UNDO_TO_PURGE; + + undo->state= state; + mtr->write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + block->page.frame, + state); + return block; } /** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK. @@ -1392,8 +1478,16 @@ void trx_undo_set_state_at_prepare(trx_t *trx, trx_undo_t *undo, bool rollback, { ut_a(undo->id < TRX_RSEG_N_SLOTS); - buf_block_t* block = trx_undo_page_get( - page_id_t(undo->rseg->space->id, undo->hdr_page_no), mtr); + buf_block_t* block = buf_page_get( + page_id_t(undo->rseg->space->id, undo->hdr_page_no), 0, + RW_X_LATCH, mtr); + if (UNIV_UNLIKELY(!block)) { + /* In case of !rollback the undo header page + corruption would leave the transaction object in an + unexpected (active) state. */ + ut_a(rollback); + return; + } if (rollback) { ut_ad(undo->state == TRX_UNDO_PREPARED); -- cgit v1.2.1 From cc4eabc7b276fd27044ed42bd32c4f58c45b924d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 14:04:45 +0300 Subject: MDEV-13542: Implement page read fault injection --debug-dbug=d,intermittent_read_failure is effective after the database has been started up. --debug-dbug=d,intermittent_recovery_failure is always effective, including during recovery. --- storage/innobase/fil/fil0fil.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index f91d127215f..703638f2b7c 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2834,6 +2834,14 @@ fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len, goto release; } + DBUG_EXECUTE_IF("intermittent_recovery_failure", + if (type.is_read() && !(~get_rnd_value() & 0x3ff0)) + goto io_error;); + + DBUG_EXECUTE_IF("intermittent_read_failure", + if (srv_was_started && type.is_read() && + !(~get_rnd_value() & 0x3ff0)) goto io_error;); + if (UNIV_LIKELY_NULL(UT_LIST_GET_NEXT(chain, node))) { ut_ad(this == fil_system.sys_space || this == fil_system.temp_space); @@ -2850,7 +2858,9 @@ fail: offset, len, type.is_read()); } - +#ifndef DBUG_OFF +io_error: +#endif set_corrupted(); err = DB_IO_ERROR; node = nullptr; -- cgit v1.2.1 From 4179f93d28035ea2798cb1c16feeaaef87ab4775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 6 Jun 2022 14:05:01 +0300 Subject: MDEV-18976 Implement OPT_PAGE_CHECKSUM log record for improved validation We will introduce an optional log record OPT_PAGE_CHECKSUM for recording page checksums, so that more inconsistencies on crash recovery may be caught. mtr_t::page_checksum(const buf_page_t&): Write OPT_PAGE_CHECKSUM (currently not for ROW_FORMAT=COMPRESSED pages). mtr_t::do_write(): Write OPT_PAGE_CHECKSUM records for all pages (currently, in debug builds only). mtr_t::is_logged(): Return whether log should be written. mtr_t::set_log_mode_sub(const mtr_t&): Set the logging mode of a sub-minitransaction when another mini-transaction is holding latches on some modified pages. When creating or freeing BLOB pages, we may only write OPT_PAGE_CHECKSUM records in the main mini-transaction, after all changes have been written to the log. MTR_LOG_SUB: Log mode for a sub-mini-transaction. mtr_t::free(): Define non-inline, and invoke MarkFreed. MarkFreed: For any matching page in the mini-transaction log, change the first entry to say MTR_MEMO_PAGE_X_MODIFY and any subsequent entries to MTR_MEMO_PAGE_X_FIX. FindModified: Simplify a condition. MTR_MEMO_MODIFY can only be set if MTR_MEMO_PAGE_X_FIX or MTR_MEMO_PAGE_SX_FIX are set. FindBlockX: Consider also MTR_MEMO_PAGE_X_MODIFY. recv_sys_t::parse(): Store OPT_PAGE_CHECKSUM records. log_phys_t::apply(): Validate OPT_PAGE_CHECKSUM records. log_phys_t::page_checksum(): Validate an OPT_PAGE_CHECKSUM record. Tested by: Matthias Leich --- storage/innobase/btr/btr0cur.cc | 4 +- storage/innobase/fil/fil0fil.cc | 2 +- storage/innobase/fsp/fsp0fsp.cc | 34 +++---- storage/innobase/include/buf0buf.h | 5 +- storage/innobase/include/fil0fil.h | 5 +- storage/innobase/include/mtr0log.h | 41 +++----- storage/innobase/include/mtr0mtr.h | 28 +++++- storage/innobase/include/mtr0types.h | 26 ++++-- storage/innobase/log/log0recv.cc | 65 ++++++++++--- storage/innobase/mtr/mtr0mtr.cc | 175 ++++++++++++++++++++++++++++------- storage/innobase/page/page0cur.cc | 8 +- storage/innobase/page/page0zip.cc | 6 +- 12 files changed, 275 insertions(+), 124 deletions(-) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index abab6ab876b..adce2ed2b6f 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -6943,7 +6943,7 @@ btr_store_big_rec_extern_fields( mtr.start(); index->set_modified(mtr); - mtr.set_log_mode(btr_mtr->get_log_mode()); + mtr.set_log_mode_sub(*btr_mtr); mtr.memo_push(rec_block, MTR_MEMO_PAGE_X_FIX); rec_block->page.fix(); @@ -7287,7 +7287,7 @@ btr_free_externally_stored_field( mtr.start(); mtr.set_spaces(*local_mtr); - mtr.set_log_mode(local_mtr->get_log_mode()); + mtr.set_log_mode_sub(*local_mtr); ut_ad(!index->table->is_temporary() || local_mtr->get_log_mode() == MTR_LOG_NO_REDO); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 703638f2b7c..07f77add744 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1483,7 +1483,7 @@ inline void mtr_t::log_file_op(mfile_type_t type, ulint space_id, ut_ad(!strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD)); flag_modified(); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; m_last= nullptr; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index b1cf62fc160..54a43b920bb 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -475,26 +475,20 @@ updating an allocation bitmap page. @param[in] mtr mini-transaction */ void fil_space_t::modify_check(const mtr_t& mtr) const { - switch (mtr.get_log_mode()) { - case MTR_LOG_NONE: - /* These modes are only allowed within a non-bitmap page - when there is a higher-level redo log record written. */ - ut_ad(purpose == FIL_TYPE_TABLESPACE - || purpose == FIL_TYPE_TEMPORARY); - break; - case MTR_LOG_NO_REDO: - ut_ad(purpose == FIL_TYPE_TEMPORARY - || purpose == FIL_TYPE_IMPORT); - return; - case MTR_LOG_ALL: - /* We may only write redo log for a persistent - tablespace. */ - ut_ad(purpose == FIL_TYPE_TABLESPACE); - ut_ad(mtr.is_named_space(id)); - return; - } - - ut_ad("invalid log mode" == 0); + switch (mtr.get_log_mode()) { + case MTR_LOG_NONE: + /* These modes are only allowed within a non-bitmap page + when there is a higher-level redo log record written. */ + ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_TEMPORARY); + break; + case MTR_LOG_NO_REDO: + ut_ad(purpose == FIL_TYPE_TEMPORARY || purpose == FIL_TYPE_IMPORT); + break; + default: + /* We may only write redo log for a persistent tablespace. */ + ut_ad(purpose == FIL_TYPE_TABLESPACE); + ut_ad(mtr.is_named_space(id)); + } } #endif diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 22a07e8d86a..9440672aba1 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -24,8 +24,7 @@ The database buffer pool high-level routines Created 11/5/1995 Heikki Tuuri *******************************************************/ -#ifndef buf0buf_h -#define buf0buf_h +#pragma once /** Magic value to use instead of checksums when they are disabled */ #define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL @@ -2201,5 +2200,3 @@ struct CheckUnzipLRUAndLRUList { #include "buf0buf.inl" #endif /* !UNIV_INNOCHECKSUM */ - -#endif diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index aff5109300a..8889604a919 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1278,8 +1278,9 @@ struct fil_addr_t { /** For the first page in a system tablespace data file(ibdata*, not *.ibd): the file has been flushed to disk at least up to this lsn -For other pages: 32-bit key version used to encrypt the page + 32-bit checksum -or 64 bites of zero if no encryption */ +For other pages of tablespaces not in innodb_checksum_algorithm=full_crc32 +format: 32-bit key version used to encrypt the page + 32-bit checksum +or 64 bits of zero if no encryption */ #define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26U /** This overloads FIL_PAGE_FILE_FLUSH_LSN for RTREE Split Sequence Number */ diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 8192c93a8f9..093b706c1de 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -196,7 +196,7 @@ inline bool mtr_t::write(const buf_block_t &block, void *ptr, V val) } byte *p= static_cast(ptr); const byte *const end= p + l; - if (w != FORCED && m_log_mode == MTR_LOG_ALL) + if (w != FORCED && is_logged()) { const byte *b= buf; while (*p++ == *b++) @@ -224,7 +224,7 @@ inline void mtr_t::memset(const buf_block_t &b, ulint ofs, ulint len, byte val) { ut_ad(len); set_modified(b); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency"); @@ -261,7 +261,7 @@ inline void mtr_t::memset(const buf_block_t &b, ulint ofs, size_t len, ut_ad(size); ut_ad(len > size); /* use mtr_t::memcpy() for shorter writes */ set_modified(b); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency"); @@ -319,7 +319,7 @@ inline void mtr_t::memcpy_low(const buf_block_t &block, uint16_t offset, { ut_ad(len); set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; if (len < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5)) { @@ -354,7 +354,7 @@ inline void mtr_t::memmove(const buf_block_t &b, ulint d, ulint s, ulint len) ut_ad(d + len <= ulint(srv_page_size)); set_modified(b); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency"); size_t lenlen= (len < MIN_2BYTE ? 1 : len < MIN_3BYTE ? 2 : 3); @@ -387,7 +387,7 @@ template inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage, size_t len, bool alloc, size_t offset) { - static_assert(!(type & 15) && type != RESERVED && type != OPTION && + static_assert(!(type & 15) && type != RESERVED && type <= FILE_CHECKPOINT, "invalid type"); ut_ad(type >= FILE_CREATE || is_named_space(id.space())); ut_ad(!bpage || bpage->id() == id); @@ -491,7 +491,7 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, ut_ad(ut_align_down(dest, srv_page_size) == b.page.frame); char *d= static_cast(dest); const char *s= static_cast(str); - if (w != FORCED && m_log_mode == MTR_LOG_ALL) + if (w != FORCED && is_logged()) { ut_ad(len); const char *const end= d + len; @@ -531,35 +531,20 @@ inline void mtr_t::init(buf_block_t *b) b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); - if (m_log_mode != MTR_LOG_ALL) - { - ut_ad(m_log_mode == MTR_LOG_NONE || m_log_mode == MTR_LOG_NO_REDO); + if (!is_logged()) return; - } m_log.close(log_write(b->page.id(), &b->page)); m_last_offset= FIL_PAGE_TYPE; } -/** Free a page. -@param[in] space tablespace contains page to be freed -@param[in] offset page offset to be freed */ -inline void mtr_t::free(fil_space_t &space, uint32_t offset) -{ - ut_ad(is_named_space(&space)); - ut_ad(!m_freed_space || m_freed_space == &space); - - if (m_log_mode == MTR_LOG_ALL) - m_log.close(log_write({space.id, offset}, nullptr)); -} - /** Write an EXTENDED log record. @param block buffer pool page @param type extended record subtype; @see mrec_ext_t */ inline void mtr_t::log_write_extended(const buf_block_t &block, byte type) { set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; byte *l= log_write(block.page.id(), &block.page, 1, true); *l++= type; @@ -586,7 +571,7 @@ inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec) ut_ad(!block.zip_size()); ut_ad(prev_rec < block.physical_size()); set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; size_t len= (prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4); byte *l= log_write(block.page.id(), &block.page, len, true); @@ -613,7 +598,7 @@ inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec, ut_ad(hdr_size < MIN_3BYTE); ut_ad(prev_rec < block.physical_size()); ut_ad(data_size < block.physical_size()); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; size_t len= prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4; len+= hdr_size < MIN_2BYTE ? 1 : 2; @@ -645,7 +630,7 @@ inline void mtr_t::undo_append(const buf_block_t &block, { ut_ad(len > 2); set_modified(block); - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; const bool small= len + 1 < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5); byte *end= log_write(block.page.id(), &block.page, len + 1, small); @@ -668,7 +653,7 @@ inline void mtr_t::undo_append(const buf_block_t &block, @param id first page identifier that will not be in the file */ inline void mtr_t::trim_pages(const page_id_t id) { - if (m_log_mode != MTR_LOG_ALL) + if (!is_logged()) return; byte *l= log_write(id, nullptr, 1, true); *l++= TRIM_PAGES; diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 3208e492c2f..02f469e3a53 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -136,10 +136,18 @@ struct mtr_t { mtr_log_t get_log_mode() const { static_assert(MTR_LOG_ALL == 0, "efficiency"); - ut_ad(m_log_mode <= MTR_LOG_NO_REDO); return static_cast(m_log_mode); } + /** @return whether log is to be written for changes */ + bool is_logged() const + { + static_assert(MTR_LOG_ALL == 0, "efficiency"); + static_assert(MTR_LOG_NONE & MTR_LOG_NO_REDO, "efficiency"); + static_assert(!(MTR_LOG_NONE & MTR_LOG_SUB), "efficiency"); + return !(m_log_mode & MTR_LOG_NONE); + } + /** Change the logging mode. @param mode logging mode @return old mode */ @@ -150,6 +158,15 @@ struct mtr_t { return old_mode; } + /** Set the log mode of a sub-minitransaction + @param mtr parent mini-transaction */ + void set_log_mode_sub(const mtr_t &mtr) + { + ut_ad(mtr.m_log_mode == MTR_LOG_ALL || mtr.m_log_mode == MTR_LOG_NO_REDO); + m_log_mode= mtr.m_log_mode | MTR_LOG_SUB; + static_assert((MTR_LOG_SUB | MTR_LOG_NO_REDO) == MTR_LOG_NO_REDO, ""); + } + /** Check if we are holding a block latch in exclusive mode @param block buffer pool block to search for */ bool have_x_latch(const buf_block_t &block) const; @@ -372,6 +389,9 @@ public: /** @return whether the log and memo are empty */ bool is_empty() const { return m_memo.size() == 0 && m_log.size() == 0; } + /** Write an OPT_PAGE_CHECKSUM record. */ + inline void page_checksum(const buf_page_t &bpage); + /** Write request types */ enum write_type { @@ -470,9 +490,9 @@ public: @param[in,out] b buffer page */ void init(buf_block_t *b); /** Free a page. - @param[in] space tablespace contains page to be freed - @param[in] offset page offset to be freed */ - inline void free(fil_space_t &space, uint32_t offset); + @param space tablespace + @param offset offset of the page to be freed */ + void free(const fil_space_t &space, uint32_t offset); /** Write log for partly initializing a B-tree or R-tree page. @param block B-tree or R-tree page @param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */ diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index 9ee7810fa7b..7acc255da36 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -41,6 +41,11 @@ enum mtr_log_t { Set for attempting modification of a ROW_FORMAT=COMPRESSED page. */ MTR_LOG_NONE, + /** Log all operations, but do not write any OPT_PAGE_CHECKSUM + records because some of the modified pages were also modified + by another mini-transaction that did not write its log yet. */ + MTR_LOG_SUB, + /** Don't generate REDO log but add dirty pages to flush list */ MTR_LOG_NO_REDO }; @@ -77,12 +82,8 @@ type. The following record types refer to data pages: RESERVED (6): reserved for future use; a subtype code (encoded immediately after the length) would be written to reserve code space for further extensions - OPTION (7): optional record that may be ignored; a subtype code - (encoded immediately after the length) would distinguish actual - usage, such as: - * MDEV-18976 page checksum record - * binlog record - * SQL statement (at the start of statement) + OPTION (7): optional record that may be ignored; a subtype @see mrec_opt + (encoded immediately after the length) would distinguish actual usage Bits 3..0 indicate the redo log record length, excluding the first byte, but including additional length bytes and any other bytes, @@ -229,9 +230,7 @@ enum mrec_type_t /** Reserved for future use. */ RESERVED= 0x60, /** Optional record that may be ignored in crash recovery. - A subtype code will be encoded immediately after the length. - Possible subtypes would include a MDEV-18976 page checksum record, - a binlog record, or an SQL statement. */ + A subtype (@see mrec_opt) will be encoded after the page identifier. */ OPTION= 0x70 }; @@ -283,6 +282,15 @@ enum mrec_ext_t }; +/** Recognized OPTION record subtypes. */ +enum mrec_opt +{ + /** page checksum at the end of the mini-transaction */ + OPT_PAGE_CHECKSUM= 0 + /* Other possible subtypes: a binlog record, or an SQL statement. */ +}; + + /** Redo log record types for file-level operations. These bit patterns will be written to redo log files, so the existing codes or their interpretation on crash recovery must not be changed. */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 6f120d2c7fd..377c08b9290 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -54,6 +54,7 @@ Created 9/20/1997 Heikki Tuuri #include "srv0srv.h" #include "srv0start.h" #include "fil0pagecompress.h" +#include "log.h" /** The recovery system */ recv_sys_t recv_sys; @@ -86,7 +87,7 @@ is bigger than the lsn we are able to scan up to, that is an indication that the recovery failed and the database may be corrupt. */ static lsn_t recv_max_page_lsn; -/** Stored physical log record with logical LSN (@see log_t::FORMAT_10_5) */ +/** Stored physical log record */ struct log_phys_t : public log_rec_t { /** start LSN of the mini-transaction (not necessarily of this record) */ @@ -178,6 +179,35 @@ public: return false; } + /** Check an OPT_PAGE_CHECKSUM record. + @see mtr_t::page_checksum() + @param block buffer page + @param l pointer to checksum + @return whether an unrecoverable mismatch was found */ + static bool page_checksum(const buf_block_t &block, const byte *l) + { + size_t size; + const byte *page= block.page.zip.data; + if (UNIV_LIKELY_NULL(page)) + size= (UNIV_ZIP_SIZE_MIN >> 1) << block.page.zip.ssize; + else + { + page= block.page.frame; + size= srv_page_size; + } + if (UNIV_LIKELY(my_crc32c(my_crc32c(my_crc32c(0, page + FIL_PAGE_OFFSET, + FIL_PAGE_LSN - + FIL_PAGE_OFFSET), + page + FIL_PAGE_TYPE, 2), + page + FIL_PAGE_SPACE_ID, + size - (FIL_PAGE_SPACE_ID + 8)) == + mach_read_from_4(l))) + return false; + + ib::error() << "OPT_PAGE_CHECKSUM mismatch on " << block.page.id(); + return !srv_force_recovery; + } + /** The status of apply() */ enum apply_status { /** The page was not affected */ @@ -262,9 +292,21 @@ public: next_not_same_page: last_offset= 1; /* the next record must not be same_page */ } - next: l+= rlen; continue; + case OPTION: + ut_ad(rlen == 5); + ut_ad(*l == OPT_PAGE_CHECKSUM); + if (page_checksum(block, l + 1)) + { + applied= APPLIED_YES; +page_corrupted: + sql_print_error("InnoDB: Set innodb_force_recovery=1" + " to ignore corruption."); + recv_sys.set_corrupt_log(); + return applied; + } + goto next_after_applying; } ut_ad(mach_read_from_4(frame + FIL_PAGE_OFFSET) == @@ -275,8 +317,6 @@ public: ut_ad(last_offset <= size); switch (b & 0x70) { - case OPTION: - goto next; case EXTENDED: if (UNIV_UNLIKELY(block.page.id().page_no() < 3 || block.page.zip.ssize)) @@ -305,12 +345,7 @@ public: if (UNIV_UNLIKELY(rlen <= 3)) goto record_corrupted; if (undo_append(block, ++l, --rlen) && !srv_force_recovery) - { -page_corrupted: - ib::error() << "Set innodb_force_recovery=1 to ignore corruption."; - recv_sys.set_corrupt_log(); - return applied; - } + goto page_corrupted; break; case INSERT_HEAP_REDUNDANT: case INSERT_REUSE_REDUNDANT: @@ -2334,7 +2369,8 @@ same_page: if (got_page_op) { const page_id_t id(space_id, page_no); - ut_d(if ((b & 0x70) == INIT_PAGE) freed.erase(id)); + ut_d(if ((b & 0x70) == INIT_PAGE || (b & 0x70) == OPTION) + freed.erase(id)); ut_ad(freed.find(id) == freed.end()); switch (b & 0x70) { case FREE_PAGE: @@ -2370,8 +2406,11 @@ same_page: } last_offset= FIL_PAGE_TYPE; break; - case RESERVED: case OPTION: + if (rlen == 5 && *l == OPT_PAGE_CHECKSUM) + break; + /* fall through */ + case RESERVED: continue; case WRITE: case MEMMOVE: @@ -2463,9 +2502,9 @@ same_page: #if 0 && defined UNIV_DEBUG switch (b & 0x70) { case RESERVED: - case OPTION: ut_ad(0); /* we did "continue" earlier */ break; + case OPTION: case FREE_PAGE: break; default: diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index af3ac0c626c..4a5b5f7124a 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -375,8 +375,8 @@ struct ReleaseBlocks return true; } - buf_flush_note_modification(static_cast(slot->object), - start, end); + buf_block_t *block= static_cast(slot->object); + buf_flush_note_modification(block, start, end); return true; } }; @@ -436,7 +436,7 @@ void mtr_t::commit() std::pair lsns; - if (UNIV_LIKELY(m_log_mode == MTR_LOG_ALL)) + if (UNIV_LIKELY(is_logged())) { lsns= do_write(); @@ -577,6 +577,7 @@ void mtr_t::commit_shrink(fil_space_t &space) log_write_and_flush_prepare(); const lsn_t start_lsn= do_write().first; + ut_d(m_log.erase()); mysql_mutex_lock(&log_sys.flush_order_mutex); /* Durably write the reduced FSP_SIZE before truncating the data file. */ @@ -673,19 +674,9 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) bool mtr_t::is_named_space(ulint space) const { - ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE); - - switch (m_log_mode) { - case MTR_LOG_NONE: - case MTR_LOG_NO_REDO: - return(true); - case MTR_LOG_ALL: - return(m_user_space_id == space - || is_predefined_tablespace(space)); - } - - ut_error; - return(false); + ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE); + return !is_logged() || m_user_space_id == space || + is_predefined_tablespace(space); } /** Check if a tablespace is associated with the mini-transaction (needed for generating a FILE_MODIFY record) @@ -695,16 +686,8 @@ bool mtr_t::is_named_space(const fil_space_t* space) const { ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE); - switch (m_log_mode) { - case MTR_LOG_NONE: - case MTR_LOG_NO_REDO: - return true; - case MTR_LOG_ALL: - return m_user_space == space || is_predefined_tablespace(space->id); - } - - ut_error; - return false; + return !is_logged() || m_user_space == space || + is_predefined_tablespace(space->id); } #endif /* UNIV_DEBUG */ @@ -978,6 +961,68 @@ static mtr_t::page_flush_ahead log_close(lsn_t lsn) return mtr_t::PAGE_FLUSH_SYNC; } +inline void mtr_t::page_checksum(const buf_page_t &bpage) +{ + const byte *page= bpage.frame; + size_t size= srv_page_size; + + if (UNIV_LIKELY_NULL(bpage.zip.data)) + { + size= (UNIV_ZIP_SIZE_MIN >> 1) << bpage.zip.ssize; + switch (fil_page_get_type(bpage.zip.data)) { + case FIL_PAGE_TYPE_ALLOCATED: + case FIL_PAGE_INODE: + case FIL_PAGE_IBUF_BITMAP: + case FIL_PAGE_TYPE_FSP_HDR: + case FIL_PAGE_TYPE_XDES: + /* These are essentially uncompressed pages. */ + break; + default: + page= bpage.zip.data; + } + } + + /* We have to exclude from the checksum the normal + page checksum that is written by buf_flush_init_for_writing() + and FIL_PAGE_LSN which would be updated once we have actually + allocated the LSN. + + Unfortunately, we cannot access fil_space_t easily here. In order to + be compatible with encrypted tablespaces in the pre-full_crc32 + format we will unconditionally exclude the 8 bytes at + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + a.k.a. FIL_RTREE_SPLIT_SEQ_NUM. */ + const uint32_t checksum= + my_crc32c(my_crc32c(my_crc32c(0, page + FIL_PAGE_OFFSET, + FIL_PAGE_LSN - FIL_PAGE_OFFSET), + page + FIL_PAGE_TYPE, 2), + page + FIL_PAGE_SPACE_ID, size - (FIL_PAGE_SPACE_ID + 8)); + + byte *l= log_write