summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-03-19 17:30:26 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-03-19 17:30:26 +0200
commit601807bfbde573254258e5d3e31dde1ac0601fd7 (patch)
tree7bb49f334406a9d254b94490a78d6c725d1d1477
parent6297a1026db4032ab7b7e28788d0b5b9a1f32aa3 (diff)
downloadmariadb-git-10.5-MDEV-15053.tar.gz
WIP (crashes/hangs!) MDEV-15053 Reduce buf_pool_t::mutex contention10.5-MDEV-15053
TODO: Carefully review and document the changed latching rules, make ASAN happy, and possibly make more use of std::atomic and make more things private in buf_pool_t to control access.
-rw-r--r--mysql-test/suite/encryption/r/innodb-bad-key-change2.result12
-rw-r--r--mysql-test/suite/encryption/r/innodb-bad-key-change4.result6
-rw-r--r--mysql-test/suite/encryption/r/innodb-encryption-disable.result4
-rw-r--r--mysql-test/suite/encryption/r/innodb-redo-badkey.result1
-rw-r--r--mysql-test/suite/encryption/r/innodb-remove-encryption.result4
-rw-r--r--mysql-test/suite/encryption/t/innodb-bad-key-change2.test12
-rw-r--r--mysql-test/suite/encryption/t/innodb-bad-key-change3.test6
-rw-r--r--mysql-test/suite/encryption/t/innodb-bad-key-change4.test6
-rw-r--r--mysql-test/suite/encryption/t/innodb-encryption-disable.test4
-rw-r--r--mysql-test/suite/encryption/t/innodb-redo-badkey.test1
-rw-r--r--mysql-test/suite/encryption/t/innodb-remove-encryption.test4
-rw-r--r--mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test2
-rw-r--r--mysql-test/suite/innodb/t/innodb_bug14147491.test1
-rw-r--r--mysql-test/suite/mariabackup/unencrypted_page_compressed.result1
-rw-r--r--mysql-test/suite/mariabackup/unencrypted_page_compressed.test1
-rw-r--r--storage/innobase/btr/btr0cur.cc42
-rw-r--r--storage/innobase/btr/btr0sea.cc30
-rw-r--r--storage/innobase/buf/buf0buddy.cc132
-rw-r--r--storage/innobase/buf/buf0buf.cc693
-rw-r--r--storage/innobase/buf/buf0dblwr.cc12
-rw-r--r--storage/innobase/buf/buf0dump.cc14
-rw-r--r--storage/innobase/buf/buf0flu.cc389
-rw-r--r--storage/innobase/buf/buf0lru.cc459
-rw-r--r--storage/innobase/buf/buf0rea.cc81
-rw-r--r--storage/innobase/dict/dict0dict.cc36
-rw-r--r--storage/innobase/handler/ha_innodb.cc39
-rw-r--r--storage/innobase/handler/i_s.cc45
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc14
-rw-r--r--storage/innobase/include/buf0buddy.h13
-rw-r--r--storage/innobase/include/buf0buf.h279
-rw-r--r--storage/innobase/include/buf0buf.ic167
-rw-r--r--storage/innobase/include/buf0dblwr.h14
-rw-r--r--storage/innobase/include/buf0flu.h23
-rw-r--r--storage/innobase/include/buf0flu.ic2
-rw-r--r--storage/innobase/include/buf0lru.h55
-rw-r--r--storage/innobase/include/buf0types.h4
-rw-r--r--storage/innobase/include/srv0srv.h8
-rw-r--r--storage/innobase/include/sync0sync.h5
-rw-r--r--storage/innobase/include/sync0types.h18
-rw-r--r--storage/innobase/srv/srv0srv.cc8
-rw-r--r--storage/innobase/sync/sync0debug.cc47
-rw-r--r--storage/innobase/sync/sync0sync.cc4
42 files changed, 1460 insertions, 1238 deletions
diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change2.result b/mysql-test/suite/encryption/r/innodb-bad-key-change2.result
index 543c3bc29b2..af1028f1331 100644
--- a/mysql-test/suite/encryption/r/innodb-bad-key-change2.result
+++ b/mysql-test/suite/encryption/r/innodb-bad-key-change2.result
@@ -7,12 +7,12 @@ call mtr.add_suppression("InnoDB: Table `test`\\.`t1` is corrupted");
call mtr.add_suppression("InnoDB: Cannot delete tablespace .* because it is not found in the tablespace memory cache");
call mtr.add_suppression("InnoDB: ALTER TABLE `test`\\.`t1` DISCARD TABLESPACE failed to find tablespace");
call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot decrypt \\[page id: space=");
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
SET GLOBAL innodb_file_per_table = ON;
CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(8)) ENGINE=InnoDB
ENCRYPTED=YES ENCRYPTION_KEY_ID=4;
INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
SELECT * FROM t1;
ERROR 42S02: Table 'test.t1' doesn't exist in engine
SHOW WARNINGS;
@@ -35,11 +35,11 @@ test.t1 check Error Table 'test.t1' doesn't exist in engine
test.t1 check status Operation failed
SHOW WARNINGS;
Level Code Message
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
FLUSH TABLES t1 FOR EXPORT;
backup: t1
UNLOCK TABLES;
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
ALTER TABLE t1 DISCARD TABLESPACE;
ERROR 42S02: Table 'test.t1' doesn't exist in engine
DROP TABLE t1;
@@ -47,7 +47,7 @@ CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(8)) ENGINE=InnoDB
ENCRYPTED=YES ENCRYPTION_KEY_ID=4;
ALTER TABLE t1 DISCARD TABLESPACE;
restore: t1 .ibd and .cfg files
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
ALTER TABLE t1 DISCARD TABLESPACE;
Warnings:
Warning 1814 Tablespace has been discarded for table `t1`
@@ -61,7 +61,7 @@ t1 CREATE TABLE `t1` (
`f` varchar(8) DEFAULT NULL,
PRIMARY KEY (`pk`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 `ENCRYPTED`=YES `ENCRYPTION_KEY_ID`=4
-# restart: --innodb-encrypt-tables --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
+# restart: --innodb-encrypt-tables --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
RENAME TABLE t1 TO t1new;
ERROR HY000: Error on rename of './test/t1' to './test/t1new' (errno: 155 "The table does not exist in the storage engine")
ALTER TABLE t1 RENAME TO t1new;
diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change4.result b/mysql-test/suite/encryption/r/innodb-bad-key-change4.result
index e37ee8eb8cd..ad218457068 100644
--- a/mysql-test/suite/encryption/r/innodb-bad-key-change4.result
+++ b/mysql-test/suite/encryption/r/innodb-bad-key-change4.result
@@ -4,12 +4,12 @@ call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]
call mtr.add_suppression("Couldn't load plugins from 'file_key_management");
call mtr.add_suppression("InnoDB: Table `test`\\.`t1` is corrupted");
call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot decrypt \\[page id: space=");
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
SET GLOBAL innodb_file_per_table = ON;
CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(8)) ENGINE=InnoDB
ENCRYPTED=YES ENCRYPTION_KEY_ID=4;
INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys3.txt
OPTIMIZE TABLE t1;
Table Op Msg_type Msg_text
test.t1 optimize Error Table 'test.t1' doesn't exist in engine
@@ -22,5 +22,5 @@ test.t1 check Error Table 'test.t1' doesn't exist in engine
test.t1 check status Operation failed
SHOW WARNINGS;
Level Code Message
-# restart: --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
+# restart: --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
DROP TABLE t1;
diff --git a/mysql-test/suite/encryption/r/innodb-encryption-disable.result b/mysql-test/suite/encryption/r/innodb-encryption-disable.result
index e49a6b759e9..bb4f02b9c39 100644
--- a/mysql-test/suite/encryption/r/innodb-encryption-disable.result
+++ b/mysql-test/suite/encryption/r/innodb-encryption-disable.result
@@ -4,7 +4,7 @@ call mtr.add_suppression("failed to read or decrypt \\[page id: space=[1-9][0-9]
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=3\\] in file .*test.t[15].ibd looks corrupted; key_version=1");
call mtr.add_suppression("InnoDB: Table `test`\\.`t[15]` is corrupted");
call mtr.add_suppression("Couldn't load plugins from 'file_key_management");
-# restart: --innodb-encrypt-tables=ON --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
+# restart: --innodb-encrypt-tables=ON --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
create table t5 (
`intcol1` int(32) DEFAULT NULL,
`intcol2` int(32) DEFAULT NULL,
@@ -27,6 +27,6 @@ select * from t1;
ERROR 42S02: Table 'test.t1' doesn't exist in engine
select * from t5;
ERROR 42S02: Table 'test.t5' doesn't exist in engine
-# restart: --innodb-encrypt-tables=ON --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
+# restart: --innodb-encrypt-tables=ON --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
drop table t1;
drop table t5;
diff --git a/mysql-test/suite/encryption/r/innodb-redo-badkey.result b/mysql-test/suite/encryption/r/innodb-redo-badkey.result
index f90e7aeb780..5d75b89dfd6 100644
--- a/mysql-test/suite/encryption/r/innodb-redo-badkey.result
+++ b/mysql-test/suite/encryption/r/innodb-redo-badkey.result
@@ -7,6 +7,7 @@ call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed f
call mtr.add_suppression("InnoDB: Failed to read file '.*' at offset .*");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
+call mtr.add_suppression("The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*' cannot be decrypted");
call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot decrypt \\[page id: space=");
# restart: --file-key-management-filename=MYSQL_TEST_DIR/std_data/keys2.txt
# Wait max 10 min for key encryption threads to encrypt all spaces
diff --git a/mysql-test/suite/encryption/r/innodb-remove-encryption.result b/mysql-test/suite/encryption/r/innodb-remove-encryption.result
index 08b31cb568d..af905ee7a0b 100644
--- a/mysql-test/suite/encryption/r/innodb-remove-encryption.result
+++ b/mysql-test/suite/encryption/r/innodb-remove-encryption.result
@@ -1,12 +1,12 @@
set global innodb_file_per_table=OFF;
-call mtr.add_suppression("mysqld: file-key-management-filename is not set");
+call mtr.add_suppression("file-key-management-filename is not set");
call mtr.add_suppression("Plugin 'file_key_management' init function returned error.");
call mtr.add_suppression("Plugin 'file_key_management' registration as a ENCRYPTION failed.");
flush tables;
create table t1(a int not null primary key, b char(200)) engine=innodb;
# Restart server with encryption
-# restart: --plugin-load-add=file_key_management.so --loose-file-key-management --loose-file-key-management-filename=MYSQL_TEST_DIR/std_data/keys.txt --file-key-management-encryption-algorithm=aes_cbc --innodb-encrypt-tables=ON --innodb-encryption-threads=4 --innodb-tablespaces-encryption --innodb-encryption-rotate-key-age=15
+# restart: --plugin-load-add=file_key_management --loose-file-key-management --loose-file-key-management-filename=MYSQL_TEST_DIR/std_data/keys.txt --file-key-management-encryption-algorithm=aes_cbc --innodb-encrypt-tables=ON --innodb-encryption-threads=4 --innodb-tablespaces-encryption --innodb-encryption-rotate-key-age=15
# Wait until encryption threads have encrypted all tablespaces
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
NAME
diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change2.test b/mysql-test/suite/encryption/t/innodb-bad-key-change2.test
index bdbf2327e5d..19399b1e891 100644
--- a/mysql-test/suite/encryption/t/innodb-bad-key-change2.test
+++ b/mysql-test/suite/encryption/t/innodb-bad-key-change2.test
@@ -20,7 +20,7 @@ call mtr.add_suppression("InnoDB: ALTER TABLE `test`\\.`t1` DISCARD TABLESPACE f
# for innodb_checksum_algorithm=full_crc32 only
call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot decrypt \\[page id: space=");
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
--source include/restart_mysqld.inc
SET GLOBAL innodb_file_per_table = ON;
@@ -29,7 +29,7 @@ CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(8)) ENGINE=InnoDB
ENCRYPTED=YES ENCRYPTION_KEY_ID=4;
INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
--source include/restart_mysqld.inc
--error ER_NO_SUCH_TABLE_IN_ENGINE
@@ -48,7 +48,7 @@ CHECK TABLE t1;
--replace_regex /key_id [1-9][0-9]*/\1 /
SHOW WARNINGS;
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
--source include/restart_mysqld.inc
let MYSQLD_DATADIR =`SELECT @@datadir`;
@@ -60,7 +60,7 @@ ib_backup_tablespaces("test", "t1");
EOF
UNLOCK TABLES;
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
--source include/restart_mysqld.inc
--error ER_NO_SUCH_TABLE_IN_ENGINE
@@ -78,7 +78,7 @@ ib_discard_tablespaces("test", "t1");
ib_restore_tablespaces("test", "t1");
EOF
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
--source include/restart_mysqld.inc
ALTER TABLE t1 DISCARD TABLESPACE;
@@ -92,7 +92,7 @@ EOF
ALTER TABLE t1 IMPORT TABLESPACE;
SHOW CREATE TABLE t1;
---let $restart_parameters= --innodb-encrypt-tables --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
+--let $restart_parameters= --innodb-encrypt-tables --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
--source include/restart_mysqld.inc
--error ER_ERROR_ON_RENAME
diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change3.test b/mysql-test/suite/encryption/t/innodb-bad-key-change3.test
index dbd04748143..9c2918f3118 100644
--- a/mysql-test/suite/encryption/t/innodb-bad-key-change3.test
+++ b/mysql-test/suite/encryption/t/innodb-bad-key-change3.test
@@ -25,7 +25,7 @@ call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* becau
4;770A8A65DA156D24EE2A093277530143
EOF
---exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--enable_reconnect
--source include/wait_until_connected_again.inc
@@ -62,7 +62,7 @@ ib_discard_tablespaces("test", "t1");
ib_restore_tablespaces("test", "t1");
EOF
---exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys2.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys2.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--enable_reconnect
--source include/wait_until_connected_again.inc
--source include/restart_mysqld.inc
@@ -89,7 +89,7 @@ SELECT * FROM t1;
4;770A8A65DA156D24EE2A093277530143
EOF
---exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--enable_reconnect
--source include/wait_until_connected_again.inc
DROP TABLE t1;
diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change4.test b/mysql-test/suite/encryption/t/innodb-bad-key-change4.test
index b341fc81d39..58517f14978 100644
--- a/mysql-test/suite/encryption/t/innodb-bad-key-change4.test
+++ b/mysql-test/suite/encryption/t/innodb-bad-key-change4.test
@@ -16,7 +16,7 @@ call mtr.add_suppression("InnoDB: Table `test`\\.`t1` is corrupted");
# for innodb_checksum_algorithm=full_crc32 only
call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot decrypt \\[page id: space=");
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
--source include/restart_mysqld.inc
SET GLOBAL innodb_file_per_table = ON;
@@ -25,7 +25,7 @@ CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(8)) ENGINE=InnoDB
ENCRYPTED=YES ENCRYPTION_KEY_ID=4;
INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys3.txt
--source include/restart_mysqld.inc
--replace_regex /key_id [1-9][0-9]*/\1 /
@@ -38,7 +38,7 @@ CHECK TABLE t1;
--replace_regex /key_id [1-9][0-9]*/\1 /
SHOW WARNINGS;
---let $restart_parameters=--plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
+--let $restart_parameters=--plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
--source include/restart_mysqld.inc
DROP TABLE t1;
diff --git a/mysql-test/suite/encryption/t/innodb-encryption-disable.test b/mysql-test/suite/encryption/t/innodb-encryption-disable.test
index 4d0aa04bc56..2097a4ad184 100644
--- a/mysql-test/suite/encryption/t/innodb-encryption-disable.test
+++ b/mysql-test/suite/encryption/t/innodb-encryption-disable.test
@@ -16,7 +16,7 @@ call mtr.add_suppression("InnoDB: Table `test`\\.`t[15]` is corrupted");
# Suppression for builds where file_key_management plugin is linked statically
call mtr.add_suppression("Couldn't load plugins from 'file_key_management");
---let $restart_parameters=--innodb-encrypt-tables=ON --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
+--let $restart_parameters=--innodb-encrypt-tables=ON --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
--source include/restart_mysqld.inc
create table t5 (
@@ -48,7 +48,7 @@ select * from t1;
--error ER_NO_SUCH_TABLE_IN_ENGINE
select * from t5;
---let $restart_parameters=--innodb-encrypt-tables=ON --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
+--let $restart_parameters=--innodb-encrypt-tables=ON --plugin-load-add=file_key_management --file-key-management --file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
--source include/restart_mysqld.inc
drop table t1;
diff --git a/mysql-test/suite/encryption/t/innodb-redo-badkey.test b/mysql-test/suite/encryption/t/innodb-redo-badkey.test
index 50f81deb462..0817abcf1f6 100644
--- a/mysql-test/suite/encryption/t/innodb-redo-badkey.test
+++ b/mysql-test/suite/encryption/t/innodb-redo-badkey.test
@@ -16,6 +16,7 @@ call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed f
call mtr.add_suppression("InnoDB: Failed to read file '.*' at offset .*");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
+call mtr.add_suppression("The page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] in file '.*' cannot be decrypted");
# for innodb_checksum_algorithm=full_crc32 only
call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot decrypt \\[page id: space=");
diff --git a/mysql-test/suite/encryption/t/innodb-remove-encryption.test b/mysql-test/suite/encryption/t/innodb-remove-encryption.test
index 24e00a00a02..980d61b81ae 100644
--- a/mysql-test/suite/encryption/t/innodb-remove-encryption.test
+++ b/mysql-test/suite/encryption/t/innodb-remove-encryption.test
@@ -9,7 +9,7 @@
set global innodb_file_per_table=OFF;
-call mtr.add_suppression("mysqld: file-key-management-filename is not set");
+call mtr.add_suppression("file-key-management-filename is not set");
call mtr.add_suppression("Plugin 'file_key_management' init function returned error.");
call mtr.add_suppression("Plugin 'file_key_management' registration as a ENCRYPTION failed.");
flush tables;
@@ -18,7 +18,7 @@ create table t1(a int not null primary key, b char(200)) engine=innodb;
--echo
--echo # Restart server with encryption
--- let $restart_parameters=--plugin-load-add=$FILE_KEY_MANAGEMENT_SO --loose-file-key-management --loose-file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys.txt --file-key-management-encryption-algorithm=aes_cbc --innodb-encrypt-tables=ON --innodb-encryption-threads=4 --innodb-tablespaces-encryption --innodb-encryption-rotate-key-age=15
+-- let $restart_parameters=--plugin-load-add=file_key_management --loose-file-key-management --loose-file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys.txt --file-key-management-encryption-algorithm=aes_cbc --innodb-encrypt-tables=ON --innodb-encryption-threads=4 --innodb-tablespaces-encryption --innodb-encryption-rotate-key-age=15
-- source include/restart_mysqld.inc
--echo # Wait until encryption threads have encrypted all tablespaces
diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
index 9c5acedb620..46aa6df2ee6 100644
--- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
+++ b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
@@ -56,7 +56,7 @@ DELETE FROM t1 WHERE a=1;
# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
INSERT INTO t1 VALUES(1,'X',1);
-SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
+SET GLOBAL DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
--exec echo "wait" > $_expect_file_name
--error 2013
# This should force a change buffer merge
diff --git a/mysql-test/suite/innodb/t/innodb_bug14147491.test b/mysql-test/suite/innodb/t/innodb_bug14147491.test
index 44b9d16ca78..b1822015a32 100644
--- a/mysql-test/suite/innodb/t/innodb_bug14147491.test
+++ b/mysql-test/suite/innodb/t/innodb_bug14147491.test
@@ -11,6 +11,7 @@
call mtr.add_suppression("InnoDB: Table `test`\\.`t1` is corrupted\\. Please drop the table and recreate\\.");
call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page");
call mtr.add_suppression("InnoDB: We detected index corruption in an InnoDB type table");
+call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 4: Page read from tablespace is corrupted");
call mtr.add_suppression("Index for table 't1' is corrupt; try to repair it");
--enable_query_log
diff --git a/mysql-test/suite/mariabackup/unencrypted_page_compressed.result b/mysql-test/suite/mariabackup/unencrypted_page_compressed.result
index 7edf66b027a..c008e6bf965 100644
--- a/mysql-test/suite/mariabackup/unencrypted_page_compressed.result
+++ b/mysql-test/suite/mariabackup/unencrypted_page_compressed.result
@@ -1,4 +1,5 @@
call mtr.add_suppression("InnoDB: Table `test`.`t1` has an unreadable root page");
+call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3");
SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT, c char(200)) ENGINE=InnoDB page_compressed=yes;
insert into t1(b, c) values("mariadb", "mariabackup");
diff --git a/mysql-test/suite/mariabackup/unencrypted_page_compressed.test b/mysql-test/suite/mariabackup/unencrypted_page_compressed.test
index ce5c94a1c57..9b9f05ab41b 100644
--- a/mysql-test/suite/mariabackup/unencrypted_page_compressed.test
+++ b/mysql-test/suite/mariabackup/unencrypted_page_compressed.test
@@ -1,4 +1,5 @@
call mtr.add_suppression("InnoDB: Table `test`.`t1` has an unreadable root page");
+call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3");
SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT, c char(200)) ENGINE=InnoDB page_compressed=yes;
insert into t1(b, c) values("mariadb", "mariabackup");
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 23e5d477d9c..158157d9184 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -7044,44 +7044,50 @@ btr_blob_get_next_page_no(
return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
}
-/*******************************************************************//**
-Deallocate a buffer block that was reserved for a BLOB part. */
-static
+/** Deallocate a buffer block that was reserved for a BLOB part.
+@param[in] block buffer block
+@param[in] all flag whether remove the compressed page
+ if there is one
+@param[in] mtr mini-transaction to commit */
void
btr_blob_free(
-/*==========*/
- buf_block_t* block, /*!< in: buffer block */
- ibool all, /*!< in: TRUE=remove also the compressed page
- if there is one */
- mtr_t* mtr) /*!< in: mini-transaction to commit */
+ buf_block_t* block,
+ bool all,
+ mtr_t* mtr)
{
- ulint space = block->page.id.space();
- ulint page_no = block->page.id.page_no();
+ page_id_t page_id(block->page.id.space(), block->page.id.page_no());
+ bool freed = false;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
mtr_commit(mtr);
mutex_enter(&buf_pool.mutex);
+ buf_page_mutex_enter(block);
/* Only free the block if it is still allocated to
the same file page. */
- if (buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE
- && block->page.id.space() == space
- && block->page.id.page_no() == page_no) {
+ if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
+ && page_id == block->page.id) {
+
+ freed = buf_LRU_free_page(&block->page, all);
+
+ if (!freed && all && block->page.zip.data
+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
+ && page_id == block->page.id) {
- if (!buf_LRU_free_page(&block->page, all)
- && all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
- buf_LRU_free_page(&block->page, false);
+ freed = buf_LRU_free_page(&block->page, false);
}
}
- mutex_exit(&buf_pool.mutex);
+ if (!freed) {
+ mutex_exit(&buf_pool.mutex);
+ buf_page_mutex_exit(block);
+ }
}
/** Helper class used while writing blob pages, during insert or update. */
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index c01752a0051..d576b3911f3 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -408,12 +408,8 @@ void btr_search_disable(bool need_mutex)
/** Enable the adaptive hash search system. */
void btr_search_enable()
{
- mutex_enter(&buf_pool.mutex);
- if (srv_buf_pool_old_size != srv_buf_pool_size) {
- mutex_exit(&buf_pool.mutex);
+ if (srv_buf_pool_size_changing.load(std::memory_order_relaxed))
return;
- }
- mutex_exit(&buf_pool.mutex);
btr_search_x_lock_all();
btr_search_enabled = true;
@@ -2064,7 +2060,6 @@ btr_search_hash_table_validate(ulint hash_table_id)
rec_offs_init(offsets_);
btr_search_x_lock_all();
- mutex_enter(&buf_pool.mutex);
cell_count = hash_get_n_cells(
btr_search_sys->hash_tables[hash_table_id]);
@@ -2074,13 +2069,9 @@ btr_search_hash_table_validate(ulint hash_table_id)
give other queries a chance to run. */
if ((i != 0) && ((i % chunk_size) == 0)) {
- mutex_exit(&buf_pool.mutex);
btr_search_x_unlock_all();
-
os_thread_yield();
-
btr_search_x_lock_all();
- mutex_enter(&buf_pool.mutex);
ulint curr_cell_count = hash_get_n_cells(
btr_search_sys->hash_tables[hash_table_id]);
@@ -2099,11 +2090,14 @@ btr_search_hash_table_validate(ulint hash_table_id)
btr_search_sys->hash_tables[hash_table_id], i)->node;
for (; node != NULL; node = node->next) {
- const buf_block_t* block
+ buf_block_t* block
= buf_pool.block_from_ahi((byte*) node->data);
const buf_block_t* hash_block;
index_id_t page_index_id;
+ /* Prevent BUF_BLOCK_FILE_PAGE -> BUF_BLOCK_REMOVE_HASH
+ transition until we lock the block mutex */
+ mutex_enter(&buf_pool.mutex);
if (UNIV_LIKELY(buf_block_get_state(block)
== BUF_BLOCK_FILE_PAGE)) {
@@ -2122,12 +2116,12 @@ btr_search_hash_table_validate(ulint hash_table_id)
ut_a(hash_block == block);
} else {
/* When a block is being freed,
- buf_LRU_search_and_free_block() first
+ buf_LRU_free_page() first
removes the block from
buf_pool.page_hash by calling
buf_LRU_block_remove_hashed_page().
After that, it invokes
- btr_search_drop_page_hash_index() to
+ buf_LRU_block_remove_hashed() to
remove the block from
btr_search_sys->hash_tables[i]. */
@@ -2135,6 +2129,9 @@ btr_search_hash_table_validate(ulint hash_table_id)
== BUF_BLOCK_REMOVE_HASH);
}
+ mutex_enter(&block->mutex);
+ mutex_exit(&buf_pool.mutex);
+
ut_a(!dict_index_is_ibuf(block->index));
ut_ad(block->page.id.space()
== block->index->table->space_id);
@@ -2179,6 +2176,8 @@ btr_search_hash_table_validate(ulint hash_table_id)
(ulong) block->curr_left_side);
ut_ad(0);
}
+
+ mutex_exit(&block->mutex);
}
}
@@ -2186,13 +2185,9 @@ btr_search_hash_table_validate(ulint hash_table_id)
/* We release search latches every once in a while to
give other queries a chance to run. */
if (i != 0) {
- mutex_exit(&buf_pool.mutex);
btr_search_x_unlock_all();
-
os_thread_yield();
-
btr_search_x_lock_all();
- mutex_enter(&buf_pool.mutex);
ulint curr_cell_count = hash_get_n_cells(
btr_search_sys->hash_tables[hash_table_id]);
@@ -2215,7 +2210,6 @@ btr_search_hash_table_validate(ulint hash_table_id)
}
}
- mutex_exit(&buf_pool.mutex);
btr_search_x_unlock_all();
if (UNIV_LIKELY_NULL(heap)) {
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index a78974992b0..a567565f819 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -185,19 +185,20 @@ struct CheckZipFree {
static void buf_buddy_list_validate(ulint i)
{
ut_list_validate(buf_pool.zip_free[i], CheckZipFree(i));
+ ut_ad(mutex_own(&buf_pool.zip_free_mutex));
}
-/**********************************************************************//**
-Debug function to validate that a buffer is indeed free i.e.: in the
+/** Debug function to validate that a buffer is indeed free i.e.: in the
zip_free[].
@param[in] buf block to check
@param[in] i index of buf_pool.zip_free[]
@return true if free */
static bool buf_buddy_check_free(const buf_buddy_free_t* buf, ulint i)
+
{
const ulint size = BUF_BUDDY_LOW << i;
- ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(&buf_pool.zip_free_mutex));
ut_ad(!ut_align_offset(buf, size));
ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
@@ -266,7 +267,7 @@ UNIV_INLINE
void
buf_buddy_add_to_free(buf_buddy_free_t* buf, ulint i)
{
- ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(&buf_pool.zip_free_mutex));
ut_ad(buf_pool.zip_free[i].start != buf);
buf_buddy_stamp_free(buf, i);
@@ -281,7 +282,7 @@ UNIV_INLINE
void
buf_buddy_remove_from_free(buf_buddy_free_t* buf, ulint i)
{
- ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(&buf_pool.zip_free_mutex));
ut_ad(buf_buddy_check_free(buf, i));
UT_LIST_REMOVE(buf_pool.zip_free[i], buf);
@@ -295,20 +296,16 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i)
{
buf_buddy_free_t* buf;
- ut_ad(mutex_own(&buf_pool.mutex));
ut_a(i < BUF_BUDDY_SIZES);
ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+ mutex_enter(&buf_pool.zip_free_mutex);
ut_d(buf_buddy_list_validate(i));
buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]);
- if (buf_pool.curr_size < buf_pool.old_size
- && UT_LIST_GET_LEN(buf_pool.withdraw)
- < buf_pool.withdraw_target) {
-
- while (buf != NULL
- && buf_pool.will_be_withdrawn(
+ if (buf_pool.withdraw_depth()) {
+ while (buf && buf_pool.will_be_withdrawn(
reinterpret_cast<byte*>(buf))) {
/* This should be withdrawn, not to be allocated */
buf = UT_LIST_GET_NEXT(list, buf);
@@ -318,22 +315,29 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i)
if (buf) {
buf_buddy_remove_from_free(buf, i);
} else if (i + 1 < BUF_BUDDY_SIZES) {
+ mutex_exit(&buf_pool.zip_free_mutex);
/* Attempt to split. */
buf = buf_buddy_alloc_zip(i + 1);
- if (buf) {
- buf_buddy_free_t* buddy =
- reinterpret_cast<buf_buddy_free_t*>(
- buf->stamp.bytes
- + (BUF_BUDDY_LOW << i));
- ut_ad(!buf_pool.contains_zip(buddy));
- buf_buddy_add_to_free(buddy, i);
+ if (!buf) {
+ return nullptr;
}
+
+ buf_buddy_free_t* buddy = reinterpret_cast<buf_buddy_free_t*>(
+ buf->stamp.bytes + (BUF_BUDDY_LOW << i));
+ mutex_enter(&buf_pool.zip_free_mutex);
+ ut_ad(!buf_pool.contains_zip(buddy));
+ buf_buddy_add_to_free(buddy, i);
+ mutex_exit(&buf_pool.zip_free_mutex);
+ goto trash;
}
+ mutex_exit(&buf_pool.zip_free_mutex);
+
if (buf) {
+trash:
/* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
- UNIV_MEM_TRASH((void*) buf, (int) ~i, BUF_BUDDY_STAMP_OFFSET);
+ UNIV_MEM_TRASH((void*) buf, (int) ~i, BUF_BUDDY_STAMP_OFFSET);
UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4
+ buf->stamp.bytes, (int) ~i,
(BUF_BUDDY_LOW << i)
@@ -354,12 +358,12 @@ buf_buddy_block_free(void* buf)
{
const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
buf_page_t* bpage;
- buf_block_t* block;
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(!mutex_own(&buf_pool.zip_mutex));
ut_a(!ut_align_offset(buf, srv_page_size));
+ mutex_enter(&buf_pool.zip_hash_mutex);
+
HASH_SEARCH(hash, buf_pool.zip_hash, fold, buf_page_t*, bpage,
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
&& bpage->in_zip_hash && !bpage->in_page_hash),
@@ -371,28 +375,23 @@ buf_buddy_block_free(void* buf)
ut_d(bpage->in_zip_hash = FALSE);
HASH_DELETE(buf_page_t, hash, buf_pool.zip_hash, fold, bpage);
- ut_d(memset(buf, 0, srv_page_size));
- UNIV_MEM_INVALID(buf, srv_page_size);
-
- block = (buf_block_t*) bpage;
- buf_page_mutex_enter(block);
- buf_LRU_block_free_non_file_page(block);
- buf_page_mutex_exit(block);
-
ut_ad(buf_pool.buddy_n_frames > 0);
ut_d(buf_pool.buddy_n_frames--);
+
+ mutex_exit(&buf_pool.zip_hash_mutex);
+
+
+ buf_LRU_block_free_non_file_page(reinterpret_cast<buf_block_t *>(bpage));
}
-/**********************************************************************//**
-Allocate a buffer block to the buddy allocator. */
+/** Allocate a buffer block to the buddy allocator.
+@param[in] block buffer frame to allocate */
static
void
buf_buddy_block_register(
-/*=====================*/
- buf_block_t* block) /*!< in: buffer frame to allocate */
+ buf_block_t* block)
{
const ulint fold = BUF_POOL_ZIP_FOLD(block);
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(!mutex_own(&buf_pool.zip_mutex));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
@@ -404,9 +403,12 @@ buf_buddy_block_register(
ut_ad(!block->page.in_page_hash);
ut_ad(!block->page.in_zip_hash);
ut_d(block->page.in_zip_hash = TRUE);
+
+ mutex_enter(&buf_pool.zip_hash_mutex);
HASH_INSERT(buf_page_t, hash, buf_pool.zip_hash, fold, &block->page);
ut_d(buf_pool.buddy_n_frames++);
+ mutex_exit(&buf_pool.zip_hash_mutex);
}
/** Allocate a block from a bigger object.
@@ -419,6 +421,7 @@ void*
buf_buddy_alloc_from(void* buf, ulint i, ulint j)
{
ulint offs = BUF_BUDDY_LOW << j;
+ ut_ad(mutex_own(&buf_pool.zip_free_mutex));
ut_ad(j <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
ut_ad(j >= i);
@@ -440,15 +443,14 @@ buf_buddy_alloc_from(void* buf, ulint i, ulint j)
return(buf);
}
+#include <buf0buf.h>
/** Allocate a block.
@param[in] i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES
-@param[out] lru whether buf_pool.mutex was temporarily released
@return allocated block, never NULL */
-byte *buf_buddy_alloc_low(ulint i, bool *lru)
+byte *buf_buddy_alloc_low(ulint i)
{
buf_block_t* block;
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(!mutex_own(&buf_pool.zip_mutex));
ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
@@ -469,18 +471,16 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru)
}
/* Try replacing an uncompressed page in the buffer pool. */
- mutex_exit(&buf_pool.mutex);
block = buf_LRU_get_free_block();
- mutex_enter(&buf_pool.mutex);
- if (lru) {
- *lru = true;
- }
+
alloc_big:
buf_buddy_block_register(block);
+ mutex_enter(&buf_pool.zip_free_mutex);
block = (buf_block_t*) buf_buddy_alloc_from(
block->frame, i, BUF_BUDDY_SIZES);
+ mutex_exit(&buf_pool.zip_free_mutex);
func_exit:
buf_pool.buddy_stat[i].used++;
@@ -501,7 +501,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
ulint space;
ulint offset;
- ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(&buf_pool.zip_free_mutex));
ut_ad(!mutex_own(&buf_pool.zip_mutex));
ut_ad(!ut_align_offset(src, size));
ut_ad(!ut_align_offset(dst, size));
@@ -523,6 +523,8 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
const page_id_t page_id(space, offset);
rw_lock_t* hash_lock = buf_page_hash_lock_get(page_id);
+ mutex_exit(&buf_pool.zip_free_mutex);
+
rw_lock_x_lock(hash_lock);
@@ -537,6 +539,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
rw_lock_x_unlock(hash_lock);
if (!force || space != 0 || offset != 0) {
+ mutex_enter(&buf_pool.zip_free_mutex);
return(false);
}
@@ -554,6 +557,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
}
if (bpage == NULL) {
+ mutex_enter(&buf_pool.zip_free_mutex);
return(false);
}
}
@@ -566,6 +570,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
rw_lock_x_unlock(hash_lock);
+ mutex_enter(&buf_pool.zip_free_mutex);
return(false);
}
@@ -577,6 +582,8 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
mutex_enter(block_mutex);
+ mutex_enter(&buf_pool.zip_free_mutex);
+
if (buf_page_can_relocate(bpage)) {
/* Relocate the compressed page. */
const ulonglong ns = my_interval_timer();
@@ -588,14 +595,16 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
rw_lock_x_unlock(hash_lock);
+ buf_buddy_stat_t* buddy_stat = &buf_pool.buddy_stat[i];
+ buddy_stat->relocated++;
+ buddy_stat->relocated_usec += (my_interval_timer() - ns) / 1000;
+
mutex_exit(block_mutex);
buf_buddy_mem_invalid(
reinterpret_cast<buf_buddy_free_t*>(src), i);
- buf_buddy_stat_t* buddy_stat = &buf_pool.buddy_stat[i];
- buddy_stat->relocated++;
- buddy_stat->relocated_usec+= (my_interval_timer() - ns) / 1000;
+
return(true);
}
@@ -607,23 +616,25 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
/** Deallocate a block.
@param[in] buf block to be freed, must not be pointed to
- by the buffer pool
+ by the buffer pool
@param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */
void buf_buddy_free_low(void* buf, ulint i)
{
buf_buddy_free_t* buddy;
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(!mutex_own(&buf_pool.zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
- ut_ad(buf_pool.buddy_stat[i].used > 0);
+ mutex_enter(&buf_pool.zip_free_mutex);
+
+ ut_ad(buf_pool.buddy_stat[i].used > 0);
buf_pool.buddy_stat[i].used--;
recombine:
UNIV_MEM_ALLOC(buf, BUF_BUDDY_LOW << i);
if (i == BUF_BUDDY_SIZES) {
+ mutex_exit(&buf_pool.zip_free_mutex);
buf_buddy_block_free(buf);
return;
}
@@ -688,6 +699,7 @@ buddy_is_free:
func_exit:
/* Free the block to the buddy list. */
buf_buddy_add_to_free(reinterpret_cast<buf_buddy_free_t*>(buf), i);
+ mutex_exit(&buf_pool.zip_free_mutex);
}
/** Try to reallocate a block.
@@ -700,7 +712,6 @@ buf_buddy_realloc(void* buf, ulint size)
buf_block_t* block = NULL;
ulint i = buf_buddy_get_slot(size);
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(!mutex_own(&buf_pool.zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
@@ -720,29 +731,35 @@ buf_buddy_realloc(void* buf, ulint size)
buf_buddy_block_register(block);
+ mutex_enter(&buf_pool.zip_free_mutex);
block = reinterpret_cast<buf_block_t*>(
buf_buddy_alloc_from(
block->frame, i, BUF_BUDDY_SIZES));
+ } else {
+ mutex_enter(&buf_pool.zip_free_mutex);
}
-
+ ut_ad(mutex_own(&buf_pool.zip_free_mutex));
buf_pool.buddy_stat[i].used++;
/* Try to relocate the buddy of buf to the free block. */
if (buf_buddy_relocate(buf, block, i, true)) {
+ mutex_exit(&buf_pool.zip_free_mutex);
/* succeeded */
buf_buddy_free_low(buf, i);
- } else {
- /* failed */
- buf_buddy_free_low(block, i);
+ return(true);
}
- return(true); /* free_list was enough */
+ /* failed */
+ mutex_exit(&buf_pool.zip_free_mutex);
+ buf_buddy_free_low(block, i);
+
+ return(false);
}
/** Combine all pairs of free buddies. */
void buf_buddy_condense_free()
{
- ut_ad(mutex_own(&buf_pool.mutex));
+ mutex_enter(&buf_pool.zip_free_mutex);
ut_ad(buf_pool.curr_size < buf_pool.old_size);
for (ulint i = 0; i < UT_ARR_SIZE(buf_pool.zip_free); ++i) {
@@ -794,4 +811,5 @@ void buf_buddy_condense_free()
buf = next;
}
}
+ mutex_exit(&buf_pool.zip_free_mutex);
}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 1e181872e87..fb911fc29f5 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -126,24 +126,15 @@ in the file along with the file page, resides in the control block.
Buffer pool struct
------------------
-The buffer buf_pool contains a single mutex which protects all the
+The buffer buf_pool contains several mutexes which protect all the
control data structures of the buf_pool. The content of a buffer frame is
protected by a separate read-write lock in its control block, though.
-These locks can be locked and unlocked without owning the buf_pool.mutex.
-The OS events in the buf_pool struct can be waited for without owning the
-buf_pool.mutex.
-
-The buf_pool.mutex is a hot-spot in main memory, causing a lot of
-memory bus traffic on multiprocessor systems when processors
-alternately access the mutex. On our Pentium, the mutex is accessed
-maybe every 10 microseconds. We gave up the solution to have mutexes
-for each control block, for instance, because it seemed to be
-complicated.
-
-A solution to reduce mutex contention of the buf_pool.mutex is to
-create a separate mutex for the page hash table. On Pentium,
-accessing the hash table takes 2 microseconds, about half
-of the total buf_pool.mutex hold time.
+
+buf_pool.mutex protects the buf_pool.LRU list and buf_page_t::state;
+buf_pool.free_list_mutex protects the free_list and withdraw list;
+buf_pool.flush_state_mutex protects the flush state related data structures;
+buf_pool.zip_free mutex protects the zip_free arrays;
+buf_pool.zip_hash mutex protects the zip_hash hash and in_zip_hash flag.
Control blocks
--------------
@@ -158,16 +149,6 @@ The buffer frames have to be aligned so that the start memory
address of a frame is divisible by the universal page size, which
is a power of two.
-We intend to make the buffer buf_pool size on-line reconfigurable,
-that is, the buf_pool size can be changed without closing the database.
-Then the database administarator may adjust it to be bigger
-at night, for example. The control block array must
-contain enough control blocks for the maximum buffer buf_pool size
-which is used in the particular database.
-If the buf_pool size is cut, we exploit the virtual memory mechanism of
-the OS, and just refrain from using frames at high addresses. Then the OS
-can swap them to disk.
-
The control blocks containing file pages are put to a hash table
according to the file address of the page.
We could speed up the access to an individual page by using
@@ -1522,8 +1503,7 @@ bool buf_pool_t::create()
n_chunks= srv_buf_pool_size / srv_buf_pool_chunk_unit;
const size_t chunk_size= srv_buf_pool_chunk_unit;
- chunks= static_cast<buf_pool_t::chunk_t*>(ut_zalloc_nokey(n_chunks *
- sizeof *chunks));
+ chunks= static_cast<chunk_t*>(ut_zalloc_nokey(n_chunks * sizeof *chunks));
UT_LIST_INIT(free, &buf_page_t::list);
curr_size= 0;
auto chunk= chunks;
@@ -1555,8 +1535,12 @@ bool buf_pool_t::create()
while (++chunk < chunks + n_chunks);
ut_ad(is_initialised());
- mutex_create(LATCH_ID_BUF_POOL, &mutex);
+ mutex_create(LATCH_ID_BUF_POOL_LRU_LIST, &mutex);
+ mutex_create(LATCH_ID_BUF_POOL_FREE_LIST, &free_list_mutex);
+ mutex_create(LATCH_ID_BUF_POOL_ZIP_FREE, &zip_free_mutex);
+ mutex_create(LATCH_ID_BUF_POOL_ZIP_HASH, &zip_hash_mutex);
mutex_create(LATCH_ID_BUF_POOL_ZIP, &zip_mutex);
+ mutex_create(LATCH_ID_BUF_POOL_FLUSH_STATE, &flush_state_mutex);
UT_LIST_INIT(LRU, &buf_page_t::LRU);
UT_LIST_INIT(withdraw, &buf_page_t::list);
@@ -1610,14 +1594,9 @@ bool buf_pool_t::create()
io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) *
OS_AIO_N_PENDING_IOS_PER_THREAD);
- /* FIXME: remove some of these variables */
- srv_buf_pool_curr_size= curr_pool_size;
- srv_buf_pool_old_size= srv_buf_pool_size;
- srv_buf_pool_base_size= srv_buf_pool_size;
-
chunk_t::map_ref= chunk_t::map_reg;
buf_LRU_old_ratio_update(100 * 3 / 8, false);
- btr_search_sys_create(srv_buf_pool_curr_size / sizeof(void*) / 64);
+ btr_search_sys_create(curr_pool_size / sizeof(void*) / 64);
ut_ad(is_initialised());
return false;
}
@@ -1630,6 +1609,10 @@ void buf_pool_t::close()
return;
mutex_free(&mutex);
+ mutex_free(&free_list_mutex);
+ mutex_free(&zip_free_mutex);
+ mutex_free(&zip_hash_mutex);
+ mutex_free(&flush_state_mutex);
mutex_free(&zip_mutex);
mutex_free(&flush_list_mutex);
@@ -1807,21 +1790,18 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
new_block->page.id.page_no()));
rw_lock_x_unlock(hash_lock);
+ mutex_exit(&block->mutex);
mutex_exit(&new_block->mutex);
/* free block */
buf_block_set_state(block, BUF_BLOCK_MEMORY);
buf_LRU_block_free_non_file_page(block);
-
- mutex_exit(&block->mutex);
} else {
rw_lock_x_unlock(hash_lock);
mutex_exit(&block->mutex);
/* free new_block */
- mutex_enter(&new_block->mutex);
buf_LRU_block_free_non_file_page(new_block);
- mutex_exit(&new_block->mutex);
}
return(true); /* free_list was enough */
@@ -1858,21 +1838,24 @@ inline bool buf_pool_t::withdraw_blocks()
{
buf_block_t* block;
ulint loop_count = 0;
+ ulint lru_len;
ib::info() << "start to withdraw the last "
<< withdraw_target << " blocks";
/* Minimize zip_free[i] lists */
- mutex_enter(&mutex);
buf_buddy_condense_free();
+
+ mutex_enter(&mutex);
+ lru_len = UT_LIST_GET_LEN(LRU);
mutex_exit(&mutex);
+ mutex_enter(&free_list_mutex);
while (UT_LIST_GET_LEN(withdraw) < withdraw_target) {
/* try to withdraw from free_list */
ulint count1 = 0;
- mutex_enter(&mutex);
block = reinterpret_cast<buf_block_t*>(
UT_LIST_GET_FIRST(free));
while (block != NULL
@@ -1887,7 +1870,7 @@ inline bool buf_pool_t::withdraw_blocks()
UT_LIST_GET_NEXT(
list, &block->page));
- if (buf_pool.will_be_withdrawn(block->page)) {
+ if (will_be_withdrawn(block->page)) {
/* This should be withdrawn */
UT_LIST_REMOVE(free, &block->page);
UT_LIST_ADD_LAST(withdraw, &block->page);
@@ -1897,7 +1880,6 @@ inline bool buf_pool_t::withdraw_blocks()
block = next_block;
}
- mutex_exit(&mutex);
/* reserve free_list length */
if (UT_LIST_GET_LEN(withdraw) < withdraw_target) {
@@ -1905,15 +1887,12 @@ inline bool buf_pool_t::withdraw_blocks()
flush_counters_t n;
/* cap scan_depth with current LRU size. */
- mutex_enter(&mutex);
- scan_depth = UT_LIST_GET_LEN(LRU);
- mutex_exit(&mutex);
-
scan_depth = ut_min(
ut_max(withdraw_target
- UT_LIST_GET_LEN(withdraw),
static_cast<ulint>(srv_LRU_scan_depth)),
- scan_depth);
+ lru_len);
+ mutex_exit(&free_list_mutex);
buf_flush_do_batch(BUF_FLUSH_LRU, scan_depth, 0, &n);
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
@@ -1925,6 +1904,9 @@ inline bool buf_pool_t::withdraw_blocks()
MONITOR_LRU_BATCH_FLUSH_PAGES,
n.flushed);
}
+ } else {
+
+ mutex_exit(&free_list_mutex);
}
/* relocate blocks/buddies in withdrawn area */
@@ -1946,33 +1928,27 @@ inline bool buf_pool_t::withdraw_blocks()
&& will_be_withdrawn(bpage->zip.data)
&& buf_page_can_relocate(bpage)) {
mutex_exit(block_mutex);
- buf_pool_mutex_exit_forbid();
if (!buf_buddy_realloc(
bpage->zip.data,
page_zip_get_size(&bpage->zip))) {
/* failed to allocate block */
- buf_pool_mutex_exit_allow();
break;
}
- buf_pool_mutex_exit_allow();
mutex_enter(block_mutex);
count2++;
}
if (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE
- && buf_pool.will_be_withdrawn(*bpage)) {
+ && will_be_withdrawn(*bpage)) {
if (buf_page_can_relocate(bpage)) {
mutex_exit(block_mutex);
- buf_pool_mutex_exit_forbid();
if (!realloc(
reinterpret_cast<buf_block_t*>(
bpage))) {
/* failed to allocate block */
- buf_pool_mutex_exit_allow();
break;
}
- buf_pool_mutex_exit_allow();
count2++;
} else {
mutex_exit(block_mutex);
@@ -1985,8 +1961,16 @@ inline bool buf_pool_t::withdraw_blocks()
bpage = next_bpage;
}
+
mutex_exit(&mutex);
+ if (++loop_count >= 10) {
+ ib::info() << "will retry to withdraw later";
+ return true;
+ }
+
+ mutex_enter(&free_list_mutex);
+
buf_resize_status(
"withdrawing blocks. (" ULINTPF "/" ULINTPF ")",
UT_LIST_GET_LEN(withdraw),
@@ -1997,17 +1981,8 @@ inline bool buf_pool_t::withdraw_blocks()
<< " Tried to relocate " << count2 << " pages ("
<< UT_LIST_GET_LEN(withdraw) << "/"
<< withdraw_target << ")";
-
- if (++loop_count >= 10) {
- /* give up for now.
- retried after user threads paused. */
-
- ib::info() << "will retry to withdraw later";
-
- /* need retry later */
- return(true);
- }
}
+ mutex_exit(&free_list_mutex);
/* confirm withdrawn enough */
for (const chunk_t* chunk = chunks + n_chunks_new,
@@ -2019,9 +1994,13 @@ inline bool buf_pool_t::withdraw_blocks()
}
}
+ mutex_enter(&free_list_mutex);
+
ib::info() << "withdrawn target: " << UT_LIST_GET_LEN(withdraw)
<< " blocks";
+ mutex_exit(&free_list_mutex);
+
/* retry is not needed */
++withdraw_clock_;
@@ -2033,6 +2012,7 @@ static void buf_pool_resize_hash()
{
hash_table_t* new_hash_table;
+ ut_ad(mutex_own(&buf_pool.zip_hash_mutex));
ut_ad(buf_pool.page_hash_old == NULL);
/* recreate page_hash */
@@ -2118,21 +2098,27 @@ inline void buf_pool_t::resize()
ulint new_instance_size = srv_buf_pool_size >> srv_page_size_shift;
- buf_resize_status("Resizing buffer pool from " ULINTPF " to "
+ buf_resize_status("Resizing buffer pool to "
ULINTPF " (unit=" ULINTPF ").",
- srv_buf_pool_old_size, srv_buf_pool_size,
+ srv_buf_pool_size,
srv_buf_pool_chunk_unit);
- mutex_enter(&mutex);
+ // No locking needed to read, same thread updated those
ut_ad(curr_size == old_size);
ut_ad(n_chunks_new == n_chunks);
+#ifdef UNIV_DEBUG
+ mutex_enter(&free_list_mutex);
ut_ad(UT_LIST_GET_LEN(withdraw) == 0);
+ mutex_exit(&free_list_mutex);
+
+ mutex_enter(&flush_list_mutex);
ut_ad(flush_rbt == NULL);
+ mutex_exit(&flush_list_mutex);
+#endif
n_chunks_new = (new_instance_size << srv_page_size_shift)
/ srv_buf_pool_chunk_unit;
curr_size = n_chunks_new * chunks->size;
- mutex_exit(&mutex);
#ifdef BTR_CUR_HASH_ADAPT
/* disable AHI if needed */
@@ -2267,8 +2253,18 @@ withdraw_retry:
/* Indicate critical path */
resizing.store(true, std::memory_order_relaxed);
+ /* Acquire all buffer pool mutexes and hash table locks */
+ /* TODO: while we certainly lock a lot here, it does not necessarily
+ buy us enough correctness. Exploits the fact that freed pages must
+ have no pointers to them from the buffer pool nor from any other thread
+ except for the freeing one to remove redundant locking. The same applies
+ to freshly allocated pages before any pointers to them are published.*/
mutex_enter(&mutex);
hash_lock_x_all(page_hash);
+ mutex_enter(&zip_free_mutex);
+ mutex_enter(&free_list_mutex);
+ mutex_enter(&zip_hash_mutex);
+ mutex_enter(&flush_state_mutex);
chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map());
/* add/delete chunks */
@@ -2399,14 +2395,14 @@ calc_buf_pool_size:
read_ahead_area = ut_min(
BUF_READ_AHEAD_PAGES,
ut_2_power_up(curr_size / BUF_READ_AHEAD_PORTION));
+ ulint old_pool_size = curr_pool_size;
curr_pool_size = n_chunks * srv_buf_pool_chunk_unit;
- srv_buf_pool_curr_size = curr_pool_size;/* FIXME: remove*/
old_size = curr_size;
- innodb_set_buf_pool_size(buf_pool_size_align(srv_buf_pool_curr_size));
+ innodb_set_buf_pool_size(buf_pool_size_align(curr_pool_size));
const bool new_size_too_diff
- = srv_buf_pool_base_size > srv_buf_pool_size * 2
- || srv_buf_pool_base_size * 2 < srv_buf_pool_size;
+ = old_pool_size/2 > curr_pool_size
+ || old_pool_size < curr_pool_size/2;
/* Normalize page_hash and zip_hash,
if the new size is too different */
@@ -2416,8 +2412,12 @@ calc_buf_pool_size:
ib::info() << "hash tables were resized";
}
- hash_unlock_x_all(page_hash);
mutex_exit(&mutex);
+ hash_unlock_x_all(page_hash);
+ mutex_exit(&zip_free_mutex);
+ mutex_exit(&free_list_mutex);
+ mutex_exit(&zip_hash_mutex);
+ mutex_exit(&flush_state_mutex);
if (page_hash_old != NULL) {
hash_table_free(page_hash_old);
@@ -2430,8 +2430,6 @@ calc_buf_pool_size:
/* Normalize other components, if the new size is too different */
if (!warning && new_size_too_diff) {
- srv_buf_pool_base_size = srv_buf_pool_size;
-
buf_resize_status("Resizing also other hash tables.");
/* normalize lock_sys */
@@ -2440,8 +2438,7 @@ calc_buf_pool_size:
lock_sys.resize(srv_lock_table_size);
/* normalize btr_search_sys */
- btr_search_sys_resize(
- buf_pool_get_curr_size() / sizeof(void*) / 64);
+ btr_search_sys_resize(curr_pool_size / sizeof(void*) / 64);
dict_sys.resize();
@@ -2455,13 +2452,8 @@ calc_buf_pool_size:
/* normalize ibuf.max_size */
ibuf_max_size_update(srv_change_buffer_max_size);
- if (srv_buf_pool_old_size != srv_buf_pool_size) {
-
- ib::info() << "Completed to resize buffer pool from "
- << srv_buf_pool_old_size
- << " to " << srv_buf_pool_size << ".";
- srv_buf_pool_old_size = srv_buf_pool_size;
- }
+ ib::info() << "Completed to resize buffer pool"
+ " to " << srv_buf_pool_size << ".";
#ifdef BTR_CUR_HASH_ADAPT
/* enable AHI if needed */
@@ -2494,19 +2486,9 @@ static void buf_resize_callback(void *)
{
DBUG_ENTER("buf_resize_callback");
ut_a(srv_shutdown_state == SRV_SHUTDOWN_NONE);
- mutex_enter(&buf_pool.mutex);
- const auto size= srv_buf_pool_size;
- const bool work= srv_buf_pool_old_size != size;
- mutex_exit(&buf_pool.mutex);
-
- if (work)
- buf_pool.resize();
- else
- {
- std::ostringstream sout;
- sout << "Size did not change: old size = new size = " << size;
- buf_resize_status(sout.str().c_str());
- }
+ ut_a(srv_buf_pool_size_changing);
+ buf_pool.resize();
+ srv_buf_pool_size_changing= false;
DBUG_VOID_RETURN;
}
@@ -2526,18 +2508,17 @@ void buf_resize_shutdown()
}
-/********************************************************************//**
-Relocate a buffer control block. Relocates the block on the LRU list
+/** Relocate a buffer control block. Relocates the block on the LRU list
and in buf_pool.page_hash. Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
+The caller must take care of relocating bpage->list.
+@param[in,out] bpage control block being relocated, buf_page_get_state()
+ must be BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE
+@param[in,out] dpage destination control block */
static
void
buf_relocate(
-/*=========*/
- buf_page_t* bpage, /*!< in/out: control block being relocated;
- buf_page_get_state(bpage) must be
- BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
- buf_page_t* dpage) /*!< in/out: destination control block */
+ buf_page_t* bpage,
+ buf_page_t* dpage)
{
buf_page_t* b;
@@ -2637,8 +2618,9 @@ bool buf_pool_watch_is_sentinel(const buf_page_t* bpage)
}
/** Add watch for the given page to be read in. Caller must have
-appropriate hash_lock for the bpage. This function may release the
-hash_lock and reacquire it.
+appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race
+condition with buf_LRU_free_page inserting the same page into the page hash.
+This function may release the hash_lock and reacquire it.
@param[in] page_id page id
@param[in,out] hash_lock hash_lock currently latched
@return NULL if watch set, block if the page is in the buffer pool */
@@ -2670,9 +2652,7 @@ page_found:
}
/* From this point this function becomes fairly heavy in terms
- of latching. We acquire the buf_pool mutex as well as all the
- hash_locks. buf_pool mutex is needed because any changes to
- the page_hash must be covered by it and hash_locks are needed
+ of latching. We acquire all the hash_locks. They are needed
because we don't want to read any stale information in
buf_pool.watch[]. However, it is not in the critical code path
as this function will be called only by the purge thread. */
@@ -2680,20 +2660,16 @@ page_found:
/* To obey latching order first release the hash_lock. */
rw_lock_x_unlock(*hash_lock);
- mutex_enter(&buf_pool.mutex);
hash_lock_x_all(buf_pool.page_hash);
/* We have to recheck that the page
was not loaded or a watch set by some other
purge thread. This is because of the small
time window between when we release the
- hash_lock to acquire buf_pool.mutex above. */
-
+ hash_lock to lock all the hash_locks. */
*hash_lock = buf_page_hash_lock_get(page_id);
-
bpage = buf_page_hash_get_low(page_id);
- if (UNIV_LIKELY_NULL(bpage)) {
- mutex_exit(&buf_pool.mutex);
+ if (bpage) {
hash_unlock_x_all_but(buf_pool.page_hash, *hash_lock);
goto page_found;
}
@@ -2714,11 +2690,6 @@ page_found:
ut_ad(!bpage->in_page_hash);
ut_ad(bpage->buf_fix_count == 0);
- /* bpage is pointing to buf_pool.watch[],
- which is protected by buf_pool.mutex.
- Normally, buf_page_t objects are protected by
- buf_block_t::mutex or buf_pool.zip_mutex or both. */
-
bpage->state = BUF_BLOCK_ZIP_PAGE;
bpage->id = page_id;
bpage->buf_fix_count = 1;
@@ -2727,7 +2698,6 @@ page_found:
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash,
page_id.fold(), bpage);
- mutex_exit(&buf_pool.mutex);
/* Once the sentinel is in the page_hash we can
safely release all locks except just the
relevant hash_lock */
@@ -2755,27 +2725,19 @@ page_found:
}
/** Remove the sentinel block for the watch before replacing it with a
-real block. buf_page_watch_clear() or buf_page_watch_occurred() will notice
+real block. buf_pool_watch_unset() or buf_pool_watch_occurred() will notice
that the block has been replaced with the real block.
@param[in,out] watch sentinel for watch
@return reference count, to be added to the replacement block */
-static
-void
-buf_pool_watch_remove(buf_page_t* watch)
+static void buf_pool_watch_remove(buf_page_t *watch)
{
-#ifdef UNIV_DEBUG
- /* We must also own the appropriate hash_bucket mutex. */
- rw_lock_t* hash_lock = buf_page_hash_lock_get(watch->id);
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
-#endif /* UNIV_DEBUG */
-
- ut_ad(mutex_own(&buf_pool.mutex));
-
- HASH_DELETE(buf_page_t, hash, buf_pool.page_hash, watch->id.fold(),
- watch);
- ut_d(watch->in_page_hash = FALSE);
- watch->buf_fix_count = 0;
- watch->state = BUF_BLOCK_POOL_WATCH;
+ ut_ad(rw_lock_own(buf_page_hash_lock_get(watch->id), RW_LOCK_X));
+ ut_ad(watch->state == BUF_BLOCK_ZIP_PAGE);
+ ut_ad(watch->in_page_hash);
+ HASH_DELETE(buf_page_t, hash, buf_pool.page_hash, watch->id.fold(), watch);
+ ut_d(watch->in_page_hash= FALSE);
+ watch->buf_fix_count= 0;
+ watch->state= BUF_BLOCK_POOL_WATCH;
}
/** Stop watching if the page has been read in.
@@ -2783,27 +2745,17 @@ buf_pool_watch_set(same_page_id) must have returned NULL before.
@param[in] page_id page id */
void buf_pool_watch_unset(const page_id_t page_id)
{
- buf_page_t* bpage;
- /* We only need to have buf_pool.mutex in case where we end
- up calling buf_pool_watch_remove but to obey latching order
- we acquire it here before acquiring hash_lock. This should
- not cause too much grief as this function is only ever
- called from the purge thread. */
- mutex_enter(&buf_pool.mutex);
-
- rw_lock_t* hash_lock = buf_page_hash_lock_get(page_id);
- rw_lock_x_lock(hash_lock);
+ rw_lock_t *hash_lock= buf_page_hash_lock_get(page_id);
+ rw_lock_x_lock(hash_lock);
- /* The page must exist because buf_pool_watch_set()
- increments buf_fix_count. */
- bpage = buf_page_hash_get_low(page_id);
+ /* The page must exist because buf_pool_watch_set()
+ increments buf_fix_count. */
+ buf_page_t *bpage= buf_page_hash_get_low(page_id);
- if (bpage->unfix() == 0 && buf_pool_watch_is_sentinel(bpage)) {
- buf_pool_watch_remove(bpage);
- }
+ if (bpage->unfix() == 0 && buf_pool_watch_is_sentinel(bpage))
+ buf_pool_watch_remove(bpage);
- mutex_exit(&buf_pool.mutex);
- rw_lock_x_unlock(hash_lock);
+ rw_lock_x_unlock(hash_lock);
}
/** Check if the page has been read in.
@@ -2832,8 +2784,7 @@ bool buf_pool_watch_occurred(const page_id_t page_id)
return(ret);
}
-/********************************************************************//**
-Moves a page to the start of the buffer pool LRU list. This high-level
+/** Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from slipping out of
the buffer pool.
@param[in,out] bpage buffer block of a file page */
@@ -2914,18 +2865,27 @@ static void buf_block_try_discard_uncompressed(const page_id_t page_id)
{
buf_page_t* bpage;
- /* Since we need to acquire buf_pool mutex to discard
- the uncompressed frame and because page_hash mutex resides
- below buf_pool mutex in sync ordering therefore we must
- first release the page_hash mutex. This means that the
- block in question can move out of page_hash. Therefore
- we need to check again if the block is still in page_hash. */
+ /* Since we need to acquire buf_pool.mutex to discard
+ the uncompressed frame and because page_hash mutex resides below
+ buf_pool.mutex in sync ordering therefore we must first
+ release the page_hash mutex. This means that the block in question
+ can move out of page_hash. Therefore we need to check again if the
+ block is still in page_hash. */
mutex_enter(&buf_pool.mutex);
bpage = buf_page_hash_get(page_id);
if (bpage) {
- buf_LRU_free_page(bpage, false);
+
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
+
+ if (buf_LRU_free_page(bpage, false)) {
+
+ return;
+ }
+ mutex_exit(block_mutex);
}
mutex_exit(&buf_pool.mutex);
@@ -3212,22 +3172,12 @@ buf_wait_for_read(
access the block (and check for IO state) after the block has been
added to the page hashtable. */
- if (buf_block_get_io_fix(block) == BUF_IO_READ) {
+ if (buf_block_get_io_fix_unlocked(block) == BUF_IO_READ) {
/* Wait until the read operation completes */
-
- BPageMutex* mutex = buf_page_get_mutex(&block->page);
-
for (;;) {
- buf_io_fix io_fix;
-
- mutex_enter(mutex);
-
- io_fix = buf_block_get_io_fix(block);
-
- mutex_exit(mutex);
-
- if (io_fix == BUF_IO_READ) {
+ if (buf_block_get_io_fix_unlocked(block)
+ == BUF_IO_READ) {
/* Wait by temporaly s-latch */
rw_lock_s_lock(&block->lock);
rw_lock_s_unlock(&block->lock);
@@ -3271,6 +3221,7 @@ buf_page_get_gen(
unsigned access_time;
rw_lock_t* hash_lock;
buf_block_t* fix_block;
+ BPageMutex* fix_mutex = NULL;
ulint retries = 0;
ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL));
@@ -3362,8 +3313,7 @@ loop:
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
rw_lock_x_lock(hash_lock);
- /* If not own buf_pool_mutex,
- page_hash can be changed. */
+ /* page_hash can be changed. */
hash_lock = buf_page_hash_lock_x_confirm(
hash_lock, page_id);
@@ -3385,7 +3335,7 @@ loop:
buf_flush_page() for the flush
thread counterpart. */
- BPageMutex* fix_mutex
+ fix_mutex
= buf_page_get_mutex(
&fix_block->page);
mutex_enter(fix_mutex);
@@ -3501,7 +3451,7 @@ loop:
for synchorization between user thread and flush thread,
instead of block->lock. See buf_flush_page() for the flush
thread counterpart. */
- BPageMutex* fix_mutex = buf_page_get_mutex(
+ fix_mutex = buf_page_get_mutex(
&fix_block->page);
mutex_enter(fix_mutex);
fix_block->fix();
@@ -3522,11 +3472,8 @@ got_block:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
buf_page_t* fix_page = &fix_block->page;
- BPageMutex* fix_mutex = buf_page_get_mutex(fix_page);
- mutex_enter(fix_mutex);
const bool must_read
- = (buf_page_get_io_fix(fix_page) == BUF_IO_READ);
- mutex_exit(fix_mutex);
+ = (buf_page_get_io_fix_unlocked(fix_page) == BUF_IO_READ);
if (must_read) {
/* The page is being read to buffer pool,
@@ -3541,8 +3488,9 @@ got_block:
switch (UNIV_EXPECT(buf_block_get_state(fix_block),
BUF_BLOCK_FILE_PAGE)) {
case BUF_BLOCK_FILE_PAGE:
+ ut_ad(fix_mutex != &buf_pool.zip_mutex);
if (fsp_is_system_temporary(page_id.space())
- && buf_block_get_io_fix(block) != BUF_IO_NONE) {
+ && buf_block_get_io_fix_unlocked(block) != BUF_IO_NONE) {
/* This suggests that the page is being flushed.
Avoid returning reference to this page.
Instead wait for the flush action to complete. */
@@ -3555,13 +3503,19 @@ got_block:
evict_from_pool:
ut_ad(!fix_block->page.oldest_modification);
mutex_enter(&buf_pool.mutex);
+ fix_mutex
+ = buf_page_get_mutex(
+ &fix_block->page);
+ mutex_enter(fix_mutex);
fix_block->unfix();
if (!buf_LRU_free_page(&fix_block->page, true)) {
ut_ad(0);
}
+ // buf_LRU_free_page frees the mutexes we locked.
+ ut_ad(!mutex_own(fix_mutex));
+ ut_ad(!mutex_own(&buf_pool.mutex));
- mutex_exit(&buf_pool.mutex);
return(NULL);
}
break;
@@ -3586,10 +3540,13 @@ evict_from_pool:
}
buf_page_t* bpage = &block->page;
+ /* MDEV-15053-TODO innodb.table_flags-16k fails on it
+ ut_ad(fix_mutex == &buf_pool.zip_mutex); */
+ ut_ad(fix_mutex == &buf_pool.zip_mutex || !fix_mutex);
/* Note: We have already buffer fixed this block. */
if (bpage->buf_fix_count > 1
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
/* This condition often occurs when the buffer
is not buffer-fixed, but I/O-fixed by
@@ -3611,8 +3568,6 @@ evict_from_pool:
mutex_enter(&buf_pool.mutex);
- hash_lock = buf_page_hash_lock_get(page_id);
-
rw_lock_x_lock(hash_lock);
/* Buffer-fixing prevents the page_hash from changing. */
@@ -3635,10 +3590,10 @@ evict_from_pool:
This should be extremely unlikely, for example,
if buf_page_get_zip() was invoked. */
- buf_LRU_block_free_non_file_page(block);
mutex_exit(&buf_pool.mutex);
rw_lock_x_unlock(hash_lock);
buf_page_mutex_exit(block);
+ buf_LRU_block_free_non_file_page(block);
/* Try again */
goto loop;
@@ -3681,15 +3636,15 @@ evict_from_pool:
/* Insert at the front of unzip_LRU list */
buf_unzip_LRU_add_block(block, FALSE);
+ mutex_exit(&buf_pool.mutex);
+
buf_block_set_io_fix(block, BUF_IO_READ);
rw_lock_x_lock_inline(&block->lock, 0, file, line);
UNIV_MEM_INVALID(bpage, sizeof *bpage);
rw_lock_x_unlock(hash_lock);
- buf_pool.n_pend_unzip++;
mutex_exit(&buf_pool.zip_mutex);
- mutex_exit(&buf_pool.mutex);
access_time = buf_page_is_accessed(&block->page);
@@ -3703,16 +3658,14 @@ evict_from_pool:
buf_page_free_descriptor(bpage);
/* Decompress the page while not holding
- buf_pool.mutex or block->mutex. */
+ any buf_pool or block->mutex. */
if (!buf_zip_decompress(block, TRUE)) {
- mutex_enter(&buf_pool.mutex);
buf_page_mutex_enter(fix_block);
buf_block_set_io_fix(fix_block, BUF_IO_NONE);
buf_page_mutex_exit(fix_block);
--buf_pool.n_pend_unzip;
- mutex_exit(&buf_pool.mutex);
fix_block->unfix();
rw_lock_x_unlock(&fix_block->lock);
@@ -3722,17 +3675,13 @@ evict_from_pool:
return NULL;
}
- mutex_enter(&buf_pool.mutex);
-
buf_page_mutex_enter(fix_block);
buf_block_set_io_fix(fix_block, BUF_IO_NONE);
buf_page_mutex_exit(fix_block);
- --buf_pool.n_pend_unzip;
-
- mutex_exit(&buf_pool.mutex);
+ buf_pool.n_pend_unzip++;
rw_lock_x_unlock(&block->lock);
@@ -3764,16 +3713,20 @@ evict_from_pool:
relocated or enter or exit the buf_pool while we
are holding the buf_pool.mutex. */
+ fix_mutex = buf_page_get_mutex(&fix_block->page);
+ mutex_enter(fix_mutex);
+
if (buf_LRU_free_page(&fix_block->page, true)) {
- mutex_exit(&buf_pool.mutex);
+ if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+ /* Hold LRU list mutex, see comment
+ in buf_pool_watch_set(). */
+ mutex_enter(&buf_pool.mutex);
+ }
/* page_hash can be changed. */
hash_lock = buf_page_hash_lock_get(page_id);
rw_lock_x_lock(hash_lock);
-
- /* If not own buf_pool_mutex,
- page_hash can be changed. */
hash_lock = buf_page_hash_lock_x_confirm(
hash_lock, page_id);
@@ -3783,6 +3736,7 @@ evict_from_pool:
buffer pool in the first place. */
block = (buf_block_t*) buf_pool_watch_set(
page_id, &hash_lock);
+ mutex_exit(&buf_pool.mutex);
} else {
block = (buf_block_t*) buf_page_hash_get_low(
page_id);
@@ -3793,7 +3747,7 @@ evict_from_pool:
if (block != NULL) {
/* Either the page has been read in or
a watch was set on that in the window
- where we released the buf_pool::mutex
+ where we released the buf_pool.mutex
and before we acquire the hash_lock
above. Try again. */
guess = block;
@@ -3804,21 +3758,19 @@ evict_from_pool:
return(NULL);
}
- buf_page_mutex_enter(fix_block);
-
if (buf_flush_page_try(fix_block)) {
guess = fix_block;
goto loop;
}
+ mutex_exit(&buf_pool.mutex);
+
buf_page_mutex_exit(fix_block);
fix_block->fix();
/* Failed to evict the page; change it directly */
-
- mutex_exit(&buf_pool.mutex);
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
@@ -4081,16 +4033,16 @@ buf_page_try_get_func(
ut_ad(!buf_pool_watch_is_sentinel(&block->page));
- buf_page_mutex_enter(block);
+ buf_block_buf_fix_inc(block, file, line);
+
rw_lock_s_unlock(hash_lock);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ buf_page_mutex_enter(block);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_a(page_id == block->page.id);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_block_buf_fix_inc(block, file, line);
buf_page_mutex_exit(block);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
mtr_memo_type_t fix_type = MTR_MEMO_PAGE_S_FIX;
success = rw_lock_s_lock_nowait(&block->lock, file, line);
@@ -4148,7 +4100,8 @@ buf_page_init_low(
HASH_INVALIDATE(bpage, hash);
}
-/** Inits a page to the buffer buf_pool.
+/** Inits a page to the buffer buf_pool. The block pointer must be private to
+the calling thread at the start of this function.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] block block to init */
@@ -4157,8 +4110,7 @@ static void buf_page_init(const page_id_t page_id, ulint zip_size,
{
buf_page_t* hash_page;
- ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(buf_page_mutex_own(block));
+ ut_ad(!mutex_own(buf_page_get_mutex(&block->page)));
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
ut_ad(rw_lock_own(buf_page_hash_lock_get(page_id), RW_LOCK_X));
@@ -4202,8 +4154,6 @@ static void buf_page_init(const page_id_t page_id, ulint zip_size,
<< " already found in the hash table: "
<< hash_page << ", " << block;
- ut_d(buf_page_mutex_exit(block));
- ut_d(mutex_exit(&buf_pool.mutex));
ut_d(buf_pool.print());
ut_d(buf_LRU_print());
ut_d(buf_LRU_validate());
@@ -4247,12 +4197,9 @@ buf_page_init_for_read(
bool unzip)
{
buf_block_t* block;
- buf_page_t* bpage = NULL;
- buf_page_t* watch_page;
rw_lock_t* hash_lock;
mtr_t mtr;
- bool lru = false;
- void* data;
+ void* data = NULL;
*err = DB_SUCCESS;
@@ -4281,20 +4228,41 @@ buf_page_init_for_read(
ut_ad(block);
}
+ buf_page_t* bpage = NULL;
+ if (block == NULL) {
+ bpage = buf_page_alloc_descriptor();
+ }
+
+ if (!block || zip_size) {
+ data = buf_buddy_alloc(zip_size);
+ }
+
mutex_enter(&buf_pool.mutex);
hash_lock = buf_page_hash_lock_get(page_id);
rw_lock_x_lock(hash_lock);
+ buf_page_t* watch_page;
+
watch_page = buf_page_hash_get_low(page_id);
if (watch_page && !buf_pool_watch_is_sentinel(watch_page)) {
/* The page is already in the buffer pool. */
watch_page = NULL;
+
+ mutex_exit(&buf_pool.mutex);
+
rw_lock_x_unlock(hash_lock);
- if (block) {
- buf_page_mutex_enter(block);
+
+ if (bpage != NULL) {
+ buf_page_free_descriptor(bpage);
+ }
+
+ if (data != NULL) {
+ buf_buddy_free(data, zip_size);
+ }
+
+ if (block != NULL) {
buf_LRU_block_free_non_file_page(block);
- buf_page_mutex_exit(block);
}
bpage = NULL;
@@ -4302,46 +4270,23 @@ buf_page_init_for_read(
}
if (block) {
+ ut_ad(!bpage);
bpage = &block->page;
- buf_page_mutex_enter(block);
-
buf_page_init(page_id, zip_size, block);
+ buf_page_mutex_enter(block);
+
/* Note: We are using the hash_lock for protection. This is
safe because no other thread can lookup the block from the
page hashtable yet. */
buf_page_set_io_fix(bpage, BUF_IO_READ);
- rw_lock_x_unlock(hash_lock);
-
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
- /* We set a pass-type x-lock on the frame because then
- the same thread which called for the read operation
- (and is running now at this point of code) can wait
- for the read to complete by waiting for the x-lock on
- the frame; if the x-lock were recursive, the same
- thread would illegally get the x-lock before the page
- read is completed. The x-lock is cleared by the
- io-handler thread. */
-
- rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
-
if (zip_size) {
- /* buf_pool.mutex may be released and
- reacquired by buf_buddy_alloc(). Thus, we
- must release block->mutex in order not to
- break the latching order in the reacquisition
- of buf_pool.mutex. We also must defer this
- operation until after the block descriptor has
- been added to buf_pool.LRU and
- buf_pool.page_hash. */
- buf_page_mutex_exit(block);
- data = buf_buddy_alloc(zip_size, &lru);
- buf_page_mutex_enter(block);
block->page.zip.data = (page_zip_t*) data;
/* To maintain the invariant
@@ -4353,41 +4298,27 @@ buf_page_init_for_read(
buf_unzip_LRU_add_block(block, TRUE);
}
- buf_page_mutex_exit(block);
- } else {
- rw_lock_x_unlock(hash_lock);
-
- /* The compressed page must be allocated before the
- control block (bpage), in order to avoid the
- invocation of buf_buddy_relocate_block() on
- uninitialized data. */
- data = buf_buddy_alloc(zip_size, &lru);
-
- rw_lock_x_lock(hash_lock);
-
- /* If buf_buddy_alloc() allocated storage from the LRU list,
- it released and reacquired buf_pool.mutex. Thus, we must
- check the page_hash again, as it may have been modified. */
- if (UNIV_UNLIKELY(lru)) {
+ mutex_exit(&buf_pool.mutex);
watch_page = buf_page_hash_get_low(page_id);
+ /* We set a pass-type x-lock on the frame because then
+ the same thread which called for the read operation
+ (and is running now at this point of code) can wait
+ for the read to complete by waiting for the x-lock on
+ the frame; if the x-lock were recursive, the same
+ thread would illegally get the x-lock before the page
+ read is completed. The x-lock is cleared by the
+ io-handler thread. */
- if (UNIV_UNLIKELY(watch_page
- && !buf_pool_watch_is_sentinel(watch_page))) {
-
- /* The block was added by some other thread. */
- rw_lock_x_unlock(hash_lock);
- watch_page = NULL;
- buf_buddy_free(data, zip_size);
+ rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
- bpage = NULL;
- goto func_exit;
- }
- }
+ rw_lock_x_unlock(hash_lock);
- bpage = buf_page_alloc_descriptor();
+ buf_page_mutex_exit(block);
+ } else {
page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size);
+ ut_ad(data);
bpage->zip.data = (page_zip_t*) data;
mutex_enter(&buf_pool.zip_mutex);
@@ -4441,7 +4372,6 @@ buf_page_init_for_read(
buf_pool.n_pend_reads++;
func_exit:
- mutex_exit(&buf_pool.mutex);
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
@@ -4518,10 +4448,10 @@ buf_page_create(
block = free_block;
- buf_page_mutex_enter(block);
-
buf_page_init(page_id, zip_size, block);
+ buf_page_mutex_enter(block);
+
rw_lock_x_unlock(hash_lock);
/* The block must be put to the LRU list */
@@ -4538,14 +4468,10 @@ buf_page_create(
buf_page_set_io_fix(&block->page, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
+ mutex_exit(&buf_pool.mutex);
buf_page_mutex_exit(block);
- /* buf_pool.mutex may be released and reacquired by
- buf_buddy_alloc(). Thus, we must release block->mutex
- in order not to break the latching order in
- the reacquisition of buf_pool.mutex. We also must
- defer this operation until after the block descriptor
- has been added to buf_pool.LRU and buf_pool.page_hash. */
block->page.zip.data = buf_buddy_alloc(zip_size);
+ mutex_enter(&buf_pool.mutex);
buf_page_mutex_enter(block);
/* To maintain the invariant
@@ -4733,9 +4659,13 @@ buf_corrupt_page_release(buf_page_t* bpage, const fil_space_t* space)
const ibool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
page_id_t old_page_id = bpage->id;
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(bpage->id);
/* First unfix and release lock on the bpage */
mutex_enter(&buf_pool.mutex);
+
+ rw_lock_x_lock(hash_lock);
+
mutex_enter(buf_page_get_mutex(bpage));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
ut_ad(bpage->id.space() == space->id);
@@ -4753,19 +4683,20 @@ buf_corrupt_page_release(buf_page_t* bpage, const fil_space_t* space)
BUF_IO_READ);
}
- mutex_exit(buf_page_get_mutex(bpage));
-
if (!srv_force_recovery) {
buf_mark_space_corrupt(bpage, *space);
}
- /* After this point bpage can't be referenced. */
+ /* The hash lock and block mutex will be released during the "free" */
buf_LRU_free_one_page(bpage, old_page_id);
- ut_ad(buf_pool.n_pend_reads > 0);
- buf_pool.n_pend_reads--;
+ ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+ && !rw_lock_own(hash_lock, RW_LOCK_S));
mutex_exit(&buf_pool.mutex);
+
+ ut_ad(buf_pool.n_pend_reads > 0);
+ buf_pool.n_pend_reads--;
}
/** Check if the encrypted page is corrupted for the full crc32 format.
@@ -4877,6 +4808,8 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
enum buf_io_fix io_type;
const bool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
+ bool have_LRU_mutex = false;
+
ut_a(buf_page_in_file(bpage));
/* We do not need protect io_fix here by mutex to read
@@ -4885,7 +4818,7 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
ensures that this is the only thread that handles the i/o for this
block. */
- io_type = buf_page_get_io_fix(bpage);
+ io_type = buf_page_get_io_fix_unlocked(bpage);
ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
ut_ad(!!bpage->zip.ssize == (bpage->zip.data != NULL));
ut_ad(uncompressed || bpage->zip.data);
@@ -5071,19 +5004,40 @@ release_page:
}
}
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+
mutex_enter(&buf_pool.mutex);
- mutex_enter(block_mutex);
+
+ BPageMutex* page_mutex = buf_page_get_mutex(bpage);
+ mutex_enter(page_mutex);
+
+ if (io_type == BUF_IO_WRITE
+ && (
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ /* to keep consistency at buf_LRU_insert_zip_clean() */
+ buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY ||
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+ buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU ||
+ buf_page_get_flush_type(bpage) == BUF_FLUSH_SINGLE_PAGE)) {
+
+ have_LRU_mutex = true; /* optimistic */
+ } else {
+ mutex_exit(&buf_pool.mutex);
+ }
+
/* Because this thread which does the unlocking is not the same that
did the locking, we use a pass value != 0 in unlock, which simply
removes the newest lock debug record, without checking the thread
id. */
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
buf_page_monitor(bpage, io_type);
if (io_type == BUF_IO_READ) {
+
+ ut_ad(!have_LRU_mutex);
+
+ buf_page_set_io_fix(bpage, BUF_IO_NONE);
+
/* NOTE that the call to ibuf may have moved the ownership of
the x-latch to this OS thread: do not let this confuse you in
debugging! */
@@ -5097,7 +5051,7 @@ release_page:
BUF_IO_READ);
}
- mutex_exit(block_mutex);
+ mutex_exit(page_mutex);
} else {
/* Write means a flush operation: call the completion
routine in the flush system */
@@ -5119,19 +5073,22 @@ release_page:
by the caller explicitly. */
if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
evict = true;
+ ut_ad(have_LRU_mutex);
}
- mutex_exit(block_mutex);
-
- if (evict) {
- buf_LRU_free_page(bpage, true);
+ if (evict && buf_LRU_free_page(bpage, true)) {
+ have_LRU_mutex = false;
+ } else {
+ mutex_exit(buf_page_get_mutex(bpage));
+ }
+ if (have_LRU_mutex) {
+ mutex_exit(&buf_pool.mutex);
}
}
DBUG_PRINT("ib_buf", ("%s page %u:%u",
io_type == BUF_IO_READ ? "read" : "wrote",
bpage->id.space(), bpage->id.page_no()));
- mutex_exit(&buf_pool.mutex);
return DB_SUCCESS;
}
@@ -5161,7 +5118,9 @@ void buf_refresh_io_stats()
All pages must be in a replaceable state (not modified or latched). */
void buf_pool_invalidate()
{
- mutex_enter(&buf_pool.mutex);
+ ut_ad(!mutex_own(&buf_pool.mutex));
+
+ mutex_enter(&buf_pool.flush_state_mutex);
for (unsigned i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
@@ -5179,18 +5138,19 @@ void buf_pool_invalidate()
if (buf_pool.n_flush[i] > 0) {
buf_flush_t type = buf_flush_t(i);
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.flush_state_mutex);
buf_flush_wait_batch_end(type);
- mutex_enter(&buf_pool.mutex);
+ mutex_enter(&buf_pool.flush_state_mutex);
}
}
- ut_d(mutex_exit(&buf_pool.mutex));
+ mutex_exit(&buf_pool.flush_state_mutex);
ut_d(buf_pool.assert_all_freed());
- ut_d(mutex_enter(&buf_pool.mutex));
while (buf_LRU_scan_and_free_block(true));
+ mutex_enter(&buf_pool.mutex);
+
ut_ad(UT_LIST_GET_LEN(buf_pool.LRU) == 0);
ut_ad(UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0);
@@ -5198,9 +5158,10 @@ void buf_pool_invalidate()
buf_pool.LRU_old = NULL;
buf_pool.LRU_old_len = 0;
+ mutex_exit(&buf_pool.mutex);
+
memset(&buf_pool.stat, 0x00, sizeof(buf_pool.stat));
buf_refresh_io_stats();
- mutex_exit(&buf_pool.mutex);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -5208,8 +5169,6 @@ void buf_pool_invalidate()
void buf_pool_t::validate()
{
buf_page_t* b;
- buf_pool_t::chunk_t* chunk;
- ulint i;
ulint n_lru_flush = 0;
ulint n_page_flush = 0;
ulint n_list_flush = 0;
@@ -5218,22 +5177,23 @@ void buf_pool_t::validate()
ulint n_free = 0;
ulint n_zip = 0;
- mutex_enter(&buf_pool.mutex);
- hash_lock_x_all(buf_pool.page_hash);
+ mutex_enter(&mutex);
+ hash_lock_x_all(page_hash);
+ mutex_enter(&zip_mutex);
+ mutex_enter(&free_list_mutex);
+ mutex_enter(&flush_state_mutex);
- chunk = buf_pool.chunks;
+ chunk_t* chunk = chunks;
/* Check the uncompressed blocks. */
- for (i = buf_pool.n_chunks; i--; chunk++) {
+ for (ulint i = n_chunks; i--; chunk++) {
ulint j;
buf_block_t* block = chunk->blocks;
for (j = chunk->size; j--; block++) {
- buf_page_mutex_enter(block);
-
switch (buf_block_get_state(block)) {
case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
@@ -5247,7 +5207,7 @@ void buf_pool_t::validate()
ut_ad(buf_page_hash_get_low(block->page.id)
== &block->page);
- switch (buf_page_get_io_fix(&block->page)) {
+ switch (buf_page_get_io_fix_unlocked(&block->page)) {
case BUF_IO_NONE:
break;
@@ -5255,20 +5215,8 @@ void buf_pool_t::validate()
switch (buf_page_get_flush_type(
&block->page)) {
case BUF_FLUSH_LRU:
- n_lru_flush++;
- goto assert_s_latched;
case BUF_FLUSH_SINGLE_PAGE:
- n_page_flush++;
-assert_s_latched:
- ut_a(rw_lock_is_locked(
- &block->lock,
- RW_LOCK_S)
- || rw_lock_is_locked(
- &block->lock,
- RW_LOCK_SX));
- break;
case BUF_FLUSH_LIST:
- n_list_flush++;
break;
default:
ut_error;
@@ -5295,16 +5243,12 @@ assert_s_latched:
/* do nothing */
break;
}
-
- buf_page_mutex_exit(block);
}
}
- mutex_enter(&buf_pool.zip_mutex);
-
/* Check clean compressed-only blocks. */
- for (b = UT_LIST_GET_FIRST(buf_pool.zip_clean); b;
+ for (b = UT_LIST_GET_FIRST(zip_clean); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
switch (buf_page_get_io_fix(b)) {
@@ -5324,7 +5268,7 @@ assert_s_latched:
}
/* It is OK to read oldest_modification here because
- we have acquired buf_pool.zip_mutex above which acts
+ we have acquired zip_mutex above which acts
as the 'block->mutex' for these bpages. */
ut_ad(!b->oldest_modification);
ut_ad(buf_page_hash_get_low(b->id) == b);
@@ -5334,8 +5278,8 @@ assert_s_latched:
/* Check dirty blocks. */
- mutex_enter(&buf_pool.flush_list_mutex);
- for (b = UT_LIST_GET_FIRST(buf_pool.flush_list); b;
+ mutex_enter(&flush_list_mutex);
+ for (b = UT_LIST_GET_FIRST(flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->in_flush_list);
ut_ad(b->oldest_modification);
@@ -5345,7 +5289,9 @@ assert_s_latched:
case BUF_BLOCK_ZIP_DIRTY:
n_lru++;
n_zip++;
- switch (buf_page_get_io_fix(b)) {
+ /* fall through */
+ case BUF_BLOCK_FILE_PAGE:
+ switch (buf_page_get_io_fix_unlocked(b)) {
case BUF_IO_NONE:
case BUF_IO_READ:
case BUF_IO_PIN:
@@ -5367,51 +5313,50 @@ assert_s_latched:
break;
}
break;
- case BUF_BLOCK_FILE_PAGE:
- /* uncompressed page */
+ case BUF_BLOCK_REMOVE_HASH:
+ /* We do not hold buf_pool.mutex here. */
break;
case BUF_BLOCK_POOL_WATCH:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
ut_error;
break;
}
ut_ad(buf_page_hash_get_low(b->id) == b);
}
- ut_ad(UT_LIST_GET_LEN(buf_pool.flush_list) == n_flush);
-
- hash_unlock_x_all(buf_pool.page_hash);
- mutex_exit(&buf_pool.flush_list_mutex);
+ ut_ad(UT_LIST_GET_LEN(flush_list) == n_flush);
- mutex_exit(&buf_pool.zip_mutex);
+ hash_unlock_x_all(page_hash);
+ mutex_exit(&flush_list_mutex);
+ mutex_exit(&zip_mutex);
- if (buf_pool.curr_size == buf_pool.old_size
- && n_lru + n_free > buf_pool.curr_size + n_zip) {
+ if (curr_size == old_size
+ && n_lru + n_free > curr_size + n_zip) {
ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free
- << ", pool " << buf_pool.curr_size
+ << ", pool " << curr_size
<< " zip " << n_zip << ". Aborting...";
}
- ut_ad(UT_LIST_GET_LEN(buf_pool.LRU) == n_lru);
+ ut_ad(UT_LIST_GET_LEN(LRU) == n_lru);
- if (buf_pool.curr_size == buf_pool.old_size
- && UT_LIST_GET_LEN(buf_pool.free) != n_free) {
+ mutex_exit(&mutex);
+
+ if (curr_size == old_size
+ && UT_LIST_GET_LEN(free) > n_free) {
ib::fatal() << "Free list len "
- << UT_LIST_GET_LEN(buf_pool.free)
+ << UT_LIST_GET_LEN(free)
<< ", free blocks " << n_free << ". Aborting...";
}
- ut_ad(buf_pool.n_flush[BUF_FLUSH_LIST] == n_list_flush);
- ut_ad(buf_pool.n_flush[BUF_FLUSH_LRU] == n_lru_flush);
- ut_ad(buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush);
+ mutex_exit(&free_list_mutex);
- mutex_exit(&buf_pool.mutex);
+ ut_ad(this->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush);
+ mutex_exit(&flush_state_mutex);
ut_d(buf_LRU_validate());
ut_d(buf_flush_validate());
@@ -5429,7 +5374,7 @@ void buf_pool_t::print()
ulint j;
index_id_t id;
ulint n_found;
- buf_pool_t::chunk_t* chunk;
+ chunk_t* chunk;
dict_index_t* index;
size = curr_size;
@@ -5549,18 +5494,15 @@ ulint buf_get_latched_pages_number()
continue;
}
- buf_page_mutex_enter(block);
-
if (block->page.buf_fix_count != 0
- || buf_page_get_io_fix(&block->page)
+ || buf_page_get_io_fix_unlocked(&block->page)
!= BUF_IO_NONE) {
fixed_pages_number++;
}
-
- buf_page_mutex_exit(block);
}
}
+ mutex_exit(&buf_pool.mutex);
mutex_enter(&buf_pool.zip_mutex);
/* Traverse the lists of clean and dirty compressed-only blocks. */
@@ -5604,7 +5546,6 @@ ulint buf_get_latched_pages_number()
mutex_exit(&buf_pool.flush_list_mutex);
mutex_exit(&buf_pool.zip_mutex);
- mutex_exit(&buf_pool.mutex);
return(fixed_pages_number);
}
@@ -5618,6 +5559,8 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
double time_elapsed;
mutex_enter(&buf_pool.mutex);
+ mutex_enter(&buf_pool.free_list_mutex);
+ mutex_enter(&buf_pool.flush_state_mutex);
mutex_enter(&buf_pool.flush_list_mutex);
pool_info->pool_size = buf_pool.curr_size;
@@ -5647,6 +5590,9 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
+ buf_pool.init_flush[BUF_FLUSH_SINGLE_PAGE]);
mutex_exit(&buf_pool.flush_list_mutex);
+ mutex_exit(&buf_pool.flush_state_mutex);
+ mutex_exit(&buf_pool.free_list_mutex);
+ mutex_exit(&buf_pool.mutex);
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
@@ -5737,7 +5683,6 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
buf_refresh_io_stats();
- mutex_exit(&buf_pool.mutex);
}
/*********************************************************************//**
@@ -5872,12 +5817,12 @@ ulint buf_pool_check_no_pending_io()
{
/* FIXME: use atomics, no mutex */
ulint pending_io = buf_pool.n_pend_reads;
- mutex_enter(&buf_pool.mutex);
+ mutex_enter(&buf_pool.flush_state_mutex);
pending_io +=
+ buf_pool.n_flush[BUF_FLUSH_LRU]
+ buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE]
+ buf_pool.n_flush[BUF_FLUSH_LIST];
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.flush_state_mutex);
return(pending_io);
}
@@ -5915,5 +5860,9 @@ buf_page_get_trim_length(
ulint write_length)
{
return bpage->physical_size() - write_length;
+ ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(&buf_pool.free_list_mutex));
+ ut_ad(mutex_own(&buf_pool.flush_state_mutex));
+ ut_ad(mutex_own(&buf_pool.flush_list_mutex));
}
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index a4651f53f8d..2f5dae7155b 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -1029,16 +1029,16 @@ flush:
}
}
-/********************************************************************//**
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_dblwr_flush_buffered_writes and waits for for free
-space to appear. */
+/** Posts a buffer page for writing. If the doublewrite memory buffer
+is full, calls buf_dblwr_flush_buffered_writes and waits for for free
+space to appear.
+@param[in] bpage buffer block to write */
void
buf_dblwr_add_to_batch(
-/*====================*/
- buf_page_t* bpage) /*!< in: buffer block to write */
+ buf_page_t* bpage)
{
ut_a(buf_page_in_file(bpage));
+ ut_ad(!mutex_own(&buf_pool.mutex));
try_again:
mutex_enter(&buf_dblwr->mutex);
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index f7b1d05854f..bf093f9d9d2 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -62,8 +62,8 @@ static volatile bool buf_load_should_start;
static bool buf_load_abort_flag;
/* Used to temporary store dump info in order to avoid IO while holding
-buffer pool mutex during dump and also to sort the contents of the dump
-before reading the pages from disk during load.
+buffer pool LRU list mutex during dump and also to sort the contents of the
+dump before reading the pages from disk during load.
We store the space id in the high 32 bits and page no in low 32 bits. */
typedef ib_uint64_t buf_dump_t;
@@ -234,18 +234,16 @@ buf_dump_generate_path(
}
}
-/*****************************************************************//**
-Perform a buffer pool dump into the file specified by
+/** Perform a buffer pool dump into the file specified by
innodb_buffer_pool_filename. If any errors occur then the value of
innodb_buffer_pool_dump_status will be set accordingly, see buf_dump_status().
The dump filename can be specified by (relative to srv_data_home):
-SET GLOBAL innodb_buffer_pool_filename='filename'; */
+SET GLOBAL innodb_buffer_pool_filename='filename';
+@param[in] obey_shutdown quit if we are in a shutting down state */
static
void
buf_dump(
-/*=====*/
- ibool obey_shutdown) /*!< in: quit if we are in a shutting down
- state */
+ ibool obey_shutdown)
{
#define SHOULD_QUIT() (SHUTTING_DOWN() && obey_shutdown)
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 741cc5d8eb1..6a24a2fdeb7 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -367,9 +367,8 @@ buf_flush_free_flush_rbt(void)
@param[in] lsn oldest modification */
void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
{
- ut_ad(!mutex_own(&buf_pool.mutex));
ut_ad(log_flush_order_mutex_own());
- ut_ad(buf_page_mutex_own(block));
+ ut_ad(mutex_own(buf_page_get_mutex(&block->page)));
ut_ad(lsn);
mutex_enter(&buf_pool.flush_list_mutex);
@@ -384,9 +383,9 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
if (UNIV_LIKELY_NULL(buf_pool.flush_rbt)) {
ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
- /* The field in_LRU_list is protected by buf_pool.mutex, which
- we are not holding. However, while a block is in the flush
- list, it is dirty and cannot be discarded, not from the
+ /* The field in_LRU_list is protected by buf_pool.mutex,
+ which we are not holding. However, while a block is in the
+ flush list, it is dirty and cannot be discarded, not from the
page_hash or from the LRU list. At most, the uncompressed
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
@@ -416,15 +415,15 @@ func_exit:
mutex_exit(&buf_pool.flush_list_mutex);
}
-/********************************************************************//**
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed.
+/** Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., the transition FILE_PAGE => NOT_USED allowed. The caller must hold the
+LRU list and block mutexes.
+@param[in] bpage buffer control block, must be buf_page_in_file() and
+ in the LRU list
@return TRUE if can replace immediately */
ibool
buf_flush_ready_for_replace(
-/*========================*/
- buf_page_t* bpage) /*!< in: buffer control block, must be
- buf_page_in_file(bpage) and in the LRU list */
+ buf_page_t* bpage)
{
ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -443,23 +442,43 @@ buf_flush_ready_for_replace(
return(FALSE);
}
-/********************************************************************//**
-Returns true if the block is modified and ready for flushing.
+/** Check if the block is modified and ready for flushing.
+@param[in] bpage buffer control block, must be buf_page_in_file()
+@param[in] flush_type type of flush
@return true if can flush immediately */
bool
buf_flush_ready_for_flush(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer control block, must be
- buf_page_in_file(bpage) */
- buf_flush_t flush_type)/*!< in: type of flush */
+ buf_page_t* bpage,
+ buf_flush_t flush_type)
{
- ut_ad(mutex_own(&buf_pool.mutex));
- ut_a(buf_page_in_file(bpage));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+#ifdef UNIV_DEBUG
+
+
+ ut_ad(buf_page_in_file(bpage)
+
+ || (buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH
+
+ && !mutex_own(&buf_pool.mutex)));
+
+#else
+
+ ut_a(buf_page_in_file(bpage)
+
+ || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
+
+#endif
+
+ ut_ad(mutex_own(buf_page_get_mutex(bpage))
+
+ || (flush_type == BUF_FLUSH_LIST
+
+ && mutex_own(&buf_pool.flush_list_mutex)));
+
ut_ad(flush_type < BUF_FLUSH_N_TYPES);
if (bpage->oldest_modification == 0
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
return(false);
}
@@ -467,6 +486,7 @@ buf_flush_ready_for_flush(
switch (flush_type) {
case BUF_FLUSH_LIST:
+ return(buf_page_get_state(bpage) != BUF_BLOCK_REMOVE_HASH);
case BUF_FLUSH_LRU:
case BUF_FLUSH_SINGLE_PAGE:
return(true);
@@ -480,7 +500,7 @@ buf_flush_ready_for_flush(
}
/** Remove a block from the flush list of modified blocks.
-@param[in] bpage block to be removed from the flush list */
+@param[in] bpage pointer to the block in question */
void buf_flush_remove(buf_page_t* bpage)
{
#if 0 // FIXME: Rate-limit the output. Move this to the page cleaner?
@@ -492,8 +512,11 @@ void buf_flush_remove(buf_page_t* bpage)
bpage->space, UT_LIST_GET_LEN(buf_pool.flush_list));
}
#endif
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_DIRTY
+ || mutex_own(&buf_pool.mutex));
+#endif
ut_ad(bpage->in_flush_list);
mutex_enter(&buf_pool.flush_list_mutex);
@@ -564,18 +587,10 @@ buf_flush_relocate_on_flush_list(
buf_page_t* prev;
buf_page_t* prev_b = NULL;
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
mutex_enter(&buf_pool.flush_list_mutex);
- /* FIXME: At this point we have both buf_pool and flush_list
- mutexes. Theoretically removal of a block from flush list is
- only covered by flush_list mutex but currently we do
- have buf_pool mutex in buf_flush_remove() therefore this block
- is guaranteed to be in the flush list. We need to check if
- this will work without the assumption of block removing code
- having the buf_pool mutex. */
ut_ad(bpage->in_flush_list);
ut_ad(dpage->in_flush_list);
@@ -621,15 +636,17 @@ buf_flush_relocate_on_flush_list(
void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
{
ut_ad(bpage);
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+ const buf_flush_t flush_type = buf_page_get_flush_type(bpage);
+ mutex_enter(&buf_pool.flush_state_mutex);
buf_flush_remove(bpage);
+ buf_page_set_io_fix(bpage, BUF_IO_NONE);
- const buf_flush_t flush_type = buf_page_get_flush_type(bpage);
buf_pool.n_flush[flush_type]--;
ut_ad(buf_pool.n_flush[flush_type] != ULINT_MAX);
- ut_ad(mutex_own(&buf_pool.mutex));
-
if (buf_pool.n_flush[flush_type] == 0
&& buf_pool.init_flush[flush_type] == FALSE) {
@@ -638,6 +655,8 @@ void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
os_event_set(buf_pool.no_flush[flush_type]);
}
+ mutex_exit(&buf_pool.flush_state_mutex);
+
if (dblwr) {
buf_dblwr_update(bpage, flush_type);
}
@@ -1081,15 +1100,16 @@ static void buf_flush_freed_page(buf_page_t *bpage, fil_space_t *space)
/********************************************************************//**
Does an asynchronous write of a buffer page. NOTE: when the
doublewrite buffer is used, we must call
-buf_dblwr_flush_buffered_writes after we have posted a batch of
-writes! */
+buf_dblwr_flush_buffered_writes after we have posted a batch of writes!
+@param[in] bpage buffer block to write
+@param[in] flush_type type of flush
+@param[in] sync true if sync IO request */
static
void
buf_flush_write_block_low(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer block to write */
- buf_flush_t flush_type, /*!< in: type of flush */
- bool sync) /*!< in: true if sync IO request */
+ buf_page_t* bpage,
+ buf_flush_t flush_type,
+ bool sync)
{
fil_space_t* space = fil_space_acquire_for_io(bpage->id.space());
if (!space) {
@@ -1110,15 +1130,14 @@ buf_flush_write_block_low(
ut_ad(buf_page_in_file(bpage));
- /* We are not holding buf_pool.mutex or block_mutex here.
- Nevertheless, it is safe to access bpage, because it is
- io_fixed and oldest_modification != 0. Thus, it cannot be
- relocated in the buffer pool or removed from flush_list or
- LRU_list. */
- ut_ad(!mutex_own(&buf_pool.mutex));
+ /* We are not holding block_mutex here. Nevertheless, it is safe to
+ access bpage, because it is io_fixed and oldest_modification != 0.
+ Thus, it cannot be relocated in the buf_pool or removed from
+ buf_pool.flush_list or buf_pool.LRU. */
+
ut_ad(!mutex_own(&buf_pool.flush_list_mutex));
ut_ad(!buf_page_get_mutex(bpage)->is_owned());
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+ ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE);
ut_ad(bpage->oldest_modification != 0);
switch (buf_page_get_state(bpage)) {
@@ -1216,7 +1235,7 @@ buf_flush_write_block_low(
decremented the node->n_pending. However,
buf_page_io_complete() only needs to look up the
tablespace during read requests, not during writes. */
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+ ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE);
#ifdef UNIV_DEBUG
dberr_t err =
#endif
@@ -1234,7 +1253,7 @@ buf_flush_write_block_low(
buf_LRU_stat_inc_io();
}
-/** Write a flushable page asynchronously from the buffer pool to a file.
+/** Writes a flushable page asynchronously from the buffer pool to a file.
NOTE: 1. in simulated aio we must call os_aio_simulated_wake_handler_threads
after we have posted a batch of writes! 2. buf_page_get_mutex(bpage) must be
held upon entering this function. The LRU list mutex must be held if flush_type
@@ -1249,7 +1268,13 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
BPageMutex* block_mutex;
ut_ad(flush_type < BUF_FLUSH_N_TYPES);
- ut_ad(mutex_own(&buf_pool.mutex));
+ /* Hold the LRU list mutex iff called for a single page LRU
+ flush. A single page LRU flush is already non-performant, and holding
+ the LRU list mutex allows us to avoid having to store the previous LRU
+ list page or to restart the LRU scan in
+ buf_flush_single_page_from_LRU(). */
+ ut_ad((flush_type == BUF_FLUSH_SINGLE_PAGE)
+ == mutex_own(&buf_pool.mutex));
ut_ad(buf_page_in_file(bpage));
ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
@@ -1286,6 +1311,8 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
/* We are committed to flushing by the time we get here */
+ mutex_enter(&buf_pool.flush_state_mutex);
+
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
buf_page_set_flush_type(bpage, flush_type);
@@ -1296,14 +1323,21 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
++buf_pool.n_flush[flush_type];
ut_ad(buf_pool.n_flush[flush_type] != 0);
-
+ mutex_exit(&buf_pool.flush_state_mutex);
mutex_exit(block_mutex);
- mutex_exit(&buf_pool.mutex);
-
- if (flush_type == BUF_FLUSH_LIST
- && is_uncompressed
- && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
+ switch (flush_type) {
+ case BUF_FLUSH_SINGLE_PAGE:
+ mutex_exit(&buf_pool.mutex);
+ break;
+ case BUF_FLUSH_LRU:
+ case BUF_FLUSH_N_TYPES:
+ break;
+ case BUF_FLUSH_LIST:
+ if (!is_uncompressed
+ || rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
+ break;
+ }
if (!fsp_is_system_temporary(bpage->id.space())) {
/* avoiding deadlock possibility involves
@@ -1320,7 +1354,7 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
/* Even though bpage is not protected by any mutex at this
point, it is safe to access bpage, because it is io_fixed and
oldest_modification != 0. Thus, it cannot be relocated in the
- buffer pool or removed from flush_list or LRU_list. */
+ buf_pool or removed from buf_pool.flush_list or buf_pool.LRU. */
buf_flush_write_block_low(bpage, flush_type, sync);
return true;
@@ -1337,13 +1371,13 @@ bool buf_flush_page_try(buf_block_t* block)
{
ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(buf_page_mutex_own(block));
+ ut_ad(mutex_own(buf_page_get_mutex(&block->page)));
if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
return false;
}
- /* The following call will release the buf_pool and block mutex. */
+ /* The following will have released the mutexes when returning true. */
return buf_flush_page(&block->page, BUF_FLUSH_SINGLE_PAGE, true);
}
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
@@ -1360,20 +1394,25 @@ buf_flush_check_neighbor(
{
buf_page_t* bpage;
bool ret;
+ rw_lock_t* hash_lock;
+ BPageMutex* block_mutex;
ut_ad(flush_type == BUF_FLUSH_LRU
|| flush_type == BUF_FLUSH_LIST);
- mutex_enter(&buf_pool.mutex);
-
- bpage = buf_page_hash_get(page_id);
+ bpage = buf_page_hash_get_s_locked(page_id, &hash_lock);
if (!bpage) {
- mutex_exit(&buf_pool.mutex);
return(false);
}
+ block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
+
+ rw_lock_s_unlock(hash_lock);
+
ut_a(buf_page_in_file(bpage));
/* We avoid flushing 'non-old' blocks in an LRU flush,
@@ -1381,15 +1420,13 @@ buf_flush_check_neighbor(
ret = false;
if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
if (buf_flush_ready_for_flush(bpage, flush_type)) {
ret = true;
}
- mutex_exit(block_mutex);
}
- mutex_exit(&buf_pool.mutex);
+
+ mutex_exit(block_mutex);
return(ret);
}
@@ -1414,6 +1451,8 @@ buf_flush_try_neighbors(
ulint count = 0;
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+ ut_ad(!mutex_own(&buf_pool.mutex));
+ ut_ad(!mutex_own(&buf_pool.flush_list_mutex));
fil_space_t* space = fil_space_acquire_for_io(page_id.space());
if (!space) {
return 0;
@@ -1485,6 +1524,8 @@ buf_flush_try_neighbors(
for (ulint i = low; i < high; i++) {
buf_page_t* bpage;
+ rw_lock_t* hash_lock;
+ BPageMutex* block_mutex;
if ((count + n_flushed) >= n_to_flush) {
@@ -1503,15 +1544,20 @@ buf_flush_try_neighbors(
const page_id_t cur_page_id(page_id.space(), i);
- mutex_enter(&buf_pool.mutex);
- bpage = buf_page_hash_get(cur_page_id);
+ bpage = buf_page_hash_get_s_locked(cur_page_id,
+ &hash_lock);
if (bpage == NULL) {
- mutex_exit(&buf_pool.mutex);
continue;
}
+ block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
+
+ rw_lock_s_unlock(hash_lock);
+
ut_a(buf_page_in_file(bpage));
/* We avoid flushing 'non-old' blocks in an LRU flush,
@@ -1521,10 +1567,6 @@ buf_flush_try_neighbors(
|| i == page_id.page_no()
|| buf_page_is_old(bpage)) {
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
if (buf_flush_ready_for_flush(bpage, flush_type)
&& (i == page_id.page_no()
|| bpage->buf_fix_count == 0)) {
@@ -1536,15 +1578,13 @@ buf_flush_try_neighbors(
++count;
} else {
mutex_exit(block_mutex);
- mutex_exit(&buf_pool.mutex);
}
continue;
- } else {
- mutex_exit(block_mutex);
}
}
- mutex_exit(&buf_pool.mutex);
+
+ mutex_exit(block_mutex);
}
space->release_for_io();
@@ -1561,16 +1601,15 @@ buf_flush_try_neighbors(
}
/** Check if the block is modified and ready for flushing.
-If the the block is ready to flush then flush the page and try o flush
-its neighbors.
+is ready to flush then flush the page and try o flush its neighbors. The caller
+must hold the buffer pool list mutex corresponding to the type of flush.
@param[in] bpage buffer control block,
-must be buf_page_in_file(bpage)
+ must be buf_page_in_file(bpage)
@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST
@param[in] n_to_flush number of pages to flush
@param[in,out] count number of pages flushed
-@return TRUE if buf_pool mutex was released during this function.
-This does not guarantee that some pages were written as well.
-Number of pages written are incremented to the count. */
+@return TRUE if the list mutex was released during this function. This does
+not guarantee that some pages were written as well. */
static
bool
buf_flush_page_and_try_neighbors(
@@ -1579,45 +1618,78 @@ buf_flush_page_and_try_neighbors(
ulint n_to_flush,
ulint* count)
{
- ut_ad(mutex_own(&buf_pool.mutex));
-
bool flushed;
- BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+ BPageMutex* block_mutex = NULL;
- mutex_enter(block_mutex);
+ ut_ad(flush_type != BUF_FLUSH_SINGLE_PAGE);
- ut_a(buf_page_in_file(bpage));
+ ut_ad((flush_type == BUF_FLUSH_LRU
+ && mutex_own(&buf_pool.mutex))
+ || (flush_type == BUF_FLUSH_LIST
+ && mutex_own(&buf_pool.flush_list_mutex)));
+
+ if (flush_type == BUF_FLUSH_LRU) {
+ block_mutex = buf_page_get_mutex(bpage);
+ mutex_enter(block_mutex);
+ }
+
+#ifdef UNIV_DEBUG
+ if (!buf_page_in_file(bpage)) {
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
+ ut_ad(!mutex_own(&buf_pool.mutex));
+ }
+#else
+ ut_a(buf_page_in_file(bpage)
+ || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);mutex_own(&buf_pool.flush_list_mutex)
+#endif /* UNIV_DEBUG */
if (buf_flush_ready_for_flush(bpage, flush_type)) {
+ if (flush_type == BUF_FLUSH_LRU) {
+ mutex_exit(&buf_pool.mutex);
+ }
+
const page_id_t page_id = bpage->id;
- mutex_exit(block_mutex);
- mutex_exit(&buf_pool.mutex);
+ if (flush_type == BUF_FLUSH_LRU) {
+ mutex_exit(block_mutex);
+ } else {
+ mutex_exit(&buf_pool.flush_list_mutex);
+ }
/* Try to flush also all the neighbors */
*count += buf_flush_try_neighbors(
page_id, flush_type, *count, n_to_flush);
- mutex_enter(&buf_pool.mutex);
+ if (flush_type == BUF_FLUSH_LRU) {
+ mutex_enter(&buf_pool.mutex);
+ } else {
+ mutex_enter(&buf_pool.flush_list_mutex);
+ }
flushed = true;
- } else {
+
+ } else if (flush_type == BUF_FLUSH_LRU) {
+
mutex_exit(block_mutex);
flushed = false;
+ } else {
+ flushed = false;
}
- ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad((flush_type == BUF_FLUSH_LRU
+ && mutex_own(&buf_pool.mutex))
+ || (flush_type == BUF_FLUSH_LIST
+ && mutex_own(&buf_pool.flush_list_mutex)));
return(flushed);
}
-/*******************************************************************//**
-This utility moves the uncompressed frames of pages to the free list.
+/** This utility moves the uncompressed frames of pages to the free list.
Note that this function does not actually flush any data to disk. It
just detaches the uncompressed frames from the compressed pages at the
tail of the unzip_LRU and puts those freed frames in the free list.
Note that it is a best effort attempt and it is not guaranteed that
after a call to this function there will be 'max' blocks in the free
-list.
+list. The caller must hold the LRU list mutex.
@param[in] max desired number of blocks in the free_list
@return number of blocks moved to the free list. */
static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
@@ -1636,15 +1708,21 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
&& free_len < srv_LRU_scan_depth
&& lru_len > UT_LIST_GET_LEN(buf_pool.LRU) / 10) {
+ BPageMutex* block_mutex = buf_page_get_mutex(&block->page);
+
++scanned;
+
+ mutex_enter(block_mutex);
+
if (buf_LRU_free_page(&block->page, false)) {
- /* Block was freed. buf_pool.mutex potentially
- released and reacquired */
+ /* Block was freed, all mutexes released */
++count;
+ mutex_enter(&buf_pool.mutex);
block = UT_LIST_GET_LAST(buf_pool.unzip_LRU);
} else {
+ mutex_exit(block_mutex);
block = UT_LIST_GET_PREV(unzip_LRU, block);
}
@@ -1665,31 +1743,25 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
return(count);
}
-/** Flush dirty blocks from the end of the LRU list.
+/** This utility flushes dirty blocks from the end of the LRU list.
The calling thread is not allowed to own any latches on pages!
@param[in] max desired number of blocks to make available
in the free list (best effort; not guaranteed)
-@param[out] n counts of flushed and evicted pages */
-static void buf_flush_LRU_list_batch(ulint max, flush_counters_t* n)
+@param[in] max desired number for blocks in the free_list
+*/
+static void
+buf_flush_LRU_list_batch(ulint max, flush_counters_t* n)
{
- buf_page_t* bpage;
ulint scanned = 0;
ulint free_len = UT_LIST_GET_LEN(buf_pool.free);
ulint lru_len = UT_LIST_GET_LEN(buf_pool.LRU);
- ulint withdraw_depth = 0;
- n->flushed = 0;
- n->evicted = 0;
- n->unzip_LRU_evicted = 0;
ut_ad(mutex_own(&buf_pool.mutex));
- if (buf_pool.curr_size < buf_pool.old_size
- && buf_pool.withdraw_target > 0) {
- withdraw_depth = buf_pool.withdraw_target
- - UT_LIST_GET_LEN(buf_pool.withdraw);
- }
+ n->evicted = 0;
+ const ulint withdraw_depth = buf_pool.withdraw_depth();
- for (bpage = UT_LIST_GET_LAST(buf_pool.LRU);
+ for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool.LRU);
bpage != NULL && n->flushed + n->evicted < max
&& free_len < srv_LRU_scan_depth + withdraw_depth
&& lru_len > BUF_LRU_MIN_LEN;
@@ -1701,27 +1773,32 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t* n)
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
+ bool acquired = mutex_enter_nowait(block_mutex) == 0;
- if (buf_flush_ready_for_replace(bpage)) {
+ if (acquired && buf_flush_ready_for_replace(bpage)) {
/* block is ready for eviction i.e., it is
clean and is not IO-fixed or buffer fixed. */
- mutex_exit(block_mutex);
if (buf_LRU_free_page(bpage, true)) {
++n->evicted;
+ mutex_enter(&buf_pool.mutex);
+ } else {
+ mutex_exit(block_mutex);
}
- } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_LRU)) {
+ } else if (acquired
+ && buf_flush_ready_for_flush(bpage, BUF_FLUSH_LRU)) {
/* Block is ready for flush. Dispatch an IO
request. The IO helper thread will put it on
free list in IO completion routine. */
mutex_exit(block_mutex);
buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LRU, max, &n->flushed);
+ } else if (!acquired) {
+ ut_ad(buf_pool.lru_hp.is_hp(prev));
} else {
/* Can't evict or dispatch this block. Go to
previous. */
- ut_ad(buf_pool.lru_hp.is_hp(prev));
mutex_exit(block_mutex);
+ ut_ad(buf_pool.lru_hp.is_hp(prev));
}
ut_ad(!mutex_own(block_mutex));
@@ -1764,6 +1841,8 @@ Whether LRU or unzip_LRU is used depends on the state of the system.
@param[out] n counts of flushed and evicted pages */
static void buf_do_LRU_batch(ulint max, flush_counters_t* n)
{
+ ut_ad(mutex_own(&buf_pool.mutex));
+
n->unzip_LRU_evicted = buf_LRU_evict_from_unzip_LRU()
? buf_free_from_unzip_LRU_list_batch(max) : 0;
@@ -1793,8 +1872,6 @@ static ulint buf_do_flush_list_batch(ulint min_n, lsn_t lsn_limit)
ulint count = 0;
ulint scanned = 0;
- ut_ad(mutex_own(&buf_pool.mutex));
-
/* Start from the end of the list looking for a suitable
block to be flushed. */
mutex_enter(&buf_pool.flush_list_mutex);
@@ -1818,7 +1895,6 @@ static ulint buf_do_flush_list_batch(ulint min_n, lsn_t lsn_limit)
prev = UT_LIST_GET_PREV(list, bpage);
buf_pool.flush_hp.set(prev);
- mutex_exit(&buf_pool.flush_list_mutex);
#ifdef UNIV_DEBUG
bool flushed =
@@ -1826,8 +1902,6 @@ static ulint buf_do_flush_list_batch(ulint min_n, lsn_t lsn_limit)
buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LIST, min_n, &count);
- mutex_enter(&buf_pool.flush_list_mutex);
-
ut_ad(flushed || buf_pool.flush_hp.is_hp(prev));
--len;
@@ -1852,8 +1926,6 @@ static ulint buf_do_flush_list_batch(ulint min_n, lsn_t lsn_limit)
count);
}
- ut_ad(mutex_own(&buf_pool.mutex));
-
return(count);
}
@@ -1883,13 +1955,13 @@ buf_flush_batch(
ut_ad(flush_type == BUF_FLUSH_LRU
|| !sync_check_iterate(dict_sync_check()));
- mutex_enter(&buf_pool.mutex);
-
- /* Note: The buffer pool mutex is released and reacquired within
+ /* Note: The buffer pool mutexes are released and reacquired within
the flush functions. */
switch (flush_type) {
case BUF_FLUSH_LRU:
+ mutex_enter(&buf_pool.mutex);
buf_do_LRU_batch(min_n, n);
+ mutex_exit(&buf_pool.mutex);
break;
case BUF_FLUSH_LIST:
n->flushed = buf_do_flush_list_batch(min_n, lsn_limit);
@@ -1899,8 +1971,6 @@ buf_flush_batch(
ut_error;
}
- mutex_exit(&buf_pool.mutex);
-
DBUG_LOG("ib_buf", "flush " << flush_type << " completed");
}
@@ -1931,14 +2001,14 @@ bool buf_flush_start(buf_flush_t flush_type)
{
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
- mutex_enter(&buf_pool.mutex);
+ mutex_enter(&buf_pool.flush_state_mutex);
if (buf_pool.n_flush[flush_type] > 0
|| buf_pool.init_flush[flush_type] == TRUE) {
/* There is already a flush batch of the same type running */
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.flush_state_mutex);
return(false);
}
@@ -1947,16 +2017,17 @@ bool buf_flush_start(buf_flush_t flush_type)
os_event_reset(buf_pool.no_flush[flush_type]);
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.flush_state_mutex);
return(true);
}
-/** End a buffer flush batch.
+/** End a buffer flush batch for LRU or flush list
+@param[in] buf_pool buffer pool instance
@param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST */
void buf_flush_end(buf_flush_t flush_type)
{
- mutex_enter(&buf_pool.mutex);
+ mutex_enter(&buf_pool.flush_state_mutex);
buf_pool.init_flush[flush_type] = FALSE;
@@ -1969,7 +2040,7 @@ void buf_flush_end(buf_flush_t flush_type)
os_event_set(buf_pool.no_flush[flush_type]);
}
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.flush_state_mutex);
if (!srv_read_only_mode) {
buf_dblwr_flush_buffered_writes();
@@ -2060,14 +2131,16 @@ void buf_flush_wait_flushed(lsn_t new_oldest)
/** This utility flushes dirty blocks from the end of the flush list.
NOTE: The calling thread is not allowed to own any latches on pages!
-@param[in] min_n wished minimum mumber of blocks flushed (it is
-not guaranteed that the actual number is that big, though)
+@param[in] min_n wished minimum mumber of blocks flushed
+ (it is not guaranteed that the actual number
+ is that big, though)
@param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose
-oldest_modification is smaller than this should be flushed (if their number
-does not exceed min_n), otherwise ignored
+ oldest_modification is smaller than this
+ should be flushed (if their number does not
+ exceed min_n), otherwise ignored
@param[out] n_processed the number of pages which were processed is
-passed back to caller. Ignored if NULL.
-@retval true if a batch was queued successfully
+ passed back to caller. Ignored if NULL.
+
@retval false if another batch of same type was already running */
bool buf_flush_lists(ulint min_n, lsn_t lsn_limit, ulint *n_processed)
{
@@ -2087,13 +2160,13 @@ bool buf_flush_lists(ulint min_n, lsn_t lsn_limit, ulint *n_processed)
return success;
}
-/******************************************************************//**
-This function picks up a single page from the tail of the LRU
+/** This function picks up a single page from the tail of the LRU
list, flushes it (if it is dirty), removes it from page_hash and LRU
list and puts it on the free list. It is called from user threads when
they are unable to find a replaceable page at the tail of the LRU
list i.e.: when the background LRU flushing in the page_cleaner thread
is not fast enough to keep pace with the workload.
+@param[in,out] buf_pool buffer pool instance
@return true if success. */
bool buf_flush_single_page_from_LRU()
{
@@ -2121,12 +2194,12 @@ bool buf_flush_single_page_from_LRU()
if (buf_flush_ready_for_replace(bpage)) {
/* block is ready for eviction i.e., it is
clean and is not IO-fixed or buffer fixed. */
- mutex_exit(block_mutex);
if (buf_LRU_free_page(bpage, true)) {
- mutex_exit(&buf_pool.mutex);
freed = true;
break;
+ } else {
+ mutex_exit(block_mutex);
}
} else if (buf_flush_ready_for_flush(
@@ -2180,28 +2253,18 @@ config parameter innodb_LRU_scan_depth.
@return total pages flushed */
static ulint buf_flush_LRU_list()
{
- ulint scan_depth, withdraw_depth;
flush_counters_t n;
memset(&n, 0, sizeof(flush_counters_t));
/* srv_LRU_scan_depth can be arbitrarily large value.
We cap it with current LRU size. */
- mutex_enter(&buf_pool.mutex);
- scan_depth = UT_LIST_GET_LEN(buf_pool.LRU);
- if (buf_pool.curr_size < buf_pool.old_size
- && buf_pool.withdraw_target > 0) {
- withdraw_depth = buf_pool.withdraw_target
- - UT_LIST_GET_LEN(buf_pool.withdraw);
+ const ulint withdraw_depth = buf_pool.withdraw_depth();
+ ulint scan_depth = UT_LIST_GET_LEN(buf_pool.LRU);
+ if (withdraw_depth > scan_depth) {
+ scan_depth = std::min(withdraw_depth, scan_depth);
} else {
- withdraw_depth = 0;
- }
- mutex_exit(&buf_pool.mutex);
- if (withdraw_depth > srv_LRU_scan_depth) {
- scan_depth = ut_min(withdraw_depth, scan_depth);
- } else {
- scan_depth = ut_min(static_cast<ulint>(srv_LRU_scan_depth),
- scan_depth);
+ scan_depth = std::min<ulint>(srv_LRU_scan_depth, scan_depth);
}
/* Currently one of page_cleaners is the only thread
that can trigger an LRU flush at the same time.
@@ -2215,10 +2278,10 @@ static ulint buf_flush_LRU_list()
/** Wait for any possible LRU flushes to complete. */
void buf_flush_wait_LRU_batch_end()
{
- mutex_enter(&buf_pool.mutex);
+ mutex_enter(&buf_pool.flush_state_mutex);
bool wait = buf_pool.n_flush[BUF_FLUSH_LRU]
|| buf_pool.init_flush[BUF_FLUSH_LRU];
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.flush_state_mutex);
if (wait) {
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
}
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 2ee0c02d964..3155733fbc3 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -125,8 +125,7 @@ least this many milliseconds ago. Not protected by any mutex or latch. */
uint buf_LRU_old_threshold_ms;
/* @} */
-/******************************************************************//**
-Takes a block out of the LRU list and page hash table.
+/** Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
the object will be freed.
@@ -135,19 +134,19 @@ and the appropriate hash_lock. This function will release the
buf_page_get_mutex() and the hash_lock.
If a compressed page is freed other compressed pages may be relocated.
+
+@param[in] bpage block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page
+@param[in] zip true if should remove also the
+ compressed page of an uncompressed page
@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
caller needs to free the page to the free list
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
this case the block is already returned to the buddy allocator. */
static MY_ATTRIBUTE((warn_unused_result))
-bool
-buf_LRU_block_remove_hashed(
-/*========================*/
- buf_page_t* bpage, /*!< in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
- bool zip); /*!< in: true if should remove also the
- compressed page of an uncompressed page */
+bool buf_LRU_block_remove_hashed(buf_page_t *bpage, bool zip);
+
/******************************************************************//**
Puts a file page whose has no hash index to the free list. */
static
@@ -161,7 +160,6 @@ buf_LRU_block_free_hashed_page(
@param[in] bpage control block */
static inline void incr_LRU_size_in_bytes(const buf_page_t* bpage)
{
- /* FIXME: use atomics, not mutex */
ut_ad(mutex_own(&buf_pool.mutex));
buf_pool.stat.LRU_bytes += bpage->physical_size();
@@ -231,8 +229,7 @@ buf_LRU_drop_page_hash_batch(ulint space_id, const ulint* arr, ulint count)
}
}
-/******************************************************************//**
-When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
+/** When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
hash index entries belonging to that table. This function tries to
do that in batch. Note that this is a 'best effort' attempt and does
not guarantee that ALL hash entries will be removed.
@@ -303,7 +300,7 @@ next_page:
goto next_page;
}
- /* Array full. We release the buf_pool.mutex to obey
+ /* Array full. We release the LRU list mutex to obey
the latching order. */
mutex_exit(&buf_pool.mutex);
@@ -313,7 +310,7 @@ next_page:
mutex_enter(&buf_pool.mutex);
- /* Note that we released the buf_pool mutex above
+ /* Note that we released the buf_pool.mutex above
after reading the prev_bpage during processing of a
page_hash_batch (i.e.: when the array was full).
Because prev_bpage could belong to a compressed-only
@@ -327,8 +324,7 @@ next_page:
guarantee that ALL such entries will be dropped. */
/* If, however, bpage has been removed from LRU list
- to the free list then we should restart the scan.
- bpage->state is protected by buf_pool mutex. */
+ to the free list then we should restart the scan. */
if (bpage != NULL
&& buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
@@ -343,9 +339,9 @@ next_page:
ut_free(page_arr);
}
-/** Try to drop the adaptive hash index for a tablespace.
-@param[in,out] table table
-@return whether anything was dropped */
+/** While flushing (or removing dirty) pages from a tablespace we don't
+want to hog the CPU and resources. Release the LRU list and block
+mutexes and try to force a context switch. Then reacquire the same mutexes. */
bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
{
for (dict_index_t* index = dict_table_get_first_index(table);
@@ -362,28 +358,23 @@ bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
}
/**
-While flushing (or removing dirty) pages from a tablespace we don't
-want to hog the CPU and resources. Release the buffer pool and block
-mutex and try to force a context switch. Then reacquire the same mutexes.
The current page is "fixed" before the release of the mutexes and then
"unfixed" again once we have reacquired the mutexes.
@param[in,out] bpage current page */
static void buf_flush_yield(buf_page_t* bpage)
{
- BPageMutex* block_mutex;
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+ ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(block_mutex));
ut_ad(buf_page_in_file(bpage));
- block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
/* "Fix" the block so that the position cannot be
changed after we release the buffer pool and
block mutexes. */
buf_page_set_sticky(bpage);
- /* Now it is safe to release the buf_pool.mutex. */
+ /* Now it is safe to release the LRU list mutex. */
mutex_exit(&buf_pool.mutex);
mutex_exit(block_mutex);
@@ -394,14 +385,14 @@ static void buf_flush_yield(buf_page_t* bpage)
mutex_enter(block_mutex);
/* "Unfix" the block now that we have both the
- buffer pool and block mutex again. */
+ LRU list and block mutexes again. */
buf_page_unset_sticky(bpage);
mutex_exit(block_mutex);
}
/******************************************************************//**
-If we have hogged the resources for too long then release the buffer
-pool and flush list mutex and do a thread yield. Set the current page
+If we have hogged the resources for too long then release the LRU list and
+flush list mutexes and do a thread yield. Set the current page
to "sticky" so that it is not relocated during the yield.
@return true if yielded */
static MY_ATTRIBUTE((warn_unused_result))
@@ -409,7 +400,9 @@ bool
buf_flush_try_yield(
/*================*/
buf_page_t* bpage, /*!< in/out: bpage to remove */
- ulint processed) /*!< in: number of pages processed */
+ ulint processed, /*!< in: number of pages processed */
+ bool* must_restart) /*!< in/out: if true, we have to
+ restart the flush list scan */
{
/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
loop we release buf_pool.mutex to let other threads
@@ -419,11 +412,35 @@ buf_flush_try_yield(
if (bpage != NULL
&& processed >= BUF_LRU_DROP_SEARCH_SIZE
- && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+ && buf_page_get_io_fix_unlocked(bpage) == BUF_IO_NONE) {
+
+ BPageMutex* block_mutex = buf_page_get_mutex(bpage);
mutex_exit(&buf_pool.flush_list_mutex);
- /* Release the buffer pool and block mutex
+ /* We don't have to worry about bpage becoming a dangling
+ pointer by a compressed page flush list relocation because
+ buf_page_get_gen() won't be called for pages from this
+ tablespace. */
+
+ mutex_enter(block_mutex);
+ /* Recheck the I/O fix and the flush list presence now that we
+ hold the right mutex */
+ if (buf_page_get_io_fix(bpage) != BUF_IO_NONE
+ || bpage->oldest_modification == 0) {
+
+ mutex_exit(block_mutex);
+
+ *must_restart = true;
+
+ mutex_enter(&buf_pool.flush_list_mutex);
+
+ return false;
+ }
+
+ *must_restart = false;
+
+ /* Release the LRU list and buf_page_get_mutex() mutex
to give the other threads a go. */
buf_flush_yield(bpage);
@@ -445,18 +462,19 @@ buf_flush_try_yield(
/** Remove a single page from flush_list.
@param[in,out] bpage buffer page to remove
-@param[in] flush whether to flush the page before removing
+@param[in] flush flush to disk if true but don't remove
+ else remove without flushing to disk
+@param[in,out] must_restart flag if must restart the flush list scan
@return true if page was removed. */
-static bool buf_flush_or_remove_page(buf_page_t *bpage, bool flush)
+static bool buf_flush_or_remove_page(buf_page_t *bpage, bool flush, bool *must_restart)
{
ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(mutex_own(&buf_pool.flush_list_mutex));
- /* bpage->space and bpage->io_fix are protected by
- buf_pool.mutex and block_mutex. It is safe to check
- them while holding buf_pool.mutex only. */
+ /* It is safe to check bpage->space and bpage->io_fix while holding
+ buf_pool.mutex only. */
- if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ if (buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
@@ -469,20 +487,30 @@ static bool buf_flush_or_remove_page(buf_page_t *bpage, bool flush)
bool processed = false;
block_mutex = buf_page_get_mutex(bpage);
-
- /* We have to release the flush_list_mutex to obey the
- latching order. We are however guaranteed that the page
- will stay in the flush_list and won't be relocated because
- buf_flush_remove() and buf_flush_relocate_on_flush_list()
- need buf_pool.mutex as well. */
+ /* We don't have to worry about bpage becoming a dangling
+ pointer by a compressed page flush list relocation because
+ buf_page_get_gen() won't be called for pages from this
+ tablespace. */
mutex_exit(&buf_pool.flush_list_mutex);
mutex_enter(block_mutex);
- ut_ad(bpage->oldest_modification != 0);
+ /* Recheck the page I/O fix and the flush list presence now
+ that we hold the right mutex. */
+ if (buf_page_get_io_fix(bpage) != BUF_IO_NONE
+ || bpage->oldest_modification == 0) {
+
+ /* The page became I/O-fixed or is not on the flush
+ list anymore, this invalidates any flush-list-page
+ pointers we have. */
- if (!flush) {
+ mutex_exit(block_mutex);
+
+ *must_restart = true;
+ processed = false;
+
+ } else if (!flush) {
buf_flush_remove(bpage);
@@ -492,8 +520,8 @@ static bool buf_flush_or_remove_page(buf_page_t *bpage, bool flush)
} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
- /* The following call will release the buffer pool
- and block mutex. */
+ /* The following call will release the LRU list
+ and block mutexes. */
processed = buf_flush_page(
bpage, BUF_FLUSH_SINGLE_PAGE, false);
@@ -528,8 +556,10 @@ static bool buf_flush_or_remove_pages(ulint id, bool flush, ulint first)
buf_page_t* bpage;
ulint processed = 0;
+ ut_ad(mutex_own(&buf_pool.mutex));
mutex_enter(&buf_pool.flush_list_mutex);
rescan:
+ bool must_restart = false;
bool all_freed = true;
for (bpage = UT_LIST_GET_LAST(buf_pool.flush_list);
@@ -548,15 +578,16 @@ rescan:
different tablespace. */
} else if (bpage->id.page_no() < first) {
/* Skip this block, because it is below the limit. */
- } else if (!buf_flush_or_remove_page(bpage, flush)) {
+ } else if (!buf_flush_or_remove_page(bpage, flush,
+ &must_restart)) {
/* Remove was unsuccessful, we have to try again
by scanning the entire list from the end.
This also means that we never released the
- buf_pool mutex. Therefore we can trust the prev
+ flust list mutex. Therefore we can trust the prev
pointer.
buf_flush_or_remove_page() released the
- flush list mutex but not the buf_pool mutex.
+ flush list mutex but not the LRU list mutex.
Therefore it is possible that a new page was
added to the flush list. For example, in case
where we are at the head of the flush list and
@@ -574,17 +605,23 @@ rescan:
} else if (flush) {
/* The processing was successful. And during the
- processing we have released the buf_pool mutex
+ processing we have released all the buf_pool mutexes
when calling buf_page_flush(). We cannot trust
prev pointer. */
goto rescan;
+ } else if (must_restart) {
+
+ ut_ad(!all_freed);
+ break;
}
#ifdef BTR_CUR_HASH_ADAPT
++processed;
/* Yield if we have hogged the CPU and mutexes for too long. */
- if (buf_flush_try_yield(prev, processed)) {
+ if (buf_flush_try_yield(prev, processed,
+ &must_restart)) {
+ ut_ad(!must_restart);
/* Reset the batch size counter if we had to yield. */
processed = 0;
}
@@ -621,7 +658,11 @@ static void buf_flush_dirty_pages(ulint id, bool flush, ulint first)
for (buf_page_t *bpage= UT_LIST_GET_FIRST(buf_pool.flush_list); bpage;
bpage= UT_LIST_GET_NEXT(list, bpage))
{
- ut_ad(buf_page_in_file(bpage));
+ const auto state= bpage->state;
+ ut_ad(state == BUF_BLOCK_FILE_PAGE ||
+ state == BUF_BLOCK_ZIP_PAGE ||
+ state == BUF_BLOCK_ZIP_DIRTY ||
+ state == BUF_BLOCK_REMOVE_HASH);
ut_ad(bpage->in_flush_list);
ut_ad(bpage->oldest_modification > 0);
ut_ad(id != bpage->id.space());
@@ -630,7 +671,6 @@ static void buf_flush_dirty_pages(ulint id, bool flush, ulint first)
mutex_exit(&buf_pool.flush_list_mutex);
}
#endif
-
mutex_exit(&buf_pool.mutex);
}
@@ -653,14 +693,14 @@ void buf_LRU_flush_or_remove_pages(ulint id, bool flush, ulint first)
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/********************************************************************//**
-Insert a compressed block into buf_pool.zip_clean in the LRU order. */
+/** Insert a compressed block into buf_pool.zip_clean in the LRU order.
+@param[in] bpage pointer to the block in question */
void
buf_LRU_insert_zip_clean(
-/*=====================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
+ buf_page_t* bpage)
{
ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(&buf_pool.zip_mutex));
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
/* Find the first successor of bpage in the LRU list
@@ -710,12 +750,17 @@ static bool buf_LRU_free_from_unzip_LRU_list(bool scan_all)
prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
+ mutex_enter(&block->mutex);
+
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list);
ut_ad(block->page.in_LRU_list);
freed = buf_LRU_free_page(&block->page, false);
+ if (!freed)
+ mutex_exit(&block->mutex);
+
block = prev_block;
}
@@ -752,20 +797,21 @@ static bool buf_LRU_free_from_common_LRU_list(bool scan_all)
buf_pool.lru_scan_itr.set(prev);
- mutex_enter(mutex);
-
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
unsigned accessed = buf_page_is_accessed(bpage);
+ mutex_enter(mutex);
+
if (buf_flush_ready_for_replace(bpage)) {
- mutex_exit(mutex);
+
freed = buf_LRU_free_page(bpage, true);
- } else {
- mutex_exit(mutex);
}
+ if (!freed)
+ mutex_exit(mutex);
+
if (freed && !accessed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
@@ -773,8 +819,10 @@ static bool buf_LRU_free_from_common_LRU_list(bool scan_all)
++buf_pool.stat.n_ra_pages_evicted;
}
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(!mutex_own(mutex));
+
+ if (freed)
+ break;
}
if (scanned) {
@@ -789,15 +837,29 @@ static bool buf_LRU_free_from_common_LRU_list(bool scan_all)
}
/** Try to free a replaceable block.
-@param[in] scan_all true=scan the whole LRU list,
- false=use BUF_LRU_SEARCH_SCAN_THRESHOLD
+@param[in] scan_all scan whole LRU list if ture, otherwise scan
+ only BUF_LRU_SEARCH_SCAN_THRESHOLD blocks
@return true if found and freed */
bool buf_LRU_scan_and_free_block(bool scan_all)
{
- ut_ad(mutex_own(&buf_pool.mutex));
+ bool freed = false;
+ bool use_unzip_list = UT_LIST_GET_LEN(buf_pool.unzip_LRU) > 0;
- return(buf_LRU_free_from_unzip_LRU_list(scan_all)
- || buf_LRU_free_from_common_LRU_list(scan_all));
+ mutex_enter(&buf_pool.mutex);
+
+ if (use_unzip_list) {
+ freed = buf_LRU_free_from_unzip_LRU_list(scan_all);
+ }
+
+ if (!freed) {
+ freed = buf_LRU_free_from_common_LRU_list(scan_all);
+ }
+
+ if (!freed) {
+ mutex_exit(&buf_pool.mutex);
+ }
+
+ return(freed);
}
/** @return whether less than 1/4 of the buffer pool is available */
@@ -815,7 +877,7 @@ buf_block_t* buf_LRU_get_free_only()
{
buf_block_t* block;
- ut_ad(mutex_own(&buf_pool.mutex));
+ mutex_enter(&buf_pool.free_list_mutex);
block = reinterpret_cast<buf_block_t*>(
UT_LIST_GET_FIRST(buf_pool.free));
@@ -828,13 +890,11 @@ buf_block_t* buf_LRU_get_free_only()
ut_ad(!block->page.in_LRU_list);
ut_a(!buf_page_in_file(&block->page));
UT_LIST_REMOVE(buf_pool.free, &block->page);
+ mutex_exit(&buf_pool.free_list_mutex);
- if (buf_pool.curr_size >= buf_pool.old_size
- || UT_LIST_GET_LEN(buf_pool.withdraw)
- >= buf_pool.withdraw_target
+ if (!buf_pool.withdraw_depth()
|| !buf_pool.will_be_withdrawn(block->page)) {
/* found valid free block */
- buf_page_mutex_enter(block);
/* No adaptive hash index entries may point to
a free block. */
assert_block_ahi_empty(block);
@@ -842,11 +902,11 @@ buf_block_t* buf_LRU_get_free_only()
buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
UNIV_MEM_ALLOC(block->frame, srv_page_size);
- buf_page_mutex_exit(block);
- break;
+ return(block);
}
/* This should be withdrawn */
+ mutex_enter(&buf_pool.free_list_mutex);
UT_LIST_ADD_LAST(
buf_pool.withdraw,
&block->page);
@@ -856,6 +916,8 @@ buf_block_t* buf_LRU_get_free_only()
UT_LIST_GET_FIRST(buf_pool.free));
}
+ mutex_exit(&buf_pool.free_list_mutex);
+
return(block);
}
@@ -866,10 +928,10 @@ function will either assert or issue a warning and switch on the
status monitor. */
static void buf_LRU_check_size_of_non_data_objects()
{
- ut_ad(mutex_own(&buf_pool.mutex));
-
+ bool buf_pool_resizing =
+ srv_buf_pool_size_changing.load(std::memory_order::memory_order_relaxed);
if (!recv_recovery_is_on()
- && buf_pool.curr_size == buf_pool.old_size
+ && !buf_pool_resizing
&& UT_LIST_GET_LEN(buf_pool.free)
+ UT_LIST_GET_LEN(buf_pool.LRU) < buf_pool.curr_size / 20) {
@@ -884,7 +946,7 @@ static void buf_LRU_check_size_of_non_data_objects()
<< (buf_pool.curr_size >> (20U - srv_page_size_shift))
<< "M could be bigger.";
} else if (!recv_recovery_is_on()
- && buf_pool.curr_size == buf_pool.old_size
+ && !buf_pool_resizing
&& (UT_LIST_GET_LEN(buf_pool.free)
+ UT_LIST_GET_LEN(buf_pool.LRU))
< buf_pool.curr_size / 3) {
@@ -955,10 +1017,10 @@ buf_block_t* buf_LRU_get_free_block()
ulint n_iterations = 0;
ulint flush_failures = 0;
+ ut_ad(!mutex_own(&buf_pool.mutex));
+
MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
loop:
- mutex_enter(&buf_pool.mutex);
-
buf_LRU_check_size_of_non_data_objects();
DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
@@ -970,7 +1032,6 @@ loop:
block = buf_LRU_get_free_only();
if (block != NULL) {
- mutex_exit(&buf_pool.mutex);
memset(&block->page.zip, 0, sizeof block->page.zip);
block->skip_flush_check = false;
return(block);
@@ -1003,14 +1064,12 @@ loop:
not_found:
#endif
- mutex_exit(&buf_pool.mutex);
-
if (freed) {
goto loop;
}
if (n_iterations > 20 && !buf_lru_free_blocks_error_printed
- && srv_buf_pool_old_size == srv_buf_pool_size) {
+ && !srv_buf_pool_size_changing.load(std::memory_order_relaxed)) {
ib::warn() << "Difficult to find free blocks in the buffer pool"
" (" << n_iterations << " search iterations)! "
@@ -1050,8 +1109,8 @@ not_found:
TODO: A more elegant way would have been to return the freed
up block to the caller here but the code that deals with
- removing the block from page_hash and LRU_list is fairly
- involved (particularly in case of compressed pages). We
+ removing the block from page_hash and LRU is fairly
+ involved (particularly in case of ROW_FORMAT=COMPRESSED pages). We
can do that in a separate patch sometime in future. */
if (!buf_flush_single_page_from_LRU()) {
@@ -1132,7 +1191,7 @@ static void buf_LRU_old_adjust_len()
}
/** Initialize the old blocks pointer in the LRU list. This function should be
-called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
+@param[in,out] buf_pool buffer pool instance */
static void buf_LRU_old_init()
{
ut_ad(mutex_own(&buf_pool.mutex));
@@ -1261,14 +1320,14 @@ static inline void buf_LRU_remove_block(buf_page_t* bpage)
buf_LRU_old_adjust_len();
}
-/******************************************************************//**
-Adds a block to the LRU list of decompressed zip pages. */
+/** Adds a block to the LRU list of decompressed zip pages.
+@param[in] block control block
+@param[in] old TRUE if should be put to the end of the list,
+ else put to the start */
void
buf_unzip_LRU_add_block(
-/*====================*/
- buf_block_t* block, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the end
- of the list, else put to the start */
+ buf_block_t* block,
+ ibool old)
{
ut_ad(mutex_own(&buf_pool.mutex));
ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
@@ -1282,19 +1341,19 @@ buf_unzip_LRU_add_block(
}
}
-/******************************************************************//**
-Adds a block to the LRU list. Please make sure that the page_size is
+/** Adds a block to the LRU list. Please make sure that the page_size is
already set when invoking the function, so that we can get correct
-page_size from the buffer page when adding a block into LRU */
+page_size from the buffer page when adding a block into LRU
+@param[in] bpage control block
+@param[in] old TRUE if should be put to the old blocks in the LRU list,
+ else put to the start; if the LRU list is very short,
+ the block is added to the start, regardless of this
+ parameter */
UNIV_INLINE
void
buf_LRU_add_block_low(
-/*==================*/
- buf_page_t* bpage, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the old blocks
- in the LRU list, else put to the start; if the
- LRU list is very short, the block is added to
- the start, regardless of this parameter */
+ buf_page_t* bpage,
+ ibool old)
{
ut_ad(mutex_own(&buf_pool.mutex));
ut_a(buf_page_in_file(bpage));
@@ -1369,12 +1428,11 @@ buf_LRU_add_block(
buf_LRU_add_block_low(bpage, old);
}
-/******************************************************************//**
-Moves a block to the start of the LRU list. */
+/** Moves a block to the start of the LRU list.
+@param[in] bpage control block */
void
buf_LRU_make_block_young(
-/*=====================*/
- buf_page_t* bpage) /*!< in: control block */
+ buf_page_t* bpage)
{
ut_ad(mutex_own(&buf_pool.mutex));
@@ -1386,39 +1444,35 @@ buf_LRU_make_block_young(
buf_LRU_add_block_low(bpage, FALSE);
}
-/******************************************************************//**
-Try to free a block. If bpage is a descriptor of a compressed-only
+/** Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
-
-NOTE: If this function returns true, it will temporarily
-release buf_pool.mutex. Furthermore, the page frame will no longer be
-accessible via bpage.
-
-The caller must hold buf_pool.mutex and must not hold any
-buf_page_get_mutex() when calling this function.
+NOTE: this function may temporarily release and relock the
+buf_page_get_get_mutex(). Furthermore, the page frame will no longer be
+accessible via bpage. If this function returns true, it will also release
+the LRU list mutex.
+The caller must hold the LRU list and buf_page_get_mutex() mutexes.
+@param[in] bpage block to be freed
+@param[in] zip true if should remove also the compressed page of
+ an uncompressed page
@return true if freed, false otherwise. */
bool
buf_LRU_free_page(
-/*===============*/
- buf_page_t* bpage, /*!< in: block to be freed */
- bool zip) /*!< in: true if should remove also the
- compressed page of an uncompressed page */
+ buf_page_t* bpage,
+ bool zip)
{
buf_page_t* b = NULL;
rw_lock_t* hash_lock = buf_page_hash_lock_get(bpage->id);
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
ut_ad(mutex_own(&buf_pool.mutex));
+ ut_ad(mutex_own(block_mutex));
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
- rw_lock_x_lock(hash_lock);
- mutex_enter(block_mutex);
-
if (!buf_page_can_relocate(bpage)) {
/* Do not free buffer fixed and I/O-fixed blocks. */
- goto func_exit;
+ return(false);
}
if (zip || !bpage->zip.data) {
@@ -1426,25 +1480,20 @@ buf_LRU_free_page(
/* Do not completely free dirty blocks. */
if (bpage->oldest_modification) {
- goto func_exit;
+ return(false);
}
} else if (bpage->oldest_modification > 0
&& buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
-func_exit:
- rw_lock_x_unlock(hash_lock);
- mutex_exit(block_mutex);
return(false);
} else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
b = buf_page_alloc_descriptor();
ut_a(b);
- new (b) buf_page_t(*bpage);
}
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
@@ -1452,10 +1501,46 @@ func_exit:
DBUG_PRINT("ib_buf", ("free page %u:%u",
bpage->id.space(), bpage->id.page_no()));
+ mutex_exit(block_mutex);
+
+ rw_lock_x_lock(hash_lock);
+ mutex_enter(block_mutex);
+
+ if (!buf_page_can_relocate(bpage)
+ || ((zip || bpage->zip.data == NULL)
+ && bpage->oldest_modification > 0)) {
+
+not_freed:
+ rw_lock_x_unlock(hash_lock);
+
+ if (b != NULL) {
+ buf_page_free_descriptor(b);
+ }
+
+ return(false);
+
+ } else if (bpage->oldest_modification > 0
+ && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
+
+ goto not_freed;
+
+ } else if (b) {
+ new (b) buf_page_t(*bpage);
+ }
+
ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
ut_ad(buf_page_can_relocate(bpage));
if (!buf_LRU_block_remove_hashed(bpage, zip)) {
+
+ mutex_exit(&buf_pool.mutex);
+
+ if (b != NULL) {
+ buf_page_free_descriptor(b);
+ }
+
return(true);
}
@@ -1551,6 +1636,8 @@ func_exit:
buf_LRU_add_block_low(b, buf_page_is_old(b));
}
+ mutex_enter(&buf_pool.zip_mutex);
+ rw_lock_x_unlock(hash_lock);
if (b->state == BUF_BLOCK_ZIP_PAGE) {
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(b);
@@ -1564,20 +1651,14 @@ func_exit:
page_zip_set_size(&bpage->zip, 0);
- mutex_exit(block_mutex);
-
/* Prevent buf_page_get_gen() from
- decompressing the block while we release
- buf_pool.mutex and block_mutex. */
- block_mutex = buf_page_get_mutex(b);
-
- mutex_enter(block_mutex);
+ decompressing the block while we release block_mutex. */
buf_page_set_sticky(b);
- mutex_exit(block_mutex);
+ mutex_exit(&buf_pool.zip_mutex);
- rw_lock_x_unlock(hash_lock);
+ mutex_exit(block_mutex);
}
mutex_exit(&buf_pool.mutex);
@@ -1615,14 +1696,12 @@ func_exit:
checksum);
}
- mutex_enter(&buf_pool.mutex);
-
if (b != NULL) {
- mutex_enter(block_mutex);
+ mutex_enter(&buf_pool.zip_mutex);
buf_page_unset_sticky(b);
- mutex_exit(block_mutex);
+ mutex_exit(&buf_pool.zip_mutex);
}
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
@@ -1630,18 +1709,14 @@ func_exit:
return(true);
}
-/******************************************************************//**
-Puts a block back to the free list. */
+/** Puts a block back to the free list.
+@param[in] block block must not contain a file page */
void
buf_LRU_block_free_non_file_page(
-/*=============================*/
- buf_block_t* block) /*!< in: block, must not contain a file page */
+ buf_block_t* block)
{
void* data;
- ut_ad(mutex_own(&buf_pool.mutex));
- ut_ad(buf_page_mutex_own(block));
-
switch (buf_block_get_state(block)) {
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_READY_FOR_USE:
@@ -1655,9 +1730,6 @@ buf_LRU_block_free_non_file_page(
ut_ad(!block->page.in_flush_list);
ut_ad(!block->page.in_LRU_list);
- buf_block_set_state(block, BUF_BLOCK_NOT_USED);
-
- UNIV_MEM_ALLOC(block->frame, srv_page_size);
#ifdef UNIV_DEBUG
/* Wipe contents of page to reveal possible stale pointers to it */
memset(block->frame, '\0', srv_page_size);
@@ -1674,37 +1746,36 @@ buf_LRU_block_free_non_file_page(
if (data != NULL) {
block->page.zip.data = NULL;
- buf_page_mutex_exit(block);
- buf_pool_mutex_exit_forbid();
ut_ad(block->zip_size());
buf_buddy_free(data, block->zip_size());
- buf_pool_mutex_exit_allow();
- buf_page_mutex_enter(block);
-
page_zip_set_size(&block->page.zip, 0);
}
- if (buf_pool.curr_size < buf_pool.old_size
- && UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target
+ if (buf_pool.withdraw_depth()
&& buf_pool.will_be_withdrawn(block->page)) {
/* This should be withdrawn */
+ buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+ mutex_enter(&buf_pool.free_list_mutex);
UT_LIST_ADD_LAST(
buf_pool.withdraw,
&block->page);
ut_d(block->in_withdraw_list = TRUE);
+ UNIV_MEM_FREE(block->frame, srv_page_size);
+ mutex_exit(&buf_pool.free_list_mutex);
} else {
+ buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+ mutex_enter(&buf_pool.free_list_mutex);
UT_LIST_ADD_FIRST(buf_pool.free, &block->page);
ut_d(block->page.in_free_list = TRUE);
+ UNIV_MEM_FREE(block->frame, srv_page_size);
+ mutex_exit(&buf_pool.free_list_mutex);
}
-
- UNIV_MEM_FREE(block->frame, srv_page_size);
}
-/******************************************************************//**
-Takes a block out of the LRU list and page hash table.
+/** Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
the object will be freed.
@@ -1713,19 +1784,17 @@ and the appropriate hash_lock. This function will release the
buf_page_get_mutex() and the hash_lock.
If a compressed page is freed other compressed pages may be relocated.
+
+@param[in] bpage block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page
+@param[in] zip true if should remove also the
+ compressed page of an uncompressed page
@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
caller needs to free the page to the free list
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
this case the block is already returned to the buddy allocator. */
-static
-bool
-buf_LRU_block_remove_hashed(
-/*========================*/
- buf_page_t* bpage, /*!< in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
- bool zip) /*!< in: true if should remove also the
- compressed page of an uncompressed page */
+static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, bool zip)
{
const buf_page_t* hashed_bpage;
rw_lock_t* hash_lock;
@@ -1867,11 +1936,9 @@ buf_LRU_block_remove_hashed(
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
mutex_exit(&buf_pool.zip_mutex);
rw_lock_x_unlock(hash_lock);
- buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data, bpage->zip_size());
- buf_pool_mutex_exit_allow();
buf_page_free_descriptor(bpage);
return(false);
@@ -1899,14 +1966,15 @@ buf_LRU_block_remove_hashed(
page_hash. Only possibility is when while invalidating
a tablespace we buffer fix the prev_page in LRU to
avoid relocation during the scan. But that is not
- possible because we are holding buf_pool mutex.
+ possible because we are holding LRU list mutex.
2) Not possible because in buf_page_init_for_read()
- we do a look up of page_hash while holding buf_pool
- mutex and since we are holding buf_pool mutex here
+ we do a look up of page_hash while holding LRU list
+ mutex and since we are holding LRU list mutex here
and by the time we'll release it in the caller we'd
have inserted the compressed only descriptor in the
page_hash. */
+ ut_ad(mutex_own(&buf_pool.mutex));
rw_lock_x_unlock(hash_lock);
mutex_exit(&((buf_block_t*) bpage)->mutex);
@@ -1918,12 +1986,9 @@ buf_LRU_block_remove_hashed(
ut_ad(!bpage->in_free_list);
ut_ad(!bpage->in_flush_list);
ut_ad(!bpage->in_LRU_list);
- buf_pool_mutex_exit_forbid();
buf_buddy_free(data, bpage->zip_size());
- buf_pool_mutex_exit_allow();
-
page_zip_set_size(&bpage->zip, 0);
}
@@ -1951,9 +2016,6 @@ buf_LRU_block_free_hashed_page(
buf_block_t* block) /*!< in: block, must contain a file page and
be in a state where it can be freed */
{
- ut_ad(mutex_own(&buf_pool.mutex));
-
- buf_page_mutex_enter(block);
if (buf_pool.flush_rbt == NULL) {
block->page.id
@@ -1963,7 +2025,6 @@ buf_LRU_block_free_hashed_page(
buf_block_set_state(block, BUF_BLOCK_MEMORY);
buf_LRU_block_free_non_file_page(block);
- buf_page_mutex_exit(block);
}
/** Remove one page from LRU list and put it to free list.
@@ -1973,19 +2034,19 @@ buf_LRU_block_free_hashed_page(
@param[in] old_page_id page number before bpage->id was invalidated */
void buf_LRU_free_one_page(buf_page_t* bpage, page_id_t old_page_id)
{
- rw_lock_t* hash_lock = buf_page_hash_lock_get(old_page_id);
+#ifdef UNIV_DEBUG
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(old_page_id);
ut_ad(mutex_own(&buf_pool.mutex));
-
- rw_lock_x_lock(hash_lock);
-
+ ut_ad(mutex_own(block_mutex));
+ ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+#endif /* UNIV_DEBUG */
while (bpage->buf_fix_count > 0) {
/* Wait for other threads to release the fix count
before releasing the bpage from LRU list. */
}
- mutex_enter(block_mutex);
bpage->id = old_page_id;
@@ -2140,6 +2201,10 @@ void buf_LRU_validate()
ut_a(buf_pool.LRU_old_len == old_len);
+ mutex_exit(&buf_pool.mutex);
+
+ mutex_enter(&buf_pool.free_list_mutex);
+
CheckInFreeList::validate();
for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool.free);
@@ -2149,6 +2214,10 @@ void buf_LRU_validate()
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
}
+ mutex_exit(&buf_pool.free_list_mutex);
+
+ mutex_enter(&buf_pool.mutex);
+
CheckUnzipLRUAndLRUList::validate();
for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool.unzip_LRU);
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 4dceaf8f524..83956fb6c8a 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -58,9 +58,11 @@ buf_read_page_handle_error(
const bool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
const page_id_t old_page_id = bpage->id;
+ rw_lock_t * hash_lock = buf_page_hash_lock_get(bpage->id);
/* First unfix and release lock on the bpage */
mutex_enter(&buf_pool.mutex);
+ rw_lock_x_lock(hash_lock);
mutex_enter(buf_page_get_mutex(bpage));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
@@ -74,15 +76,16 @@ buf_read_page_handle_error(
BUF_IO_READ);
}
- mutex_exit(buf_page_get_mutex(bpage));
-
- /* remove the block from LRU list */
+ /* The hash lock and block mutex will be released during the "free" */
buf_LRU_free_one_page(bpage, old_page_id);
+ ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+ && !rw_lock_own(hash_lock, RW_LOCK_S));
+
ut_ad(buf_pool.n_pend_reads > 0);
buf_pool.n_pend_reads--;
-
mutex_exit(&buf_pool.mutex);
+
}
/** Low-level function which reads a page asynchronously from a file to the
@@ -153,6 +156,7 @@ buf_read_page_low(
<< " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
ut_ad(buf_page_in_file(bpage));
+ ut_ad(!mutex_own(&buf_pool.mutex));
if (sync) {
thd_wait_begin(NULL, THD_WAIT_DISKIO);
@@ -282,11 +286,8 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
return(0);
}
- mutex_enter(&buf_pool.mutex);
-
if (buf_pool.n_pend_reads
> buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- mutex_exit(&buf_pool.mutex);
return(0);
}
@@ -295,19 +296,23 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
that is, reside near the start of the LRU list. */
for (i = low; i < high; i++) {
- if (const buf_page_t* bpage = buf_page_hash_get(
- page_id_t(page_id.space(), i))) {
- if (buf_page_is_accessed(bpage)
- && buf_page_peek_if_young(bpage)
- && ++recent_blocks
- >= 5 + buf_pool.read_ahead_area / 8) {
- mutex_exit(&buf_pool.mutex);
+ rw_lock_t* hash_lock;
+ const buf_page_t* bpage;
+
+ bpage = buf_page_hash_get_s_locked(page_id_t(page_id.space(), i), &hash_lock);
+
+ if (bpage
+ && buf_page_is_accessed(bpage)
+ && buf_page_peek_if_young(bpage)
+ && ++recent_blocks >= 5 + buf_pool.read_ahead_area / 8) {
+ rw_lock_s_unlock(hash_lock);
goto read_ahead;
- }
+ }
+ if (bpage) {
+ rw_lock_s_unlock(hash_lock);
}
}
- mutex_exit(&buf_pool.mutex);
/* Do nothing */
return(0);
@@ -362,10 +367,13 @@ read_ahead:
return(count);
}
-/** High-level function which reads a page asynchronously from a file to the
+/** High-level function which reads a page from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
+released by the buf_page_io_complete function.
+We use synchronous reads here, because in this case the page is used
+right after reading.
+
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@retval DB_SUCCESS if the page was read and is not corrupted,
@@ -378,12 +386,6 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
ulint count;
dberr_t err = DB_SUCCESS;
- /* We do synchronous IO because our AIO completion code
- is sub-optimal. See buf_page_io_complete(), we have to
- acquire the buffer pool mutex before acquiring the block
- mutex, required for updating the page state. The acquire
- of the buffer pool mutex becomes an expensive bottleneck. */
-
count = buf_read_page_low(
&err, true, BUF_READ_ANY_PAGE, page_id, zip_size, false);
@@ -480,6 +482,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
buf_page_t* bpage;
buf_frame_t* frame;
buf_page_t* pred_bpage = NULL;
+ unsigned pred_bpage_is_accessed = 0;
ulint pred_offset;
ulint succ_offset;
int asc_or_desc;
@@ -539,11 +542,8 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
return(0);
}
- mutex_enter(&buf_pool.mutex);
-
if (buf_pool.n_pend_reads
> buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- mutex_exit(&buf_pool.mutex);
return(0);
}
@@ -565,8 +565,12 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
fail_count = 0;
+ rw_lock_t* hash_lock;
+
for (i = low; i < high; i++) {
- bpage = buf_page_hash_get(page_id_t(page_id.space(), i));
+ bpage = buf_page_hash_get_s_locked(
+ page_id_t(page_id.space(),
+ i), &hash_lock);
if (bpage == NULL || !buf_page_is_accessed(bpage)) {
/* Not accessed */
@@ -583,7 +587,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
a little against this. */
int res = ut_ulint_cmp(
buf_page_is_accessed(bpage),
- buf_page_is_accessed(pred_bpage));
+ pred_bpage_is_accessed);
/* Accesses not in the right order */
if (res != 0 && res != asc_or_desc) {
fail_count++;
@@ -592,22 +596,29 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
if (fail_count > threshold) {
/* Too many failures: return */
- mutex_exit(&buf_pool.mutex);
+ if (bpage) {
+ rw_lock_s_unlock(hash_lock);
+ }
return(0);
}
- if (bpage && buf_page_is_accessed(bpage)) {
- pred_bpage = bpage;
+ if (bpage) {
+ if (buf_page_is_accessed(bpage)) {
+ pred_bpage = bpage;
+ pred_bpage_is_accessed
+ = buf_page_is_accessed(bpage);
+ }
+
+ rw_lock_s_unlock(hash_lock);
}
}
/* If we got this far, we know that enough pages in the area have
been accessed in the right order: linear read-ahead can be sensible */
- bpage = buf_page_hash_get(page_id);
+ bpage = buf_page_hash_get_s_locked( page_id, &hash_lock);
if (bpage == NULL) {
- mutex_exit(&buf_pool.mutex);
return(0);
}
@@ -633,7 +644,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
pred_offset = fil_page_get_prev(frame);
succ_offset = fil_page_get_next(frame);
- mutex_exit(&buf_pool.mutex);
+ rw_lock_s_unlock(hash_lock);
if ((page_id.page_no() == low)
&& (succ_offset == page_id.page_no() + 1)) {
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index d66dd642acb..757bd750bfa 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1016,7 +1016,8 @@ void dict_sys_t::create()
mutex_create(LATCH_ID_DICT_SYS, &mutex);
- const ulint hash_size = buf_pool_get_curr_size()
+ ut_ad(buf_pool.is_initialised());
+ const ulint hash_size = buf_pool.curr_pool_size
/ (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE);
table_hash= hash_create(hash_size);
@@ -4811,14 +4812,15 @@ void dict_sys_t::resize()
hash_table_free(table_hash);
hash_table_free(table_id_hash);
hash_table_free(temp_id_hash);
+ ut_ad(buf_pool.is_initialised());
- const ulint hash_size = buf_pool_get_curr_size()
- / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE);
- table_hash = hash_create(hash_size);
- table_id_hash = hash_create(hash_size);
- temp_id_hash = hash_create(hash_size);
+ const ulint hash_size= buf_pool.curr_pool_size /
+ (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE);
+ table_hash= hash_create(hash_size);
+ table_id_hash= hash_create(hash_size);
+ temp_id_hash= hash_create(hash_size);
- for (dict_table_t* table= UT_LIST_GET_FIRST(table_LRU); table;
+ for (dict_table_t *table= UT_LIST_GET_FIRST(table_LRU); table;
table= UT_LIST_GET_NEXT(table_LRU, table))
{
ut_ad(!table->is_temporary());
@@ -4829,17 +4831,15 @@ void dict_sys_t::resize()
HASH_INSERT(dict_table_t, id_hash, table_id_hash, id_fold, table);
}
- for (dict_table_t* table = UT_LIST_GET_FIRST(table_non_LRU); table;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
- ulint fold = ut_fold_string(table->name.m_name);
- ulint id_fold = ut_fold_ull(table->id);
-
- HASH_INSERT(dict_table_t, name_hash, table_hash, fold, table);
-
- hash_table_t* id_hash = table->is_temporary()
- ? temp_id_hash : table_id_hash;
-
- HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, table);
+ for (dict_table_t *table= UT_LIST_GET_FIRST(table_non_LRU); table;
+ table = UT_LIST_GET_NEXT(table_LRU, table))
+ {
+ ulint fold= ut_fold_string(table->name.m_name);
+ ulint id_fold= ut_fold_ull(table->id);
+ HASH_INSERT(dict_table_t, name_hash, table_hash, fold, table);
+ hash_table_t *id_hash= table->is_temporary()
+ ? temp_id_hash : table_id_hash;
+ HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, table);
}
mutex_exit(&mutex);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 2107153b48e..81e49ca28bd 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -506,7 +506,11 @@ static PSI_mutex_info all_innodb_mutexes[] = {
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
PSI_KEY(buffer_block_mutex),
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+ PSI_KEY(buf_pool_flush_state_mutex),
PSI_KEY(buf_pool_mutex),
+ PSI_KEY(buf_pool_free_list_mutex),
+ PSI_KEY(buf_pool_zip_free_mutex),
+ PSI_KEY(buf_pool_zip_hash_mutex),
PSI_KEY(buf_pool_zip_mutex),
PSI_KEY(cache_last_read_mutex),
PSI_KEY(dict_foreign_err_mutex),
@@ -17611,7 +17615,7 @@ static
void
innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save)
{
- longlong in_val = *static_cast<const longlong*>(save);
+ longlong in_val = *static_cast<const longlong*>(save);
snprintf(export_vars.innodb_buffer_pool_resize_status,
sizeof(export_vars.innodb_buffer_pool_resize_status),
@@ -18233,8 +18237,7 @@ innodb_srv_buf_dump_filename_validate(
#ifdef UNIV_DEBUG
static char* srv_buffer_pool_evict;
-/****************************************************************//**
-Evict all uncompressed pages of compressed tables from the buffer pool.
+/** Evict all uncompressed pages of compressed tables from the buffer pool.
Keep the compressed pages in the buffer pool.
@return whether all uncompressed pages were evicted */
static MY_ATTRIBUTE((warn_unused_result))
@@ -18253,9 +18256,25 @@ innodb_buffer_pool_evict_uncompressed()
ut_ad(block->page.in_LRU_list);
mutex_enter(&block->mutex);
- if (!buf_LRU_free_page(&block->page, false)) {
+ rw_lock_t* hash_lock
+ = buf_page_hash_lock_get(block->page.id);
+ rw_lock_x_lock(hash_lock);
+ mutex_enter(&block->mutex);
+
+ if (!buf_page_can_relocate(&block->page)
+ || block->page.oldest_modification) {
+
+ rw_lock_x_unlock(hash_lock);
+
mutex_exit(&block->mutex);
+
all_evicted = false;
+
+ } else {
+
+ btr_search_drop_page_hash_index(block);
+ auto old_page_id = block->page.id;
+ buf_LRU_free_one_page(&block->page, old_page_id);
}
block = prev_block;
}
@@ -21429,12 +21448,7 @@ innodb_buffer_pool_size_validate(
return(1);
}
#endif /* UNIV_DEBUG */
-
-
- mutex_enter(&buf_pool.mutex);
-
- if (srv_buf_pool_old_size != srv_buf_pool_size) {
- mutex_exit(&buf_pool.mutex);
+ if (srv_buf_pool_size_changing.load(std::memory_order_relaxed)) {
my_printf_error(ER_WRONG_ARGUMENTS,
"Another buffer pool resize is already in progress.", MYF(0));
return(1);
@@ -21445,13 +21459,11 @@ innodb_buffer_pool_size_validate(
*static_cast<ulonglong*>(save) = requested_buf_pool_size;
if (srv_buf_pool_size == ulint(intbuf)) {
- mutex_exit(&buf_pool.mutex);
/* nothing to do */
return(0);
}
if (srv_buf_pool_size == requested_buf_pool_size) {
- mutex_exit(&buf_pool.mutex);
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_ARGUMENTS,
"innodb_buffer_pool_size must be at least"
@@ -21460,9 +21472,8 @@ innodb_buffer_pool_size_validate(
/* nothing to do */
return(0);
}
-
+ srv_buf_pool_size_changing = true;
srv_buf_pool_size = requested_buf_pool_size;
- mutex_exit(&buf_pool.mutex);
if (intbuf != static_cast<longlong>(requested_buf_pool_size)) {
char buf[64];
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index da0ca1e85d9..8481de7e6da 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -1608,18 +1608,20 @@ static ST_FIELD_INFO i_s_cmpmem_fields_info[] =
};
} // namespace Show
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmpmem or
+/** Fill the dynamic table information_schema.innodb_cmpmem or
innodb_cmpmem_reset.
+@param[in] thd thread
+@param[in,out] tables tables to fill
+@param[in] item condition (ignored)
+@param[in] reset TRUE=reset cumulated counts
@return 0 on success, 1 on failure */
static
int
i_s_cmpmem_fill_low(
-/*================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* , /*!< in: condition (ignored) */
- ibool reset) /*!< in: TRUE=reset cumulated counts */
+ THD* thd,
+ TABLE_LIST* tables,
+ Item* item,
+ ibool reset)
{
TABLE* table = (TABLE*) tables->table;
@@ -1636,8 +1638,9 @@ i_s_cmpmem_fill_low(
ulint zip_free_len_local[BUF_BUDDY_SIZES_MAX + 1];
buf_buddy_stat_t buddy_stat_local[BUF_BUDDY_SIZES_MAX + 1];
+ mutex_enter(&buf_pool.zip_free_mutex);
+
/* Save buddy stats for buffer pool in local variables. */
- mutex_enter(&buf_pool.mutex);
for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
zip_free_len_local[x] = (x < BUF_BUDDY_SIZES) ?
@@ -1646,13 +1649,13 @@ i_s_cmpmem_fill_low(
buddy_stat_local[x] = buf_pool.buddy_stat[x];
if (reset) {
- /* This is protected by buf_pool.mutex. */
+ /* This is protected by buf_pool.zip_free_mutex. */
buf_pool.buddy_stat[x].relocated = 0;
buf_pool.buddy_stat[x].relocated_usec = 0;
}
}
- mutex_exit(&buf_pool.mutex);
+ mutex_exit(&buf_pool.zip_free_mutex);
for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
buf_buddy_stat_t* buddy_stat = &buddy_stat_local[x];
@@ -4159,8 +4162,12 @@ i_s_innodb_buffer_page_get_info(
out: structure filled with scanned
info */
{
+ BPageMutex* mutex = buf_page_get_mutex(bpage);
+
page_info->block_id = pos;
+ mutex_enter(mutex);
+
page_info->page_state = buf_page_get_state(bpage) & 7;
/* Only fetch information for buffers that map to a tablespace,
@@ -4197,6 +4204,7 @@ i_s_innodb_buffer_page_get_info(
case BUF_IO_READ:
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
page_info->newest_mod = 0;
+ mutex_exit(mutex);
return;
}
@@ -4222,6 +4230,8 @@ i_s_innodb_buffer_page_get_info(
} else {
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
}
+
+ mutex_exit(mutex);
}
/*******************************************************************//**
@@ -4272,16 +4282,10 @@ static int i_s_innodb_buffer_page_fill(THD *thd, TABLE_LIST *tables, Item *)
/* For each chunk, we'll pre-allocate information
structures to cache the page information read from
- the buffer pool. Doing so before obtain any mutex */
+ the buffer pool */
info_buffer = (buf_page_info_t*) mem_heap_zalloc(
heap, mem_size);
- /* Obtain appropriate mutexes. Since this is diagnostic
- buffer pool info printout, we are not required to
- preserve the overall consistency, so we can
- release mutex periodically */
- mutex_enter(&buf_pool.mutex);
-
/* GO through each block in the chunk */
for (n_blocks = num_to_process; n_blocks--; block++) {
i_s_innodb_buffer_page_get_info(
@@ -4291,8 +4295,6 @@ static int i_s_innodb_buffer_page_fill(THD *thd, TABLE_LIST *tables, Item *)
num_page++;
}
- mutex_exit(&buf_pool.mutex);
-
/* Fill in information schema table with information
just collected from the buffer chunk scan */
status = i_s_innodb_buffer_page_fill(
@@ -4593,9 +4595,12 @@ i_s_innodb_buf_page_lru_fill(
DBUG_RETURN(0);
}
-/** Fill the table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU.
+/** This is the function that goes through buffer pool's LRU list
+@param[in,out] tables tables to fill
@param[in] thd thread
@param[in,out] tables tables to fill
+@param[in] buf_pool buffer pool to scan
+@param[in] pool_id buffer pool id
@return 0 on success, 1 on failure */
static int i_s_innodb_fill_buffer_lru(THD *thd, TABLE_LIST *tables, Item *)
{
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 4a7dbff9030..77f791a80b8 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -432,7 +432,8 @@ ibuf_init_at_db_start(void)
buffer pool size. Once ibuf struct is initialized this
value is updated with the user supplied size by calling
ibuf_max_size_update(). */
- ibuf.max_size = ((buf_pool_get_curr_size() >> srv_page_size_shift)
+ ut_ad(buf_pool.is_initialised());
+ ibuf.max_size = ((buf_pool.curr_pool_size >> srv_page_size_shift)
* CHANGE_BUFFER_DEFAULT_SIZE) / 100;
mutex_create(LATCH_ID_IBUF, &ibuf_mutex);
@@ -514,7 +515,8 @@ ibuf_max_size_update(
ulint new_val) /*!< in: new value in terms of
percentage of the buffer pool size */
{
- ulint new_size = ((buf_pool_get_curr_size() >> srv_page_size_shift)
+ ut_ad(buf_pool.is_initialised());
+ ulint new_size = ((buf_pool.curr_pool_size >> srv_page_size_shift)
* new_val) / 100;
mutex_enter(&ibuf_mutex);
ibuf.max_size = new_size;
@@ -2068,8 +2070,7 @@ ibuf_get_merge_page_nos_func(
*n_stored = 0;
- limit = ut_min(IBUF_MAX_N_PAGES_MERGED,
- buf_pool_get_curr_size() / 4);
+ limit = IBUF_MAX_N_PAGES_MERGED;
if (page_rec_is_supremum(rec)) {
@@ -4222,7 +4223,12 @@ ibuf_merge_or_delete_for_page(
ulint dops[IBUF_OP_COUNT];
ut_ad(block == NULL || page_id == block->page.id);
+#if MDEV_15053_FIXED // innodb.ibuf_not_empty fails
+ ut_ad(block == NULL
+ || buf_block_get_io_fix_unlocked(block) == BUF_IO_READ);
+#else
ut_ad(!block || buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif
if (trx_sys_hdr_page(page_id)
|| fsp_is_system_temporary(page_id.space())) {
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index bd1cc06aca6..6d878c32fea 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -53,17 +53,15 @@ buf_buddy_get_slot(ulint size)
@param[in] i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES
@param[out] lru whether buf_pool.mutex was temporarily released
@return allocated block, never NULL */
-byte *buf_buddy_alloc_low(ulint i, bool *lru) MY_ATTRIBUTE((malloc));
+byte *buf_buddy_alloc_low(ulint i) MY_ATTRIBUTE((malloc));
-/** Allocate a ROW_FORMAT=COMPRESSED block.
-The caller must not hold buf_pool.mutex nor buf_pool.zip_mutex nor any
+/** The caller must not hold buf_pool.mutex nor buf_pool.zip_mutex nor any
block->mutex.
@param[in] size compressed page size
-@param[out] lru whether buf_pool.mutex was temporarily released
@return allocated block, never NULL */
-inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr)
+inline byte *buf_buddy_alloc(ulint size)
{
- return buf_buddy_alloc_low(buf_buddy_get_slot(size), lru);
+ return buf_buddy_alloc_low(buf_buddy_get_slot(size));
}
/** Deallocate a block.
@@ -72,7 +70,7 @@ inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr)
@param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */
void buf_buddy_free_low(void* buf, ulint i);
-/** Deallocate a block.
+/** Try to reallocate a block.
@param[in] buf block to be freed, must not be pointed to
by the buffer pool
@param[in] size block size in bytes */
@@ -85,6 +83,7 @@ inline void buf_buddy_free(void* buf, ulint size)
@param[in] buf block to be reallocated, must be pointed
to by the buffer pool
@param[in] size block size, up to srv_page_size
+@retval true if succeeded or if failed because the block was fixed
@retval false if failed because of no free blocks. */
bool buf_buddy_realloc(void* buf, ulint size);
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 8bf9269418f..d3464d12285 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -187,15 +187,7 @@ std::ostream&
operator<<(
std::ostream& out,
const page_id_t page_id);
-
#ifndef UNIV_INNOCHECKSUM
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void);
-/*========================*/
/**
@return the smallest oldest_modification lsn for any page.
@retval 0 if all modified persistent pages have been flushed */
@@ -351,14 +343,10 @@ buf_page_release_latch(
buf_block_t* block, /*!< in: buffer block */
ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
-/********************************************************************//**
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from slipping out of
-the buffer pool. */
+
void
buf_page_make_young(
-/*================*/
- buf_page_t* bpage); /*!< in: buffer block of a file page */
+ buf_page_t* bpage);
/** Mark the page status as FREED for the given tablespace id and
page number. If the page is not in buffer pool then ignore it.
@@ -393,8 +381,6 @@ buf_block_get_freed_page_clock(
/** Determine if a block is still close enough to the MRU end of the LRU list
meaning that it is not in danger of getting evicted and also implying
that it has been accessed recently.
-Note that this is for heuristics only and does not reserve buffer pool
-mutex.
@param[in] bpage buffer pool page
@return whether bpage is close to MRU end of LRU */
inline bool buf_page_peek_if_young(const buf_page_t *bpage);
@@ -421,7 +407,6 @@ on the block. */
UNIV_INLINE
void
buf_block_modify_clock_inc(
-/*=======================*/
buf_block_t* block); /*!< in: block */
/********************************************************************//**
Returns the value of the modify clock. The caller must have an s-lock
@@ -603,6 +588,7 @@ buf_block_get_lock_hash_val(
/*========================*/
const buf_block_t* block) /*!< in: block */
MY_ATTRIBUTE((warn_unused_result));
+
#ifdef UNIV_DEBUG
/** Find a block in the buffer pool that points to a given compressed page.
@param[in] data pointer to compressed page
@@ -649,6 +635,7 @@ buf_print_io(
@param[out] pool_info buffer pool metadata */
void buf_stats_get_pool_info(buf_pool_info_t *pool_info);
+/*============================*/
/** Refresh the statistics used to print per-second averages. */
void buf_refresh_io_stats();
@@ -656,8 +643,7 @@ void buf_refresh_io_stats();
@return number of pending i/o */
ulint buf_pool_check_no_pending_io();
-/** Invalidate all pages in the buffer pool.
-All pages must be in a replaceable state (not modified or latched). */
+/** All pages must be in a replaceable state (not modified or latched). */
void buf_pool_invalidate();
/*========================================================================
@@ -804,20 +790,20 @@ buf_block_set_io_fix(
/*=================*/
buf_block_t* block, /*!< in/out: control block */
enum buf_io_fix io_fix);/*!< in: io_fix state */
-/*********************************************************************//**
-Makes a block sticky. A sticky block implies that even after we release
+/** Makes a block sticky. A sticky block implies that even after we release
the buf_pool.mutex and the block->mutex:
* it cannot be removed from the flush_list
* the block descriptor cannot be relocated
* it cannot be removed from the LRU list
Note that:
* the block can still change its position in the LRU list
-* the next and previous pointers can change. */
+* the next and previous pointers can change.
+@param[in,out] bpage control block */
UNIV_INLINE
void
buf_page_set_sticky(
-/*================*/
- buf_page_t* bpage); /*!< in/out: control block */
+ buf_page_t* bpage);
+
/*********************************************************************//**
Removes stickiness of a block. */
UNIV_INLINE
@@ -835,14 +821,13 @@ buf_page_can_relocate(
const buf_page_t* bpage) /*!< control block being relocated */
MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************************//**
-Determine if a block has been flagged old.
+/** Determine if a block has been flagged old.
+@param[in] bpage control block
@return TRUE if old */
UNIV_INLINE
ibool
buf_page_is_old(
-/*============*/
- const buf_page_t* bpage) /*!< in: control block */
+ const buf_page_t* bpage)
MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Flag a block old. */
@@ -867,18 +852,19 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
- buf_page_t* bpage) /*!< in/out: control block */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL. Note: even though bpage is not declared a
-const we don't update its value.
+ buf_page_t* bpage); /*!< in/out: control block */
+
+/** Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL. page frame exists, or NULL. The caller must hold
+either the appropriate hash lock in any mode, either the LRU list mutex. Note:
+even though bpage is not declared a const we don't update its value. It is safe
+to make this pure.
+@param[in] bpage control block, or NULL
@return control block, or NULL */
UNIV_INLINE
buf_block_t*
buf_page_get_block(
-/*===============*/
- buf_page_t* bpage) /*!< in: control block, or NULL */
+ buf_page_t* bpage)
MY_ATTRIBUTE((warn_unused_result));
#ifdef UNIV_DEBUG
@@ -927,11 +913,10 @@ buf_page_init_for_read(
ulint zip_size,
bool unzip);
-/** Complete a read or write request of a file page to or from the buffer pool.
-@param[in,out] bpage page to complete
+/** Completes an asynchronous read or write request of a file page to or from
@param[in] dblwr whether the doublewrite buffer was used (on write)
+@param[in] bpage pointer to the block in question
@param[in] evict whether or not to evict the page from LRU list
-@return whether the operation succeeded
@retval DB_SUCCESS always when writing, or if a read page was OK
@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
@@ -1094,6 +1079,28 @@ public:
}
};
+/** Gets the io_fix state of a buffer block. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in] block pointer to the buffer block
+@return page io_fix state */
+UNIV_INLINE
+buf_io_fix
+buf_block_get_io_fix_unlocked(
+ const buf_block_t* block)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets the io_fix state of a buffer page. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in] bpage pointer to the buffer page
+@return page io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix_unlocked(
+ const buf_page_t* bpage)
+ MY_ATTRIBUTE((warn_unused_result));
+
/** The common buffer control block structure
for compressed and uncompressed frames */
@@ -1106,11 +1113,10 @@ public:
None of these bit-fields must be modified without holding
buf_page_get_mutex() [buf_block_t::mutex or
buf_pool.zip_mutex], since they can be stored in the same
- machine word. Some of these fields are additionally protected
- by buf_pool.mutex. */
+ machine word. */
/* @{ */
- /** Page id. Protected by buf_pool mutex. */
+ /** Page id. */
page_id_t id;
buf_page_t* hash; /*!< node used in chaining to
buf_pool.page_hash or
@@ -1119,8 +1125,7 @@ public:
/** Count of how manyfold this block is currently bufferfixed. */
Atomic_counter<uint32_t> buf_fix_count;
- /** type of pending I/O operation; also protected by
- buf_pool.mutex for writes only */
+ /** type of pending I/O operation. */
buf_io_fix io_fix;
/** Block state. @see buf_page_in_file */
@@ -1133,7 +1138,7 @@ public:
/* @} */
page_zip_des_t zip; /*!< compressed page; zip.data
(but not the data it points to) is
- also protected by buf_pool.mutex;
+ protected by buf_pool.zip_mutex;
state == BUF_BLOCK_ZIP_PAGE and
zip.data == NULL means an active
buf_pool.watch */
@@ -1163,22 +1168,17 @@ public:
UT_LIST_NODE_T(buf_page_t) list;
/*!< based on state, this is a
- list node, protected either by
- buf_pool.mutex or by
- buf_pool.flush_list_mutex,
- in one of the following lists in
- buf_pool:
+ list node, protected by the
+ corresponding list mutex, in one of the
+ following lists in buf_pool:
- BUF_BLOCK_NOT_USED: free, withdraw
- BUF_BLOCK_FILE_PAGE: flush_list
- BUF_BLOCK_ZIP_DIRTY: flush_list
- BUF_BLOCK_ZIP_PAGE: zip_clean
- If bpage is part of flush_list
- then the node pointers are
- covered by buf_pool.flush_list_mutex.
- Otherwise these pointers are
- protected by buf_pool.mutex.
+ The node pointers are protected by the
+ corresponding list mutex.
The contents of the list node
is undefined if !in_flush_list
@@ -1201,8 +1201,8 @@ public:
reads can happen while holding
any one of the two mutexes */
ibool in_free_list; /*!< TRUE if in buf_pool.free; when
- buf_pool.mutex is free, the following
- should hold: in_free_list
+ buf_pool.free_list_mutex is free, the
+ following should hold: in_free_list
== (state == BUF_BLOCK_NOT_USED) */
#endif /* UNIV_DEBUG */
@@ -1221,8 +1221,8 @@ public:
any one of the two mutexes */
/* @} */
/** @name LRU replacement algorithm fields
- These fields are protected by buf_pool.mutex only (not
- buf_pool.zip_mutex or buf_block_t::mutex). */
+ These fields are protected by both buf_pool.mutex and the
+ block mutex. */
/* @{ */
UT_LIST_NODE_T(buf_page_t) LRU;
@@ -1319,7 +1319,9 @@ struct buf_block_t{
/*!< node of the decompressed LRU list;
a block is in the unzip_LRU list
if page.state == BUF_BLOCK_FILE_PAGE
- and page.zip.data != NULL */
+ and page.zip.data != NULL. Protected by
+ both mutex and the block
+ mutex. */
#ifdef UNIV_DEBUG
ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
decompressed LRU list;
@@ -1329,8 +1331,8 @@ struct buf_block_t{
uint32_t lock_hash_val; /*!< hashed value of the page address
in the record lock hash table;
protected by buf_block_t::lock
- (or buf_block_t::mutex, buf_pool.mutex
- in buf_page_get_gen(),
+ (or buf_block_t::mutex in
+ buf_page_get_gen(),
buf_page_init_for_read()
and buf_page_create()) */
/* @} */
@@ -1344,10 +1346,11 @@ struct buf_block_t{
positioning: if the modify clock has
not changed, we know that the pointer
is still valid; this field may be
- changed if the thread (1) owns the
- pool mutex and the page is not
+ changed if the thread (1) owns the LRU
+ list mutex and the page is not
bufferfixed, or (2) the thread has an
- x-latch on the block */
+ x-latch on the block, or (3) the block
+ must belong to an intrinsic table */
/* @} */
#ifdef BTR_CUR_HASH_ADAPT
/** @name Hash search fields (unprotected)
@@ -1504,7 +1507,8 @@ public:
void set(buf_page_t *bpage)
{
ut_ad(mutex_own(m_mutex));
- ut_ad(!bpage || buf_page_in_file(bpage));
+ ut_ad(!bpage || buf_page_in_file(bpage) ||
+ bpage->state == BUF_BLOCK_REMOVE_HASH);
m_hp= bpage;
}
@@ -1615,25 +1619,31 @@ struct buf_pool_stat_t{
counted as page gets; this field
is NOT protected by the buffer
pool mutex */
- ulint n_pages_read; /*!< number read operations */
- ulint n_pages_written;/*!< number write operations */
+ ulint n_pages_read; /*!< number of read operations. Accessed
+ atomically. */
+ ulint n_pages_written;/*!< number of write operations. Accessed
+ atomically. */
ulint n_pages_created;/*!< number of pages created
- in the pool with no read */
+ in the pool with no read. Accessed
+ atomically. */
ulint n_ra_pages_read_rnd;/*!< number of pages read in
- as part of random read ahead */
+ as part of random read ahead. Not protected. */
ulint n_ra_pages_read;/*!< number of pages read in
- as part of read ahead */
+ as part of read ahead. Not protected. */
ulint n_ra_pages_evicted;/*!< number of read ahead
pages that are evicted without
- being accessed */
+ being accessed. Protected by mutex. */
ulint n_pages_made_young; /*!< number of pages made young, in
- calls to buf_LRU_make_block_young() */
+ calls to buf_LRU_make_block_young(). Protected
+ by mutex. */
ulint n_pages_not_made_young; /*!< number of pages not made
young because the first access
was not long enough ago, in
- buf_page_peek_if_too_old() */
- ulint LRU_bytes; /*!< LRU size in bytes */
- ulint flush_list_bytes;/*!< flush_list size in bytes */
+ buf_page_peek_if_too_old(). Not protected. */
+ ulint LRU_bytes; /*!< LRU size in bytes. Protected by
+ mutex. */
+ ulint flush_list_bytes;/*!< flush_list size in bytes.
+ Protected by flush_list_mutex */
};
/** Statistics of buddy blocks of a given size. */
@@ -1649,6 +1659,9 @@ struct buf_buddy_stat_t {
/** The buffer pool */
class buf_pool_t
{
+ typedef ib_mutex_t BufListMutex;
+ typedef ib_bpmutex_t BufPageMutex;
+
/** A chunk of buffers */
struct chunk_t
{
@@ -1761,6 +1774,18 @@ public:
this->withdraw_clock() != withdraw_clock);
}
+ /** @return how many more pages must be added to the withdraw_list
+ to reach the withdraw target of the currently ongoing resize() */
+ ulint withdraw_depth()
+ {
+ if (curr_size >= old_size)
+ return 0;
+ mutex_enter(&free_list_mutex);
+ const lint w= withdraw_target - UT_LIST_GET_LEN(withdraw);
+ mutex_exit(&free_list_mutex);
+ return w > 0 ? static_cast<ulint>(w) : 0;
+ }
+
/** @return the current size in blocks */
size_t get_n_pages() const
{
@@ -1812,7 +1837,7 @@ public:
@retval nullptr if not found */
const buf_block_t *contains_zip(const void *data) const
{
- ut_ad(mutex_own(&mutex));
+ ut_ad(mutex_own(&zip_free_mutex));
for (const chunk_t *chunk= chunks, * const end= chunks + n_chunks;
chunk != end; chunk++)
if (const buf_block_t *block= chunk->contains_zip(data))
@@ -1859,21 +1884,28 @@ public:
void print();
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+ /** Buffer pool mutex; protecting the LRU list and buf_page_t::state */
+ BufListMutex mutex;
+ /** free and withdraw list mutex */
+ BufListMutex free_list_mutex;
+ /** buddy allocator mutex */
+ BufListMutex zip_free_mutex;
+ /** zip_hash mutex */
+ BufListMutex zip_hash_mutex;
+ /** zip_hash mutex */
+ BufPageMutex zip_mutex;
+ /** flush state protection mutex */
+ ib_mutex_t flush_state_mutex;
+
/** @name General fields */
/* @{ */
- BufPoolMutex mutex; /*!< Buffer pool mutex */
- BufPoolZipMutex zip_mutex; /*!< Zip mutex, protects compressed
- only pages (of type buf_page_t, not
- buf_block_t */
ulint curr_pool_size; /*!< Current pool size in bytes */
ulint LRU_old_ratio; /*!< Reserve this much of the buffer
pool for "old" blocks */
#ifdef UNIV_DEBUG
ulint buddy_n_frames; /*!< Number of frames allocated from
- the buffer pool to the buddy system */
-#endif
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ulint mutex_exit_forbidden; /*!< Forbid release mutex */
+ the buffer pool to the buddy system.
+ Protected by zip_hash_mutex. */
#endif
ut_allocator<unsigned char> allocator; /*!< Allocator used for
allocating memory for the the "chunks"
@@ -1893,29 +1925,27 @@ public:
buf_page_in_file() == TRUE,
indexed by (space_id, offset).
page_hash is protected by an
- array of mutexes.
- Changes in page_hash are protected
- by buf_pool.mutex and the relevant
- page_hash mutex. Lookups can happen
- while holding the buf_pool.mutex or
- the relevant page_hash mutex. */
+ array of mutexes. */
hash_table_t* page_hash_old; /*!< old pointer to page_hash to be
freed after resizing buffer pool */
hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks
whose frames are allocated to the
zip buddy system,
indexed by block->frame */
- ulint n_pend_reads; /*!< number of pending read
- operations */
+ Atomic_counter<ulint> n_pend_reads; /*!< number of pending read
+ operations. Accessed atomically */
Atomic_counter<ulint>
n_pend_unzip; /*!< number of pending decompressions */
time_t last_printout_time;
/*!< when buf_print_io was last time
- called */
+ called. Accesses not protected. */
buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
/*!< Statistics of buddy system,
- indexed by block size */
+ indexed by block size. Protected by
+ zip_free mutex, except for the used
+ field, which is also accessed
+ atomically */
buf_pool_stat_t stat; /*!< current statistics */
buf_pool_stat_t old_stat; /*!< old statistics */
@@ -1925,7 +1955,7 @@ public:
/* @{ */
- FlushListMutex flush_list_mutex;/*!< mutex protecting the
+ BufListMutex flush_list_mutex;/*!< mutex protecting the
flush list access. This mutex
protects flush_list, flush_rbt
and bpage::list pointers when
@@ -1942,16 +1972,17 @@ public:
list */
ibool init_flush[BUF_FLUSH_N_TYPES];
/*!< this is TRUE when a flush of the
- given type is being initialized */
+ given type is being initialized.
+ Protected by flush_state_mutex. */
ulint n_flush[BUF_FLUSH_N_TYPES];
/*!< this is the number of pending
- writes in the given flush type */
+ writes in the given flush type.
+ Protected by flush_state_mutex. */
os_event_t no_flush[BUF_FLUSH_N_TYPES];
/*!< this is in the set state
when there is no flush batch
- of the given type running;
- os_event_set() and os_event_reset()
- are protected by buf_pool_t::mutex */
+ of the given type running. Protected by
+ flush_state_mutex. */
ib_rbt_t* flush_rbt; /*!< a red-black tree is used
exclusively during recovery to
speed up insertions in the
@@ -1974,7 +2005,8 @@ public:
billion! A thread is allowed
to read this for heuristic
purposes without holding any
- mutex or latch */
+ mutex or latch. For non-heuristic
+ purposes protected by mutex */
ibool try_LRU_scan; /*!< Set to FALSE when an LRU
scan for free block fails. This
flag is used to avoid repeated
@@ -1983,8 +2015,8 @@ public:
available in the scan depth for
eviction. Set to TRUE whenever
we flush a batch from the
- buffer pool. Protected by the
- buf_pool.mutex */
+ buffer pool. Accessed protected by
+ memory barriers. */
/* @} */
/** @name LRU replacement algorithm fields */
@@ -1998,21 +2030,22 @@ public:
/*!< base node of the withdraw
block list. It is only used during
shrinking buffer pool size, not to
- reuse the blocks will be removed */
+ reuse the blocks will be removed.
+ Protected by free_list_mutex */
ulint withdraw_target;/*!< target length of withdraw
block list, when withdrawing */
/** "hazard pointer" used during scan of LRU while doing
- LRU list batch. Protected by buf_pool::mutex */
+ LRU list batch. Protected by buf_pool.mutex */
LRUHp lru_hp;
/** Iterator used to scan the LRU list when searching for
- replacable victim. Protected by buf_pool::mutex. */
+ replacable victim. Protected by buf_pool.mutex. */
LRUItr lru_scan_itr;
/** Iterator used to scan the LRU list when searching for
- single page flushing victim. Protected by buf_pool::mutex. */
+ single page flushing victim. Protected by buf_pool.mutex. */
LRUItr single_scan_itr;
UT_LIST_BASE_NODE_T(buf_page_t) LRU;
@@ -2035,7 +2068,8 @@ public:
UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
/*!< base node of the
- unzip_LRU list */
+ unzip_LRU list. The list is protected
+ by mutex. */
/* @} */
/** @name Buddy allocator fields
@@ -2056,8 +2090,12 @@ public:
buf_page_t* watch;
/*!< Sentinel records for buffer
- pool watches. Protected by
- buf_pool.mutex. */
+ pool watches. Scanning the array is
+ protected by taking all page_hash
+ latches in X. Updating or reading an
+ individual watch page is protected by
+ a corresponding individual page_hash
+ latch. */
/** Reserve a buffer. */
@@ -2127,7 +2165,7 @@ Use these instead of accessing buffer pool mutexes directly. */
mutex_enter(&(b)->mutex); \
} while (0)
-/** Release the trx->mutex. */
+/** Release the block->mutex. */
#define buf_page_mutex_exit(b) do { \
(b)->mutex.exit(); \
} while (0)
@@ -2179,23 +2217,6 @@ buf_page_hash_lock_get(const page_id_t& page_id)
# define buf_block_hash_lock_held_s_or_x(p) (TRUE)
#endif /* UNIV_DEBUG */
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid() do { \
- ut_ad(mutex_own(&buf_pool.mutex)); \
- buf_pool.mutex_exit_forbidden++; \
-} while (0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow() do { \
- ut_ad(mutex_own(&buf_pool.mutex)); \
- ut_ad(buf_pool.mutex_exit_forbidden--); \
-} while (0)
-#else
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid() ((void) 0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow() ((void) 0)
-#endif
/* @} */
/**********************************************************************
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 7b74705d5c7..f939b2160f0 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -37,17 +37,6 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0rea.h"
#include "fsp0types.h"
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void)
-/*========================*/
-{
- return(srv_buf_pool_curr_size);
-}
-
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
@return freed_page_clock */
@@ -57,7 +46,7 @@ buf_page_get_freed_page_clock(
/*==========================*/
const buf_page_t* bpage) /*!< in: block */
{
- /* This is sometimes read without holding buf_pool.mutex. */
+ /* This is sometimes read without holding any buffer pool mutex. */
return(bpage->freed_page_clock);
}
@@ -74,13 +63,15 @@ buf_block_get_freed_page_clock(
}
/** Determine if a block is still close enough to the MRU end of the LRU list
-meaning that it is not in danger of getting evicted and also implying
-that it has been accessed recently.
-The page must be either buffer-fixed, or its page hash must be locked.
-@param[in] bpage buffer pool page
+implying that it has been accessed recently.
+The page must be either buffer-fixed, either its page hash must be locked.
+@param[in] bpage block
@return whether bpage is close to MRU end of LRU */
inline bool buf_page_peek_if_young(const buf_page_t *bpage)
{
+ ut_ad(bpage->buf_fix_count > 0
+ || buf_page_hash_lock_held_s_or_x(bpage));
+
/* FIXME: bpage->freed_page_clock is 31 bits */
return((buf_pool.freed_page_clock & ((1UL << 31) - 1))
< (bpage->freed_page_clock
@@ -163,14 +154,38 @@ buf_block_get_state(
return(buf_page_get_state(&block->page));
}
-/*********************************************************************//**
-Sets the state of a block. */
+#ifdef UNIV_DEBUG
+/** Assert that a given buffer pool page is private to the caller: no pointers
+to it exist in any buffer pool list or hash table. Accessing pages by iterating
+over buffer pool chunks is not considered here. Furthermore, assert that no
+buffer pool locks except for LRU list mutex and page hash are held.
+@param[in] bpage pointer to a buffer pool page */
+UNIV_INLINE
+bool
+buf_page_is_private(
+ const buf_page_t* bpage)
+{
+ ut_a(!bpage->in_page_hash);
+ ut_a(!bpage->in_zip_hash);
+ ut_a(!bpage->in_flush_list);
+ ut_a(!bpage->in_free_list);
+ ut_a(!bpage->in_LRU_list);
+ ut_a(!mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(!mutex_own(&buf_pool.free_list_mutex));
+ ut_a(!mutex_own(&buf_pool.zip_free_mutex));
+ ut_a(!mutex_own(&buf_pool.zip_hash_mutex));
+ return(true);
+}
+#endif
+
+/** Sets the state of a block.
+@param[in,out] bpage pointer to control block
+@param[in] state state */
UNIV_INLINE
void
buf_page_set_state(
-/*===============*/
- buf_page_t* bpage, /*!< in/out: pointer to control block */
- enum buf_page_state state) /*!< in: state */
+ buf_page_t* bpage,
+ enum buf_page_state state)
{
#ifdef UNIV_DEBUG
enum buf_page_state old_state = buf_page_get_state(bpage);
@@ -184,17 +199,23 @@ buf_page_set_state(
break;
case BUF_BLOCK_ZIP_DIRTY:
ut_ad(state == BUF_BLOCK_ZIP_PAGE);
+ ut_a(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(mutex_own(&buf_pool.flush_list_mutex));
+ ut_a(bpage->in_flush_list);
break;
case BUF_BLOCK_NOT_USED:
ut_ad(state == BUF_BLOCK_READY_FOR_USE);
+ ut_a(buf_page_is_private(bpage));
break;
case BUF_BLOCK_READY_FOR_USE:
ut_ad(state == BUF_BLOCK_MEMORY
|| state == BUF_BLOCK_FILE_PAGE
|| state == BUF_BLOCK_NOT_USED);
+ ut_a(buf_page_is_private(bpage));
break;
case BUF_BLOCK_MEMORY:
ut_ad(state == BUF_BLOCK_NOT_USED);
+ ut_a(buf_page_is_private(bpage));
break;
case BUF_BLOCK_FILE_PAGE:
ut_ad(state == BUF_BLOCK_NOT_USED
@@ -206,6 +227,8 @@ buf_page_set_state(
ut_ad(!bpage->in_LRU_list);
ut_ad(!bpage->in_free_list);
}
+ ut_a(buf_page_hash_lock_held_x(bpage));
+
break;
case BUF_BLOCK_REMOVE_HASH:
ut_ad(state == BUF_BLOCK_MEMORY);
@@ -349,6 +372,20 @@ buf_page_get_io_fix(
/*================*/
const buf_page_t* bpage) /*!< in: pointer to the control block */
{
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ return buf_page_get_io_fix_unlocked(bpage);
+}
+
+/** Gets the io_fix state of a buffer page. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in] bpage pointer to the buffer page
+@return page io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix_unlocked(
+ const buf_page_t* bpage)
+{
ut_ad(bpage != NULL);
enum buf_io_fix io_fix = bpage->io_fix;
@@ -378,6 +415,19 @@ buf_block_get_io_fix(
return(buf_page_get_io_fix(&block->page));
}
+/** Gets the io_fix state of a buffer block. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in] block pointer to the buffer block
+@return page io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix_unlocked(
+ const buf_block_t* block)
+{
+ return(buf_page_get_io_fix_unlocked(&block->page));
+}
+
/*********************************************************************//**
Sets the io_fix state of a block. */
UNIV_INLINE
@@ -387,7 +437,6 @@ buf_page_set_io_fix(
buf_page_t* bpage, /*!< in/out: control block */
enum buf_io_fix io_fix) /*!< in: io_fix state */
{
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
bpage->io_fix = io_fix;
@@ -406,24 +455,26 @@ buf_block_set_io_fix(
buf_page_set_io_fix(&block->page, io_fix);
}
-/*********************************************************************//**
-Makes a block sticky. A sticky block implies that even after we release
+/** Makes a block sticky. A sticky block implies that even after we release
the buf_pool.mutex and the block->mutex:
* it cannot be removed from the flush_list
* the block descriptor cannot be relocated
* it cannot be removed from the LRU list
Note that:
* the block can still change its position in the LRU list
-* the next and previous pointers can change. */
+* the next and previous pointers can change.
+@param[in,out] bpage control block */
UNIV_INLINE
void
buf_page_set_sticky(
-/*================*/
- buf_page_t* bpage) /*!< in/out: control block */
+ buf_page_t* bpage)
{
+
ut_ad(mutex_own(&buf_pool.mutex));
+
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ ut_ad(bpage->in_LRU_list);
bpage->io_fix = BUF_IO_PIN;
}
@@ -436,7 +487,6 @@ buf_page_unset_sticky(
/*==================*/
buf_page_t* bpage) /*!< in/out: control block */
{
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
@@ -452,7 +502,6 @@ buf_page_can_relocate(
/*==================*/
const buf_page_t* bpage) /*!< control block being relocated */
{
- ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
@@ -461,18 +510,18 @@ buf_page_can_relocate(
&& bpage->buf_fix_count == 0);
}
-/*********************************************************************//**
-Determine if a block has been flagged old.
+/** Determine if a block has been flagged old.
+@param[in] bpage control block
@return TRUE if old */
UNIV_INLINE
ibool
buf_page_is_old(
-/*============*/
- const buf_page_t* bpage) /*!< in: control block */
+ const buf_page_t* bpage)
{
+
/* Buffer page mutex is not strictly required here for heuristic
- purposes even if LRU mutex is not being held. Keep the assertion
- for not since all the callers hold it. */
+ purposes even if LRU mutex is not being held. Keep the assertion
+ for now since all the callers hold it. */
ut_ad(mutex_own(buf_page_get_mutex(bpage))
|| mutex_own(&buf_pool.mutex));
ut_ad(buf_page_in_file(bpage));
@@ -480,14 +529,14 @@ buf_page_is_old(
return(bpage->old);
}
-/*********************************************************************//**
-Flag a block old. */
+/** Flag a block old.
+@param[in] bpage control block
+@param[in] old old */
UNIV_INLINE
void
buf_page_set_old(
-/*=============*/
- buf_page_t* bpage, /*!< in/out: control block */
- bool old) /*!< in: old */
+ buf_page_t* bpage,
+ bool old)
{
ut_a(buf_page_in_file(bpage));
ut_ad(mutex_own(&buf_pool.mutex));
@@ -545,19 +594,23 @@ buf_page_set_accessed(
}
}
-/*********************************************************************//**
-Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL.
+/** Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL. page frame exists, or NULL. The caller must hold
+either the appropriate hash lock in any mode, either the LRU list mutex. Note:
+even though bpage is not declared a const we don't update its value. It is safe
+to make this pure.
+@param[in] bpage control block, or NULL
@return control block, or NULL */
UNIV_INLINE
buf_block_t*
buf_page_get_block(
-/*===============*/
- buf_page_t* bpage) /*!< in: control block, or NULL */
+ buf_page_t* bpage)
{
if (bpage != NULL) {
ut_ad(buf_page_hash_lock_held_s_or_x(bpage)
|| mutex_own(&buf_pool.mutex));
+ ut_ad(buf_page_hash_lock_held_s_or_x(bpage)
+ || mutex_own(&buf_pool.mutex));
ut_ad(buf_page_in_file(bpage));
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
@@ -676,17 +729,10 @@ buf_block_free(
/*===========*/
buf_block_t* block) /*!< in, own: block to be freed */
{
- mutex_enter(&buf_pool.mutex);
-
- buf_page_mutex_enter(block);
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
buf_LRU_block_free_non_file_page(block);
-
- buf_page_mutex_exit(block);
-
- mutex_exit(&buf_pool.mutex);
}
/********************************************************************//**
@@ -701,8 +747,8 @@ buf_block_modify_clock_inc(
{
/* No latch is acquired for the shared temporary tablespace. */
ut_ad(fsp_is_system_temporary(block->page.id.space())
- || (mutex_own(&buf_pool.mutex)
- && block->page.buf_fix_count == 0)
+ || (block->page.buf_fix_count == 0
+ && mutex_own(&buf_pool.mutex))
|| rw_lock_own_flagged(&block->lock,
RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
assert_block_ahi_valid(block);
@@ -758,6 +804,7 @@ buf_block_buf_fix_inc_func(
block->fix();
}
+
/*******************************************************************//**
Decrements the bufferfix count. */
UNIV_INLINE
@@ -851,12 +898,10 @@ buf_page_hash_get_locked(
if (mode == RW_LOCK_S) {
rw_lock_s_lock(hash_lock);
- /* If not own buf_pool_mutex, page_hash can be changed. */
hash_lock = hash_lock_s_confirm(
hash_lock, buf_pool.page_hash, page_id.fold());
} else {
rw_lock_x_lock(hash_lock);
- /* If not own buf_pool_mutex, page_hash can be changed. */
hash_lock = hash_lock_x_confirm(
hash_lock, buf_pool.page_hash, page_id.fold());
}
@@ -1028,16 +1073,14 @@ buf_block_dbg_add_level(
}
#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Get buf frame. */
-UNIV_INLINE
-void *
+
+static inline void *
buf_page_get_frame(
/*===============*/
- const buf_page_t* bpage) /*!< in: buffer pool page */
-{
+ const buf_page_t* bpage)/*!< in: buffer pool page */
/* In encryption/compression buffer pool page may contain extra
buffer where result is stored. */
+{
if (bpage->slot && bpage->slot->out_buf) {
return bpage->slot->out_buf;
} else if (bpage->zip.data) {
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 17fa84a85cc..e1d16f93c13 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -84,14 +84,14 @@ ibool
buf_dblwr_page_inside(
/*==================*/
ulint page_no); /*!< in: page number */
-/********************************************************************//**
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_dblwr_flush_buffered_writes and waits for for free
-space to appear. */
+
+/** Posts a buffer page for writing. If the doublewrite memory buffer
+is full, calls buf_dblwr_flush_buffered_writes and waits for for free
+space to appear.
+@param[in] bpage buffer block to write */
void
buf_dblwr_add_to_batch(
-/*====================*/
- buf_page_t* bpage); /*!< in: buffer block to write */
+ buf_page_t* bpage);
/********************************************************************//**
Flush a batch of writes to the datafiles that have already been
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index becf2d052ed..2deecd84c8e 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -133,13 +133,13 @@ passed back to caller. Ignored if NULL.
@retval false if another batch of same type was already running */
bool buf_flush_lists(ulint min_n, lsn_t lsn_limit, ulint *n_processed);
-/******************************************************************//**
-This function picks up a single page from the tail of the LRU
+/** This function picks up a single page from the tail of the LRU
list, flushes it (if it is dirty), removes it from page_hash and LRU
list and puts it on the free list. It is called from user threads when
they are unable to find a replaceable page at the tail of the LRU
list i.e.: when the background LRU flushing in the page_cleaner thread
is not fast enough to keep pace with the workload.
+@param[in,out] buf_pool buffer pool instance
@return true if success. */
bool buf_flush_single_page_from_LRU();
@@ -162,15 +162,15 @@ buf_flush_note_modification(
set of mtr's */
lsn_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
-/********************************************************************//**
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., transition FILE_PAGE => NOT_USED allowed.
+/** Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., the transition FILE_PAGE => NOT_USED allowed. The caller must hold the
+LRU list and block mutexes.
+@param[in] bpage buffer control block, must be buf_page_in_file() and
+ in the LRU list
@return TRUE if can replace immediately */
ibool
buf_flush_ready_for_replace(
-/*========================*/
- buf_page_t* bpage); /*!< in: buffer control block, must be
- buf_page_in_file(bpage) and in the LRU list */
+ buf_page_t* bpage);
/** Initialize page_cleaner. */
void buf_flush_page_cleaner_init();
@@ -212,13 +212,12 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync);
/** Check if the block is modified and ready for flushing.
@param[in] bpage buffer control block, must be buf_page_in_file()
@param[in] flush_type type of flush
+@param[in] flush_type type of flush
@return true if can flush immediately */
bool
buf_flush_ready_for_flush(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer control block, must be
- buf_page_in_file(bpage) */
- buf_flush_t flush_type)/*!< in: type of flush */
+ buf_page_t* bpage,
+ buf_flush_t flush_type)
MY_ATTRIBUTE((warn_unused_result));
/** Synchronously flush dirty blocks.
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
index 6f57bcb6fc8..0c52595acc6 100644
--- a/storage/innobase/include/buf0flu.ic
+++ b/storage/innobase/include/buf0flu.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2019, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 9ed42f4eed0..2fdaf9f3f71 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -63,31 +63,29 @@ bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
void buf_LRU_flush_or_remove_pages(ulint id, bool flush, ulint first = 0);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/********************************************************************//**
-Insert a compressed block into buf_pool.zip_clean in the LRU order. */
+/** Insert a compressed block into buf_pool.zip_clean in the LRU order.
+@param[in] bpage pointer to the block in question */
void
buf_LRU_insert_zip_clean(
-/*=====================*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
+ buf_page_t* bpage);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-/******************************************************************//**
-Try to free a block. If bpage is a descriptor of a compressed-only
+/** Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
-
-NOTE: If this function returns true, it will temporarily
-release buf_pool.mutex. Furthermore, the page frame will no longer be
-accessible via bpage.
-
-The caller must hold buf_pool.mutex and must not hold any
-buf_page_get_mutex() when calling this function.
+NOTE: this function may temporarily release and relock the
+buf_page_get_get_mutex(). Furthermore, the page frame will no longer be
+accessible via bpage. If this function returns true, it will also release
+the LRU list mutex.
+The caller must hold the LRU list and buf_page_get_mutex() mutexes.
+@param[in] bpage block to be freed
+@param[in] zip true if should remove also the compressed page of
+ an uncompressed page
@return true if freed, false otherwise. */
bool
buf_LRU_free_page(
-/*==============*/
- buf_page_t* bpage, /*!< in: block to be freed */
- bool zip) /*!< in: true if should remove also the
- compressed page of an uncompressed page */
+ buf_page_t* bpage,
+ bool zip)
+
MY_ATTRIBUTE((nonnull));
/** Try to free a replaceable block.
@@ -101,6 +99,7 @@ bool buf_LRU_scan_and_free_block(bool scan_all);
buf_block_t* buf_LRU_get_free_only();
/** Get a free block from the buf_pool. The block is taken off the
+@param[in] buf_pool buffer pool instance
free list. If free list is empty, blocks are moved from the end of the
LRU list to the free list.
This function is called from a user thread when it needs a clean
@@ -122,6 +121,7 @@ we put it to free list to be used.
* scan LRU list even if buf_pool.try_LRU_scan is not set
* iteration > 1:
* same as iteration 1 but sleep 10ms
+@param[in,out] buf_pool buffer pool instance
@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
buf_block_t* buf_LRU_get_free_block()
MY_ATTRIBUTE((malloc,warn_unused_result));
@@ -146,16 +146,18 @@ buf_LRU_add_block(
blocks in the LRU list, else put to the
start; if the LRU list is very short, added to
the start regardless of this parameter */
-/******************************************************************//**
-Adds a block to the LRU list of decompressed zip pages. */
+
+/** Adds a block to the LRU list of decompressed zip pages.
+@param[in] block control block
+@param[in] old TRUE if should be put to the end of the list,
+ else put to the start */
void
buf_unzip_LRU_add_block(
-/*====================*/
- buf_block_t* block, /*!< in: control block */
- ibool old); /*!< in: TRUE if should be put to the end
- of the list, else put to the start */
-/******************************************************************//**
-Moves a block to the start of the LRU list. */
+ buf_block_t* block,
+ ibool old);
+
+/** Moves a block to the start of the LRU list.
+@param[in] bpage control block */
void
buf_LRU_make_block_young(buf_page_t* bpage);
@@ -181,6 +183,7 @@ buf_LRU_stat_update();
void buf_LRU_free_one_page(buf_page_t* bpage, page_id_t old_page_id)
MY_ATTRIBUTE((nonnull));
+
/** Adjust LRU hazard pointers if needed.
@param[in] bpage buffer page descriptor */
void buf_LRU_adjust_hp(const buf_page_t* bpage);
@@ -236,7 +239,7 @@ Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update(). Protected by buf_pool.mutex. */
+Updated by buf_LRU_stat_update(). Accesses protected by memory barriers. */
extern buf_LRU_stat_t buf_LRU_stat_sum;
/********************************************************************//**
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 124a7f1854f..a2d9600f1a7 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2019, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -128,6 +128,7 @@ this must be equal to srv_page_size */
#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
/* @} */
+
/** Page identifier. */
class page_id_t
{
@@ -181,6 +182,7 @@ private:
typedef ib_bpmutex_t BPageMutex;
typedef ib_mutex_t BufPoolMutex;
+
typedef ib_mutex_t FlushListMutex;
typedef BPageMutex BufPoolZipMutex;
typedef rw_lock_t BPageLock;
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index e05c2a586e3..7e8a58b59ad 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -358,12 +358,8 @@ extern ulong srv_n_page_hash_locks;
extern ulong srv_LRU_scan_depth;
/** Whether or not to flush neighbors of a block */
extern ulong srv_flush_neighbors;
-/** Previously requested size */
-extern ulint srv_buf_pool_old_size;
-/** Current size as scaling factor for the other components */
-extern ulint srv_buf_pool_base_size;
-/** Current size in bytes */
-extern ulint srv_buf_pool_curr_size;
+/** Buffer pool size is being changed */
+extern std::atomic<bool> srv_buf_pool_size_changing;
/** Dump this % of each buffer pool during BP dump */
extern ulong srv_buf_pool_dump_pct;
#ifdef UNIV_DEBUG
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index a3feefef9f5..3b2c07e8ac9 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2020, MariaDB Corporation.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
@@ -51,7 +52,11 @@ instrumentation due to their large number of instances. */
/* Key defines to register InnoDB mutexes with performance schema */
extern mysql_pfs_key_t autoinc_mutex_key;
extern mysql_pfs_key_t buffer_block_mutex_key;
+extern mysql_pfs_key_t buf_pool_flush_state_mutex_key;
extern mysql_pfs_key_t buf_pool_mutex_key;
+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
extern mysql_pfs_key_t buf_pool_zip_mutex_key;
extern mysql_pfs_key_t cache_last_read_mutex_key;
extern mysql_pfs_key_t dict_foreign_err_mutex_key;
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index e2b5354d2de..6b08516886d 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -162,7 +162,7 @@ V
Search system mutex
|
V
-Buffer pool mutex
+Buffer pool mutexes
|
V
Log mutex
@@ -193,11 +193,13 @@ enum latch_level_t {
SYNC_DOUBLEWRITE,
SYNC_BUF_FLUSH_LIST,
-
+ SYNC_BUF_FLUSH_STATE,
+ SYNC_BUF_ZIP_HASH,
+ SYNC_BUF_FREE_LIST,
+ SYNC_BUF_ZIP_FREE,
SYNC_BUF_BLOCK,
SYNC_BUF_PAGE_HASH,
-
- SYNC_BUF_POOL,
+ SYNC_BUF_LRU_LIST,
SYNC_POOL,
SYNC_POOL_MANAGER,
@@ -284,8 +286,12 @@ enum latch_id_t {
LATCH_ID_NONE = 0,
LATCH_ID_AUTOINC,
LATCH_ID_BUF_BLOCK_MUTEX,
- LATCH_ID_BUF_POOL,
LATCH_ID_BUF_POOL_ZIP,
+ LATCH_ID_BUF_POOL_LRU_LIST,
+ LATCH_ID_BUF_POOL_FREE_LIST,
+ LATCH_ID_BUF_POOL_ZIP_FREE,
+ LATCH_ID_BUF_POOL_ZIP_HASH,
+ LATCH_ID_BUF_POOL_FLUSH_STATE,
LATCH_ID_CACHE_LAST_READ,
LATCH_ID_DICT_FOREIGN_ERR,
LATCH_ID_DICT_SYS,
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 1a0bfb39523..207d923c719 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -213,12 +213,8 @@ ulong srv_n_page_hash_locks = 16;
ulong srv_LRU_scan_depth;
/** innodb_flush_neighbors; whether or not to flush neighbors of a block */
ulong srv_flush_neighbors;
-/** Previously requested size */
-ulint srv_buf_pool_old_size;
-/** Current size as scaling factor for the other components */
-ulint srv_buf_pool_base_size;
-/** Current size in bytes */
-ulint srv_buf_pool_curr_size;
+/** Buffer pool size is being changed */
+std::atomic<bool> srv_buf_pool_size_changing;
/** Dump this % of each buffer pool during BP dump */
ulong srv_buf_pool_dump_pct;
/** Abort load after this amount of pages */
diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc
index bc445bd4604..871cfc918ab 100644
--- a/storage/innobase/sync/sync0debug.cc
+++ b/storage/innobase/sync/sync0debug.cc
@@ -457,9 +457,13 @@ LatchDebug::LatchDebug()
LEVEL_MAP_INSERT(SYNC_ANY_LATCH);
LEVEL_MAP_INSERT(SYNC_DOUBLEWRITE);
LEVEL_MAP_INSERT(SYNC_BUF_FLUSH_LIST);
+ LEVEL_MAP_INSERT(SYNC_BUF_FLUSH_STATE);
+ LEVEL_MAP_INSERT(SYNC_BUF_ZIP_HASH);
+ LEVEL_MAP_INSERT(SYNC_BUF_FREE_LIST);
+ LEVEL_MAP_INSERT(SYNC_BUF_ZIP_FREE);
LEVEL_MAP_INSERT(SYNC_BUF_BLOCK);
LEVEL_MAP_INSERT(SYNC_BUF_PAGE_HASH);
- LEVEL_MAP_INSERT(SYNC_BUF_POOL);
+ LEVEL_MAP_INSERT(SYNC_BUF_LRU_LIST);
LEVEL_MAP_INSERT(SYNC_POOL);
LEVEL_MAP_INSERT(SYNC_POOL_MANAGER);
LEVEL_MAP_INSERT(SYNC_SEARCH_SYS);
@@ -817,7 +821,11 @@ LatchDebug::check_order(
break;
case SYNC_BUF_FLUSH_LIST:
- case SYNC_BUF_POOL:
+ case SYNC_BUF_LRU_LIST:
+ case SYNC_BUF_FREE_LIST:
+ case SYNC_BUF_ZIP_FREE:
+ case SYNC_BUF_ZIP_HASH:
+ case SYNC_BUF_FLUSH_STATE:
/* We can have multiple mutexes of this type therefore we
can only check whether the greater than condition holds. */
@@ -826,22 +834,9 @@ LatchDebug::check_order(
break;
case SYNC_BUF_PAGE_HASH:
-
- /* Multiple page_hash locks are only allowed during
- buf_pool.validate() and that is where buf_pool mutex is already
- held. */
-
- /* Fall through */
-
case SYNC_BUF_BLOCK:
-
- /* Either the thread must own the (buffer pool) buf_pool.mutex
- or it is allowed to latch only ONE of (buffer block)
- block->mutex or buf_pool.zip_mutex. */
-
- if (less(latches, level) != NULL) {
+ if (less(latches, level)) {
basic_check(latches, level, level - 1);
- ut_a(find(latches, SYNC_BUF_POOL) != 0);
}
break;
@@ -1286,7 +1281,25 @@ sync_latch_meta_init()
buffer_block_mutex_key);
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
- LATCH_ADD_MUTEX(BUF_POOL, SYNC_BUF_POOL, buf_pool_mutex_key);
+ LATCH_ADD_MUTEX(BUF_POOL_LRU_LIST,
+ SYNC_BUF_LRU_LIST,
+ buf_pool_mutex_key);
+
+ LATCH_ADD_MUTEX(BUF_POOL_FREE_LIST,
+ SYNC_BUF_FREE_LIST,
+ buf_pool_free_list_mutex_key);
+
+ LATCH_ADD_MUTEX(BUF_POOL_ZIP_FREE,
+ SYNC_BUF_ZIP_FREE,
+ buf_pool_zip_free_mutex_key);
+
+ LATCH_ADD_MUTEX(BUF_POOL_ZIP_HASH,
+ SYNC_BUF_ZIP_HASH,
+ buf_pool_zip_free_mutex_key);
+
+ LATCH_ADD_MUTEX(BUF_POOL_FLUSH_STATE,
+ SYNC_BUF_FLUSH_STATE,
+ buf_pool_flush_state_mutex_key);
LATCH_ADD_MUTEX(BUF_POOL_ZIP, SYNC_BUF_BLOCK, buf_pool_zip_mutex_key);
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index 94dbbced4c7..9580011b7a2 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -38,7 +38,11 @@ Created 9/5/1995 Heikki Tuuri
/* Key to register autoinc_mutex with performance schema */
mysql_pfs_key_t autoinc_mutex_key;
mysql_pfs_key_t buffer_block_mutex_key;
+mysql_pfs_key_t buf_pool_flush_state_mutex_key;
mysql_pfs_key_t buf_pool_mutex_key;
+mysql_pfs_key_t buf_pool_free_list_mutex_key;
+mysql_pfs_key_t buf_pool_zip_free_mutex_key;
+mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
mysql_pfs_key_t buf_pool_zip_mutex_key;
mysql_pfs_key_t cache_last_read_mutex_key;
mysql_pfs_key_t dict_foreign_err_mutex_key;