From ef0dc50c05594e4287a2a3e06aef8e2ab576b8cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Wed, 13 Oct 2021 11:56:33 +0300 Subject: MDEV-26815 : galera.galera_ftwrl_drain fails with wrong errno 1146 Add wait_conditions to stabilize --- mysql-test/suite/galera/r/galera_ftwrl_drain.result | 16 ++++++++-------- mysql-test/suite/galera/t/galera_ftwrl_drain.test | 11 +++++++---- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/mysql-test/suite/galera/r/galera_ftwrl_drain.result b/mysql-test/suite/galera/r/galera_ftwrl_drain.result index 751811b88fd..f90e6dfcdfd 100644 --- a/mysql-test/suite/galera/r/galera_ftwrl_drain.result +++ b/mysql-test/suite/galera/r/galera_ftwrl_drain.result @@ -8,9 +8,9 @@ connection node_2; SET SESSION wsrep_sync_wait = 0; SET SESSION wsrep_on = 0; SET SESSION wsrep_on = 1; -SELECT COUNT(*) = 0 FROM t1; -COUNT(*) = 0 -1 +SELECT COUNT(*) AS EXPECT_0 FROM t1; +EXPECT_0 +0 connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; connection node_2a; FLUSH TABLES WITH READ LOCK;; @@ -27,12 +27,12 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction connection node_2a; UNLOCK TABLES; connection node_2; -SELECT COUNT(*) = 1 FROM t1; -COUNT(*) = 1 +SELECT COUNT(*) AS EXPECT_1 FROM t1; +EXPECT_1 1 INSERT INTO t1 VALUES (3); connection node_1; -SELECT COUNT(*) = 2 FROM t1; -COUNT(*) = 2 -1 +SELECT COUNT(*) AS EXPECT_2 FROM t1; +EXPECT_2 +2 DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_ftwrl_drain.test b/mysql-test/suite/galera/t/galera_ftwrl_drain.test index 690e890cdea..9ed93643b3f 100644 --- a/mysql-test/suite/galera/t/galera_ftwrl_drain.test +++ b/mysql-test/suite/galera/t/galera_ftwrl_drain.test @@ -18,7 +18,8 @@ CREATE TABLE t1 (id INT PRIMARY KEY) ENGINE=InnoDB; --connection node_2 - +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc --let $galera_sync_point = apply_monitor_slave_enter_sync --source include/galera_set_sync_point.inc @@ -31,7 +32,7 @@ SET SESSION wsrep_sync_wait = 0; # Wait until applier has blocked --source include/galera_wait_sync_point.inc -SELECT COUNT(*) = 0 FROM t1; +SELECT COUNT(*) AS EXPECT_0 FROM t1; --connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 --connection node_2a @@ -61,9 +62,11 @@ INSERT INTO t2 VALUES (2); UNLOCK TABLES; --connection node_2 -SELECT COUNT(*) = 1 FROM t1; +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1 +--source include/wait_condition.inc +SELECT COUNT(*) AS EXPECT_1 FROM t1; INSERT INTO t1 VALUES (3); --connection node_1 -SELECT COUNT(*) = 2 FROM t1; +SELECT COUNT(*) AS EXPECT_2 FROM t1; DROP TABLE t1; -- cgit v1.2.1 From 78e023c2743c9b7fc17db31cc11524920b658075 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Wed, 13 Oct 2021 15:02:57 +0200 Subject: Workaround a assertion on shutdown. Something initiates purge, while all purge THDs are destroyed. tpool::waitable_task.disable() would not allow the task to be executed anymore --- storage/innobase/srv/srv0srv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index f8a4d838a98..a26a862e1ab 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1973,7 +1973,7 @@ void srv_init_purge_tasks() static void srv_shutdown_purge_tasks() { - purge_coordinator_task.wait(); + purge_coordinator_task.disable(); delete purge_coordinator_timer; purge_coordinator_timer= nullptr; purge_worker_task.wait(); -- cgit v1.2.1 From d2b4d3ada02253e39cbec7e6ecf656b54a8476d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Wed, 13 Oct 2021 17:57:31 +0300 Subject: MDEV-26707: SR transaction rolls back locally, but not in cluster Record correct result file. --- .../suite/galera_3nodes_sr/r/MDEV-26707.result | 25 +++------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result b/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result index a041274162f..8de724c1576 100644 --- a/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result +++ b/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result @@ -71,35 +71,16 @@ connection node_3a; SET SESSION wsrep_sync_wait = 0; SET SESSION wsrep_sync_wait = DEFAULT; connection node_1a; -Timeout in wait_condition.inc for SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log -Id User Host db Command Time State Info Progress -1 system user NULL Sleep 66 wsrep aborter idle NULL 0.000 -2 system user NULL Sleep 66 closing tables NULL 0.000 -10 root localhost test Sleep 58 NULL 0.000 -11 root localhost:52722 test Sleep 56 NULL 0.000 -12 root localhost:52724 test Query 0 starting show full processlist 0.000 SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; EXPECT_0 -1 +0 connection node_2a; -Timeout in wait_condition.inc for SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log -Id User Host db Command Time State Info Progress -1 system user NULL Sleep 96 wsrep aborter idle NULL 0.000 -2 system user NULL Sleep 87 closing tables NULL 0.000 -10 root localhost:37222 test Sleep 64 NULL 0.000 -11 root localhost:37228 test Query 0 starting show full processlist 0.000 SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; EXPECT_0 -1 +0 connection node_3a; -Timeout in wait_condition.inc for SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log -Id User Host db Command Time State Info Progress -1 system user NULL Sleep 122 wsrep aborter idle NULL 0.000 -2 system user NULL Sleep 117 closing tables NULL 0.000 -10 root localhost:60992 test Sleep 117 NULL 0.000 -11 root localhost:60994 test Query 0 starting show full processlist 0.000 SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; EXPECT_0 -1 +0 connection node_1; DROP TABLE t1; -- cgit v1.2.1 From bd1573b0f332d4c7c08aab2974aadd544dbc24e3 Mon Sep 17 00:00:00 2001 From: Sergei Krivonos Date: Wed, 6 Oct 2021 11:31:08 +0300 Subject: Xcode compatibility update: pcre, mysql-test-run.pl --- cmake/pcre.cmake | 19 ++++++++++--------- mysql-test/mysql-test-run.pl | 1 + 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake index 3d4f163fab9..5ea81f53828 100644 --- a/cmake/pcre.cmake +++ b/cmake/pcre.cmake @@ -11,21 +11,22 @@ MACRO(BUNDLE_PCRE2) FOREACH(lib pcre2-posix pcre2-8) ADD_LIBRARY(${lib} STATIC IMPORTED GLOBAL) ADD_DEPENDENCIES(${lib} pcre2) + + GET_PROPERTY(MULTICONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) + IF(MULTICONFIG) + SET(intdir "${CMAKE_CFG_INTDIR}/") + ELSE() + SET(intdir) + ENDIF() + + SET(file ${dir}/src/pcre2-build/${intdir}${CMAKE_STATIC_LIBRARY_PREFIX}${lib}${CMAKE_STATIC_LIBRARY_SUFFIX}) + IF(WIN32) # Debug libary name. # Same condition as in pcre2 CMakeLists.txt that adds "d" - GET_PROPERTY(MULTICONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) - IF(MULTICONFIG) - SET(intdir "${CMAKE_CFG_INTDIR}/") - ELSE() - SET(intdir) - ENDIF() - - SET(file ${dir}/src/pcre2-build/${intdir}${CMAKE_STATIC_LIBRARY_PREFIX}${lib}${CMAKE_STATIC_LIBRARY_SUFFIX}) SET(file_d ${dir}/src/pcre2-build/${intdir}${CMAKE_STATIC_LIBRARY_PREFIX}${lib}d${CMAKE_STATIC_LIBRARY_SUFFIX}) SET_TARGET_PROPERTIES(${lib} PROPERTIES IMPORTED_LOCATION_DEBUG ${file_d}) ELSE() - SET(file ${dir}/src/pcre2-build/${CMAKE_STATIC_LIBRARY_PREFIX}${lib}${CMAKE_STATIC_LIBRARY_SUFFIX}) SET(file_d) ENDIF() SET(byproducts ${byproducts} BUILD_BYPRODUCTS ${file} ${file_d}) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index dbe5fc4d650..9a66a839de9 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -1260,6 +1260,7 @@ sub command_line_setup { { $path_client_bindir= mtr_path_exists("$bindir/client_release", "$bindir/client_debug", + "$bindir/client/debug", "$bindir/client$opt_vs_config", "$bindir/client", "$bindir/bin"); -- cgit v1.2.1 From bbae2d398f866b00f7e8ad71984884ff3375df52 Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Wed, 29 Sep 2021 19:19:38 +0300 Subject: MDEV-26712 row events never reset thd->mem_root but must do that at the end of the statement. A provide template patch is elaborated also to match to the upstream fixes of the very same bug. --- sql/log_event.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sql/log_event.cc b/sql/log_event.cc index 04577be4f6f..9108b57fec1 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -11208,12 +11208,17 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) } #endif /* WITH_WSREP && HAVE_QUERY_CACHE */ - if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rgi, thd))) - slave_rows_error_report(ERROR_LEVEL, - thd->is_error() ? 0 : error, - rgi, thd, table, - get_type_str(), - RPL_LOG_NAME, (ulong) log_pos); + if (get_flags(STMT_END_F)) + { + if ((error= rows_event_stmt_cleanup(rgi, thd))) + slave_rows_error_report(ERROR_LEVEL, + thd->is_error() ? 0 : error, + rgi, thd, table, + get_type_str(), + RPL_LOG_NAME, (ulong) log_pos); + if (thd->slave_thread) + free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); + } DBUG_RETURN(error); err: -- cgit v1.2.1 From df383043427fb22b0735fe31968db860f4cdb7a0 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Thu, 14 Oct 2021 08:37:23 +0400 Subject: MDEV-26742 Assertion `field->type_handler() == this' failed in FixedBinTypeBundle::Type_handler_fbt::stored_field_cmp_to_item --- .../mysql-test/type_inet/type_inet6_innodb.result | 27 ++++++++++++++++++++++ .../mysql-test/type_inet/type_inet6_innodb.test | 18 +++++++++++++++ .../mysql-test/type_inet/type_inet6_myisam.result | 19 +++++++++++++++ .../mysql-test/type_inet/type_inet6_myisam.test | 12 ++++++++++ sql/field.cc | 19 +++++++++++++-- sql/field.h | 2 ++ 6 files changed, 95 insertions(+), 2 deletions(-) diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result b/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result index 5f7063b8f4b..a6911751747 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result @@ -88,5 +88,32 @@ Warnings: Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = INET6'::ff' DROP TABLE t1; # +# MDEV-26742 Assertion `field->type_handler() == this' failed in FixedBinTypeBundle::Type_handler_fbt::stored_field_cmp_to_item +# +CREATE TABLE t1 (pk inet6, c text) engine=myisam; +INSERT INTO t1 VALUES ('::',1); +CREATE TABLE t2 (d text, KEY (d)) engine=innodb ; +Warnings: +Note 1071 Specified key was too long; max key length is 3072 bytes +INSERT INTO t2 VALUES (2); +SELECT * FROM t2 JOIN t1 ON ( t1.pk > t2.d); +d pk c +Warnings: +Warning 1292 Incorrect inet6 value: '2' +UPDATE t2 JOIN t1 ON ( t1.pk > t2.d) SET t1.c = 1; +ERROR 22007: Incorrect inet6 value: '2' +SET sql_mode=''; +UPDATE t2 JOIN t1 ON ( t1.pk > t2.d) SET t1.c = 1; +Warnings: +Warning 1292 Incorrect inet6 value: '2' +SET sql_mode=DEFAULT; +SELECT * FROM t1; +pk c +:: 1 +SELECT * FROM t2; +d +2 +DROP TABLE t1, t2; +# # End of 10.5 tests # diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.test b/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.test index dd6049abbf3..55826cc3e3f 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.test +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.test @@ -12,6 +12,24 @@ SET default_storage_engine=InnoDB; --source type_inet6_engines.inc +--echo # +--echo # MDEV-26742 Assertion `field->type_handler() == this' failed in FixedBinTypeBundle::Type_handler_fbt::stored_field_cmp_to_item +--echo # + +CREATE TABLE t1 (pk inet6, c text) engine=myisam; +INSERT INTO t1 VALUES ('::',1); +CREATE TABLE t2 (d text, KEY (d)) engine=innodb ; +INSERT INTO t2 VALUES (2); +SELECT * FROM t2 JOIN t1 ON ( t1.pk > t2.d); +--error ER_TRUNCATED_WRONG_VALUE +UPDATE t2 JOIN t1 ON ( t1.pk > t2.d) SET t1.c = 1; +SET sql_mode=''; +UPDATE t2 JOIN t1 ON ( t1.pk > t2.d) SET t1.c = 1; +SET sql_mode=DEFAULT; +SELECT * FROM t1; +SELECT * FROM t2; +DROP TABLE t1, t2; + --echo # --echo # End of 10.5 tests diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result b/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result index c8dba6ff959..ba65d61cb08 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result @@ -88,5 +88,24 @@ Warnings: Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = INET6'::ff' DROP TABLE t1; # +# MDEV-26742 Assertion `field->type_handler() == this' failed in FixedBinTypeBundle::Type_handler_fbt::stored_field_cmp_to_item +# +CREATE TABLE t1 (c varchar(64), key(c)) engine=myisam; +INSERT INTO t1 VALUES ('0::1'),('::1'),('::2'); +SELECT * FROM t1 WHERE c>CAST('::1' AS INET6); +c +::2 +EXPLAIN SELECT * FROM t1 WHERE c>CAST('::1' AS INET6); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index c c 67 NULL 3 Using where; Using index +SELECT * FROM t1 WHERE c=CAST('::1' AS INET6); +c +0::1 +::1 +EXPLAIN SELECT * FROM t1 WHERE c=CAST('::1' AS INET6); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index c c 67 NULL 3 Using where; Using index +DROP TABLE t1; +# # End of 10.5 tests # diff --git a/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.test b/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.test index c5183f01cf0..0ba8369ac95 100644 --- a/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.test +++ b/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.test @@ -10,6 +10,18 @@ SET default_storage_engine=MyISAM; --source type_inet6_engines.inc +--echo # +--echo # MDEV-26742 Assertion `field->type_handler() == this' failed in FixedBinTypeBundle::Type_handler_fbt::stored_field_cmp_to_item +--echo # + +CREATE TABLE t1 (c varchar(64), key(c)) engine=myisam; +INSERT INTO t1 VALUES ('0::1'),('::1'),('::2'); +SELECT * FROM t1 WHERE c>CAST('::1' AS INET6); +EXPLAIN SELECT * FROM t1 WHERE c>CAST('::1' AS INET6); +SELECT * FROM t1 WHERE c=CAST('::1' AS INET6); +EXPLAIN SELECT * FROM t1 WHERE c=CAST('::1' AS INET6); +DROP TABLE t1; + --echo # --echo # End of 10.5 tests diff --git a/sql/field.cc b/sql/field.cc index 7ff07540538..2c768527ced 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1276,6 +1276,21 @@ bool Field::can_be_substituted_to_equal_item(const Context &ctx, } +bool Field::cmp_is_done_using_type_handler_of_this(const Item_bool_func *cond, + const Item *item) const +{ + /* + We could eventually take comparison_type_handler() from cond, + instead of calculating it again. But only some descendants of + Item_bool_func has this method. So this needs some hierarchy changes. + Another option is to pass "class Context" to this method. + */ + Type_handler_hybrid_field_type cmp(type_handler_for_comparison()); + return !cmp.aggregate_for_comparison(item->type_handler_for_comparison()) && + cmp.type_handler() == type_handler_for_comparison(); +} + + /* This handles all numeric and BIT data types. */ @@ -7356,7 +7371,7 @@ bool Field_longstr::cmp_to_string_with_same_collation(const Item_bool_func *cond, const Item *item) const { - return item->cmp_type() == STRING_RESULT && + return cmp_is_done_using_type_handler_of_this(cond, item) && charset() == cond->compare_collation(); } @@ -7365,7 +7380,7 @@ bool Field_longstr::cmp_to_string_with_stricter_collation(const Item_bool_func *cond, const Item *item) const { - return item->cmp_type() == STRING_RESULT && + return cmp_is_done_using_type_handler_of_this(cond, item) && (charset() == cond->compare_collation() || cond->compare_collation()->state & MY_CS_BINSORT); } diff --git a/sql/field.h b/sql/field.h index 6747f8070dc..47a85efc43e 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1648,6 +1648,8 @@ protected: } int warn_if_overflow(int op_result); Copy_func *get_identical_copy_func() const; + bool cmp_is_done_using_type_handler_of_this(const Item_bool_func *cond, + const Item *item) const; bool can_optimize_scalar_range(const RANGE_OPT_PARAM *param, const KEY_PART *key_part, const Item_bool_func *cond, -- cgit v1.2.1 From a2a42f4eba7409264d4b4fb2dc7c04e40c50bd25 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 13 Oct 2021 12:57:57 +0400 Subject: MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset There were two independent problems which lead to the crash and to the non-relevant records returned in I_S queries: - The code in the I_S implementation was not secure about values with 0x00 bytes. It's fixed by using check_db_name() and check_table_name() inside make_table_name_list(), and by adding the test for 0x00 inside check_table_name(). - The code in Item_string::print() did not convert strings without introducers when restoring the CREATE VIEW statement from an Item tree. This made wrong literals inside the "query" line in the view FRM file in cases when the VIEW parse time character_set_client!=character_set_connection. That's fixed by adding a proper conversion. This change also fixed a similar problem in SHOW PROCEDURE CODE - the literals were displayed in wrong character set in SP instructions in cases when the SP parse time character_set_client!=character_set_connection. --- mysql-test/r/ctype_utf16le.result | 33 +++++++++++++++++++++++++ mysql-test/r/ctype_utf8.result | 17 +++++++++++++ mysql-test/r/information_schema.result | 20 ++++++++++++++++ mysql-test/r/sp-code.result | 24 +++++++++++++++++++ mysql-test/t/ctype_utf16le.test | 37 ++++++++++++++++++++++++++++ mysql-test/t/ctype_utf8.test | 18 ++++++++++++++ mysql-test/t/information_schema.test | 21 ++++++++++++++++ mysql-test/t/sp-code.test | 24 +++++++++++++++++++ sql/item.cc | 44 ++++++++++++++++++++++++++++++++-- sql/sql_show.cc | 7 +++++- sql/table.cc | 15 ++++++++++++ 11 files changed, 257 insertions(+), 3 deletions(-) diff --git a/mysql-test/r/ctype_utf16le.result b/mysql-test/r/ctype_utf16le.result index a43ed6ee538..bc28f89b7ea 100644 --- a/mysql-test/r/ctype_utf16le.result +++ b/mysql-test/r/ctype_utf16le.result @@ -3000,5 +3000,38 @@ DROP TABLE t1; # SET STORAGE_ENGINE=Default; # +# MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +# +SET NAMES utf8; +SET SESSION character_set_connection= utf16le; +CREATE TABLE kv (v BLOB); +CREATE TABLE t (a INT); +CREATE VIEW v AS SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +LOAD DATA INFILE 'MYSQLD_DATADIR/test/v.frm' REPLACE INTO TABLE kv; +SELECT * FROM kv WHERE v LIKE _binary'query=%'; +v +query=select `information_schema`.`TABLES`.`TABLE_CATALOG` AS `TABLE_CATALOG`,`information_schema`.`TABLES`.`TABLE_SCHEMA` AS `TABLE_SCHEMA`,`information_schema`.`TABLES`.`TABLE_NAME` AS `TABLE_NAME`,`information_schema`.`TABLES`.`TABLE_TYPE` AS `TABLE_TYPE`,`information_schema`.`TABLES`.`ENGINE` AS `ENGINE`,`information_schema`.`TABLES`.`VERSION` AS `VERSION`,`information_schema`.`TABLES`.`ROW_FORMAT` AS `ROW_FORMAT`,`information_schema`.`TABLES`.`TABLE_ROWS` AS `TABLE_ROWS`,`information_schema`.`TABLES`.`AVG_ROW_LENGTH` AS `AVG_ROW_LENGTH`,`information_schema`.`TABLES`.`DATA_LENGTH` AS `DATA_LENGTH`,`information_schema`.`TABLES`.`MAX_DATA_LENGTH` AS `MAX_DATA_LENGTH`,`information_schema`.`TABLES`.`INDEX_LENGTH` AS `INDEX_LENGTH`,`information_schema`.`TABLES`.`DATA_FREE` AS `DATA_FREE`,`information_schema`.`TABLES`.`AUTO_INCREMENT` AS `AUTO_INCREMENT`,`information_schema`.`TABLES`.`CREATE_TIME` AS `CREATE_TIME`,`information_schema`.`TABLES`.`UPDATE_TIME` AS `UPDATE_TIME`,`information_schema`.`TABLES`.`CHECK_TIME` AS `CHECK_TIME`,`information_schema`.`TABLES`.`TABLE_COLLATION` AS `TABLE_COLLATION`,`information_schema`.`TABLES`.`CHECKSUM` AS `CHECKSUM`,`information_schema`.`TABLES`.`CREATE_OPTIONS` AS `CREATE_OPTIONS`,`information_schema`.`TABLES`.`TABLE_COMMENT` AS `TABLE_COMMENT` from `INFORMATION_SCHEMA`.`TABLES` where `information_schema`.`TABLES`.`TABLE_NAME` = 't1' +TRUNCATE TABLE kv; +SELECT * FROM v; +TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT +LOCK TABLE t WRITE; +UNLOCK TABLES; +DROP VIEW v; +DROP TABLE t; +DROP TABLE kv; +CREATE TABLE t (a INT); +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME=CONCAT('t',0x00,'1'); +TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT +LOCK TABLE t WRITE; +UNLOCK TABLES; +DROP TABLE t; +CREATE TABLE t (a INT); +SELECT TABLE_NAME, HEX(TABLE_NAME) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME=CONCAT('t',0x00,'1'); +TABLE_NAME HEX(TABLE_NAME) +SELECT TABLE_NAME, TABLE_SCHEMA, HEX(TABLE_NAME) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA=CONCAT('test',0x00,'1'); +TABLE_NAME TABLE_SCHEMA HEX(TABLE_NAME) +DROP TABLE t; +SET NAMES utf8; +# # End of 10.2 tests # diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 7189629b570..18398f2556a 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -11239,5 +11239,22 @@ DROP TABLE t1; # SET STORAGE_ENGINE=Default; # +# MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +# +SET NAMES utf8; +SET SESSION character_set_connection=latin1; +CREATE VIEW v1 AS SELECT 'ä' AS c1; +SELECT c1, HEX(c1) FROM v1; +c1 HEX(c1) +ä E4 +CREATE TABLE kv (v BLOB); +LOAD DATA INFILE 'MYSQLD_DATADIR/test/v1.frm' REPLACE INTO TABLE kv; +SELECT * FROM kv WHERE v LIKE _binary'query=%'; +v +query=select 'ä' AS `c1` +DROP TABLE kv; +DROP VIEW v1; +SET NAMES utf8; +# # End of 10.2 tests # diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result index c7153bd6383..cc9cf842570 100644 --- a/mysql-test/r/information_schema.result +++ b/mysql-test/r/information_schema.result @@ -2210,5 +2210,25 @@ SELECT * FROM v LIMIT ROWS EXAMINED 9; ERROR HY000: Sort aborted: DROP VIEW v; # +# MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +# +CREATE TABLE t (a INT); +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME=CONCAT('t',0x00,'1'); +TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA=CONCAT('test',0x00,'1'); +TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT +DROP TABLE t; +CREATE TABLE `a/~.b` (a INT); +SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME='a/~.b'; +TABLE_SCHEMA TABLE_NAME +test a/~.b +DROP TABLE `a/~.b`; +CREATE DATABASE `a/~.b`; +CREATE TABLE `a/~.b`.t1 (a INT); +SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='a/~.b'; +TABLE_SCHEMA TABLE_NAME +a/~.b t1 +DROP DATABASE `a/~.b`; +# # End of 10.2 Test # diff --git a/mysql-test/r/sp-code.result b/mysql-test/r/sp-code.result index 67932447c2a..4ad79f50d45 100644 --- a/mysql-test/r/sp-code.result +++ b/mysql-test/r/sp-code.result @@ -971,3 +971,27 @@ Pos Instruction DROP PROCEDURE testp_bug11763507; DROP FUNCTION testf_bug11763507; #END OF BUG#11763507 test. +# +# MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +# +SET NAMES utf8; +SET SESSION character_set_connection=latin1; +CREATE PROCEDURE p1() +BEGIN +DECLARE a VARCHAR(10) CHARACTER SET utf8; +SET a='ä'; +SELECT a, 'ä' AS b; +END; +$$ +SHOW PROCEDURE CODE p1; +Pos Instruction +0 set a@0 NULL +1 set a@0 'ä' +2 stmt 0 "SELECT a, 'ä' AS b" +CALL p1; +a b +ä ä +DROP PROCEDURE p1; +# +# End of 10.2 tests +# diff --git a/mysql-test/t/ctype_utf16le.test b/mysql-test/t/ctype_utf16le.test index 204df136274..671100c2d9d 100644 --- a/mysql-test/t/ctype_utf16le.test +++ b/mysql-test/t/ctype_utf16le.test @@ -3,6 +3,7 @@ -- source include/have_utf32.inc -- source include/have_utf8mb4.inc +let $MYSQLD_DATADIR= `select @@datadir`; SET TIME_ZONE='+03:00'; @@ -810,6 +811,42 @@ let $coll='utf16le_nopad_bin'; let $coll_pad='utf16le_bin'; --source include/ctype_pad_all_engines.inc +--echo # +--echo # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +--echo # + + + +SET NAMES utf8; +SET SESSION character_set_connection= utf16le; + +CREATE TABLE kv (v BLOB); +CREATE TABLE t (a INT); +CREATE VIEW v AS SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--replace_result $MYSQLD_DATADIR MYSQLD_DATADIR +eval LOAD DATA INFILE '$MYSQLD_DATADIR/test/v.frm' REPLACE INTO TABLE kv; +SELECT * FROM kv WHERE v LIKE _binary'query=%'; +TRUNCATE TABLE kv; +SELECT * FROM v; +LOCK TABLE t WRITE; +UNLOCK TABLES; +DROP VIEW v; +DROP TABLE t; +DROP TABLE kv; + +CREATE TABLE t (a INT); +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME=CONCAT('t',0x00,'1'); +LOCK TABLE t WRITE; +UNLOCK TABLES; +DROP TABLE t; + +CREATE TABLE t (a INT); +SELECT TABLE_NAME, HEX(TABLE_NAME) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME=CONCAT('t',0x00,'1'); +SELECT TABLE_NAME, TABLE_SCHEMA, HEX(TABLE_NAME) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA=CONCAT('test',0x00,'1'); +DROP TABLE t; + +SET NAMES utf8; + --echo # --echo # End of 10.2 tests --echo # diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 9a64821db66..b34de4175e9 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -2,6 +2,8 @@ # Tests with the utf8 character set # +let $MYSQLD_DATADIR= `select @@datadir`; + let collation=utf8_unicode_ci; --source include/have_collation.inc SET TIME_ZONE='+03:00'; @@ -2165,6 +2167,22 @@ let $coll='utf8_nopad_bin'; let $coll_pad='utf8_bin'; --source include/ctype_pad_all_engines.inc +--echo # +--echo # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +--echo # + +SET NAMES utf8; +SET SESSION character_set_connection=latin1; +CREATE VIEW v1 AS SELECT 'ä' AS c1; +SELECT c1, HEX(c1) FROM v1; +CREATE TABLE kv (v BLOB); +--replace_result $MYSQLD_DATADIR MYSQLD_DATADIR +eval LOAD DATA INFILE '$MYSQLD_DATADIR/test/v1.frm' REPLACE INTO TABLE kv; +SELECT * FROM kv WHERE v LIKE _binary'query=%'; +DROP TABLE kv; +DROP VIEW v1; +SET NAMES utf8; + --echo # --echo # End of 10.2 tests --echo # diff --git a/mysql-test/t/information_schema.test b/mysql-test/t/information_schema.test index 9ff94d2deb7..b7f4a7e4407 100644 --- a/mysql-test/t/information_schema.test +++ b/mysql-test/t/information_schema.test @@ -1934,6 +1934,27 @@ SELECT * FROM v LIMIT ROWS EXAMINED 9; DROP VIEW v; +--echo # +--echo # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +--echo # + +# Expect empty sets if requested TABLE_NAME or TABLE_SCHEMA with zero bytes +CREATE TABLE t (a INT); +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME=CONCAT('t',0x00,'1'); +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA=CONCAT('test',0x00,'1'); +DROP TABLE t; + +# Make sure check_table_name() does not reject special characters +CREATE TABLE `a/~.b` (a INT); +SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME='a/~.b'; +DROP TABLE `a/~.b`; + +# Make sure check_db_name() does not reject special characters +CREATE DATABASE `a/~.b`; +CREATE TABLE `a/~.b`.t1 (a INT); +SELECT TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='a/~.b'; +DROP DATABASE `a/~.b`; + --echo # --echo # End of 10.2 Test --echo # diff --git a/mysql-test/t/sp-code.test b/mysql-test/t/sp-code.test index 129a68204ba..0f19627c78c 100644 --- a/mysql-test/t/sp-code.test +++ b/mysql-test/t/sp-code.test @@ -735,3 +735,27 @@ DROP PROCEDURE testp_bug11763507; DROP FUNCTION testf_bug11763507; --echo #END OF BUG#11763507 test. + + +--echo # +--echo # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset +--echo # + +SET NAMES utf8; +SET SESSION character_set_connection=latin1; +DELIMITER $$; +CREATE PROCEDURE p1() +BEGIN + DECLARE a VARCHAR(10) CHARACTER SET utf8; + SET a='ä'; + SELECT a, 'ä' AS b; +END; +$$ +DELIMITER ;$$ +SHOW PROCEDURE CODE p1; +CALL p1; +DROP PROCEDURE p1; + +--echo # +--echo # End of 10.2 tests +--echo # diff --git a/sql/item.cc b/sql/item.cc index 2a7c620b864..3ff0219c3b3 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -3338,8 +3338,48 @@ void Item_string::print(String *str, enum_query_type query_type) } else { - // Caller wants a result in the charset of str_value. - str_value.print(str); + /* + We're restoring a parse-able statement from an Item tree. + Make sure to revert character set conversions that previously + happened in the parser when Item_string was created. + */ + if (print_introducer) + { + /* + Print the string as is, without conversion: + Strings with introducers are not converted in the parser. + */ + str_value.print(str); + } + else + { + /* + Print the string with conversion. + Strings without introducers are converted in the parser, + from character_set_client to character_set_connection. + + When restoring a CREATE VIEW statement, + - str_value.charsets() contains parse time character_set_connection + - str->charset() contains parse time character_set_client + So we convert the string back from parse-time character_set_connection + to parse time character_set_client. + + In some cases, e.g. SHOW PROCEDURE CODE, it's also possible + that str->charset() is "utf8mb3" instead of parse time + character_set_client. In these cases we convert + here from the parse-time character_set_connection to utf8mb3. + + QQ: perhaps the code behind SHOW PROCEDURE CODE should + also request the result in the parse-time character_set_client + (like the code restoring CREATE VIEW statements does), + rather than in utf8mb3: + - utf8mb3 does not work well with non-BMP characters (e.g. emoji). + - Simply changing utf8mb3 to utf8mb4 will not fully help: + some character sets have unassigned characters, + they get lost during during cs->utf8mb4->cs round trip. + */ + str_value.print_with_conversion(str, str->charset()); + } } str->append('\''); diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 721bb053343..710c68d2551 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -4227,7 +4227,9 @@ make_table_name_list(THD *thd, Dynamic_array *table_names, if (!lookup_field_vals->wild_table_value && lookup_field_vals->table_value.str) { - if (lookup_field_vals->table_value.length > NAME_LEN) + if (check_table_name(lookup_field_vals->table_value.str, + lookup_field_vals->table_value.length, + false)) { /* Impossible value for a table name, @@ -4264,6 +4266,9 @@ make_table_name_list(THD *thd, Dynamic_array *table_names, return (schema_tables_add(thd, table_names, lookup_field_vals->table_value.str)); + if (check_db_name((LEX_STRING*)db_name)) + return 0; // Impossible TABLE_SCHEMA name + find_files_result res= find_files(thd, table_names, db_name, path, &lookup_field_vals->table_value); if (res != FIND_FILES_OK) diff --git a/sql/table.cc b/sql/table.cc index 87b3c158a67..d4f8170e0af 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4183,6 +4183,21 @@ bool check_table_name(const char *name, size_t length, bool check_for_path_chars if (check_for_path_chars && (*name == '/' || *name == '\\' || *name == '~' || *name == FN_EXTCHAR)) return 1; + /* + We don't allow zero byte in table/schema names: + - Some code still uses NULL-terminated strings. + Zero bytes will confuse this code. + - There is a little practical use of zero bytes in names anyway. + Note, if the string passed as "name" comes here + from the parser as an identifier, it does not contain zero bytes, + as the parser rejects zero bytes in identifiers. + But "name" can also come here from queries like this: + SELECT * FROM I_S.TABLES WHERE TABLE_NAME='str'; + In this case "name" is a general string expression + and it can have any arbitrary bytes, including zero bytes. + */ + if (*name == 0x00) + return 1; name++; name_length++; } -- cgit v1.2.1 From 9e6c383867ed9145ae88af6eb933a1fdd4d5c757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Wed, 13 Oct 2021 13:13:27 +0300 Subject: MDEV-17964: Assertion `status == 0' failed in add_role_user_mapping_action This happens upon CREATE USER and DROP ROLE. The underlying problem is that our HASH implementation shuffles elements around when performing an update or delete. This means that when doing a scan through the HASH table by index, in search of elements to delete or update one must restart the scan to make sure nothing is missed if at least one delete / update happened. More specifically, what happened in this case: The hash has 131 element, DROP ROLE removes the element [119]. Its [119]->next was element [129], so [129] is moved to [119]. Now we need to compact the hash, removing the last element [130]. It gets one bit off its hash value and becomes element [2]. The existing element [2] is moved to [129], and old [130] is moved to [2]. We cannot simply move [130] to [129] and make [2]->next=130, it won't work if [2] is itself in the collision list and doesn't belong in [2]. The handle_grant_struct code assumed that it is safe to continue by only reexamining the currently modified / deleted element index, but that is not true. Missing to delete an element in the hash triggered the assertion in the test case. DROP ROLE would not clear all necessary role->role or role->user mappings. To fix the problem we ensure that the scan is restarted, only if an element was deleted / updated, similar to how bubble-sort keeps sorting until it finds no more elements to swap. --- mysql-test/suite/roles/rebuild_role_grants.result | 266 ++++++++++++++++ mysql-test/suite/roles/rebuild_role_grants.test | 31 ++ sql/sql_acl.cc | 371 +++++++++++----------- 3 files changed, 479 insertions(+), 189 deletions(-) diff --git a/mysql-test/suite/roles/rebuild_role_grants.result b/mysql-test/suite/roles/rebuild_role_grants.result index 72eabe38b93..101efd47569 100644 --- a/mysql-test/suite/roles/rebuild_role_grants.result +++ b/mysql-test/suite/roles/rebuild_role_grants.result @@ -65,3 +65,269 @@ drop role look, isp, xxx, ppp; connection default; disconnect con1; drop user nnnn@'%'; +CREATE USER u@localhost; +CREATE ROLE r1; +CREATE ROLE r2; +CREATE ROLE r3; +CREATE ROLE r4; +CREATE ROLE r5; +CREATE ROLE r6; +CREATE ROLE r7; +CREATE ROLE r8; +CREATE ROLE r9; +CREATE ROLE r10; +CREATE ROLE r11; +CREATE ROLE r12; +CREATE ROLE r13; +CREATE ROLE r14; +CREATE ROLE r15; +CREATE ROLE r16; +CREATE ROLE r17; +CREATE ROLE r18; +CREATE ROLE r19; +CREATE ROLE r20; +CREATE ROLE r21; +CREATE ROLE r22; +CREATE ROLE r23; +CREATE ROLE r24; +CREATE ROLE r25; +CREATE ROLE r26; +CREATE ROLE r27; +CREATE ROLE r28; +CREATE ROLE r29; +CREATE ROLE r30; +CREATE ROLE r31; +CREATE ROLE r32; +CREATE ROLE r33; +CREATE ROLE r34; +CREATE ROLE r35; +CREATE ROLE r36; +CREATE ROLE r37; +CREATE ROLE r38; +CREATE ROLE r39; +CREATE ROLE r40; +CREATE ROLE r41; +CREATE ROLE r42; +CREATE ROLE r43; +CREATE ROLE r44; +CREATE ROLE r45; +CREATE ROLE r46; +CREATE ROLE r47; +CREATE ROLE r48; +CREATE ROLE r49; +CREATE ROLE r50; +CREATE ROLE r51; +CREATE ROLE r52; +CREATE ROLE r53; +CREATE ROLE r54; +CREATE ROLE r55; +CREATE ROLE r56; +CREATE ROLE r57; +CREATE ROLE r58; +CREATE ROLE r59; +CREATE ROLE r60; +CREATE ROLE r61; +CREATE ROLE r62; +CREATE ROLE r63; +CREATE ROLE r64; +CREATE ROLE r65; +CREATE ROLE r66; +CREATE ROLE r67; +CREATE ROLE r68; +CREATE ROLE r69; +CREATE ROLE r70; +CREATE ROLE r71; +CREATE ROLE r72; +CREATE ROLE r73; +CREATE ROLE r74; +CREATE ROLE r75; +CREATE ROLE r76; +CREATE ROLE r77; +CREATE ROLE r78; +CREATE ROLE r79; +CREATE ROLE r80; +CREATE ROLE r81; +CREATE ROLE r82; +CREATE ROLE r83; +CREATE ROLE r84; +CREATE ROLE r85; +CREATE ROLE r86; +CREATE ROLE r87; +CREATE ROLE r88; +CREATE ROLE r89; +CREATE ROLE r90; +CREATE ROLE r91; +CREATE ROLE r92; +CREATE ROLE r93; +CREATE ROLE r94; +CREATE ROLE r95; +CREATE ROLE r96; +CREATE ROLE r97; +CREATE ROLE r98; +CREATE ROLE r99; +CREATE ROLE r100; +CREATE ROLE r101; +CREATE ROLE r102; +CREATE ROLE r103; +CREATE ROLE r104; +CREATE ROLE r105; +CREATE ROLE r106; +CREATE ROLE r107; +CREATE ROLE r108; +CREATE ROLE r109; +CREATE ROLE r110; +CREATE ROLE r111; +CREATE ROLE r112; +CREATE ROLE r113; +CREATE ROLE r114; +CREATE ROLE r115; +CREATE ROLE r116; +CREATE ROLE r117; +CREATE ROLE r118; +CREATE ROLE r119; +CREATE ROLE r120; +CREATE ROLE r121; +CREATE ROLE r122; +CREATE ROLE r123; +CREATE ROLE r124; +CREATE ROLE r125; +CREATE ROLE r126; +CREATE ROLE r127; +CREATE ROLE r128; +CREATE ROLE n; +CREATE ROLE d WITH ADMIN n; +CREATE ROLE '%' WITH ADMIN u@localhost; +DROP ROLE n; +CREATE USER 't'; +DROP ROLE r1; +DROP ROLE r2; +DROP ROLE r3; +DROP ROLE r4; +DROP ROLE r5; +DROP ROLE r6; +DROP ROLE r7; +DROP ROLE r8; +DROP ROLE r9; +DROP ROLE r10; +DROP ROLE r11; +DROP ROLE r12; +DROP ROLE r13; +DROP ROLE r14; +DROP ROLE r15; +DROP ROLE r16; +DROP ROLE r17; +DROP ROLE r18; +DROP ROLE r19; +DROP ROLE r20; +DROP ROLE r21; +DROP ROLE r22; +DROP ROLE r23; +DROP ROLE r24; +DROP ROLE r25; +DROP ROLE r26; +DROP ROLE r27; +DROP ROLE r28; +DROP ROLE r29; +DROP ROLE r30; +DROP ROLE r31; +DROP ROLE r32; +DROP ROLE r33; +DROP ROLE r34; +DROP ROLE r35; +DROP ROLE r36; +DROP ROLE r37; +DROP ROLE r38; +DROP ROLE r39; +DROP ROLE r40; +DROP ROLE r41; +DROP ROLE r42; +DROP ROLE r43; +DROP ROLE r44; +DROP ROLE r45; +DROP ROLE r46; +DROP ROLE r47; +DROP ROLE r48; +DROP ROLE r49; +DROP ROLE r50; +DROP ROLE r51; +DROP ROLE r52; +DROP ROLE r53; +DROP ROLE r54; +DROP ROLE r55; +DROP ROLE r56; +DROP ROLE r57; +DROP ROLE r58; +DROP ROLE r59; +DROP ROLE r60; +DROP ROLE r61; +DROP ROLE r62; +DROP ROLE r63; +DROP ROLE r64; +DROP ROLE r65; +DROP ROLE r66; +DROP ROLE r67; +DROP ROLE r68; +DROP ROLE r69; +DROP ROLE r70; +DROP ROLE r71; +DROP ROLE r72; +DROP ROLE r73; +DROP ROLE r74; +DROP ROLE r75; +DROP ROLE r76; +DROP ROLE r77; +DROP ROLE r78; +DROP ROLE r79; +DROP ROLE r80; +DROP ROLE r81; +DROP ROLE r82; +DROP ROLE r83; +DROP ROLE r84; +DROP ROLE r85; +DROP ROLE r86; +DROP ROLE r87; +DROP ROLE r88; +DROP ROLE r89; +DROP ROLE r90; +DROP ROLE r91; +DROP ROLE r92; +DROP ROLE r93; +DROP ROLE r94; +DROP ROLE r95; +DROP ROLE r96; +DROP ROLE r97; +DROP ROLE r98; +DROP ROLE r99; +DROP ROLE r100; +DROP ROLE r101; +DROP ROLE r102; +DROP ROLE r103; +DROP ROLE r104; +DROP ROLE r105; +DROP ROLE r106; +DROP ROLE r107; +DROP ROLE r108; +DROP ROLE r109; +DROP ROLE r110; +DROP ROLE r111; +DROP ROLE r112; +DROP ROLE r113; +DROP ROLE r114; +DROP ROLE r115; +DROP ROLE r116; +DROP ROLE r117; +DROP ROLE r118; +DROP ROLE r119; +DROP ROLE r120; +DROP ROLE r121; +DROP ROLE r122; +DROP ROLE r123; +DROP ROLE r124; +DROP ROLE r125; +DROP ROLE r126; +DROP ROLE r127; +DROP ROLE r128; +DROP ROLE d; +DROP ROLE '%'; +DROP USER 't'; +DROP USER u@localhost; diff --git a/mysql-test/suite/roles/rebuild_role_grants.test b/mysql-test/suite/roles/rebuild_role_grants.test index 84dbdf78fb8..7007df0ecdd 100644 --- a/mysql-test/suite/roles/rebuild_role_grants.test +++ b/mysql-test/suite/roles/rebuild_role_grants.test @@ -67,3 +67,34 @@ drop role look, isp, xxx, ppp; connection default; disconnect con1; drop user nnnn@'%'; + +# +# MDEV-17964 Assertion `status == 0' failed in add_role_user_mapping_action +# upon CREATE USER and DROP ROLE +# +CREATE USER u@localhost; + +--let $n= 1 +while ($n < 129) +{ + eval CREATE ROLE r$n; + inc $n; +} + +CREATE ROLE n; +CREATE ROLE d WITH ADMIN n; +CREATE ROLE '%' WITH ADMIN u@localhost; +DROP ROLE n; +CREATE USER 't'; + +--let $n= 1 +while ($n < 129) +{ + eval DROP ROLE r$n; + inc $n; +} + +DROP ROLE d; +DROP ROLE '%'; +DROP USER 't'; +DROP USER u@localhost; diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 89fecc92e9b..f62dd5471eb 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -9645,8 +9645,8 @@ static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop, LEX_USER *user_from, LEX_USER *user_to) { int result= 0; - int idx; int elements; + bool restart; const char *UNINIT_VAR(user); const char *UNINIT_VAR(host); ACL_USER *acl_user= NULL; @@ -9747,82 +9747,98 @@ static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop, DBUG_RETURN(-1); } + #ifdef EXTRA_DEBUG DBUG_PRINT("loop",("scan struct: %u search user: '%s' host: '%s'", struct_no, user_from->user.str, user_from->host.str)); #endif - /* Loop over all elements *backwards* (see the comment below). */ - for (idx= elements - 1; idx >= 0; idx--) - { - /* - Get a pointer to the element. - */ - switch (struct_no) { - case USER_ACL: - acl_user= dynamic_element(&acl_users, idx, ACL_USER*); - user= acl_user->user.str; - host= acl_user->host.hostname; - break; + /* Loop over elements backwards as it may reduce the number of mem-moves + for dynamic arrays. - case DB_ACL: - acl_db= &acl_dbs.at(idx); - user= acl_db->user; - host= acl_db->host.hostname; + We restart the loop, if we deleted or updated anything in a hash table + because calling my_hash_delete or my_hash_update shuffles elements indices + and we can miss some if we do only one scan. + */ + do { + restart= false; + for (int idx= elements - 1; idx >= 0; idx--) + { + /* + Get a pointer to the element. + */ + switch (struct_no) { + case USER_ACL: + acl_user= dynamic_element(&acl_users, idx, ACL_USER*); + user= acl_user->user.str; + host= acl_user->host.hostname; break; - case COLUMN_PRIVILEGES_HASH: - case PROC_PRIVILEGES_HASH: - case FUNC_PRIVILEGES_HASH: - grant_name= (GRANT_NAME*) my_hash_element(grant_name_hash, idx); - user= grant_name->user; - host= grant_name->host.hostname; - break; + case DB_ACL: + acl_db= &acl_dbs.at(idx); + user= acl_db->user; + host= acl_db->host.hostname; + break; - case PROXY_USERS_ACL: - acl_proxy_user= dynamic_element(&acl_proxy_users, idx, ACL_PROXY_USER*); - user= acl_proxy_user->get_user(); - host= acl_proxy_user->get_host(); - break; + case COLUMN_PRIVILEGES_HASH: + case PROC_PRIVILEGES_HASH: + case FUNC_PRIVILEGES_HASH: + grant_name= (GRANT_NAME*) my_hash_element(grant_name_hash, idx); + user= grant_name->user; + host= grant_name->host.hostname; + break; - case ROLES_MAPPINGS_HASH: - role_grant_pair= (ROLE_GRANT_PAIR *) my_hash_element(roles_mappings_hash, idx); - user= role_grant_pair->u_uname; - host= role_grant_pair->u_hname; - break; + case PROXY_USERS_ACL: + acl_proxy_user= dynamic_element(&acl_proxy_users, idx, ACL_PROXY_USER*); + user= acl_proxy_user->get_user(); + host= acl_proxy_user->get_host(); + break; - default: - DBUG_ASSERT(0); - } - if (! user) - user= ""; - if (! host) - host= ""; + case ROLES_MAPPINGS_HASH: + role_grant_pair= (ROLE_GRANT_PAIR *) my_hash_element(roles_mappings_hash, idx); + user= role_grant_pair->u_uname; + host= role_grant_pair->u_hname; + break; + + default: + DBUG_ASSERT(0); + } + if (! user) + user= ""; + if (! host) + host= ""; #ifdef EXTRA_DEBUG - DBUG_PRINT("loop",("scan struct: %u index: %u user: '%s' host: '%s'", - struct_no, idx, user, host)); + DBUG_PRINT("loop",("scan struct: %u index: %u user: '%s' host: '%s'", + struct_no, idx, user, host)); #endif - if (struct_no == ROLES_MAPPINGS_HASH) - { - const char* role= role_grant_pair->r_uname? role_grant_pair->r_uname: ""; - if (user_from->is_role()) + if (struct_no == ROLES_MAPPINGS_HASH) { - /* When searching for roles within the ROLES_MAPPINGS_HASH, we have - to check both the user field as well as the role field for a match. + const char* role= role_grant_pair->r_uname? role_grant_pair->r_uname: ""; + if (user_from->is_role()) + { + /* When searching for roles within the ROLES_MAPPINGS_HASH, we have + to check both the user field as well as the role field for a match. - It is possible to have a role granted to a role. If we are going - to modify the mapping entry, it needs to be done on either on the - "user" end (here represented by a role) or the "role" end. At least - one part must match. + It is possible to have a role granted to a role. If we are going + to modify the mapping entry, it needs to be done on either on the + "user" end (here represented by a role) or the "role" end. At least + one part must match. - If the "user" end has a not-empty host string, it can never match - as we are searching for a role here. A role always has an empty host - string. - */ - if ((*host || strcmp(user_from->user.str, user)) && - strcmp(user_from->user.str, role)) - continue; + If the "user" end has a not-empty host string, it can never match + as we are searching for a role here. A role always has an empty host + string. + */ + if ((*host || strcmp(user_from->user.str, user)) && + strcmp(user_from->user.str, role)) + continue; + } + else + { + if (strcmp(user_from->user.str, user) || + my_strcasecmp(system_charset_info, user_from->host.str, host)) + continue; + } } else { @@ -9830,154 +9846,131 @@ static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop, my_strcasecmp(system_charset_info, user_from->host.str, host)) continue; } - } - else - { - if (strcmp(user_from->user.str, user) || - my_strcasecmp(system_charset_info, user_from->host.str, host)) - continue; - } - result= 1; /* At least one element found. */ - if ( drop ) - { - elements--; - switch ( struct_no ) { - case USER_ACL: - free_acl_user(dynamic_element(&acl_users, idx, ACL_USER*)); - delete_dynamic_element(&acl_users, idx); - break; + result= 1; /* At least one element found. */ + if ( drop ) + { + elements--; + switch ( struct_no ) { + case USER_ACL: + free_acl_user(dynamic_element(&acl_users, idx, ACL_USER*)); + delete_dynamic_element(&acl_users, idx); + break; - case DB_ACL: - acl_dbs.del(idx); - break; + case DB_ACL: + acl_dbs.del(idx); + break; - case COLUMN_PRIVILEGES_HASH: - case PROC_PRIVILEGES_HASH: - case FUNC_PRIVILEGES_HASH: - my_hash_delete(grant_name_hash, (uchar*) grant_name); - /* - In our HASH implementation on deletion one elements - is moved into a place where a deleted element was, - and the last element is moved into the empty space. - Thus we need to re-examine the current element, but - we don't have to restart the search from the beginning. - */ - if (idx != elements) - idx++; - break; + case COLUMN_PRIVILEGES_HASH: + case PROC_PRIVILEGES_HASH: + case FUNC_PRIVILEGES_HASH: + my_hash_delete(grant_name_hash, (uchar*) grant_name); + restart= true; + break; - case PROXY_USERS_ACL: - delete_dynamic_element(&acl_proxy_users, idx); - break; + case PROXY_USERS_ACL: + delete_dynamic_element(&acl_proxy_users, idx); + break; - case ROLES_MAPPINGS_HASH: - my_hash_delete(roles_mappings_hash, (uchar*) role_grant_pair); - if (idx != elements) - idx++; - break; + case ROLES_MAPPINGS_HASH: + my_hash_delete(roles_mappings_hash, (uchar*) role_grant_pair); + restart= true; + break; - default: - DBUG_ASSERT(0); - break; + default: + DBUG_ASSERT(0); + break; + } } - } - else if ( user_to ) - { - switch ( struct_no ) { - case USER_ACL: - acl_user->user.str= strdup_root(&acl_memroot, user_to->user.str); - acl_user->user.length= user_to->user.length; - update_hostname(&acl_user->host, strdup_root(&acl_memroot, user_to->host.str)); - acl_user->hostname_length= strlen(acl_user->host.hostname); - break; + else if ( user_to ) + { + switch ( struct_no ) { + case USER_ACL: + acl_user->user.str= strdup_root(&acl_memroot, user_to->user.str); + acl_user->user.length= user_to->user.length; + update_hostname(&acl_user->host, strdup_root(&acl_memroot, user_to->host.str)); + acl_user->hostname_length= strlen(acl_user->host.hostname); + break; - case DB_ACL: - acl_db->user= strdup_root(&acl_memroot, user_to->user.str); - update_hostname(&acl_db->host, strdup_root(&acl_memroot, user_to->host.str)); - break; + case DB_ACL: + acl_db->user= strdup_root(&acl_memroot, user_to->user.str); + update_hostname(&acl_db->host, strdup_root(&acl_memroot, user_to->host.str)); + break; - case COLUMN_PRIVILEGES_HASH: - case PROC_PRIVILEGES_HASH: - case FUNC_PRIVILEGES_HASH: - { - /* - Save old hash key and its length to be able to properly update - element position in hash. - */ - char *old_key= grant_name->hash_key; - size_t old_key_length= grant_name->key_length; + case COLUMN_PRIVILEGES_HASH: + case PROC_PRIVILEGES_HASH: + case FUNC_PRIVILEGES_HASH: + { + /* + Save old hash key and its length to be able to properly update + element position in hash. + */ + char *old_key= grant_name->hash_key; + size_t old_key_length= grant_name->key_length; + + /* + Update the grant structure with the new user name and host name. + */ + grant_name->set_user_details(user_to->host.str, grant_name->db, + user_to->user.str, grant_name->tname, + TRUE); + + /* + Since username is part of the hash key, when the user name + is renamed, the hash key is changed. Update the hash to + ensure that the position matches the new hash key value + */ + my_hash_update(grant_name_hash, (uchar*) grant_name, (uchar*) old_key, + old_key_length); + restart= true; + break; + } - /* - Update the grant structure with the new user name and host name. - */ - grant_name->set_user_details(user_to->host.str, grant_name->db, - user_to->user.str, grant_name->tname, - TRUE); - - /* - Since username is part of the hash key, when the user name - is renamed, the hash key is changed. Update the hash to - ensure that the position matches the new hash key value - */ - my_hash_update(grant_name_hash, (uchar*) grant_name, (uchar*) old_key, - old_key_length); - /* - hash_update() operation could have moved element from the tail or - the head of the hash to the current position. But it can never - move an element from the head to the tail or from the tail to the - head over the current element. - So we need to examine the current element once again, but - we don't need to restart the search from the beginning. - */ - idx++; + case PROXY_USERS_ACL: + acl_proxy_user->set_user (&acl_memroot, user_to->user.str); + acl_proxy_user->set_host (&acl_memroot, user_to->host.str); break; - } - case PROXY_USERS_ACL: - acl_proxy_user->set_user (&acl_memroot, user_to->user.str); - acl_proxy_user->set_host (&acl_memroot, user_to->host.str); - break; + case ROLES_MAPPINGS_HASH: + { + /* + Save old hash key and its length to be able to properly update + element position in hash. + */ + char *old_key= role_grant_pair->hashkey.str; + size_t old_key_length= role_grant_pair->hashkey.length; + bool oom; + + if (user_to->is_role()) + oom= role_grant_pair->init(&acl_memroot, role_grant_pair->u_uname, + role_grant_pair->u_hname, + user_to->user.str, false); + else + oom= role_grant_pair->init(&acl_memroot, user_to->user.str, + user_to->host.str, + role_grant_pair->r_uname, false); + if (oom) + DBUG_RETURN(-1); + + my_hash_update(roles_mappings_hash, (uchar*) role_grant_pair, + (uchar*) old_key, old_key_length); + restart= true; + break; + } - case ROLES_MAPPINGS_HASH: - { - /* - Save old hash key and its length to be able to properly update - element position in hash. - */ - char *old_key= role_grant_pair->hashkey.str; - size_t old_key_length= role_grant_pair->hashkey.length; - bool oom; - - if (user_to->is_role()) - oom= role_grant_pair->init(&acl_memroot, role_grant_pair->u_uname, - role_grant_pair->u_hname, - user_to->user.str, false); - else - oom= role_grant_pair->init(&acl_memroot, user_to->user.str, - user_to->host.str, - role_grant_pair->r_uname, false); - if (oom) - DBUG_RETURN(-1); - - my_hash_update(roles_mappings_hash, (uchar*) role_grant_pair, - (uchar*) old_key, old_key_length); - idx++; // see the comment above + default: + DBUG_ASSERT(0); break; } - default: - DBUG_ASSERT(0); + } + else + { + /* If search is requested, we do not need to search further. */ break; } - } - else - { - /* If search is requested, we do not need to search further. */ - break; - } - } + } while (restart); #ifdef EXTRA_DEBUG DBUG_PRINT("loop",("scan struct: %u result %d", struct_no, result)); #endif -- cgit v1.2.1 From 5f63f5dc60a48105d739f606cbf0a575925029d1 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 15 Oct 2021 21:56:17 +0400 Subject: A clean-up patch for MDEV-23408: fixing test failure on Windows Schema and table names in a veiw FRM files are: - in upper case on Linux - in lower case on Windows Using the LOWER() function when displaying an FRM file fragment, to avoid the OS-specific difference. --- mysql-test/r/ctype_utf16le.result | 10 +++++----- mysql-test/t/ctype_utf16le.test | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mysql-test/r/ctype_utf16le.result b/mysql-test/r/ctype_utf16le.result index bc28f89b7ea..8dc51eb262b 100644 --- a/mysql-test/r/ctype_utf16le.result +++ b/mysql-test/r/ctype_utf16le.result @@ -3004,13 +3004,13 @@ SET STORAGE_ENGINE=Default; # SET NAMES utf8; SET SESSION character_set_connection= utf16le; -CREATE TABLE kv (v BLOB); +CREATE TABLE kv (v TEXT CHARACTER SET latin1); CREATE TABLE t (a INT); CREATE VIEW v AS SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; -LOAD DATA INFILE 'MYSQLD_DATADIR/test/v.frm' REPLACE INTO TABLE kv; -SELECT * FROM kv WHERE v LIKE _binary'query=%'; -v -query=select `information_schema`.`TABLES`.`TABLE_CATALOG` AS `TABLE_CATALOG`,`information_schema`.`TABLES`.`TABLE_SCHEMA` AS `TABLE_SCHEMA`,`information_schema`.`TABLES`.`TABLE_NAME` AS `TABLE_NAME`,`information_schema`.`TABLES`.`TABLE_TYPE` AS `TABLE_TYPE`,`information_schema`.`TABLES`.`ENGINE` AS `ENGINE`,`information_schema`.`TABLES`.`VERSION` AS `VERSION`,`information_schema`.`TABLES`.`ROW_FORMAT` AS `ROW_FORMAT`,`information_schema`.`TABLES`.`TABLE_ROWS` AS `TABLE_ROWS`,`information_schema`.`TABLES`.`AVG_ROW_LENGTH` AS `AVG_ROW_LENGTH`,`information_schema`.`TABLES`.`DATA_LENGTH` AS `DATA_LENGTH`,`information_schema`.`TABLES`.`MAX_DATA_LENGTH` AS `MAX_DATA_LENGTH`,`information_schema`.`TABLES`.`INDEX_LENGTH` AS `INDEX_LENGTH`,`information_schema`.`TABLES`.`DATA_FREE` AS `DATA_FREE`,`information_schema`.`TABLES`.`AUTO_INCREMENT` AS `AUTO_INCREMENT`,`information_schema`.`TABLES`.`CREATE_TIME` AS `CREATE_TIME`,`information_schema`.`TABLES`.`UPDATE_TIME` AS `UPDATE_TIME`,`information_schema`.`TABLES`.`CHECK_TIME` AS `CHECK_TIME`,`information_schema`.`TABLES`.`TABLE_COLLATION` AS `TABLE_COLLATION`,`information_schema`.`TABLES`.`CHECKSUM` AS `CHECKSUM`,`information_schema`.`TABLES`.`CREATE_OPTIONS` AS `CREATE_OPTIONS`,`information_schema`.`TABLES`.`TABLE_COMMENT` AS `TABLE_COMMENT` from `INFORMATION_SCHEMA`.`TABLES` where `information_schema`.`TABLES`.`TABLE_NAME` = 't1' +LOAD DATA INFILE 'MYSQLD_DATADIR/test/v.frm' REPLACE INTO TABLE kv CHARACTER SET latin1; +SELECT LOWER(v) FROM kv WHERE v LIKE _binary'query=%'; +LOWER(v) +query=select `information_schema`.`tables`.`table_catalog` as `table_catalog`,`information_schema`.`tables`.`table_schema` as `table_schema`,`information_schema`.`tables`.`table_name` as `table_name`,`information_schema`.`tables`.`table_type` as `table_type`,`information_schema`.`tables`.`engine` as `engine`,`information_schema`.`tables`.`version` as `version`,`information_schema`.`tables`.`row_format` as `row_format`,`information_schema`.`tables`.`table_rows` as `table_rows`,`information_schema`.`tables`.`avg_row_length` as `avg_row_length`,`information_schema`.`tables`.`data_length` as `data_length`,`information_schema`.`tables`.`max_data_length` as `max_data_length`,`information_schema`.`tables`.`index_length` as `index_length`,`information_schema`.`tables`.`data_free` as `data_free`,`information_schema`.`tables`.`auto_increment` as `auto_increment`,`information_schema`.`tables`.`create_time` as `create_time`,`information_schema`.`tables`.`update_time` as `update_time`,`information_schema`.`tables`.`check_time` as `check_time`,`information_schema`.`tables`.`table_collation` as `table_collation`,`information_schema`.`tables`.`checksum` as `checksum`,`information_schema`.`tables`.`create_options` as `create_options`,`information_schema`.`tables`.`table_comment` as `table_comment` from `information_schema`.`tables` where `information_schema`.`tables`.`table_name` = 't1' TRUNCATE TABLE kv; SELECT * FROM v; TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT diff --git a/mysql-test/t/ctype_utf16le.test b/mysql-test/t/ctype_utf16le.test index 671100c2d9d..c0cac7f5d67 100644 --- a/mysql-test/t/ctype_utf16le.test +++ b/mysql-test/t/ctype_utf16le.test @@ -820,12 +820,12 @@ let $coll_pad='utf16le_bin'; SET NAMES utf8; SET SESSION character_set_connection= utf16le; -CREATE TABLE kv (v BLOB); +CREATE TABLE kv (v TEXT CHARACTER SET latin1); CREATE TABLE t (a INT); CREATE VIEW v AS SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; --replace_result $MYSQLD_DATADIR MYSQLD_DATADIR -eval LOAD DATA INFILE '$MYSQLD_DATADIR/test/v.frm' REPLACE INTO TABLE kv; -SELECT * FROM kv WHERE v LIKE _binary'query=%'; +eval LOAD DATA INFILE '$MYSQLD_DATADIR/test/v.frm' REPLACE INTO TABLE kv CHARACTER SET latin1; +SELECT LOWER(v) FROM kv WHERE v LIKE _binary'query=%'; TRUNCATE TABLE kv; SELECT * FROM v; LOCK TABLE t WRITE; -- cgit v1.2.1 From cf8e78a40170118e2595e35c9c5f43aedeca91a0 Mon Sep 17 00:00:00 2001 From: Sergei Krivonos Date: Sat, 16 Oct 2021 02:35:16 +0300 Subject: Implemented Json_writer_array & Json_writer_object subitems name presence control --- .gitignore | 3 +++ sql/my_json_writer.cc | 2 ++ sql/my_json_writer.h | 36 ++++++++++++++++++++++++++++++------ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 5005dbae363..9abb2b075bc 100644 --- a/.gitignore +++ b/.gitignore @@ -10,8 +10,11 @@ .ninja_* *.mri *.mri.tpl +/.cproject +/.project .gdb_history .vs/ +/.settings/ errmsg.sys typescript _CPack_Packages diff --git a/sql/my_json_writer.cc b/sql/my_json_writer.cc index 3234b8f9995..e4033ed0c02 100644 --- a/sql/my_json_writer.cc +++ b/sql/my_json_writer.cc @@ -260,6 +260,8 @@ void Json_writer::add_str(const String &str) add_str(str.ptr(), str.length()); } +thread_local std::vector Json_writer_struct::named_items_expectation; + Json_writer_temp_disable::Json_writer_temp_disable(THD *thd_arg) { thd= thd_arg; diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h index bc8002de529..f91a7c9ba8c 100644 --- a/sql/my_json_writer.h +++ b/sql/my_json_writer.h @@ -15,8 +15,12 @@ #ifndef JSON_WRITER_INCLUDED #define JSON_WRITER_INCLUDED + #include "my_base.h" #include "sql_select.h" + +#include + class Opt_trace_stmt; class Opt_trace_context; class Json_writer; @@ -308,6 +312,7 @@ public: /* A common base for Json_writer_object and Json_writer_array */ class Json_writer_struct { + static thread_local std::vector named_items_expectation; protected: Json_writer* my_writer; Json_value_helper context; @@ -317,16 +322,29 @@ protected: bool closed; public: - explicit Json_writer_struct(THD *thd) + explicit Json_writer_struct(THD *thd, bool expect_named_children) { my_writer= thd->opt_trace.get_current_json(); context.init(my_writer); closed= false; + named_items_expectation.push_back(expect_named_children); + } + + virtual ~Json_writer_struct() + { + named_items_expectation.pop_back(); } - bool trace_started() + + bool trace_started() const { return my_writer != 0; } + + bool named_item_expected() const + { + return named_items_expectation.size() > 1 + && *(named_items_expectation.rbegin() + 1); + } }; @@ -347,15 +365,17 @@ private: } public: explicit Json_writer_object(THD *thd) - : Json_writer_struct(thd) + : Json_writer_struct(thd, true) { + DBUG_ASSERT(!named_item_expected()); if (unlikely(my_writer)) my_writer->start_object(); } explicit Json_writer_object(THD* thd, const char *str) - : Json_writer_struct(thd) + : Json_writer_struct(thd, true) { + DBUG_ASSERT(named_item_expected()); if (unlikely(my_writer)) my_writer->add_member(str).start_object(); } @@ -519,14 +539,18 @@ public: class Json_writer_array : public Json_writer_struct { public: - Json_writer_array(THD *thd): Json_writer_struct(thd) + Json_writer_array(THD *thd) + : Json_writer_struct(thd, false) { + DBUG_ASSERT(!named_item_expected()); if (unlikely(my_writer)) my_writer->start_array(); } - Json_writer_array(THD *thd, const char *str) : Json_writer_struct(thd) + Json_writer_array(THD *thd, const char *str) + : Json_writer_struct(thd, false) { + DBUG_ASSERT(named_item_expected()); if (unlikely(my_writer)) my_writer->add_member(str).start_array(); } -- cgit v1.2.1 From 052dda61bb8d5fef271ecd091a5b5db25d57040b Mon Sep 17 00:00:00 2001 From: Sergei Krivonos Date: Sun, 17 Oct 2021 12:36:12 +0300 Subject: Made optional Json_writer_object / Json_writer_array consistency check --- CMakeLists.txt | 4 ++++ sql/my_json_writer.cc | 2 ++ sql/my_json_writer.h | 16 ++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 591920450ea..2bafbda9964 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,6 +185,10 @@ IF(DISABLE_SHARED) SET(WITHOUT_DYNAMIC_PLUGINS 1) ENDIF() OPTION(ENABLED_PROFILING "Enable profiling" ON) +OPTION(ENABLED_JSON_WRITER_CONSISTENCY_CHECKS "Enable Json_writer_object / Json_writer_array checking to produce consistent JSON output" OFF) +IF(ENABLED_JSON_WRITER_CONSISTENCY_CHECKS) + ADD_DEFINITIONS(-DENABLED_JSON_WRITER_CONSISTENCY_CHECKS) +ENDIF() OPTION(WITHOUT_SERVER "Build only the client library and clients" OFF) IF(UNIX) OPTION(WITH_VALGRIND "Valgrind instrumentation" OFF) diff --git a/sql/my_json_writer.cc b/sql/my_json_writer.cc index e4033ed0c02..8e6f0942857 100644 --- a/sql/my_json_writer.cc +++ b/sql/my_json_writer.cc @@ -260,7 +260,9 @@ void Json_writer::add_str(const String &str) add_str(str.ptr(), str.length()); } +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS thread_local std::vector Json_writer_struct::named_items_expectation; +#endif Json_writer_temp_disable::Json_writer_temp_disable(THD *thd_arg) { diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h index f91a7c9ba8c..9686984ba9b 100644 --- a/sql/my_json_writer.h +++ b/sql/my_json_writer.h @@ -312,7 +312,9 @@ public: /* A common base for Json_writer_object and Json_writer_array */ class Json_writer_struct { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS static thread_local std::vector named_items_expectation; +#endif protected: Json_writer* my_writer; Json_value_helper context; @@ -327,12 +329,16 @@ public: my_writer= thd->opt_trace.get_current_json(); context.init(my_writer); closed= false; +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS named_items_expectation.push_back(expect_named_children); +#endif } virtual ~Json_writer_struct() { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS named_items_expectation.pop_back(); +#endif } bool trace_started() const @@ -340,11 +346,13 @@ public: return my_writer != 0; } +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS bool named_item_expected() const { return named_items_expectation.size() > 1 && *(named_items_expectation.rbegin() + 1); } +#endif }; @@ -367,7 +375,9 @@ public: explicit Json_writer_object(THD *thd) : Json_writer_struct(thd, true) { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS DBUG_ASSERT(!named_item_expected()); +#endif if (unlikely(my_writer)) my_writer->start_object(); } @@ -375,7 +385,9 @@ public: explicit Json_writer_object(THD* thd, const char *str) : Json_writer_struct(thd, true) { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS DBUG_ASSERT(named_item_expected()); +#endif if (unlikely(my_writer)) my_writer->add_member(str).start_object(); } @@ -542,7 +554,9 @@ public: Json_writer_array(THD *thd) : Json_writer_struct(thd, false) { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS DBUG_ASSERT(!named_item_expected()); +#endif if (unlikely(my_writer)) my_writer->start_array(); } @@ -550,7 +564,9 @@ public: Json_writer_array(THD *thd, const char *str) : Json_writer_struct(thd, false) { +#ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS DBUG_ASSERT(named_item_expected()); +#endif if (unlikely(my_writer)) my_writer->add_member(str).start_array(); } -- cgit v1.2.1 From a46665090b96f5598b534d31524af830dfc225b0 Mon Sep 17 00:00:00 2001 From: Kentoku SHIBA Date: Sat, 6 Jul 2019 23:54:53 +0900 Subject: MDEV-19866 With a Spider table, a SELECT with WHERE involving primary key breaks following SELECTs (#1356) Change checking scanning partitions from part_spec to part_info->read_partitions --- .../spider/bugfix/include/mdev_19866_deinit.inc | 14 +++ .../spider/bugfix/include/mdev_19866_init.inc | 52 ++++++++++ .../mysql-test/spider/bugfix/r/mdev_19866.result | 111 +++++++++++++++++++++ .../mysql-test/spider/bugfix/t/mdev_19866.cnf | 4 + .../mysql-test/spider/bugfix/t/mdev_19866.test | 97 ++++++++++++++++++ storage/spider/spd_group_by_handler.cc | 62 ++++++------ 6 files changed, 310 insertions(+), 30 deletions(-) create mode 100644 storage/spider/mysql-test/spider/bugfix/include/mdev_19866_deinit.inc create mode 100644 storage/spider/mysql-test/spider/bugfix/include/mdev_19866_init.inc create mode 100644 storage/spider/mysql-test/spider/bugfix/r/mdev_19866.result create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_19866.cnf create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test diff --git a/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_deinit.inc b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_deinit.inc new file mode 100644 index 00000000000..9d255152dd8 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_deinit.inc @@ -0,0 +1,14 @@ +--let $MASTER_1_COMMENT_2_1= $MASTER_1_COMMENT_2_1_BACKUP +--let $CHILD2_1_DROP_TABLES= $CHILD2_1_DROP_TABLES_BACKUP +--let $CHILD2_1_CREATE_TABLES= $CHILD2_1_CREATE_TABLES_BACKUP +--let $CHILD2_1_SELECT_TABLES= $CHILD2_1_SELECT_TABLES_BACKUP +--let $CHILD2_2_DROP_TABLES= $CHILD2_2_DROP_TABLES_BACKUP +--let $CHILD2_2_CREATE_TABLES= $CHILD2_2_CREATE_TABLES_BACKUP +--let $CHILD2_2_SELECT_TABLES= $CHILD2_2_SELECT_TABLES_BACKUP +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_deinit.inc +--enable_result_log +--enable_query_log +--enable_warnings diff --git a/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_init.inc b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_init.inc new file mode 100644 index 00000000000..dceae8226b0 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/include/mdev_19866_init.inc @@ -0,0 +1,52 @@ +--disable_warnings +--disable_query_log +--disable_result_log +--source ../t/test_init.inc +if (!$HAVE_PARTITION) +{ + --source group_by_order_by_limit_deinit.inc + --enable_result_log + --enable_query_log + --enable_warnings + skip Test requires partitioning; +} +--enable_result_log +--enable_query_log +--enable_warnings +--let $MASTER_1_COMMENT_2_1_BACKUP= $MASTER_1_COMMENT_2_1 +let $MASTER_1_COMMENT_2_1= + COMMENT='table "tbl_a"' + PARTITION BY KEY(pkey) ( + PARTITION pt1 COMMENT='srv "s_2_1"', + PARTITION pt2 COMMENT='srv "s_2_2"' + ); +--let $CHILD2_1_DROP_TABLES_BACKUP= $CHILD2_1_DROP_TABLES +let $CHILD2_1_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a; +--let $CHILD2_1_CREATE_TABLES_BACKUP= $CHILD2_1_CREATE_TABLES +let $CHILD2_1_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_1_ENGINE $CHILD2_1_CHARSET; +--let $CHILD2_1_SELECT_TABLES_BACKUP= $CHILD2_1_SELECT_TABLES +let $CHILD2_1_SELECT_TABLES= + SELECT pkey, val FROM tbl_a ORDER BY pkey; +let $CHILD2_1_SELECT_ARGUMENT1= + SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; +--let $CHILD2_2_DROP_TABLES_BACKUP= $CHILD2_2_DROP_TABLES +let $CHILD2_2_DROP_TABLES= + DROP TABLE IF EXISTS tbl_a; +--let $CHILD2_2_CREATE_TABLES_BACKUP= $CHILD2_2_CREATE_TABLES +let $CHILD2_2_CREATE_TABLES= + CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) + ) $CHILD2_2_ENGINE $CHILD2_2_CHARSET; +--let $CHILD2_2_SELECT_TABLES_BACKUP= $CHILD2_2_SELECT_TABLES +let $CHILD2_2_SELECT_TABLES= + SELECT pkey, val FROM tbl_a ORDER BY pkey; +let $CHILD2_2_SELECT_ARGUMENT1= + SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_19866.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_19866.result new file mode 100644 index 00000000000..5d483481edd --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_19866.result @@ -0,0 +1,111 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +this test is for MDEV-19866 + +drop and create databases +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +connection child2_1; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +connection child2_2; +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote2; +USE auto_test_remote2; + +create table and insert +connection child2_1; +CHILD2_1_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection child2_2; +CHILD2_2_CREATE_TABLES +TRUNCATE TABLE mysql.general_log; +connection master_1; +CREATE TABLE tbl_a ( +pkey int NOT NULL, +val char(1) NOT NULL, +PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1 +INSERT INTO tbl_a (pkey,val) VALUES (1,'1'),(2,'2'),(3,'3'),(4,'4'); + +select test 1 +connection child2_1; +TRUNCATE TABLE mysql.general_log; +connection child2_2; +TRUNCATE TABLE mysql.general_log; +connection master_1; +SELECT * FROM tbl_a; +pkey val +1 1 +3 3 +2 2 +4 4 +SELECT * FROM tbl_a WHERE pkey = 1; +pkey val +1 1 +SELECT * FROM tbl_a; +pkey val +1 1 +3 3 +2 2 +4 4 +SELECT * FROM tbl_a WHERE pkey = 2; +pkey val +2 2 +SELECT * FROM tbl_a; +pkey val +1 1 +3 3 +2 2 +4 4 +connection child2_1; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; +argument +select `pkey`,`val` from `auto_test_remote`.`tbl_a` +select `pkey`,`val` from `auto_test_remote`.`tbl_a` where `pkey` = 1 +select `pkey`,`val` from `auto_test_remote`.`tbl_a` +select `pkey`,`val` from `auto_test_remote`.`tbl_a` +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %' +SELECT pkey, val FROM tbl_a ORDER BY pkey; +pkey val +1 1 +3 3 +connection child2_2; +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; +argument +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` where `pkey` = 2 +select `pkey`,`val` from `auto_test_remote2`.`tbl_a` +SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %' +SELECT pkey, val FROM tbl_a ORDER BY pkey; +pkey val +2 2 +4 4 + +deinit +connection master_1; +DROP DATABASE IF EXISTS auto_test_local; +connection child2_1; +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; +connection child2_2; +DROP DATABASE IF EXISTS auto_test_remote2; +SET GLOBAL log_output = @old_log_output; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 + +end of test diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.cnf new file mode 100644 index 00000000000..e0ffb99c38e --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.cnf @@ -0,0 +1,4 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf +!include ../my_2_2.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test new file mode 100644 index 00000000000..05b753ae8bb --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test @@ -0,0 +1,97 @@ +--source ../include/mdev_19866_init.inc +--echo +--echo this test is for MDEV-19866 +--echo +--echo drop and create databases +--connection master_1 +--disable_warnings +CREATE DATABASE auto_test_local; +USE auto_test_local; + +--connection child2_1 +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; + +--connection child2_2 +SET @old_log_output = @@global.log_output; +SET GLOBAL log_output = 'TABLE,FILE'; +CREATE DATABASE auto_test_remote2; +USE auto_test_remote2; +--enable_warnings + +--echo +--echo create table and insert + +--connection child2_1 +--disable_query_log +echo CHILD2_1_CREATE_TABLES; +eval $CHILD2_1_CREATE_TABLES; +--enable_query_log +TRUNCATE TABLE mysql.general_log; + +--connection child2_2 +--disable_query_log +echo CHILD2_2_CREATE_TABLES; +eval $CHILD2_2_CREATE_TABLES; +--enable_query_log +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +--disable_query_log +echo CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) +) MASTER_1_ENGINE MASTER_1_CHARSET MASTER_1_COMMENT_2_1; +eval CREATE TABLE tbl_a ( + pkey int NOT NULL, + val char(1) NOT NULL, + PRIMARY KEY (pkey) +) $MASTER_1_ENGINE $MASTER_1_CHARSET $MASTER_1_COMMENT_2_1; +--enable_query_log +INSERT INTO tbl_a (pkey,val) VALUES (1,'1'),(2,'2'),(3,'3'),(4,'4'); + +--echo +--echo select test 1 + +--connection child2_1 +TRUNCATE TABLE mysql.general_log; + +--connection child2_2 +TRUNCATE TABLE mysql.general_log; + +--connection master_1 +SELECT * FROM tbl_a; +SELECT * FROM tbl_a WHERE pkey = 1; +SELECT * FROM tbl_a; +SELECT * FROM tbl_a WHERE pkey = 2; +SELECT * FROM tbl_a; + +--connection child2_1 +eval $CHILD2_1_SELECT_ARGUMENT1; +eval $CHILD2_1_SELECT_TABLES; + +--connection child2_2 +eval $CHILD2_2_SELECT_ARGUMENT1; +eval $CHILD2_2_SELECT_TABLES; + +--echo +--echo deinit +--disable_warnings +--connection master_1 +DROP DATABASE IF EXISTS auto_test_local; + +--connection child2_1 +DROP DATABASE IF EXISTS auto_test_remote; +SET GLOBAL log_output = @old_log_output; + +--connection child2_2 +DROP DATABASE IF EXISTS auto_test_remote2; +SET GLOBAL log_output = @old_log_output; + +--enable_warnings +--source ../include/mdev_19866_deinit.inc +--echo +--echo end of test diff --git a/storage/spider/spd_group_by_handler.cc b/storage/spider/spd_group_by_handler.cc index 8bd0eca507f..de041897239 100644 --- a/storage/spider/spd_group_by_handler.cc +++ b/storage/spider/spd_group_by_handler.cc @@ -1654,21 +1654,18 @@ group_by_handler *spider_create_group_by_handler( if (from->table->part_info) { DBUG_PRINT("info",("spider partition handler")); -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) - ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); - DBUG_PRINT("info",("spider part_spec->start_part=%u", part_spec->start_part)); - DBUG_PRINT("info",("spider part_spec->end_part=%u", part_spec->end_part)); - if ( - part_spec->start_part == partition->get_no_current_part_id() || - part_spec->start_part != part_spec->end_part - ) { +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) + partition_info *part_info = from->table->part_info; + uint bits = bitmap_bits_set(&part_info->read_partitions); + DBUG_PRINT("info",("spider bits=%u", bits)); + if (bits != 1) + { DBUG_PRINT("info",("spider using multiple partitions is not supported by this feature yet")); #else DBUG_PRINT("info",("spider partition is not supported by this feature yet")); #endif DBUG_RETURN(NULL); -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif } @@ -1686,17 +1683,18 @@ group_by_handler *spider_create_group_by_handler( /* all tables are const_table */ DBUG_RETURN(NULL); } -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1717,17 +1715,18 @@ group_by_handler *spider_create_group_by_handler( { if (from->table->const_table) continue; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1755,17 +1754,18 @@ group_by_handler *spider_create_group_by_handler( do { if (from->table->const_table) continue; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1908,17 +1908,18 @@ group_by_handler *spider_create_group_by_handler( { from = from->next_local; } -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; @@ -1996,17 +1997,18 @@ group_by_handler *spider_create_group_by_handler( continue; fields->clear_conn_holder_from_conn(); -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) if (from->table->part_info) { + partition_info *part_info = from->table->part_info; + uint part = bitmap_get_first_set(&part_info->read_partitions); ha_partition *partition = (ha_partition *) from->table->file; - part_id_range *part_spec = partition->get_part_spec(); handler **handlers = partition->get_child_handlers(); - spider = (ha_spider *) handlers[part_spec->start_part]; + spider = (ha_spider *) handlers[part]; } else { #endif spider = (ha_spider *) from->table->file; -#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) && defined(PARTITION_HAS_GET_PART_SPEC) +#if defined(PARTITION_HAS_GET_CHILD_HANDLERS) } #endif share = spider->share; -- cgit v1.2.1 From 39f6315612149860ffdb92d433a6bace2096b9c1 Mon Sep 17 00:00:00 2001 From: Nayuta Yanagisawa Date: Tue, 10 Aug 2021 11:32:31 +0000 Subject: MDEV-19866 follow-up Cherry-picking the fix for MDEV-19866 changes the behavior of the Spider slightly. So, I modified a existing test to match the new behavior. --- storage/spider/mysql-test/spider/bugfix/r/mdev_20100.result | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_20100.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_20100.result index fc4fb02d72f..9d2297b4daf 100644 --- a/storage/spider/mysql-test/spider/bugfix/r/mdev_20100.result +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_20100.result @@ -78,16 +78,16 @@ a b c connection child2_1; SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %'; argument -select `a`,`b`,`c` from `auto_test_remote`.`ta_r3` where (`b` = 'c') +select t0.`a` `a`,t0.`b` `b`,t0.`c` `c` from `auto_test_remote`.`ta_r3` t0 where (t0.`b` = 'c') select `a`,`b`,`c` from `auto_test_remote`.`ta_r2` select `a`,`b`,`c` from `auto_test_remote`.`ta_r3` -select `a`,`b`,`c` from `auto_test_remote`.`ta_r4` where (`b` = 'c') +select t0.`a` `a`,t0.`b` `b`,t0.`c` `c` from `auto_test_remote`.`ta_r4` t0 where (t0.`b` = 'c') select `a`,`b`,`c` from `auto_test_remote`.`ta_r2` select `a`,`b`,`c` from `auto_test_remote`.`ta_r3` -select `a`,`b`,`c` from `auto_test_remote`.`ta_r2` where (`b` = 'c') +select t0.`a` `a`,t0.`b` `b`,t0.`c` `c` from `auto_test_remote`.`ta_r2` t0 where (t0.`b` = 'c') select `a`,`b`,`c` from `auto_test_remote`.`ta_r2` select `a`,`b`,`c` from `auto_test_remote`.`ta_r4` -select `a`,`b`,`c` from `auto_test_remote`.`ta_r2` where (`b` = 'c') +select t0.`a` `a`,t0.`b` `b`,t0.`c` `c` from `auto_test_remote`.`ta_r2` t0 where (t0.`b` = 'c') select `a`,`b`,`c` from `auto_test_remote`.`ta_r3` select `a`,`b`,`c` from `auto_test_remote`.`ta_r4` SELECT argument FROM mysql.general_log WHERE argument LIKE '%select %' -- cgit v1.2.1 From 9068020efe8f8b0161ea0e48a89fb90fcd8e59aa Mon Sep 17 00:00:00 2001 From: Nayuta Yanagisawa Date: Tue, 21 Sep 2021 18:32:37 +0900 Subject: MDEV-26539 SIGSEGV in spider_check_and_set_trx_isolation and I_P_List_iterator from THD::drop_temporary_table (10.5.3 opt only) on ALTER The server crashes if ALTER TABLE, which accesses physical data placed at data nodes, is performed on a Spider table. The cause of the bug is that spider_check_trx_and_get_conn() does not allocate connections if sql_command == SQLCOM_ALTER_TABLE. Some ALTER TABLE statements, like ALTER TABLE ... CHECK PARTITION, access data nodes. So, we need to allocate a new connection before performing ALTER TABLEs. --- .../mysql-test/spider/bugfix/r/mdev_26539.result | 36 +++++++++++++++++++ .../mysql-test/spider/bugfix/t/mdev_26539.cnf | 3 ++ .../mysql-test/spider/bugfix/t/mdev_26539.test | 40 ++++++++++++++++++++++ storage/spider/spd_trx.cc | 6 ++-- 4 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 storage/spider/mysql-test/spider/bugfix/r/mdev_26539.result create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_26539.cnf create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_26539.test diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_26539.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_26539.result new file mode 100644 index 00000000000..4e195fddfad --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_26539.result @@ -0,0 +1,36 @@ +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +# +# MDEV-26539 SIGSEGV in spider_check_and_set_trx_isolation and I_P_List_iterator from THD::drop_temporary_table (10.5.3 opt only) on ALTER +# +connection child2_1; +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +CREATE TABLE tbl_a ( +c INT +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +CREATE TABLE tbl_a ( +c INT +) ENGINE=Spider DEFAULT CHARSET=utf8 COMMENT='table "tbl_a"' PARTITION BY LIST COLUMNS (c) ( +PARTITION pt1 DEFAULT COMMENT = 'srv "s_2_1"' +); +INSERT INTO tbl_a VALUES (1); +ALTER TABLE tbl_a CHECK PARTITION ALL; +Table Op Msg_type Msg_text +auto_test_local.tbl_a check status OK +DROP DATABASE auto_test_local; +connection child2_1; +DROP DATABASE auto_test_remote; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_26539.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_26539.cnf new file mode 100644 index 00000000000..05dfd8a0bce --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_26539.cnf @@ -0,0 +1,3 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_26539.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_26539.test new file mode 100644 index 00000000000..f2561f8c9a5 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_26539.test @@ -0,0 +1,40 @@ +--disable_query_log +--disable_result_log +--source ../../t/test_init.inc +--enable_result_log +--enable_query_log + +--echo # +--echo # MDEV-26539 SIGSEGV in spider_check_and_set_trx_isolation and I_P_List_iterator from THD::drop_temporary_table (10.5.3 opt only) on ALTER +--echo # + +--connection child2_1 +CREATE DATABASE auto_test_remote; +USE auto_test_remote; +eval CREATE TABLE tbl_a ( + c INT +) $CHILD2_1_ENGINE $CHILD2_1_CHARSET; + +--connection master_1 +CREATE DATABASE auto_test_local; +USE auto_test_local; + +eval CREATE TABLE tbl_a ( + c INT +) $MASTER_1_ENGINE $MASTER_1_CHARSET COMMENT='table "tbl_a"' PARTITION BY LIST COLUMNS (c) ( + PARTITION pt1 DEFAULT COMMENT = 'srv "s_2_1"' +); + +INSERT INTO tbl_a VALUES (1); +ALTER TABLE tbl_a CHECK PARTITION ALL; + +DROP DATABASE auto_test_local; + +--connection child2_1 +DROP DATABASE auto_test_remote; + +--disable_query_log +--disable_result_log +--source ../../t/test_deinit.inc +--enable_result_log +--enable_query_log diff --git a/storage/spider/spd_trx.cc b/storage/spider/spd_trx.cc index 0eda9d31df6..80658012506 100644 --- a/storage/spider/spd_trx.cc +++ b/storage/spider/spd_trx.cc @@ -3744,10 +3744,8 @@ int spider_check_trx_and_get_conn( } spider->wide_handler->trx = trx; spider->set_error_mode(); - if ( - spider->wide_handler->sql_command != SQLCOM_DROP_TABLE && - spider->wide_handler->sql_command != SQLCOM_ALTER_TABLE - ) { + if (spider->wide_handler->sql_command != SQLCOM_DROP_TABLE) + { SPIDER_TRX_HA *trx_ha = spider_check_trx_ha(trx, spider); if (!trx_ha || trx_ha->wait_for_reusing) spider_trx_set_link_idx_for_all(spider); -- cgit v1.2.1 From 18eab4a83280049974265358b0d78389d05cd67b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 18 Oct 2021 12:49:10 +0300 Subject: MDEV-26682 Replication timeouts with XA PREPARE The purpose of non-exclusive locks in a transaction is to guarantee that the records covered by those locks must remain in that way until the transaction is committed. (The purpose of gap locks is to ensure that a record that was nonexistent will remain that way.) Once a transaction has reached the XA PREPARE state, the only allowed further actions are XA ROLLBACK or XA COMMIT. Therefore, it can be argued that only the exclusive locks that the XA PREPARE transaction is holding are essential. Furthermore, InnoDB never preserved explicit locks across server restart. For XA PREPARE transations, we will only recover implicit exclusive locks for records that had been modified. Because of the fact that XA PREPARE followed by a server restart will cause some locks to be lost, we might as well always release all non-exclusive locks during the execution of an XA PREPARE statement. lock_release_on_prepare(): Release non-exclusive locks on XA PREPARE. trx_prepare(): Invoke lock_release_on_prepare() unless the isolation level is SERIALIZABLE or this is an internal distributed transaction with the binlog (not actual XA PREPARE statement). This has been discussed with Sergei Golubchik and Andrei Elkin. Reviewed by: Sergei Golubchik --- mysql-test/suite/rpl/r/rpl_xa.result | 61 +++++++++++++++++ .../suite/rpl/r/rpl_xa_gtid_pos_auto_engine.result | 61 +++++++++++++++++ mysql-test/suite/rpl/t/rpl_xa.inc | 80 +++++++++++++++++++++- storage/innobase/include/lock0lock.h | 6 +- storage/innobase/lock/lock0lock.cc | 59 ++++++++++++++++ storage/innobase/trx/trx0trx.cc | 14 ++++ 6 files changed, 279 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_xa.result b/mysql-test/suite/rpl/r/rpl_xa.result index a90e6e0b996..061c7b360d0 100644 --- a/mysql-test/suite/rpl/r/rpl_xa.result +++ b/mysql-test/suite/rpl/r/rpl_xa.result @@ -219,4 +219,65 @@ include/sync_with_master_gtid.inc connection master; drop database test_ign; drop table t1, t2, t3, tm; +# +# MDEV-26682 slave lock timeout with XA and gap locks +# +create table t1 (a int primary key, b int unique) engine=innodb; +insert t1 values (1,1),(3,3),(5,5); +connection slave; +set session tx_isolation='repeatable-read'; +start transaction; +select * from t1; +a b +1 1 +3 3 +5 5 +connect m2, localhost, root; +delete from t1 where a=3; +xa start 'x1'; +update t1 set b=3 where a=5; +xa end 'x1'; +xa prepare 'x1'; +connect m3, localhost, root; +insert t1 values (2, 2); +-->slave +connection slave; +commit; +select * from t1; +a b +1 1 +2 2 +5 5 +connection m2; +xa rollback 'x1'; +disconnect m2; +disconnect m3; +connection master; +drop table t1; +create table t1 (id int auto_increment primary key, c1 int not null unique) +engine=innodb; +create table t2 (id int auto_increment primary key, c1 int not null, +foreign key(c1) references t1(c1), unique key(c1)) engine=innodb; +insert t1 values (869,1), (871,3), (873,4), (872,5), (870,6), (877,7); +insert t2 values (795,6), (800,7); +xa start '1'; +update t2 set id = 9, c1 = 5 where c1 in (null, null, null, null, null, 7, 3); +connect con1, localhost,root; +xa start '2'; +delete from t1 where c1 like '3%'; +xa end '2'; +xa prepare '2'; +connection master; +xa end '1'; +xa prepare '1'; +->slave +connection slave; +connection slave; +include/sync_with_master_gtid.inc +connection con1; +xa commit '2'; +disconnect con1; +connection master; +xa commit '1'; +drop table t2, t1; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_xa_gtid_pos_auto_engine.result b/mysql-test/suite/rpl/r/rpl_xa_gtid_pos_auto_engine.result index ffd0426ab0d..35625cc7026 100644 --- a/mysql-test/suite/rpl/r/rpl_xa_gtid_pos_auto_engine.result +++ b/mysql-test/suite/rpl/r/rpl_xa_gtid_pos_auto_engine.result @@ -228,6 +228,67 @@ include/sync_with_master_gtid.inc connection master; drop database test_ign; drop table t1, t2, t3, tm; +# +# MDEV-26682 slave lock timeout with XA and gap locks +# +create table t1 (a int primary key, b int unique) engine=innodb; +insert t1 values (1,1),(3,3),(5,5); +connection slave; +set session tx_isolation='repeatable-read'; +start transaction; +select * from t1; +a b +1 1 +3 3 +5 5 +connect m2, localhost, root; +delete from t1 where a=3; +xa start 'x1'; +update t1 set b=3 where a=5; +xa end 'x1'; +xa prepare 'x1'; +connect m3, localhost, root; +insert t1 values (2, 2); +-->slave +connection slave; +commit; +select * from t1; +a b +1 1 +2 2 +5 5 +connection m2; +xa rollback 'x1'; +disconnect m2; +disconnect m3; +connection master; +drop table t1; +create table t1 (id int auto_increment primary key, c1 int not null unique) +engine=innodb; +create table t2 (id int auto_increment primary key, c1 int not null, +foreign key(c1) references t1(c1), unique key(c1)) engine=innodb; +insert t1 values (869,1), (871,3), (873,4), (872,5), (870,6), (877,7); +insert t2 values (795,6), (800,7); +xa start '1'; +update t2 set id = 9, c1 = 5 where c1 in (null, null, null, null, null, 7, 3); +connect con1, localhost,root; +xa start '2'; +delete from t1 where c1 like '3%'; +xa end '2'; +xa prepare '2'; +connection master; +xa end '1'; +xa prepare '1'; +->slave +connection slave; +connection slave; +include/sync_with_master_gtid.inc +connection con1; +xa commit '2'; +disconnect con1; +connection master; +xa commit '1'; +drop table t2, t1; connection slave; include/stop_slave.inc SET @@global.gtid_pos_auto_engines=""; diff --git a/mysql-test/suite/rpl/t/rpl_xa.inc b/mysql-test/suite/rpl/t/rpl_xa.inc index 38344da5e66..d22d2d2ef3d 100644 --- a/mysql-test/suite/rpl/t/rpl_xa.inc +++ b/mysql-test/suite/rpl/t/rpl_xa.inc @@ -1,6 +1,6 @@ # # This "body" file checks general properties of XA transaction replication -# as of MDEV-7974. +# as of MDEV-742. # Parameters: # --let rpl_xa_check= SELECT ... # @@ -353,3 +353,81 @@ source include/sync_with_master_gtid.inc; connection master; --eval drop database test_ign drop table t1, t2, t3, tm; + +--echo # +--echo # MDEV-26682 slave lock timeout with XA and gap locks +--echo # +create table t1 (a int primary key, b int unique) engine=innodb; +insert t1 values (1,1),(3,3),(5,5); +sync_slave_with_master; + +# set a strong isolation level to keep the read view below. +# alternatively a long-running select can do that too even in read-committed +set session tx_isolation='repeatable-read'; +start transaction; +# opens a read view to disable purge on the slave +select * from t1; + +connect m2, localhost, root; +# now, delete a value, purge it on the master, but not on the slave +delete from t1 where a=3; +xa start 'x1'; +# this sets a gap lock on <3>, when it exists (so, on the slave) +update t1 set b=3 where a=5; +xa end 'x1'; +xa prepare 'x1'; + +connect m3, localhost, root; +# and this tries to insert straight into the locked gap +insert t1 values (2, 2); + +echo -->slave; +sync_slave_with_master; +commit; +select * from t1; + +connection m2; +xa rollback 'x1'; +disconnect m2; +disconnect m3; + +connection master; + +drop table t1; + +create table t1 (id int auto_increment primary key, c1 int not null unique) +engine=innodb; + +create table t2 (id int auto_increment primary key, c1 int not null, +foreign key(c1) references t1(c1), unique key(c1)) engine=innodb; + +insert t1 values (869,1), (871,3), (873,4), (872,5), (870,6), (877,7); +insert t2 values (795,6), (800,7); + +xa start '1'; +update t2 set id = 9, c1 = 5 where c1 in (null, null, null, null, null, 7, 3); + +connect con1, localhost,root; +xa start '2'; +delete from t1 where c1 like '3%'; +xa end '2'; +xa prepare '2'; + +connection master; +xa end '1'; +xa prepare '1'; + +echo ->slave; + +sync_slave_with_master; + +connection slave; +source include/sync_with_master_gtid.inc; + +connection con1; +xa commit '2'; +disconnect con1; + +connection master; +xa commit '1'; +drop table t2, t1; diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 3b63b06a9bb..225c246f4e7 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -478,6 +478,10 @@ lock_rec_unlock( and release possible other transactions waiting because of these locks. */ void lock_release(trx_t* trx); +/** Release non-exclusive locks on XA PREPARE, +and release possible other transactions waiting because of these locks. */ +void lock_release_on_prepare(trx_t *trx); + /*************************************************************//** Get the lock hash table */ UNIV_INLINE diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index d7ec5736826..12764470bef 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4219,6 +4219,65 @@ void lock_release(trx_t* trx) #endif } +/** Release non-exclusive locks on XA PREPARE, +and release possible other transactions waiting because of these locks. */ +void lock_release_on_prepare(trx_t *trx) +{ + ulint count= 0; + lock_mutex_enter(); + ut_ad(!trx_mutex_own(trx)); + + for (lock_t *lock= UT_LIST_GET_LAST(trx->lock.trx_locks); lock; ) + { + ut_ad(lock->trx == trx); + + if (lock_get_type_low(lock) == LOCK_REC) + { + ut_ad(!lock->index->table->is_temporary()); + if (lock_rec_get_gap(lock) || lock_get_mode(lock) != LOCK_X) + lock_rec_dequeue_from_page(lock); + else + { + ut_ad(trx->dict_operation || + lock->index->table->id >= DICT_HDR_FIRST_ID); +retain_lock: + lock= UT_LIST_GET_PREV(trx_locks, lock); + continue; + } + } + else + { + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + dict_table_t *table= lock->un_member.tab_lock.table; + ut_ad(!table->is_temporary()); + + switch (lock_get_mode(lock)) { + case LOCK_IS: + case LOCK_S: + lock_table_dequeue(lock); + break; + case LOCK_IX: + case LOCK_X: + ut_ad(table->id >= DICT_HDR_FIRST_ID || trx->dict_operation); + /* fall through */ + default: + goto retain_lock; + } + } + + if (++count == LOCK_RELEASE_INTERVAL) + { + lock_mutex_exit(); + count= 0; + lock_mutex_enter(); + } + + lock= UT_LIST_GET_LAST(trx->lock.trx_locks); + } + + lock_mutex_exit(); +} + /* True if a lock mode is S or X */ #define IS_LOCK_S_OR_X(lock) \ (lock_get_mode(lock) == LOCK_S \ diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index cf8fa17cf1a..16ea7c41d71 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1971,6 +1971,20 @@ trx_prepare( We must not be holding any mutexes or latches here. */ trx_flush_log_if_needed(lsn, trx); + + if (!UT_LIST_GET_LEN(trx->lock.trx_locks) + || trx->isolation_level == TRX_ISO_SERIALIZABLE) { + /* Do not release any locks at the + SERIALIZABLE isolation level. */ + } else if (!trx->mysql_thd + || thd_sql_command(trx->mysql_thd) + != SQLCOM_XA_PREPARE) { + /* Do not release locks for XA COMMIT ONE PHASE + or for internal distributed transactions + (XID::get_my_xid() would be nonzero). */ + } else { + lock_release_on_prepare(trx); + } } } -- cgit v1.2.1 From c9a9ae65544e03f9585a65db9c0e6d729616a40c Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Thu, 14 Oct 2021 16:19:09 +0200 Subject: MDEV-26650: Failed ALTER USER/GRANT statement removes the password from the cache Starting from 10.4 AUTH is not part of ACL_USER so changes have to be done over a copy, and bring in the cache only in case of success. --- .../suite/plugins/r/simple_password_check.result | 20 ++++++++++ .../suite/plugins/t/simple_password_check.test | 14 +++++++ sql/sql_acl.cc | 46 ++++++++++++++++------ 3 files changed, 69 insertions(+), 11 deletions(-) diff --git a/mysql-test/suite/plugins/r/simple_password_check.result b/mysql-test/suite/plugins/r/simple_password_check.result index 2e706115bd1..b04c5535a3f 100644 --- a/mysql-test/suite/plugins/r/simple_password_check.result +++ b/mysql-test/suite/plugins/r/simple_password_check.result @@ -161,3 +161,23 @@ flush privileges; uninstall plugin simple_password_check; create user foo1 identified by 'pwd'; drop user foo1; +# +# MDEV-26650: Failed ALTER USER/GRANT statement removes the +# password from the cache +# +create user foo1@localhost identified by 'nauth >= nauth) - acl_user->nauth= nauth; - else - acl_user->alloc_auth(&acl_memroot, nauth); + if (!(work_copy= (ACL_USER_PARAM::AUTH*) + alloc_root(thd->mem_root, nauth * sizeof(ACL_USER_PARAM::AUTH)))) + return 1; USER_AUTH *auth= combo.auth; for (uint i= 0; i < nauth; i++, auth= auth->next) { - acl_user->auth[i].plugin= auth->plugin; - acl_user->auth[i].auth_string= safe_lexcstrdup_root(&acl_memroot, auth->auth_str); - if (fix_user_plugin_ptr(acl_user->auth + i)) - acl_user->auth[i].plugin= safe_lexcstrdup_root(&acl_memroot, auth->plugin); - if (set_user_auth(thd, acl_user->user, acl_user->auth + i, auth->pwtext)) + work_copy[i].plugin= auth->plugin; + work_copy[i].auth_string= safe_lexcstrdup_root(&acl_memroot, + auth->auth_str); + if (fix_user_plugin_ptr(work_copy + i)) + work_copy[i].plugin= safe_lexcstrdup_root(&acl_memroot, auth->plugin); + if (set_user_auth(thd, acl_user->user, work_copy + i, auth->pwtext)) return 1; } } @@ -3269,11 +3270,34 @@ static int acl_user_update(THD *thd, ACL_USER *acl_user, uint nauth, if (options.account_locked != ACCOUNTLOCK_UNSPECIFIED) acl_user->account_locked= options.account_locked == ACCOUNTLOCK_LOCKED; - /* Unexpire the user password */ + if (thd->is_error()) + { + // If something went wrong (including OOM) we will not spoil acl cache + return 1; + } + /* Unexpire the user password and copy AUTH (when no more errors possible)*/ if (nauth) { acl_user->password_expired= false; - acl_user->password_last_changed= thd->query_start();; + acl_user->password_last_changed= thd->query_start(); + + if (acl_user->nauth >= nauth) + { + acl_user->nauth= nauth; + } + else + { + if (acl_user->alloc_auth(&acl_memroot, nauth)) + { + /* + acl_user is a copy, so NULL assigned in case of an error do not + change the acl cache + */ + return 1; + } + } + DBUG_ASSERT(work_copy); // allocated under the same condinition + memcpy(acl_user->auth, work_copy, nauth * sizeof(ACL_USER_PARAM::AUTH)); } switch (options.password_expire) { -- cgit v1.2.1 From edde9084c2a8ee2e7b702c994945a4dfdb7e2bdf Mon Sep 17 00:00:00 2001 From: Nayuta Yanagisawa Date: Fri, 24 Sep 2021 23:58:06 +0900 Subject: MDEV-26582 SIGSEGV in spider_db_bulk_insert and spider_db_connect and spider_db_before_query, and hang in "End of update loop" / "Reset for next command" query states Spider accesses a freed connection in ha_spider::end_bulk_insert() and results in SIGSEGV. The cause of the bug is that ha_spider::is_bulk_insert_exec_period() wrongly returns TRUE when the bulk insertion has not yet started. Spider decides whether it is during the bulk insertion or not by the value of insert_pos, but the variable is not reset in a case, and this result in the bug. --- .../mysql-test/spider/bugfix/r/mdev_26582.result | 11 +++++++++ .../mysql-test/spider/bugfix/t/mdev_26582.cnf | 2 ++ .../mysql-test/spider/bugfix/t/mdev_26582.test | 27 ++++++++++++++++++++++ storage/spider/spd_db_conn.cc | 10 ++++++++ 4 files changed, 50 insertions(+) create mode 100644 storage/spider/mysql-test/spider/bugfix/r/mdev_26582.result create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_26582.cnf create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_26582.test diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_26582.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_26582.result new file mode 100644 index 00000000000..54a4fc44b48 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_26582.result @@ -0,0 +1,11 @@ +# +# MDEV-26582 SIGSEGV in spider_db_bulk_insert and spider_db_connect and spider_db_before_query, and hang in "End of update loop" / "Reset for next command" query states +# +CREATE DATABASE IF NOT EXISTS auto_test_local; +USE auto_test_local; +CREATE TABLE t (i CHAR) ENGINE=SPIDER; +INSERT INTO t VALUES (0); +ERROR HY000: Unable to connect to foreign data source: localhost +INSERT t SELECT 1 ON DUPLICATE KEY UPDATE c=1; +ERROR 42S22: Unknown column 'c' in 'field list' +DROP DATABASE IF EXISTS auto_test_local; diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_26582.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_26582.cnf new file mode 100644 index 00000000000..b0853e32654 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_26582.cnf @@ -0,0 +1,2 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_26582.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_26582.test new file mode 100644 index 00000000000..714a5e1c853 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_26582.test @@ -0,0 +1,27 @@ +--echo # +--echo # MDEV-26582 SIGSEGV in spider_db_bulk_insert and spider_db_connect and spider_db_before_query, and hang in "End of update loop" / "Reset for next command" query states +--echo # + +# NOTE: The bug does not reproduce if we import ../../t/test_init.inc instead. +--disable_query_log +--disable_result_log +--source ../../include/init_spider.inc +--enable_result_log +--enable_query_log + +CREATE DATABASE IF NOT EXISTS auto_test_local; +USE auto_test_local; + +CREATE TABLE t (i CHAR) ENGINE=SPIDER; +--error ER_CONNECT_TO_FOREIGN_DATA_SOURCE +INSERT INTO t VALUES (0); +--error ER_BAD_FIELD_ERROR +INSERT t SELECT 1 ON DUPLICATE KEY UPDATE c=1; + +DROP DATABASE IF EXISTS auto_test_local; + +--disable_query_log +--disable_result_log +--source ../../include/deinit_spider.inc +--enable_result_log +--enable_query_log diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc index 22567d82bc1..d2cce0ba6d0 100644 --- a/storage/spider/spd_db_conn.cc +++ b/storage/spider/spd_db_conn.cc @@ -6661,7 +6661,11 @@ int spider_db_bulk_insert( #endif if ((error_num = spider->append_insert_values_sql_part( SPIDER_SQL_TYPE_INSERT_SQL))) + { + if (spider->sql_kinds & SPIDER_SQL_KIND_SQL) + spider->set_insert_to_pos_sql(SPIDER_SQL_TYPE_INSERT_SQL); DBUG_RETURN(error_num); + } #if defined(HS_HAS_SQLCOM) && defined(HAVE_HANDLERSOCKET) } if (spider->sql_kinds & SPIDER_SQL_KIND_HS) @@ -6681,6 +6685,8 @@ int spider_db_bulk_insert( if ((error_num = spider->append_insert_terminator_sql_part( SPIDER_SQL_TYPE_INSERT_SQL))) { + if (spider->sql_kinds & SPIDER_SQL_KIND_SQL) + spider->set_insert_to_pos_sql(SPIDER_SQL_TYPE_INSERT_SQL); DBUG_RETURN(error_num); } #ifdef HA_CAN_BULK_ACCESS @@ -6715,6 +6721,8 @@ int spider_db_bulk_insert( if ((error_num = dbton_handler->set_sql_for_exec(sql_type, roop_count2))) { + if (spider->sql_kinds & SPIDER_SQL_KIND_SQL) + spider->set_insert_to_pos_sql(SPIDER_SQL_TYPE_INSERT_SQL); if (dbton_handler->need_lock_before_set_sql_for_exec(sql_type)) { SPIDER_CLEAR_FILE_POS(&conn->mta_conn_mutex_file_pos); @@ -6744,6 +6752,8 @@ int spider_db_bulk_insert( conn->mta_conn_mutex_unlock_later = TRUE; if ((error_num = spider_db_set_names(spider, conn, roop_count2))) { + if (spider->sql_kinds & SPIDER_SQL_KIND_SQL) + spider->set_insert_to_pos_sql(SPIDER_SQL_TYPE_INSERT_SQL); DBUG_ASSERT(conn->mta_conn_mutex_lock_already); DBUG_ASSERT(conn->mta_conn_mutex_unlock_later); conn->mta_conn_mutex_lock_already = FALSE; -- cgit v1.2.1 From c3c53926c467c95386ae98d61ada87294bd61478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 18 Oct 2021 18:03:12 +0300 Subject: MDEV-26554: Races between INSERT on child and DDL on parent table The SQL layer never acquires metadata locks (MDL) on the tables that the tables that DML statement accesses is modifying. However, the storage engine must access the parent table in order to ensure that the child table will not refer to a non-existing record in the parent table. During certain DDL operations, the InnoDB table metadata (dict_table_t) may be be freed and rebuilt. This would cause a race condition with a concurrent INSERT that is attempting to report a FOREIGN KEY violation. We work around the insufficient MDL during DML by acquiring exclusive InnoDB table locks on all child tables during DDL. To avoid deadlocks, we will follow the following order of acquisition: 1. tables whose REFERENCES clauses point to the current table 2. the current table that is being subjected to DDL 3. mysql.innodb_table_stats 4. mysql.innodb_index_stats 5. the InnoDB dictionary tables (SYS_TABLES and so on) 6. exclusive dict_sys.latch --- mysql-test/suite/innodb/r/foreign_key.result | 36 ++++++++++++++++--- .../suite/innodb/r/row_format_redundant.result | 2 +- mysql-test/suite/innodb/r/truncate_foreign.result | 7 ++-- mysql-test/suite/innodb/t/foreign_key.test | 39 +++++++++++++++++--- mysql-test/suite/innodb/t/truncate_foreign.test | 4 +-- storage/innobase/handler/ha_innodb.cc | 41 ++++++++++++++++++++-- storage/innobase/handler/handler0alter.cc | 19 ++++++++-- 7 files changed, 130 insertions(+), 18 deletions(-) diff --git a/mysql-test/suite/innodb/r/foreign_key.result b/mysql-test/suite/innodb/r/foreign_key.result index ff0e025246e..822ad4b6edd 100644 --- a/mysql-test/suite/innodb/r/foreign_key.result +++ b/mysql-test/suite/innodb/r/foreign_key.result @@ -408,8 +408,8 @@ INSERT INTO t1 VALUES (1,2); CREATE TABLE x AS SELECT * FROM t1; ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the ACTIVE state connect con1,localhost,root,,test; -SET foreign_key_checks= OFF, innodb_lock_wait_timeout= 1; -SET lock_wait_timeout=5; +SET foreign_key_checks= OFF, innodb_lock_wait_timeout= 0; +SET lock_wait_timeout=2; ALTER TABLE t1 ADD FOREIGN KEY f (a) REFERENCES t1 (pk), LOCK=EXCLUSIVE; ERROR HY000: Lock wait timeout exceeded; try restarting transaction disconnect con1; @@ -491,7 +491,7 @@ BEGIN; UPDATE users SET name = 'qux' WHERE id = 1; connect con1,localhost,root; connection con1; -SET innodb_lock_wait_timeout= 1; +SET innodb_lock_wait_timeout= 0; DELETE FROM matchmaking_groups WHERE id = 10; connection default; COMMIT; @@ -531,9 +531,10 @@ connection con1; BEGIN; UPDATE t2 SET f = 11 WHERE id = 1; connection default; -SET innodb_lock_wait_timeout= 1; +SET innodb_lock_wait_timeout= 0; DELETE FROM t1 WHERE id = 1; ERROR HY000: Lock wait timeout exceeded; try restarting transaction +SET innodb_lock_wait_timeout= 1; connection con1; COMMIT; connection default; @@ -897,3 +898,30 @@ create or replace table t2 (pk int primary key, a varchar(4096) unique, foreign ERROR HY000: Can't create table `test`.`t2` (errno: 150 "Foreign key constraint is incorrectly formed") drop table t1; # End of 10.5 tests +# +# MDEV-26554 Table-rebuilding DDL on parent table causes crash +# for INSERT into child table +# +CREATE TABLE parent(a INT PRIMARY KEY) ENGINE=InnoDB; +CREATE TABLE child(a INT PRIMARY KEY REFERENCES parent(a)) ENGINE=InnoDB; +connect con1, localhost, root,,; +BEGIN; +INSERT INTO child SET a=1; +ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`child`, CONSTRAINT `child_ibfk_1` FOREIGN KEY (`a`) REFERENCES `parent` (`a`)) +connection default; +SET innodb_lock_wait_timeout=0, foreign_key_checks=0; +TRUNCATE TABLE parent; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +ALTER TABLE parent FORCE, ALGORITHM=COPY; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +ALTER TABLE parent FORCE, ALGORITHM=INPLACE; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +ALTER TABLE parent ADD COLUMN b INT, ALGORITHM=INSTANT; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +disconnect con1; +TRUNCATE TABLE parent; +ALTER TABLE parent FORCE, ALGORITHM=COPY; +ALTER TABLE parent FORCE, ALGORITHM=INPLACE; +ALTER TABLE parent ADD COLUMN b INT, ALGORITHM=INSTANT; +DROP TABLE child, parent; +# End of 10.6 tests diff --git a/mysql-test/suite/innodb/r/row_format_redundant.result b/mysql-test/suite/innodb/r/row_format_redundant.result index f354666f645..8a629d06dd8 100644 --- a/mysql-test/suite/innodb/r/row_format_redundant.result +++ b/mysql-test/suite/innodb/r/row_format_redundant.result @@ -75,7 +75,7 @@ DROP TABLE t1; Warnings: Warning 1932 Table 'test.t1' doesn't exist in engine DROP TABLE t2,t3; -FOUND 5 /\[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b/ in mysqld.1.err +FOUND 6 /\[ERROR\] InnoDB: Table test/t1 in InnoDB data dictionary contains invalid flags\. SYS_TABLES\.TYPE=1 SYS_TABLES\.MIX_LEN=511\b/ in mysqld.1.err # restart ib_buffer_pool ib_logfile0 diff --git a/mysql-test/suite/innodb/r/truncate_foreign.result b/mysql-test/suite/innodb/r/truncate_foreign.result index f9a7bcf562c..3154674aabf 100644 --- a/mysql-test/suite/innodb/r/truncate_foreign.result +++ b/mysql-test/suite/innodb/r/truncate_foreign.result @@ -43,16 +43,19 @@ SET DEBUG_SYNC='foreign_constraint_check_for_ins SIGNAL fk WAIT_FOR go'; INSERT INTO child SET a=5; connection default; SET DEBUG_SYNC='now WAIT_FOR fk'; -SET foreign_key_checks=0; +SET foreign_key_checks=0, innodb_lock_wait_timeout=0; TRUNCATE TABLE parent; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction SET DEBUG_SYNC='now SIGNAL go'; connection dml; -ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`child`, CONSTRAINT `child_ibfk_1` FOREIGN KEY (`a`) REFERENCES `parent` (`a`) ON UPDATE CASCADE) SELECT * FROM parent; a +3 +5 SELECT * FROM child; a 3 +5 disconnect dml; connection default; SET DEBUG_SYNC = RESET; diff --git a/mysql-test/suite/innodb/t/foreign_key.test b/mysql-test/suite/innodb/t/foreign_key.test index cb157fc90db..ab3e4748fcb 100644 --- a/mysql-test/suite/innodb/t/foreign_key.test +++ b/mysql-test/suite/innodb/t/foreign_key.test @@ -411,8 +411,8 @@ INSERT INTO t1 VALUES (1,2); --error ER_XAER_RMFAIL CREATE TABLE x AS SELECT * FROM t1; --connect (con1,localhost,root,,test) -SET foreign_key_checks= OFF, innodb_lock_wait_timeout= 1; -SET lock_wait_timeout=5; +SET foreign_key_checks= OFF, innodb_lock_wait_timeout= 0; +SET lock_wait_timeout=2; --error ER_LOCK_WAIT_TIMEOUT ALTER TABLE t1 ADD FOREIGN KEY f (a) REFERENCES t1 (pk), LOCK=EXCLUSIVE;# Cleanup --disconnect con1 @@ -506,7 +506,7 @@ UPDATE users SET name = 'qux' WHERE id = 1; connect (con1,localhost,root); --connection con1 -SET innodb_lock_wait_timeout= 1; +SET innodb_lock_wait_timeout= 0; DELETE FROM matchmaking_groups WHERE id = 10; --connection default @@ -544,9 +544,10 @@ BEGIN; UPDATE t2 SET f = 11 WHERE id = 1; --connection default -SET innodb_lock_wait_timeout= 1; +SET innodb_lock_wait_timeout= 0; --error ER_LOCK_WAIT_TIMEOUT DELETE FROM t1 WHERE id = 1; +SET innodb_lock_wait_timeout= 1; --connection con1 COMMIT; @@ -902,4 +903,34 @@ drop table t1; --echo # End of 10.5 tests +--echo # +--echo # MDEV-26554 Table-rebuilding DDL on parent table causes crash +--echo # for INSERT into child table +--echo # + +CREATE TABLE parent(a INT PRIMARY KEY) ENGINE=InnoDB; +CREATE TABLE child(a INT PRIMARY KEY REFERENCES parent(a)) ENGINE=InnoDB; +connect (con1, localhost, root,,); +BEGIN; +--error ER_NO_REFERENCED_ROW_2 +INSERT INTO child SET a=1; +connection default; +SET innodb_lock_wait_timeout=0, foreign_key_checks=0; +--error ER_LOCK_WAIT_TIMEOUT +TRUNCATE TABLE parent; +--error ER_LOCK_WAIT_TIMEOUT +ALTER TABLE parent FORCE, ALGORITHM=COPY; +--error ER_LOCK_WAIT_TIMEOUT +ALTER TABLE parent FORCE, ALGORITHM=INPLACE; +--error ER_LOCK_WAIT_TIMEOUT +ALTER TABLE parent ADD COLUMN b INT, ALGORITHM=INSTANT; +disconnect con1; +TRUNCATE TABLE parent; +ALTER TABLE parent FORCE, ALGORITHM=COPY; +ALTER TABLE parent FORCE, ALGORITHM=INPLACE; +ALTER TABLE parent ADD COLUMN b INT, ALGORITHM=INSTANT; +DROP TABLE child, parent; + +--echo # End of 10.6 tests + --source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/innodb/t/truncate_foreign.test b/mysql-test/suite/innodb/t/truncate_foreign.test index 7a0bc57d5cd..e40029e18be 100644 --- a/mysql-test/suite/innodb/t/truncate_foreign.test +++ b/mysql-test/suite/innodb/t/truncate_foreign.test @@ -52,12 +52,12 @@ send INSERT INTO child SET a=5; connection default; SET DEBUG_SYNC='now WAIT_FOR fk'; -SET foreign_key_checks=0; +SET foreign_key_checks=0, innodb_lock_wait_timeout=0; +--error ER_LOCK_WAIT_TIMEOUT TRUNCATE TABLE parent; SET DEBUG_SYNC='now SIGNAL go'; connection dml; ---error ER_NO_REFERENCED_ROW_2 reap; SELECT * FROM parent; SELECT * FROM child; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e3e9ed08eb4..b4690a6e8c5 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -13645,7 +13645,6 @@ static dberr_t innobase_rename_table(trx_t *trx, const char *from, DEBUG_SYNC_C("innodb_rename_table_ready"); - trx_start_if_not_started(trx, true); ut_ad(trx->will_lock); error = row_rename_table_for_mysql(norm_from, norm_to, trx, use_fk); @@ -13782,7 +13781,23 @@ int ha_innobase::truncate() dict_table_t *table_stats = nullptr, *index_stats = nullptr; MDL_ticket *mdl_table = nullptr, *mdl_index = nullptr; - dberr_t error = lock_table_for_trx(ib_table, trx, LOCK_X); + dberr_t error = DB_SUCCESS; + + dict_sys.freeze(SRW_LOCK_CALL); + for (const dict_foreign_t* f : ib_table->referenced_set) { + if (dict_table_t* child = f->foreign_table) { + error = lock_table_for_trx(child, trx, LOCK_X); + if (error != DB_SUCCESS) { + break; + } + } + } + dict_sys.unfreeze(); + + if (error == DB_SUCCESS) { + error = lock_table_for_trx(ib_table, trx, LOCK_X); + } + const bool fts = error == DB_SUCCESS && ib_table->flags2 & (DICT_TF2_FTS_HAS_DOC_ID | DICT_TF2_FTS); @@ -13945,6 +13960,26 @@ ha_innobase::rename_table( dberr_t error = DB_SUCCESS; + if (dict_table_t::is_temporary_name(norm_from)) { + /* There is no need to lock any FOREIGN KEY child tables. */ + } else if (dict_table_t *table = dict_table_open_on_name( + norm_from, false, DICT_ERR_IGNORE_FK_NOKEY)) { + dict_sys.freeze(SRW_LOCK_CALL); + for (const dict_foreign_t* f : table->referenced_set) { + if (dict_table_t* child = f->foreign_table) { + error = lock_table_for_trx(child, trx, LOCK_X); + if (error != DB_SUCCESS) { + break; + } + } + } + dict_sys.unfreeze(); + if (error == DB_SUCCESS) { + error = lock_table_for_trx(table, trx, LOCK_X); + } + table->release(); + } + if (strcmp(norm_from, TABLE_STATS_NAME) && strcmp(norm_from, INDEX_STATS_NAME) && strcmp(norm_to, TABLE_STATS_NAME) @@ -13966,7 +14001,7 @@ ha_innobase::rename_table( dict_sys.unfreeze(); } - if (table_stats && index_stats + if (error == DB_SUCCESS && table_stats && index_stats && !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) && !strcmp(index_stats->name.m_name, INDEX_STATS_NAME) && !(error = lock_table_for_trx(table_stats, trx, LOCK_X))) { diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 3bf56cd2bc6..22839298d3c 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -10847,12 +10847,24 @@ ha_innobase::commit_inplace_alter_table( for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) { auto ctx = static_cast(*pctx); + dberr_t error = DB_SUCCESS; if (new_clustered && ctx->old_table->fts) { ut_ad(!ctx->old_table->fts->add_wq); fts_optimize_remove_table(ctx->old_table); } + dict_sys.freeze(SRW_LOCK_CALL); + for (auto f : ctx->old_table->referenced_set) { + if (dict_table_t* child = f->foreign_table) { + error = lock_table_for_trx(child, trx, LOCK_X); + if (error != DB_SUCCESS) { + break; + } + } + } + dict_sys.unfreeze(); + if (ctx->new_table->fts) { ut_ad(!ctx->new_table->fts->add_wq); fts_optimize_remove_table(ctx->new_table); @@ -10863,9 +10875,12 @@ ha_innobase::commit_inplace_alter_table( transaction is holding locks on the table while we change the table definition. Any recovered incomplete transactions would be holding InnoDB locks only, not MDL. */ + if (error == DB_SUCCESS) { + error = lock_table_for_trx(ctx->new_table, trx, + LOCK_X); + } - if (dberr_t error = lock_table_for_trx(ctx->new_table, trx, - LOCK_X)) { + if (error != DB_SUCCESS) { lock_fail: my_error_innodb( error, table_share->table_name.str, 0); -- cgit v1.2.1 From 2291f8ef73489fb8ed79768484df1ee4db3583a7 Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Wed, 13 Oct 2021 07:31:32 -0600 Subject: MDEV-25284: Assertion `info->type == READ_CACHE || info->type == WRITE_CACHE' failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: ======== This patch addresses two issues. First, if a CHANGE MASTER command is issued and an error happens while locating the replica’s relay logs, the logs can be put into an invalid state where future updates fail and future CHANGE MASTER calls crash the server. More specifically, right before a replica purges the relay logs (part of the `CHANGE MASTER TO` logic), the relay log is temporarily closed with state LOG_TO_BE_OPENED. If the server errors in-between the temporary log closure and purge, i.e. during the function find_log_pos, the log should be closed. MDEV-25284 reveals the log is not properly closed. Second, upon issuing a RESET SLAVE ALL command, a slave’s GTID filters are not cleared (DO_DOMAIN_IDS, IGNORE_DOMIAN_IDS, IGNORE_SERVER_IDS). MySQL had a similar bug report, Bug #18816897, which fixed this issue to clear IGNORE_SERVER_IDS after issuing RESET SLAVE ALL in version 5.7. Solution: ========= To fix the first problem, the CHANGE MASTER error handling logic was extended to transition the relay log state to LOG_CLOSED from LOG_TO_BE_OPENED. To fix the second problem, the RESET SLAVE ALL logic is extended to clear the domain_id filter and ignore_server_ids. Reviewed By: ============ Andrei Elkin --- .../rpl/include/rpl_reset_slave_all_check.inc | 48 +++++++++++ .../r/rpl_change_master_find_log_pos_err.result | 43 ++++++++++ .../r/rpl_reset_slave_all_clears_filters.result | 54 +++++++++++++ .../rpl/t/rpl_change_master_find_log_pos_err.test | 93 ++++++++++++++++++++++ .../rpl/t/rpl_reset_slave_all_clears_filters.test | 72 +++++++++++++++++ sql/log.h | 14 ++++ sql/rpl_mi.cc | 8 ++ sql/rpl_mi.h | 5 ++ sql/sql_repl.cc | 10 +++ 9 files changed, 347 insertions(+) create mode 100644 mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc create mode 100644 mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result create mode 100644 mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result create mode 100644 mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test create mode 100644 mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test diff --git a/mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc b/mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc new file mode 100644 index 00000000000..adbaf32ebd7 --- /dev/null +++ b/mysql-test/suite/rpl/include/rpl_reset_slave_all_check.inc @@ -0,0 +1,48 @@ +# This file ensures that a slave's id filtering variables (i.e. DO_DOMAIN_IDS, +# IGNORE_DOMAIN_IDS, and IGNORE_SERVER_IDS) are cleared after issuing +# `RESET SLAVE ALL`. +# +# param $_do_domain_ids Integer list of values to use for DO_DOMAIN_IDS +# param $_ignore_domain_ids Integer list of values to use for IGNORE_DOMAIN_IDS +# param $_ignore_server_ids Integer list of values to use for IGNORE_SERVER_IDS +# + +--echo # Id filtering variable values should be empty initially +let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); +let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); +let $ignore_server_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1); + +if (`SELECT "$do_domain_ids_before" != "" OR + "$ignore_domain_ids_before" != "" OR + "$ignore_server_ids_before" != ""`) +{ + die("CHANGE MASTER TO id filter variables are not empty initially"); +} + + +--echo # Set id filtering variables +eval CHANGE MASTER TO DO_DOMAIN_IDS=$_do_domain_ids, IGNORE_DOMAIN_IDS=$_ignore_domain_ids, IGNORE_SERVER_IDS=$_ignore_server_ids, MASTER_USE_GTID=SLAVE_POS; +let $do_domain_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); +let $ignore_domain_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); +let $ignore_server_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1); +--echo # do domain id list: $do_domain_ids_set +--echo # ignore domain id list: $ignore_domain_ids_set +--echo # ignore server id list: $ignore_server_ids_set + + +--echo # RESET SLAVE ALL should clear values for all id filtering variables +RESET SLAVE ALL; +--replace_result $MASTER_MYPORT MASTER_MYPORT +eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +--source include/start_slave.inc +--source include/stop_slave.inc + +let $do_domain_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); +let $ignore_domain_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); +let $ignore_server_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1); +if (`SELECT "$do_domain_ids_cleared" != "" OR + "$ignore_domain_ids_cleared" != "" OR + "$ignore_server_ids_cleared" != ""`) +{ + die("RESET SLAVE ALL did not clear id filtering variables"); +} diff --git a/mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result b/mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result new file mode 100644 index 00000000000..0ff76b5b60f --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_change_master_find_log_pos_err.result @@ -0,0 +1,43 @@ +include/master-slave.inc +[connection master] +# +# Failed CHANGE MASTER TO should not change relay log status +# +connection slave; +include/stop_slave.inc +SET @@debug_dbug="d,simulate_find_log_pos_error"; +CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=SLAVE_POS; +ERROR HY000: Target log not found in binlog index +SET @@debug_dbug=""; +include/start_slave.inc +# +# Ensure relay log can be updated after a failed CHANGE MASTER +# +FLUSH RELAY LOGS; +include/wait_for_slave_param.inc [Relay_Log_File] +# +# Slave should continue to receive data from old master after failed +# CHANGE MASTER TO +# +connection master; +CREATE TABLE t1 (a int); +insert into t1 values (1); +connection slave; +connection slave; +# +# Future CHANGE MASTER calls should succeed +# +include/stop_slave.inc +CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS; +include/start_slave.inc +######################## +# Cleanup +######################## +connection master; +DROP TABLE t1; +connection slave; +include/stop_slave.inc +RESET SLAVE ALL; +change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +include/start_slave.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result b/mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result new file mode 100644 index 00000000000..a273aeaa678 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_reset_slave_all_clears_filters.result @@ -0,0 +1,54 @@ +include/master-slave.inc +[connection master] +connection slave; +include/stop_slave.inc +# +# Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together +# +# Id filtering variable values should be empty initially +# Set id filtering variables +CHANGE MASTER TO DO_DOMAIN_IDS=(1), IGNORE_DOMAIN_IDS=(), IGNORE_SERVER_IDS=(3), MASTER_USE_GTID=SLAVE_POS; +# do domain id list: 1 +# ignore domain id list: +# ignore server id list: 3 +# RESET SLAVE ALL should clear values for all id filtering variables +RESET SLAVE ALL; +change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +include/start_slave.inc +include/stop_slave.inc +# +# Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together +# +# Id filtering variable values should be empty initially +# Set id filtering variables +CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(2), IGNORE_SERVER_IDS=(3), MASTER_USE_GTID=SLAVE_POS; +# do domain id list: +# ignore domain id list: 2 +# ignore server id list: 3 +# RESET SLAVE ALL should clear values for all id filtering variables +RESET SLAVE ALL; +change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +include/start_slave.inc +include/stop_slave.inc +# +# Category 3) Null check - edge case with all empty lists to ensure a +# lack of specification doesn't break anything +# +# Id filtering variable values should be empty initially +# Set id filtering variables +CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(), IGNORE_SERVER_IDS=(), MASTER_USE_GTID=SLAVE_POS; +# do domain id list: +# ignore domain id list: +# ignore server id list: +# RESET SLAVE ALL should clear values for all id filtering variables +RESET SLAVE ALL; +change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +include/start_slave.inc +include/stop_slave.inc +############################ +# Cleanup +############################ +connection slave; +change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +include/start_slave.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test b/mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test new file mode 100644 index 00000000000..d1c2c03f010 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_change_master_find_log_pos_err.test @@ -0,0 +1,93 @@ +# +# Purpose: +# This test ensures that issuing a CHANGE MASTER will not put a replica into +# an inconsistent state if the slave cannot find the log files (i.e. the call to +# find_log_pos in reset_logs fails). More specifically, right before a replica +# purges the relay logs (part of the `CHANGE MASTER TO` logic), the relay log is +# temporarily closed with state LOG_TO_BE_OPENED. If the server is issued a +# CHANGE MASTER and it errors in-between the temporary log closure and purge, +# i.e. during the function find_log_pos, the log should be closed. The bug +# reported by MDEV-25284 revealed the log is not properly closed, such that +# future relay log updates fail, and future CHANGE MASTER calls crash the +# server. +# +# Methodology: +# This test ensures that the relay log is properly closed by ensuring future +# updates and CHANGE MASTER calls succeed. +# +# References: +# MDEV-25284: Assertion `info->type == READ_CACHE || +# info->type == WRITE_CACHE' failed +# +--source include/master-slave.inc +--source include/have_debug.inc + +--echo # +--echo # Failed CHANGE MASTER TO should not change relay log status +--echo # + +--connection slave +--source include/stop_slave.inc +SET @@debug_dbug="d,simulate_find_log_pos_error"; +error 1373; +CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=SLAVE_POS; +SET @@debug_dbug=""; +--source include/start_slave.inc + + +--echo # +--echo # Ensure relay log can be updated after a failed CHANGE MASTER +--echo # + +FLUSH RELAY LOGS; +--let $slave_param= Relay_Log_File +--let $slave_param_value= slave-relay-bin.000003 +--source include/wait_for_slave_param.inc + + +--echo # +--echo # Slave should continue to receive data from old master after failed +--echo # CHANGE MASTER TO +--echo # + +--connection master +CREATE TABLE t1 (a int); +insert into t1 values (1); +--let $master_checksum= `CHECKSUM TABLE t1` +--sync_slave_with_master + +--connection slave +if ($master_checksum != `CHECKSUM TABLE t1`) +{ + die("Replica failed to pull data from primary after failed CHANGE MASTER TO"); +} + + +--echo # +--echo # Future CHANGE MASTER calls should succeed +--echo # + +--source include/stop_slave.inc +CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS; +--source include/start_slave.inc + + +--echo ######################## +--echo # Cleanup +--echo ######################## + +--connection master +DROP TABLE t1; + +--connection slave +--source include/stop_slave.inc +RESET SLAVE ALL; +--replace_result $MASTER_MYPORT MASTER_MYPORT +eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +--source include/start_slave.inc + +--disable_query_log +call mtr.add_suppression("Failed to locate old binlog or relay log files"); +--enable_query_log + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test b/mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test new file mode 100644 index 00000000000..7c01ce16586 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_reset_slave_all_clears_filters.test @@ -0,0 +1,72 @@ +# +# Purpose: +# This test validates that after issuing the `SLAVE RESET ALL` command, +# any corresponding IGNORE_DOMAIN_IDS/DO_DOMAIN_IDS and IGNORE_SERVER_IDS +# values are cleared. +# +# +# Methodology: +# To ensure the filtering variables are properly cleared after issuing +# SLAVE RESET ALL, we categorize different combinations of allowable input +# into three different options, and ensure that the variables are cleared for +# each category. The categories are as follows: +# Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together +# Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together +# Category 3) Null check - edge case with all empty lists to ensure a lack +# of specification doesn't break anything +# +# To specify the values, the variables are set in `CHANGE MASTER TO`. To +# ensure the slave state is correct, we test the domain/server id filtering +# variable values at the following times while testing each category. +# +# Before CHANGE MASTER TO the filtering variables are tested to all be +# empty. +# +# After CHANGE MASTER TO the variables are tested to ensure they reflect +# those set in the CHANGE MASTER command. +# +# After RESET SLAVE ALL the filtering variables are tested to all be +# empty. +# + +--source include/master-slave.inc +--source include/have_debug.inc + +--connection slave +--source include/stop_slave.inc + +--echo # +--echo # Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together +--echo # +--let $_do_domain_ids= (1) +--let $_ignore_domain_ids= () +--let $_ignore_server_ids= (3) +--source include/rpl_reset_slave_all_check.inc + +--echo # +--echo # Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together +--echo # +--let $_do_domain_ids= () +--let $_ignore_domain_ids= (2) +--let $_ignore_server_ids= (3) +--source include/rpl_reset_slave_all_check.inc + +--echo # +--echo # Category 3) Null check - edge case with all empty lists to ensure a +--echo # lack of specification doesn't break anything +--echo # +--let $_do_domain_ids= () +--let $_ignore_domain_ids= () +--let $_ignore_server_ids= () +--source include/rpl_reset_slave_all_check.inc + + +--echo ############################ +--echo # Cleanup +--echo ############################ +--connection slave +--replace_result $MASTER_MYPORT MASTER_MYPORT +eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; +--source include/start_slave.inc + +--source include/rpl_end.inc diff --git a/sql/log.h b/sql/log.h index 0770861fe01..6896a4ff550 100644 --- a/sql/log.h +++ b/sql/log.h @@ -896,6 +896,20 @@ public: void unlock_binlog_end_pos() { mysql_mutex_unlock(&LOCK_binlog_end_pos); } mysql_mutex_t* get_binlog_end_pos_lock() { return &LOCK_binlog_end_pos; } + /* + Ensures the log's state is either LOG_OPEN or LOG_CLOSED. If something + failed along the desired path and left the log in invalid state, i.e. + LOG_TO_BE_OPENED, forces the state to be LOG_CLOSED. + */ + void try_fix_log_state() + { + mysql_mutex_lock(get_log_lock()); + /* Only change the log state if it is LOG_TO_BE_OPENED */ + if (log_state == LOG_TO_BE_OPENED) + log_state= LOG_CLOSED; + mysql_mutex_unlock(get_log_lock()); + } + int wait_for_update_binlog_end_pos(THD* thd, struct timespec * timeout); /* diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index 82a462d742b..8ed14962dd9 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -170,6 +170,8 @@ void Master_info::clear_in_memory_info(bool all) { port= MYSQL_PORT; host[0] = 0; user[0] = 0; password[0] = 0; + domain_id_filter.clear_ids(); + reset_dynamic(&ignore_server_ids); } } @@ -1788,6 +1790,12 @@ void Domain_id_filter::reset_filter() m_filter= false; } +void Domain_id_filter::clear_ids() +{ + reset_dynamic(&m_domain_ids[DO_DOMAIN_IDS]); + reset_dynamic(&m_domain_ids[IGNORE_DOMAIN_IDS]); +} + /** Update the do/ignore domain id filter lists. diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index 12574285de0..e80c14fc340 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -78,6 +78,11 @@ public: */ void reset_filter(); + /* + Clear do_ids and ignore_ids to disable domain id filtering + */ + void clear_ids(); + /* Update the do/ignore domain id filter lists. diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 7ff0e27b008..d6d2dbc0d39 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -3840,6 +3840,16 @@ err: mi->unlock_slave_threads(); if (ret == FALSE) my_ok(thd); + else + { + /* + Depending on where CHANGE MASTER failed, the logs may be waiting to be + reopened. This would break future log updates and CHANGE MASTER calls. + `try_fix_log_state()` allows the relay log to fix its state to no longer + expect to be reopened. + */ + mi->rli.relay_log.try_fix_log_state(); + } DBUG_RETURN(ret); } -- cgit v1.2.1 From 27bf57fd6dcfbaf6a116570e861b272eeae0b43c Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Fri, 1 Oct 2021 14:46:22 +0200 Subject: MDEV-26299: Some views force server (and mysqldump) to generate invalid SQL for their definitions Do not print illegal table field names for non-top-level SELECT list, they will not be refered in any case but create problem for parsing of printed result. --- mysql-test/r/view.result | 11 +++++++++++ mysql-test/t/view.test | 19 +++++++++++++++++++ sql/sql_select.cc | 23 +++++++++++++++++++++-- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result index bae415c17ea..001d26fc466 100644 --- a/mysql-test/r/view.result +++ b/mysql-test/r/view.result @@ -6876,5 +6876,16 @@ SELECT 1 FROM (SELECT count(((SELECT i1 FROM v1))) FROM v1) dt ; drop view v1; drop table t1; # +# MDEV-26299: Some views force server (and mysqldump) to generate +# invalid SQL for their definitions +# +create view v1 as +select * from +(select +"12345678901234567890123456789012345678901234567890123456789012345") as t1; +drop view v1; +CREATE VIEW v1 AS select `t1`.`12345678901234567890123456789012345678901234567890123456789012345` AS `Name_exp_1` from (select '12345678901234567890123456789012345678901234567890123456789012345') `t1`; +drop view v1; +# # End of 10.2 tests # diff --git a/mysql-test/t/view.test b/mysql-test/t/view.test index 128fa853e10..e6e6ccce8bd 100644 --- a/mysql-test/t/view.test +++ b/mysql-test/t/view.test @@ -6608,6 +6608,25 @@ SELECT 1 FROM (SELECT count(((SELECT i1 FROM v1))) FROM v1) dt ; drop view v1; drop table t1; + +--echo # +--echo # MDEV-26299: Some views force server (and mysqldump) to generate +--echo # invalid SQL for their definitions +--echo # + +create view v1 as + select * from + (select + "12345678901234567890123456789012345678901234567890123456789012345") as t1; + +let $definition=`select VIEW_DEFINITION from information_schema.views where TABLE_NAME="v1"`; + +drop view v1; + +eval CREATE VIEW v1 AS $definition; + +drop view v1; + --echo # --echo # End of 10.2 tests --echo # diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bf33623a684..54a2facfe9f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -25804,6 +25804,11 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) //Item List bool first= 1; + /* + outer_select() can not be used here because it is for name resolution + and will return NULL at any end of name resolution chain (view/derived) + */ + bool top_level= (get_master()->get_master() == 0); List_iterator_fast it(item_list); Item *item; while ((item= it++)) @@ -25813,7 +25818,8 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) else str->append(','); - if (is_subquery_function() && item->is_autogenerated_name) + if ((is_subquery_function() && item->is_autogenerated_name) || + !item->name) { /* Do not print auto-generated aliases in subqueries. It has no purpose @@ -25822,7 +25828,20 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) item->print(str, query_type); } else - item->print_item_w_name(str, query_type); + { + /* + Do not print illegal names (if it is not top level SELECT). + Top level view checked (and correct name are assigned), + other cases of top level SELECT are not important, because + it is not "table field". + */ + if (top_level || + !item->is_autogenerated_name || + !check_column_name(item->name)) + item->print_item_w_name(str, query_type); + else + item->print(str, query_type); + } } /* -- cgit v1.2.1 From 5316703141c00103d8f4155cf4672b1f74ca0c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 19 Oct 2021 08:46:16 +0300 Subject: MDEV-14804 innodb.update_time failed in buildbot with wrong result Let us use a minimal-size buffer pool to ensure that page flushing will be slow enough so that LRU eviction cannot be avoided. --- mysql-test/suite/innodb/r/update_time.result | 3 +-- mysql-test/suite/innodb/t/update_time-master.opt | 2 +- mysql-test/suite/innodb/t/update_time.test | 14 ++------------ 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/mysql-test/suite/innodb/r/update_time.result b/mysql-test/suite/innodb/r/update_time.result index d8b9069b1ae..96d1af4c09c 100644 --- a/mysql-test/suite/innodb/r/update_time.result +++ b/mysql-test/suite/innodb/r/update_time.result @@ -24,8 +24,7 @@ SELECT COUNT(*) FROM information_schema.innodb_buffer_page WHERE table_name = '`test`.`t`'; COUNT(*) 1 -# INSERT lots of data in table 'big': begin -# INSERT lots of data in table 'big': end +INSERT INTO big SELECT REPEAT('a', 1024) FROM seq_1_to_10240; SELECT COUNT(*) FROM information_schema.innodb_buffer_page WHERE table_name = '`test`.`t`'; COUNT(*) diff --git a/mysql-test/suite/innodb/t/update_time-master.opt b/mysql-test/suite/innodb/t/update_time-master.opt index 9f283a9503f..f0fd647546d 100644 --- a/mysql-test/suite/innodb/t/update_time-master.opt +++ b/mysql-test/suite/innodb/t/update_time-master.opt @@ -1 +1 @@ ---innodb-buffer-pool-size=10M +--innodb-buffer-pool-size=5M diff --git a/mysql-test/suite/innodb/t/update_time.test b/mysql-test/suite/innodb/t/update_time.test index a95c5171e9b..fd1e082f5f2 100644 --- a/mysql-test/suite/innodb/t/update_time.test +++ b/mysql-test/suite/innodb/t/update_time.test @@ -10,6 +10,7 @@ -- source include/not_embedded.inc # This test is slow on buildbot. --source include/big_test.inc +--source include/have_sequence.inc CREATE TABLE t (a INT) ENGINE=INNODB; @@ -33,18 +34,7 @@ SELECT COUNT(*) FROM information_schema.innodb_buffer_page WHERE table_name = '`test`.`t`'; # evict table 't' by inserting as much data as the BP size itself --- echo # INSERT lots of data in table 'big': begin --- disable_query_log -BEGIN; --- let $i = 10240 -while ($i) -{ - INSERT INTO big VALUES (REPEAT('a', 1024)); - dec $i; -} -COMMIT; --- enable_query_log --- echo # INSERT lots of data in table 'big': end +INSERT INTO big SELECT REPEAT('a', 1024) FROM seq_1_to_10240; # confirm that all pages for table 't' have been evicted SELECT COUNT(*) FROM information_schema.innodb_buffer_page -- cgit v1.2.1 From f7684f0ca5f6f53d63e93afe89194b43bb5431d2 Mon Sep 17 00:00:00 2001 From: Krunal Bauskar Date: Tue, 19 Oct 2021 14:03:58 +0800 Subject: MDEV-26855: Enable spinning for log_sys_mutex and log_flush_order_mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of MDEV-26779 we first discovered the effect of enabling spinning for some critical mutex. MDEV-26779 tried enabling it for lock_sys.wait_mutex and observed a good gain in performance. In yet another discussion, Mark Callaghan pointed a reference to pthread based mutex spin using PTHREAD_MUTEX_ADAPTIVE_NP (MDEV-26769 Intel RTM). Given the strong references, Marko Makela as part of his comment in #1923 pointed an idea to enable spinning for other mutexes. Based on perf profiling we decided to explore spinning for log_sys_mutex and log_flush_order_mutex as they are occupying the top slots in the contented mutex list. The evaluation showed promising results for ARM64 but not for x86. So a patch is here-by proposed to enable the spinning of the mutex for ARM64-based platform. --- storage/innobase/log/log0log.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index b2fa2e735f9..efacd520d52 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -175,8 +175,14 @@ void log_t::create() ut_ad(!is_initialised()); m_initialised= true; +#if defined(__aarch64__) + mysql_mutex_init(log_sys_mutex_key, &mutex, MY_MUTEX_INIT_FAST); + mysql_mutex_init( + log_flush_order_mutex_key, &flush_order_mutex, MY_MUTEX_INIT_FAST); +#else mysql_mutex_init(log_sys_mutex_key, &mutex, nullptr); mysql_mutex_init(log_flush_order_mutex_key, &flush_order_mutex, nullptr); +#endif /* Start the lsn from one log block from zero: this way every log record has a non-zero start lsn, a fact which we will use */ -- cgit v1.2.1 From 1a54cf62f85044a7686dc3becfe39e6b52209484 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Mon, 11 Oct 2021 15:05:44 +0400 Subject: MDEV-24585 Assertion `je->s.cs == nice_js->charset()' failed in json_nice. We should set the charset in Item_func_json_format::fix_length_and_dec(). --- mysql-test/r/func_json.result | 6 ++++++ mysql-test/t/func_json.test | 6 ++++++ sql/item_jsonfunc.cc | 1 + 3 files changed, 13 insertions(+) diff --git a/mysql-test/r/func_json.result b/mysql-test/r/func_json.result index 8f014c89733..f8e78c79f5d 100644 --- a/mysql-test/r/func_json.result +++ b/mysql-test/r/func_json.result @@ -934,5 +934,11 @@ CAST(JSON_EXTRACT('{"x":false}', '$.x') AS DECIMAL) AS cd; cf cd 0 0 # +# MDEV-24585 Assertion `je->s.cs == nice_js->charset()' failed in json_nice. +# +SELECT JSON_REPLACE( JSON_DETAILED('["x"]'), '$.a', 'xx' ); +JSON_REPLACE( JSON_DETAILED('["x"]'), '$.a', 'xx' ) +["x"] +# # End of 10.2 tests # diff --git a/mysql-test/t/func_json.test b/mysql-test/t/func_json.test index 805e9954b81..e4e093225f8 100644 --- a/mysql-test/t/func_json.test +++ b/mysql-test/t/func_json.test @@ -554,6 +554,12 @@ SELECT CAST(JSON_EXTRACT('{"x":false}', '$.x') AS DECIMAL) AS cd; +--echo # +--echo # MDEV-24585 Assertion `je->s.cs == nice_js->charset()' failed in json_nice. +--echo # + +SELECT JSON_REPLACE( JSON_DETAILED('["x"]'), '$.a', 'xx' ); + --echo # --echo # End of 10.2 tests --echo # diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 7db1ae1ffaf..ea70fbeebdb 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -3534,6 +3534,7 @@ const char *Item_func_json_format::func_name() const bool Item_func_json_format::fix_length_and_dec() { decimals= 0; + collation.set(args[0]->collation); max_length= args[0]->max_length; maybe_null= 1; return FALSE; -- cgit v1.2.1 From e7208bd93445ee233d72c8fd9413a4c83043b123 Mon Sep 17 00:00:00 2001 From: Nayuta Yanagisawa Date: Tue, 21 Sep 2021 20:22:56 +0900 Subject: MDEV-26158 SIGSEGV in spider_free_mem from ha_spider::open on INSERT The server crashes due to passing NULL to spider_free(). In some cases, this == pt_handler_share_handlers[0] at the label error_get_share in ha_spider::open(). In such cases, to nullify pt_handler_share_handlers[0]->wide_handler is nothing but to nullify this->wide_handler. We should not do this before freeing this->wide_handler. --- storage/spider/ha_spider.cc | 2 +- .../mysql-test/spider/bugfix/r/mdev_26158.result | 27 +++++++++++++++++++ .../mysql-test/spider/bugfix/t/mdev_26158.cnf | 3 +++ .../mysql-test/spider/bugfix/t/mdev_26158.test | 31 ++++++++++++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 storage/spider/mysql-test/spider/bugfix/r/mdev_26158.result create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_26158.cnf create mode 100644 storage/spider/mysql-test/spider/bugfix/t/mdev_26158.test diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc index 6cee49cd6da..bbbe76cec74 100644 --- a/storage/spider/ha_spider.cc +++ b/storage/spider/ha_spider.cc @@ -659,13 +659,13 @@ error_partition_handler_share_alloc: error_get_share: if (wide_handler_alloc) { + spider_free(spider_current_trx, wide_handler, MYF(0)); #ifdef WITH_PARTITION_STORAGE_ENGINE if (pt_handler_share_handlers) { pt_handler_share_handlers[0]->wide_handler = NULL; } #endif - spider_free(spider_current_trx, wide_handler, MYF(0)); spider->wide_handler = NULL; owner->wide_handler = NULL; owner->wide_handler_owner = FALSE; diff --git a/storage/spider/mysql-test/spider/bugfix/r/mdev_26158.result b/storage/spider/mysql-test/spider/bugfix/r/mdev_26158.result new file mode 100644 index 00000000000..2870dab2702 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/r/mdev_26158.result @@ -0,0 +1,27 @@ +# +# MDEV-26158 SIGSEGV in spider_free_mem from ha_spider::open on INSERT +# +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 +connection master_1; +CREATE DATABASE auto_test_local; +USE auto_test_local; +CREATE TABLE t ( +c INT +) ENGINE=Spider DEFAULT CHARSET=utf8 COMMENT='table "tbl_a"' +PARTITION BY LIST COLUMNS(`c`) ( +PARTITION `pt1` DEFAULT COMMENT = 'srv "s_2_1"' +); +INSERT INTO t SELECT * FROM t; +ERROR 42000: Unknown database 'auto_test_remote' +DROP DATABASE auto_test_local; +for master_1 +for child2 +child2_1 +child2_2 +child2_3 +for child3 diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_26158.cnf b/storage/spider/mysql-test/spider/bugfix/t/mdev_26158.cnf new file mode 100644 index 00000000000..05dfd8a0bce --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_26158.cnf @@ -0,0 +1,3 @@ +!include include/default_mysqld.cnf +!include ../my_1_1.cnf +!include ../my_2_1.cnf diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_26158.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_26158.test new file mode 100644 index 00000000000..0484d2b6652 --- /dev/null +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_26158.test @@ -0,0 +1,31 @@ +--echo # +--echo # MDEV-26158 SIGSEGV in spider_free_mem from ha_spider::open on INSERT +--echo # + +--disable_query_log +--disable_result_log +--source ../../t/test_init.inc +--enable_result_log +--enable_query_log + +--connection master_1 +CREATE DATABASE auto_test_local; +USE auto_test_local; + +eval CREATE TABLE t ( + c INT +) $MASTER_1_ENGINE $MASTER_1_CHARSET COMMENT='table "tbl_a"' +PARTITION BY LIST COLUMNS(`c`) ( + PARTITION `pt1` DEFAULT COMMENT = 'srv "s_2_1"' +); + +--error ER_BAD_DB_ERROR +INSERT INTO t SELECT * FROM t; + +DROP DATABASE auto_test_local; + +--disable_query_log +--disable_result_log +--source ../../t/test_deinit.inc +--enable_result_log +--enable_query_log -- cgit v1.2.1 From f502ccbcb5dfce29067434885a23db8d1bd5f134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Fri, 15 Oct 2021 16:51:05 +0300 Subject: Link with libatomic to enable C11 atomics support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some architectures (mips) require libatomic to support proper atomic operations. Check first if support is available without linking, otherwise use the library. Contributors: James Cowgill Jessica Clarke VicenÈ›iu Ciorbaru --- configure.cmake | 20 +++++++++++++++++++- mysys/CMakeLists.txt | 4 ++++ sql/CMakeLists.txt | 1 - 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/configure.cmake b/configure.cmake index 7a1369d7770..db8742bb93b 100644 --- a/configure.cmake +++ b/configure.cmake @@ -895,7 +895,25 @@ int main() long long int *ptr= &var; return (int)__atomic_load_n(ptr, __ATOMIC_SEQ_CST); }" -HAVE_GCC_C11_ATOMICS) +HAVE_GCC_C11_ATOMICS_WITHOUT_LIBATOMIC) +IF (HAVE_GCC_C11_ATOMICS_WITHOUT_LIBATOMIC) + SET(HAVE_GCC_C11_ATOMICS True) +ELSE() + SET(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) + LIST(APPEND CMAKE_REQUIRED_LIBRARIES "atomic") + CHECK_CXX_SOURCE_COMPILES(" + int main() + { + long long int var= 1; + long long int *ptr= &var; + return (int)__atomic_load_n(ptr, __ATOMIC_SEQ_CST); + }" + HAVE_GCC_C11_ATOMICS_WITH_LIBATOMIC) + IF(HAVE_GCC_C11_ATOMICS_WITH_LIBATOMIC) + SET(HAVE_GCC_C11_ATOMICS True) + ENDIF() + SET(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQUIRED_LIBRARIES}) +ENDIF() IF(WITH_VALGRIND) SET(HAVE_valgrind 1) diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index f97e3b4d390..09d3f726ffc 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -78,6 +78,10 @@ TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY} ${LIBNSL} ${LIBM} ${LIBRT} ${CMAKE_DL_LIBS} ${LIBSOCKET} ${LIBEXECINFO} ${CRC32_LIBRARY}) DTRACE_INSTRUMENT(mysys) +IF (HAVE_GCC_C11_ATOMICS_WITH_LIBATOMIC) + TARGET_LINK_LIBRARIES(mysys atomic) +ENDIF() + IF(HAVE_BFD_H) TARGET_LINK_LIBRARIES(mysys bfd) ENDIF(HAVE_BFD_H) diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 5f5d7daf1a5..f574f1f2029 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -307,7 +307,6 @@ IF(WITH_MYSQLD_LDFLAGS) "${MYSQLD_LINK_FLAGS} ${WITH_MYSQLD_LDFLAGS}") ENDIF() - FIND_PACKAGE(BISON 2.0) -- cgit v1.2.1 From a33c1082dab7ab4b08acf957d6364be95e4c6a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Fri, 15 Oct 2021 17:06:17 +0300 Subject: Fix MIPS build failure: Handle unaligned buffers in connect's TYPBLK class On MIPS platforms (and probably others) unaligned memory access results in a bus error. In the connect storage engine, block data for some data formats is stored packed in memory and the TYPBLK class is used to read values from it. Since TYPBLK does not have special handling for this packed memory, it can quite easily result in unaligned memory accesses. The simple way to fix this is to perform all accesses to the main buffer through memcpy. With GCC and optimizations turned on, this call to memcpy is completely optimized away on architectures where unaligned accesses are ok (like x86). Contributors: James Cowgill --- storage/connect/valblk.cpp | 41 +++++++++++++++++++------------------ storage/connect/valblk.h | 50 ++++++++++++++++++++++++++++------------------ 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/storage/connect/valblk.cpp b/storage/connect/valblk.cpp index a993c1371c2..8702f606a8f 100644 --- a/storage/connect/valblk.cpp +++ b/storage/connect/valblk.cpp @@ -268,14 +268,14 @@ bool TYPBLK::Init(PGLOBAL g, bool check) template char *TYPBLK::GetCharString(char *p, int n) { - sprintf(p, Fmt, Typp[n]); + sprintf(p, Fmt, UnalignedRead(n)); return p; } // end of GetCharString template <> char *TYPBLK::GetCharString(char *p, int n) { - sprintf(p, Fmt, Prec, Typp[n]); + sprintf(p, Fmt, Prec, UnalignedRead(n)); return p; } // end of GetCharString @@ -291,7 +291,7 @@ void TYPBLK::SetValue(PVAL valp, int n) ChkTyp(valp); if (!(b = valp->IsNull())) - Typp[n] = GetTypedValue(valp); + UnalignedWrite(n, GetTypedValue(valp)); else Reset(n); @@ -353,9 +353,9 @@ void TYPBLK::SetValue(PCSZ p, int n) ulonglong val = CharToNumber(p, strlen(p), maxval, Unsigned, &minus); if (minus && val < maxval) - Typp[n] = (TYPE)(-(signed)val); + UnalignedWrite(n, (TYPE)(-(signed)val)); else - Typp[n] = (TYPE)val; + UnalignedWrite(n, (TYPE)val); SetNull(n, false); } // end of SetValue @@ -398,7 +398,7 @@ void TYPBLK::SetValue(PCSZ p, int n) throw Type; } // endif Check - Typp[n] = atof(p); + UnalignedWrite(n, atof(p)); SetNull(n, false); } // end of SetValue @@ -430,7 +430,7 @@ void TYPBLK::SetValue(PVBLK pv, int n1, int n2) ChkTyp(pv); if (!(b = pv->IsNull(n2) && Nullable)) - Typp[n1] = GetTypedValue(pv, n2); + UnalignedWrite(n1, GetTypedValue(pv, n2)); else Reset(n1); @@ -481,10 +481,10 @@ void TYPBLK::SetMin(PVAL valp, int n) { CheckParms(valp, n) TYPE tval = GetTypedValue(valp); - TYPE& tmin = Typp[n]; + TYPE tmin = UnalignedRead(n); if (tval < tmin) - tmin = tval; + UnalignedWrite(n, tval); } // end of SetMin @@ -496,10 +496,10 @@ void TYPBLK::SetMax(PVAL valp, int n) { CheckParms(valp, n) TYPE tval = GetTypedValue(valp); - TYPE& tmin = Typp[n]; + TYPE tmin = UnalignedRead(n); if (tval > tmin) - tmin = tval; + UnalignedWrite(n, tval); } // end of SetMax @@ -513,8 +513,7 @@ void TYPBLK::SetValues(PVBLK pv, int k, int n) CheckType(pv) TYPE *lp = ((TYPBLK*)pv)->Typp; - for (int i = k; i < n; i++) // TODO - Typp[i] = lp[i]; + memcpy(Typp + k, lp + k, sizeof(TYPE) * n); } // end of SetValues #endif // 0 @@ -525,7 +524,7 @@ void TYPBLK::SetValues(PVBLK pv, int k, int n) template void TYPBLK::Move(int i, int j) { - Typp[j] = Typp[i]; + UnalignedWrite(j, UnalignedRead(i)); MoveNull(i, j); } // end of Move @@ -539,7 +538,7 @@ int TYPBLK::CompVal(PVAL vp, int n) ChkIndx(n); ChkTyp(vp); #endif // _DEBUG - TYPE mlv = Typp[n]; + TYPE mlv = UnalignedRead(n); TYPE vlv = GetTypedValue(vp); return (vlv > mlv) ? 1 : (vlv < mlv) ? (-1) : 0; @@ -551,8 +550,8 @@ int TYPBLK::CompVal(PVAL vp, int n) template int TYPBLK::CompVal(int i1, int i2) { - TYPE lv1 = Typp[i1]; - TYPE lv2 = Typp[i2]; + TYPE lv1 = UnalignedRead(i1); + TYPE lv2 = UnalignedRead(i2); return (lv1 > lv2) ? 1 : (lv1 < lv2) ? (-1) : 0; } // end of CompVal @@ -589,7 +588,7 @@ int TYPBLK::Find(PVAL vp) TYPE n = GetTypedValue(vp); for (i = 0; i < Nval; i++) - if (n == Typp[i]) + if (n == UnalignedRead(i)) break; return (i < Nval) ? i : (-1); @@ -605,7 +604,7 @@ int TYPBLK::GetMaxLength(void) int i, n, m; for (i = n = 0; i < Nval; i++) { - m = sprintf(buf, Fmt, Typp[i]); + m = sprintf(buf, Fmt, UnalignedRead(i)); n = MY_MAX(n, m); } // endfor i @@ -1335,7 +1334,7 @@ char *DATBLK::GetCharString(char *p, int n) char *vp; if (Dvalp) { - Dvalp->SetValue(Typp[n]); + Dvalp->SetValue(UnalignedRead(n)); vp = Dvalp->GetCharString(p); } else vp = TYPBLK::GetCharString(p, n); @@ -1351,7 +1350,7 @@ void DATBLK::SetValue(PCSZ p, int n) if (Dvalp) { // Decode the string according to format Dvalp->SetValue_psz(p); - Typp[n] = Dvalp->GetIntValue(); + UnalignedWrite(n, Dvalp->GetIntValue()); } else TYPBLK::SetValue(p, n); diff --git a/storage/connect/valblk.h b/storage/connect/valblk.h index 568fc172c6a..537e838c99f 100644 --- a/storage/connect/valblk.h +++ b/storage/connect/valblk.h @@ -151,40 +151,41 @@ class TYPBLK : public VALBLK { // Implementation virtual bool Init(PGLOBAL g, bool check); virtual int GetVlen(void) {return sizeof(TYPE);} - virtual char GetTinyValue(int n) {return (char)Typp[n];} - virtual uchar GetUTinyValue(int n) {return (uchar)Typp[n];} - virtual short GetShortValue(int n) {return (short)Typp[n];} - virtual ushort GetUShortValue(int n) {return (ushort)Typp[n];} - virtual int GetIntValue(int n) {return (int)Typp[n];} - virtual uint GetUIntValue(int n) {return (uint)Typp[n];} - virtual longlong GetBigintValue(int n) {return (longlong)Typp[n];} - virtual ulonglong GetUBigintValue(int n) {return (ulonglong)Typp[n];} - virtual double GetFloatValue(int n) {return (double)Typp[n];} + + virtual char GetTinyValue(int n) {return (char)UnalignedRead(n);} + virtual uchar GetUTinyValue(int n) {return (uchar)UnalignedRead(n);} + virtual short GetShortValue(int n) {return (short)UnalignedRead(n);} + virtual ushort GetUShortValue(int n) {return (ushort)UnalignedRead(n);} + virtual int GetIntValue(int n) {return (int)UnalignedRead(n);} + virtual uint GetUIntValue(int n) {return (uint)UnalignedRead(n);} + virtual longlong GetBigintValue(int n) {return (longlong)UnalignedRead(n);} + virtual ulonglong GetUBigintValue(int n) {return (ulonglong)UnalignedRead(n);} + virtual double GetFloatValue(int n) {return (double)UnalignedRead(n);} virtual char *GetCharString(char *p, int n); - virtual void Reset(int n) {Typp[n] = 0;} + virtual void Reset(int n) {UnalignedWrite(n, 0);} // Methods using VALBLK::SetValue; virtual void SetValue(PCSZ sp, int n); virtual void SetValue(const char *sp, uint len, int n); virtual void SetValue(short sval, int n) - {Typp[n] = (TYPE)sval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)sval); SetNull(n, false);} virtual void SetValue(ushort sval, int n) - {Typp[n] = (TYPE)sval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)sval); SetNull(n, false);} virtual void SetValue(int lval, int n) - {Typp[n] = (TYPE)lval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)lval); SetNull(n, false);} virtual void SetValue(uint lval, int n) - {Typp[n] = (TYPE)lval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)lval); SetNull(n, false);} virtual void SetValue(longlong lval, int n) - {Typp[n] = (TYPE)lval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)lval); SetNull(n, false);} virtual void SetValue(ulonglong lval, int n) - {Typp[n] = (TYPE)lval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)lval); SetNull(n, false);} virtual void SetValue(double fval, int n) - {Typp[n] = (TYPE)fval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)fval); SetNull(n, false);} virtual void SetValue(char cval, int n) - {Typp[n] = (TYPE)cval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)cval); SetNull(n, false);} virtual void SetValue(uchar cval, int n) - {Typp[n] = (TYPE)cval; SetNull(n, false);} + {UnalignedWrite(n, (TYPE)cval); SetNull(n, false);} virtual void SetValue(PVAL valp, int n); virtual void SetValue(PVBLK pv, int n1, int n2); virtual void SetMin(PVAL valp, int n); @@ -206,6 +207,17 @@ class TYPBLK : public VALBLK { // Members TYPE* const &Typp; const char *Fmt; + + // Unaligned access + TYPE UnalignedRead(int n) const { + TYPE result; + memcpy(&result, Typp + n, sizeof(TYPE)); + return result; + } + + void UnalignedWrite(int n, TYPE value) { + memcpy(Typp + n, &value, sizeof(TYPE)); + } }; // end of class TYPBLK /***********************************************************************/ -- cgit v1.2.1 From 3c2ab896b9642c43805a5dfc174c81fc5f902db0 Mon Sep 17 00:00:00 2001 From: Sergei Krivonos Date: Mon, 18 Oct 2021 18:51:04 +0300 Subject: MDEV-19129: Xcode compatibility update: update libmariadb submodule --- libmariadb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmariadb b/libmariadb index 80188c38592..735a7299dba 160000 --- a/libmariadb +++ b/libmariadb @@ -1 +1 @@ -Subproject commit 80188c38592f8276a5d36c45657ebdd1f1f9c625 +Subproject commit 735a7299dbae19cc2b82b9697becaf90e9b43047 -- cgit v1.2.1 From 1388845e048011932a6d715936a781479c5e6af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Tue, 19 Oct 2021 19:20:23 +0300 Subject: Fix Groonga crash on MIPS: Correctly link to libatomic MIPS (and possibly other) platforms require linking against libatomic to support 64-bit atomic integers. Groonga was failing to do so and all related tests were failing with an atomics relocation error on MIPS. Contributors: James Cowgill --- storage/mroonga/vendor/groonga/lib/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/storage/mroonga/vendor/groonga/lib/CMakeLists.txt b/storage/mroonga/vendor/groonga/lib/CMakeLists.txt index 8c71563f722..4f076458a36 100644 --- a/storage/mroonga/vendor/groonga/lib/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/lib/CMakeLists.txt @@ -89,7 +89,12 @@ else() endif() set_target_properties(libgroonga PROPERTIES OUTPUT_NAME "groonga") +if (HAVE_GCC_C11_ATOMICS_WITH_LIBATOMIC) + set(ATOMIC_LIBS atomic) +endif() + set(GRN_ALL_LIBRARIES + ${ATOMIC_LIBS} ${EXECINFO_LIBS} ${RT_LIBS} ${PTHREAD_LIBS} -- cgit v1.2.1 From 6e390a62baa9dfd92d2776d28c97fd9525422295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 19 Oct 2021 19:54:29 +0300 Subject: MDEV-26772 InnoDB DDL fails with DUPLICATE KEY error ha_innobase::delete_table(): When the table that is being dropped has a name starting with #sql, temporarily set innodb_lock_wait_timeout=0 while attempting to lock the persistent statistics tables. If the statistics tables cannot be locked, pretend that statistics did not exist and carry on with dropping the table. The SQL layer is not really prepared for failures of this operation. This is what fixes the test case. ha_innobase::rename_table(): When renaming a table from a name that starts with #sql, try to lock the statistics tables with an immediate timeout, and ignore the statistics if the locks were not available. In fact, during any rename from a #sql name, dict_stats_rename_table() should have no effect, because already when an earlier rename to a #sql name took place we should have deleted the statistics for the table using the non-#sql name. This change is just analogous to the ha_innobase::delete_table(). --- .../suite/innodb/r/innodb-alter-debug.result | 26 +++++++++++ mysql-test/suite/innodb/t/innodb-alter-debug.test | 37 ++++++++++++++++ storage/innobase/handler/ha_innodb.cc | 51 ++++++++++++++++++++-- 3 files changed, 110 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/innodb/r/innodb-alter-debug.result b/mysql-test/suite/innodb/r/innodb-alter-debug.result index 4644c124a45..519283536d5 100644 --- a/mysql-test/suite/innodb/r/innodb-alter-debug.result +++ b/mysql-test/suite/innodb/r/innodb-alter-debug.result @@ -107,3 +107,29 @@ ALTER TABLE t RENAME INDEX i2 to x, ALGORITHM=INPLACE; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction SET DEBUG_DBUG = @saved_debug_dbug; DROP TABLE t; +# +# MDEV-26772 InnoDB DDL fails with DUPLICATE KEY error +# +create table t1(f1 int not null primary key, +f2 int not null, index idx(f2))engine=innodb; +insert into t1 values(1, 1); +connect con1,localhost,root,,,; +SET DEBUG_SYNC='before_delete_table_stats SIGNAL blocked WAIT_FOR go'; +SET innodb_lock_wait_timeout=0; +ALTER TABLE t1 FORCE, ALGORITHM=COPY; +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +BEGIN; +SELECT * FROM mysql.innodb_table_stats FOR UPDATE; +database_name table_name last_update n_rows clustered_index_size sum_of_other_index_sizes +SET DEBUG_SYNC='now SIGNAL go'; +connection con1; +connection default; +COMMIT; +SET DEBUG_SYNC=RESET; +connection con1; +ALTER TABLE t1 RENAME KEY idx TO idx1, ALGORITHM=COPY; +disconnect con1; +connection default; +DROP TABLE t1; +# End of 10.6 tests diff --git a/mysql-test/suite/innodb/t/innodb-alter-debug.test b/mysql-test/suite/innodb/t/innodb-alter-debug.test index 7fbbb3159ee..7af8e882724 100644 --- a/mysql-test/suite/innodb/t/innodb-alter-debug.test +++ b/mysql-test/suite/innodb/t/innodb-alter-debug.test @@ -142,5 +142,42 @@ SET DEBUG_DBUG = @saved_debug_dbug; DROP TABLE t; +--echo # +--echo # MDEV-26772 InnoDB DDL fails with DUPLICATE KEY error +--echo # + +create table t1(f1 int not null primary key, + + f2 int not null, index idx(f2))engine=innodb; + +insert into t1 values(1, 1); + +connect(con1,localhost,root,,,); +SET DEBUG_SYNC='before_delete_table_stats SIGNAL blocked WAIT_FOR go'; +SET innodb_lock_wait_timeout=0; +send ALTER TABLE t1 FORCE, ALGORITHM=COPY; + +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +BEGIN; +SELECT * FROM mysql.innodb_table_stats FOR UPDATE; +SET DEBUG_SYNC='now SIGNAL go'; + +connection con1; +reap; + +connection default; +COMMIT; +SET DEBUG_SYNC=RESET; + +connection con1; +ALTER TABLE t1 RENAME KEY idx TO idx1, ALGORITHM=COPY; +disconnect con1; + +connection default; +DROP TABLE t1; + +--echo # End of 10.6 tests + # Wait till all disconnects are completed --source include/wait_until_count_sessions.inc diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index b4690a6e8c5..bcc160d5f0e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -13473,6 +13473,8 @@ int ha_innobase::delete_table(const char *name) } #endif + DEBUG_SYNC(thd, "before_delete_table_stats"); + if (err == DB_SUCCESS && dict_stats_is_persistent_enabled(table) && !table->is_stats_table()) { @@ -13496,11 +13498,29 @@ int ha_innobase::delete_table(const char *name) dict_sys.unfreeze(); } + auto &timeout= THDVAR(thd, lock_wait_timeout); + const auto save_timeout= timeout; + if (table->name.is_temporary()) + timeout= 0; + if (table_stats && index_stats && !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) && !strcmp(index_stats->name.m_name, INDEX_STATS_NAME) && !(err= lock_table_for_trx(table_stats, trx, LOCK_X))) err= lock_table_for_trx(index_stats, trx, LOCK_X); + + if (err != DB_SUCCESS && !timeout) + { + /* We may skip deleting statistics if we cannot lock the tables, + when the table carries a temporary name. */ + err= DB_SUCCESS; + dict_table_close(table_stats, false, thd, mdl_table); + dict_table_close(index_stats, false, thd, mdl_index); + table_stats= nullptr; + index_stats= nullptr; + } + + timeout= save_timeout; } if (err == DB_SUCCESS) @@ -13959,8 +13979,9 @@ ha_innobase::rename_table( normalize_table_name(norm_to, to); dberr_t error = DB_SUCCESS; + const bool from_temp = dict_table_t::is_temporary_name(norm_from); - if (dict_table_t::is_temporary_name(norm_from)) { + if (from_temp) { /* There is no need to lock any FOREIGN KEY child tables. */ } else if (dict_table_t *table = dict_table_open_on_name( norm_from, false, DICT_ERR_IGNORE_FK_NOKEY)) { @@ -14003,9 +14024,31 @@ ha_innobase::rename_table( if (error == DB_SUCCESS && table_stats && index_stats && !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) - && !strcmp(index_stats->name.m_name, INDEX_STATS_NAME) && - !(error = lock_table_for_trx(table_stats, trx, LOCK_X))) { - error = lock_table_for_trx(index_stats, trx, LOCK_X); + && !strcmp(index_stats->name.m_name, INDEX_STATS_NAME)) { + auto &timeout = THDVAR(thd, lock_wait_timeout); + const auto save_timeout = timeout; + if (from_temp) { + timeout = 0; + } + error = lock_table_for_trx(table_stats, trx, LOCK_X); + if (error == DB_SUCCESS) { + error = lock_table_for_trx(index_stats, trx, + LOCK_X); + } + if (error != DB_SUCCESS && from_temp) { + error = DB_SUCCESS; + /* We may skip renaming statistics if + we cannot lock the tables, when the + table is being renamed from from a + temporary name. */ + dict_table_close(table_stats, false, thd, + mdl_table); + dict_table_close(index_stats, false, thd, + mdl_index); + table_stats = nullptr; + index_stats = nullptr; + } + timeout = save_timeout; } } -- cgit v1.2.1 From d6a3f425ee215b22584281935e61506465a7601d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 19 Oct 2021 20:38:07 +0300 Subject: After-merge fix: Remove unused variable --- storage/innobase/lock/lock0lock.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index dcef069f049..86c44d2e52f 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -3981,7 +3981,7 @@ void lock_release_on_prepare(trx_t *trx) } else { - dict_table_t *table= lock->un_member.tab_lock.table; + ut_d(dict_table_t *table= lock->un_member.tab_lock.table); ut_ad(!table->is_temporary()); switch (lock->mode()) { case LOCK_IS: -- cgit v1.2.1 From 4590f8b41cfec0c98a96e5980b6ad7b2e250818c Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Mon, 16 Aug 2021 08:40:56 +1000 Subject: MDEV-26363 Passwords incorrectly expiring after MySQL5.7 -> MariaDB10.3 -> 10.4+ upgrades MySQL-5.7 mysql.user tables have a last_password_changed field. Because before MariaDB-10.4 remained oblivious to this, the act of creating users or otherwise changing a users row left the last_password_field with 0. Running a MariaDB-10.4 instance on this would work correctly, until mysql_upgrade is run, when this 0 value immediately translates to password expired state. MySQL-5.7 relied on the password_expired enum to indicate password expiry so we aren't going to activate password that were expired in MySQL-5.7. Thanks Hans Borresen for the bug report and review of the fix. --- mysql-test/main/mysql_upgrade.result | 72 ++++++++++++++++++++++++++++++++++++ mysql-test/main/mysql_upgrade.test | 38 +++++++++++++++++++ scripts/mysql_system_tables_fix.sql | 2 +- 3 files changed, 111 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/mysql_upgrade.result b/mysql-test/main/mysql_upgrade.result index af3688dca6b..c4c0af1601c 100644 --- a/mysql-test/main/mysql_upgrade.result +++ b/mysql-test/main/mysql_upgrade.result @@ -842,4 +842,76 @@ count(*) 5 drop table mysql.global_priv; rename table mysql.global_priv_bak to mysql.global_priv; +# +# Ensure that mysql_upgrade accounted for 0 password_last_changed +# and doesn't PASSWORD EXPIRE a user account because < 10.4 zeroed it. +# +# switching from mysql.global_priv to mysql.user +drop view mysql.user_bak; +drop table mysql.user; +truncate table mysql.tables_priv; +FLUSH TABLES mysql.user; +FLUSH PRIVILEGES; +CREATE USER mariadb_102; +UPDATE mysql.user SET password_last_changed=0 WHERE user='mariadb_102'; +FLUSH PRIVILEGES; +Phase 1/7: Checking and upgrading mysql database +Processing databases +mysql +mysql.column_stats OK +mysql.columns_priv OK +mysql.db OK +mysql.event OK +mysql.func OK +mysql.global_priv_bak OK +mysql.gtid_slave_pos OK +mysql.help_category OK +mysql.help_keyword OK +mysql.help_relation OK +mysql.help_topic OK +mysql.index_stats OK +mysql.innodb_index_stats OK +mysql.innodb_table_stats OK +mysql.plugin OK +mysql.proc OK +mysql.procs_priv OK +mysql.proxies_priv OK +mysql.roles_mapping OK +mysql.servers OK +mysql.table_stats OK +mysql.tables_priv OK +mysql.time_zone OK +mysql.time_zone_leap_second OK +mysql.time_zone_name OK +mysql.time_zone_transition OK +mysql.time_zone_transition_type OK +mysql.transaction_registry OK +mysql.user OK +Upgrading from a version before MariaDB-10.1 +Phase 2/7: Installing used storage engines +Checking for tables with unknown storage engine +Phase 3/7: Fixing views +Phase 4/7: Running 'mysql_fix_privilege_tables' +Phase 5/7: Fixing table and database names +Phase 6/7: Checking and upgrading tables +Processing databases +information_schema +mtr +mtr.global_suppressions OK +mtr.test_suppressions OK +performance_schema +test +Phase 7/7: Running 'FLUSH PRIVILEGES' +OK +SHOW CREATE USER mariadb_102; +CREATE USER for mariadb_102@% +CREATE USER `mariadb_102`@`%` +connect con1,localhost,mariadb_102; +select current_user(); +current_user() +mariadb_102@% +disconnect con1; +connection default; +drop table mysql.global_priv; +rename table mysql.global_priv_bak to mysql.global_priv; # End of 10.4 tests diff --git a/mysql-test/main/mysql_upgrade.test b/mysql-test/main/mysql_upgrade.test index 7d673d2bb4d..afc06dfc5ee 100644 --- a/mysql-test/main/mysql_upgrade.test +++ b/mysql-test/main/mysql_upgrade.test @@ -356,4 +356,42 @@ select count(*) from mysql.global_priv; drop table mysql.global_priv; rename table mysql.global_priv_bak to mysql.global_priv; +# +# MDEV-26363 Former mysql-5.7 tables have password_last_changed to 0 +# on MariaDB updates, resulting in mysql_upgrade leaving them +# with password expired. +# + +--echo # +--echo # Ensure that mysql_upgrade accounted for 0 password_last_changed +--echo # and doesn't PASSWORD EXPIRE a user account because < 10.4 zeroed it. +--echo # + +--source include/switch_to_mysql_user.inc +drop view mysql.user_bak; +drop table mysql.user; +truncate table mysql.tables_priv; +--copy_file std_data/mysql57user.frm $MYSQLD_DATADIR/mysql/user.frm +--copy_file std_data/mysql57user.MYI $MYSQLD_DATADIR/mysql/user.MYI +--copy_file std_data/mysql57user.MYD $MYSQLD_DATADIR/mysql/user.MYD +FLUSH TABLES mysql.user; +FLUSH PRIVILEGES; + +CREATE USER mariadb_102; +# manually set the value like <10.4 previously did for testing mysql_upgrade. +UPDATE mysql.user SET password_last_changed=0 WHERE user='mariadb_102'; +FLUSH PRIVILEGES; + +--exec $MYSQL_UPGRADE --force 2>&1 +# Should not have "PASSWORD EXPIRED" +SHOW CREATE USER mariadb_102; +connect con1,localhost,mariadb_102; +select current_user(); +disconnect con1; +connection default; + +drop table mysql.global_priv; +rename table mysql.global_priv_bak to mysql.global_priv; +--remove_file $MYSQLD_DATADIR/mysql_upgrade_info + --echo # End of 10.4 tests diff --git a/scripts/mysql_system_tables_fix.sql b/scripts/mysql_system_tables_fix.sql index 9dd775aaf30..ec3d532ec11 100644 --- a/scripts/mysql_system_tables_fix.sql +++ b/scripts/mysql_system_tables_fix.sql @@ -815,7 +815,7 @@ IF 'BASE TABLE' = (select table_type from information_schema.tables where table_ 'max_statement_time', max_statement_time, 'plugin', if(plugin>'',plugin,if(length(password)=16,'mysql_old_password','mysql_native_password')), 'authentication_string', if(plugin>'' and authentication_string>'',authentication_string,password), - 'password_last_changed', if(password_expired='Y', 0, UNIX_TIMESTAMP(password_last_changed)), + 'password_last_changed', if(password_expired='Y', 0, if(password_last_changed, UNIX_TIMESTAMP(password_last_changed), UNIX_TIMESTAMP())), 'password_lifetime', ifnull(password_lifetime, -1), 'account_locked', 'Y'=account_locked, 'default_role', default_role, -- cgit v1.2.1 From 78dec1f199458cff30062296b2cf0b42a71d2466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 20 Oct 2021 10:04:31 +0300 Subject: MDEV-26554: Stabilize the test --- mysql-test/suite/innodb/r/foreign_key.result | 1 + mysql-test/suite/innodb/t/foreign_key.test | 2 ++ 2 files changed, 3 insertions(+) diff --git a/mysql-test/suite/innodb/r/foreign_key.result b/mysql-test/suite/innodb/r/foreign_key.result index 822ad4b6edd..509f13715b8 100644 --- a/mysql-test/suite/innodb/r/foreign_key.result +++ b/mysql-test/suite/innodb/r/foreign_key.result @@ -919,6 +919,7 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction ALTER TABLE parent ADD COLUMN b INT, ALGORITHM=INSTANT; ERROR HY000: Lock wait timeout exceeded; try restarting transaction disconnect con1; +SET innodb_lock_wait_timeout=DEFAULT; TRUNCATE TABLE parent; ALTER TABLE parent FORCE, ALGORITHM=COPY; ALTER TABLE parent FORCE, ALGORITHM=INPLACE; diff --git a/mysql-test/suite/innodb/t/foreign_key.test b/mysql-test/suite/innodb/t/foreign_key.test index ab3e4748fcb..363aa66237b 100644 --- a/mysql-test/suite/innodb/t/foreign_key.test +++ b/mysql-test/suite/innodb/t/foreign_key.test @@ -925,6 +925,8 @@ ALTER TABLE parent FORCE, ALGORITHM=INPLACE; --error ER_LOCK_WAIT_TIMEOUT ALTER TABLE parent ADD COLUMN b INT, ALGORITHM=INSTANT; disconnect con1; +# Restore the timeout to avoid occasional races with purge. +SET innodb_lock_wait_timeout=DEFAULT; TRUNCATE TABLE parent; ALTER TABLE parent FORCE, ALGORITHM=COPY; ALTER TABLE parent FORCE, ALGORITHM=INPLACE; -- cgit v1.2.1 From a8401ad5afd766d2febc63a8bcfedda2978ff44c Mon Sep 17 00:00:00 2001 From: Nikita Malyavin Date: Wed, 21 Jul 2021 15:42:21 +0300 Subject: restore default.test, default.result after MDEV-23597 c47e4aab62c65 commit --- mysql-test/r/default.result | 4 ++-- mysql-test/t/default.test | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mysql-test/r/default.result b/mysql-test/r/default.result index 1a35a29aa7f..c7179e29723 100644 --- a/mysql-test/r/default.result +++ b/mysql-test/r/default.result @@ -3089,8 +3089,8 @@ DROP TABLE t1; # # Collations # -CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET latin1 DEFAULT CONCAT('�')) CHARACTER SET koi8r COLLATE koi8r_bin; -ERROR 22007: Encountered illegal value '�' when converting to koi8r +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET latin1 DEFAULT CONCAT('ö')) CHARACTER SET koi8r COLLATE koi8r_bin; +ERROR 22007: Encountered illegal value 'ö' when converting to koi8r CREATE OR REPLACE TABLE t1 (a char(2) default concat('A') COLLATE utf8mb4_unicode_ci); SHOW CREATE TABLE t1; Table Create Table diff --git a/mysql-test/t/default.test b/mysql-test/t/default.test index 472b1fa4796..a4fe74aefb5 100644 --- a/mysql-test/t/default.test +++ b/mysql-test/t/default.test @@ -1855,7 +1855,7 @@ DROP TABLE t1; --echo # --error ER_BAD_DATA -CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET latin1 DEFAULT CONCAT('�')) CHARACTER SET koi8r COLLATE koi8r_bin; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET latin1 DEFAULT CONCAT('ö')) CHARACTER SET koi8r COLLATE koi8r_bin; CREATE OR REPLACE TABLE t1 (a char(2) default concat('A') COLLATE utf8mb4_unicode_ci); SHOW CREATE TABLE t1; DROP TABLE t1; -- cgit v1.2.1 From 1811fd51fbae9e6c1f06ce93faef2bf1279cd3b6 Mon Sep 17 00:00:00 2001 From: Nikita Malyavin Date: Mon, 2 Aug 2021 14:24:54 +0300 Subject: MDEV-26262 frm is corrupted after ER_EXPRESSION_REFERS_TO_UNINIT_FIELD This is a duplicate of MDEV-18278 89936f11e965, but I will add an additional assertion Description: The frm corruption should not be reported during CREATE TABLE. Normally it doesn't, and the data to fill TABLE is taken by open_table_from_share call. However, the vcol data is stored as SQL string in table->s->vcol_defs.str and is anyway parsed on each table open. It is impossible [or hard] to avoid, because it's hard to clone the expression tree in general (it's easier to parse). Normally parse_vcol_defs should only fail on semantic errors. If so, error_reported is set to true. Any other failure is not expected during table creation. There is either unhandled/unacknowledged error, or something went really wrong, like memory reject. This all should be asserted anyway. Solution: * Set *error_reported=true for the forward references check; * Assert for every unacknowledged error during table creation. --- mysql-test/suite/gcol/inc/gcol_column_def_options.inc | 11 +++++++++++ mysql-test/suite/gcol/r/gcol_column_def_options_innodb.result | 6 ++++++ mysql-test/suite/gcol/r/gcol_column_def_options_myisam.result | 6 ++++++ sql/table.cc | 3 +++ 4 files changed, 26 insertions(+) diff --git a/mysql-test/suite/gcol/inc/gcol_column_def_options.inc b/mysql-test/suite/gcol/inc/gcol_column_def_options.inc index f4350d25ae9..17e926758ee 100644 --- a/mysql-test/suite/gcol/inc/gcol_column_def_options.inc +++ b/mysql-test/suite/gcol/inc/gcol_column_def_options.inc @@ -585,3 +585,14 @@ ALTER TABLE t1 ALTER COLUMN a SET DEFAULT 7, --disable_info DROP TABLE t1; --enable_warnings + +--echo # +--echo # MDEV-26262 frm is corrupted after ER_EXPRESSION_REFERS_TO_UNINIT_FIELD +--echo # + +--error ER_EXPRESSION_REFERS_TO_UNINIT_FIELD +CREATE TABLE MDEV_26262 (a INT,b INT AS (b) VIRTUAL); + +--let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err +--let SEARCH_PATTERN=Incorrect information in file: './test/MDEV_26262.frm' +--source include/search_pattern_in_file.inc diff --git a/mysql-test/suite/gcol/r/gcol_column_def_options_innodb.result b/mysql-test/suite/gcol/r/gcol_column_def_options_innodb.result index 48c4613f6ca..2c0adc3d493 100644 --- a/mysql-test/suite/gcol/r/gcol_column_def_options_innodb.result +++ b/mysql-test/suite/gcol/r/gcol_column_def_options_innodb.result @@ -699,6 +699,12 @@ ADD COLUMN c INT AS (1 + DEFAULT(a)) VIRTUAL; affected rows: 1 info: Records: 1 Duplicates: 0 Warnings: 0 DROP TABLE t1; +# +# MDEV-26262 frm is corrupted after ER_EXPRESSION_REFERS_TO_UNINIT_FIELD +# +CREATE TABLE MDEV_26262 (a INT,b INT AS (b) VIRTUAL); +ERROR 01000: Expression for field `b` is referring to uninitialized field `b` +NOT FOUND /Incorrect information in file: './test/MDEV_26262.frm'/ in mysqld.1.err DROP VIEW IF EXISTS v1,v2; DROP TABLE IF EXISTS t1,t2,t3; DROP PROCEDURE IF EXISTS p1; diff --git a/mysql-test/suite/gcol/r/gcol_column_def_options_myisam.result b/mysql-test/suite/gcol/r/gcol_column_def_options_myisam.result index b7ae6488c95..126a6c5e271 100644 --- a/mysql-test/suite/gcol/r/gcol_column_def_options_myisam.result +++ b/mysql-test/suite/gcol/r/gcol_column_def_options_myisam.result @@ -699,6 +699,12 @@ ADD COLUMN c INT AS (1 + DEFAULT(a)) VIRTUAL; affected rows: 1 info: Records: 1 Duplicates: 0 Warnings: 0 DROP TABLE t1; +# +# MDEV-26262 frm is corrupted after ER_EXPRESSION_REFERS_TO_UNINIT_FIELD +# +CREATE TABLE MDEV_26262 (a INT,b INT AS (b) VIRTUAL); +ERROR 01000: Expression for field `b` is referring to uninitialized field `b` +NOT FOUND /Incorrect information in file: './test/MDEV_26262.frm'/ in mysqld.1.err DROP VIEW IF EXISTS v1,v2; DROP TABLE IF EXISTS t1,t2,t3; DROP PROCEDURE IF EXISTS p1; diff --git a/sql/table.cc b/sql/table.cc index d4f8170e0af..349d04a60ed 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -3282,6 +3282,9 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, &error_reported, mode)) { error= OPEN_FRM_CORRUPTED; + // parse_vcol_defs may fail by semantic reasons, which is ok, but the + // real corruption should never be reported during table creation + DBUG_ASSERT(!is_create_table || !error_reported); goto err; } -- cgit v1.2.1 From caebe151c18a3415466cca88dbdacb8ec9597a29 Mon Sep 17 00:00:00 2001 From: Nikita Malyavin Date: Thu, 7 Oct 2021 17:02:26 +0300 Subject: MDEV-22445 Crash on HANDLER READ NEXT after XA PREPARE The assertion is absolutely correct since no data access is possible after XA PREPARE. The check is added in mysql_ha_read. --- mysql-test/r/xa.result | 15 +++++++++++++++ mysql-test/t/xa.test | 17 +++++++++++++++++ sql/sql_handler.cc | 3 +++ 3 files changed, 35 insertions(+) diff --git a/mysql-test/r/xa.result b/mysql-test/r/xa.result index 4e4d7bc6048..f6bae2adac2 100644 --- a/mysql-test/r/xa.result +++ b/mysql-test/r/xa.result @@ -341,5 +341,20 @@ Message XAER_RMFAIL: The command cannot be executed when global transaction is i xa commit 'foo'; drop table t1; # +# MDEV-22445 Crash on HANDLER READ NEXT after XA PREPARE +# +CREATE TABLE t (a INT KEY) ENGINE=InnoDB; +HANDLER t OPEN AS t; +XA START '0'; +SELECT * FROM t; +a +XA END '0'; +XA PREPARE '0'; +HANDLER t READ NEXT; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +# Cleanup +XA COMMIT '0'; +DROP TABLE t; +# # End of 10.2 tests # diff --git a/mysql-test/t/xa.test b/mysql-test/t/xa.test index b13d12d2019..176b8c3189b 100644 --- a/mysql-test/t/xa.test +++ b/mysql-test/t/xa.test @@ -485,6 +485,23 @@ show status like 'foo'; xa commit 'foo'; drop table t1; +--echo # +--echo # MDEV-22445 Crash on HANDLER READ NEXT after XA PREPARE +--echo # + +CREATE TABLE t (a INT KEY) ENGINE=InnoDB; +HANDLER t OPEN AS t; +XA START '0'; +SELECT * FROM t; +XA END '0'; +XA PREPARE '0'; +--error ER_XAER_RMFAIL +HANDLER t READ NEXT; + +--echo # Cleanup +XA COMMIT '0'; +DROP TABLE t; + --echo # --echo # End of 10.2 tests --echo # diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc index 7c2122b6a0a..ec3756eceba 100644 --- a/sql/sql_handler.cc +++ b/sql/sql_handler.cc @@ -697,6 +697,9 @@ retry: if (!(handler= mysql_ha_find_handler(thd, tables->alias))) goto err0; + if (thd->transaction.xid_state.check_has_uncommitted_xa()) + goto err0; + table= handler->table; tables->table= table; // This is used by fix_fields table->pos_in_table_list= tables; -- cgit v1.2.1 From b06e8167a7b8f9ae9cf2f7c19054ecc9c0c78398 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 20 Oct 2021 15:54:25 +0300 Subject: MDEV-22627 Failing assertion: dict_tf2_is_valid(flags, flags2) create_table_info_t::innobase_table_flags(): Refuse to create a PAGE_COMPRESSED table with PAGE_COMPRESSION_LEVEL=0 if also innodb_compression_level=0. The parameter value innodb_compression_level=0 was only somewhat meaningful for testing or debugging ROW_FORMAT=COMPRESSED tables. For the page_compressed format, it never made any sense, and the check in dict_tf_is_valid_not_redundant() that was added in 72378a25830184f91005be7e80cfb28381c79f23 (MDEV-12873) would cause the server to crash. --- .../r/default_row_format_create,redundant.rdiff | 9 ++++++++ .../innodb/r/default_row_format_create.result | 15 ++++++++++++++ .../suite/innodb/t/default_row_format_create.test | 16 +++++++++++++++ storage/innobase/handler/ha_innodb.cc | 24 ++++++++++++++-------- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/mysql-test/suite/innodb/r/default_row_format_create,redundant.rdiff b/mysql-test/suite/innodb/r/default_row_format_create,redundant.rdiff index 29bc560907e..0e94bfd1c0f 100644 --- a/mysql-test/suite/innodb/r/default_row_format_create,redundant.rdiff +++ b/mysql-test/suite/innodb/r/default_row_format_create,redundant.rdiff @@ -20,3 +20,12 @@ SET @save_format = @@GLOBAL.innodb_default_row_format; SET GLOBAL innodb_default_row_format = redundant; CREATE TABLE t1 (c1 INT) ENGINE=InnoDB; +@@ -49,7 +49,7 @@ + ERROR HY000: Can't create table `test`.`t` (errno: 140 "Wrong create options") + SHOW WARNINGS; + Level Code Message +-Warning 1478 InnoDB: PAGE_COMPRESSED requires PAGE_COMPRESSION_LEVEL or innodb_compression_level > 0 ++Warning 140 InnoDB: PAGE_COMPRESSED table can't have ROW_TYPE=REDUNDANT + Error 1005 Can't create table `test`.`t` (errno: 140 "Wrong create options") + Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB + CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1 page_compression_level=1; diff --git a/mysql-test/suite/innodb/r/default_row_format_create.result b/mysql-test/suite/innodb/r/default_row_format_create.result index d51a63dbd01..d85fed35d92 100644 --- a/mysql-test/suite/innodb/r/default_row_format_create.result +++ b/mysql-test/suite/innodb/r/default_row_format_create.result @@ -42,3 +42,18 @@ SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment t1 InnoDB # Redundant # # # # # # NULL # NULL NULL latin1_swedish_ci NULL DROP TABLE t1; +SET @save_level=@@GLOBAL.innodb_compression_level; +SET GLOBAL innodb_compression_level=0; +CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1; +ERROR HY000: Can't create table `test`.`t` (errno: 140 "Wrong create options") +SHOW WARNINGS; +Level Code Message +Warning 1478 InnoDB: PAGE_COMPRESSED requires PAGE_COMPRESSION_LEVEL or innodb_compression_level > 0 +Error 1005 Can't create table `test`.`t` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB +CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1 page_compression_level=1; +DROP TABLE IF EXISTS t; +SET GLOBAL innodb_compression_level=1; +CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1; +DROP TABLE IF EXISTS t; +SET GLOBAL innodb_compression_level=@save_level; diff --git a/mysql-test/suite/innodb/t/default_row_format_create.test b/mysql-test/suite/innodb/t/default_row_format_create.test index 03a7ebd3752..a1c6ff0d588 100644 --- a/mysql-test/suite/innodb/t/default_row_format_create.test +++ b/mysql-test/suite/innodb/t/default_row_format_create.test @@ -44,3 +44,19 @@ TRUNCATE TABLE t1; --replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # SHOW TABLE STATUS LIKE 't1'; DROP TABLE t1; + +SET @save_level=@@GLOBAL.innodb_compression_level; +SET GLOBAL innodb_compression_level=0; +--error ER_CANT_CREATE_TABLE +CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1; +SHOW WARNINGS; +--disable_warnings +--error 0,ER_CANT_CREATE_TABLE +CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1 page_compression_level=1; +DROP TABLE IF EXISTS t; +SET GLOBAL innodb_compression_level=1; +--error 0,ER_CANT_CREATE_TABLE +CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1; +DROP TABLE IF EXISTS t; +--enable_warnings +SET GLOBAL innodb_compression_level=@save_level; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index fefd0bdde00..592492cac60 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -11991,10 +11991,6 @@ bool create_table_info_t::innobase_table_flags() modified by another thread while the table is being created. */ const ulint file_format_allowed = srv_file_format; - /* Cache the value of innobase_compression_level, in case it is - modified by another thread while the table is being created. */ - const ulint default_compression_level = page_zip_level; - ha_table_option_struct *options= m_form->s->option_struct; m_flags = 0; @@ -12199,13 +12195,23 @@ index_bad: m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE; } + ulint level = options->page_compression_level; + if (!level) { + level = page_zip_level; + if (!level && options->page_compressed) { + push_warning_printf( + m_thd, Sql_condition::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: PAGE_COMPRESSED requires" + " PAGE_COMPRESSION_LEVEL or" + " innodb_compression_level > 0"); + DBUG_RETURN(false); + } + } + /* Set the table flags */ dict_tf_set(&m_flags, innodb_row_format, zip_ssize, - m_use_data_dir, - options->page_compressed, - options->page_compression_level == 0 ? - default_compression_level : static_cast(options->page_compression_level), - 0); + m_use_data_dir, options->page_compressed, level, 0); /* Set the flags2 when create table or alter tables */ m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; -- cgit v1.2.1 From 69b3de830d531e5cbc57c1a43c7bd55b31f7197e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 20 Oct 2021 15:55:27 +0300 Subject: Update libmariadb --- libmariadb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmariadb b/libmariadb index b99172386a7..735a7299dba 160000 --- a/libmariadb +++ b/libmariadb @@ -1 +1 @@ -Subproject commit b99172386a740ef0c8136e9a6cd7d9ad9a77b31f +Subproject commit 735a7299dbae19cc2b82b9697becaf90e9b43047 -- cgit v1.2.1 From d10c42b42541deed899dd1d1e04b69475339196c Mon Sep 17 00:00:00 2001 From: Nikita Malyavin Date: Thu, 7 Oct 2021 15:04:56 +0300 Subject: MDEV-20131 Assertion `!pk->has_virtual()' failed Assertion `!pk->has_virtual()' failed in dict_index_build_internal_clust while creating PRIMARY key longer than possible to store in the page. This happened because the key was wrongly deduced as Long UNIQUE supported, however PRIMARY KEY cannot be of that type. The main reason is that only 8 bytes are used to store the hash, see HA_HASH_FIELD_LENGTH. This is also why HA_NOSAME flag is removed (and caused the assertion in turn) in open_table_from_share: if (key_info->algorithm == HA_KEY_ALG_LONG_HASH) { key_part_end++; key_info->flags&= ~HA_NOSAME; } To make it unique, the additional check is done by check_duplicate_long_entries call from ha_write_row, and similar one from ha_update_row. PRIMARY key is already forbidden, which is checked by the first test in main.long_unique, however is_hash_field_needed was wrongly deduced to true in mysql_prepare_create_table in this particular case. FIX: * Improve the check for Key::PRIMARY type * Simplify is_hash_field_needed deduction for a more neat reading --- mysql-test/main/long_unique_innodb.opt | 1 + mysql-test/main/long_unique_innodb.result | 3 +++ mysql-test/main/long_unique_innodb.test | 5 +++++ sql/sql_table.cc | 18 ++++++++++-------- 4 files changed, 19 insertions(+), 8 deletions(-) create mode 100644 mysql-test/main/long_unique_innodb.opt diff --git a/mysql-test/main/long_unique_innodb.opt b/mysql-test/main/long_unique_innodb.opt new file mode 100644 index 00000000000..058a129cdc2 --- /dev/null +++ b/mysql-test/main/long_unique_innodb.opt @@ -0,0 +1 @@ +--innodb-page-size=8K diff --git a/mysql-test/main/long_unique_innodb.result b/mysql-test/main/long_unique_innodb.result index 135bb0808cc..96e5fac7310 100644 --- a/mysql-test/main/long_unique_innodb.result +++ b/mysql-test/main/long_unique_innodb.result @@ -131,3 +131,6 @@ connection default; drop table t1; disconnect con1; disconnect con2; +# MDEV-20131 Assertion `!pk->has_virtual()' failed +create table t1 (a text, primary key(a(1871))) engine=innodb; +ERROR 42000: Specified key was too long; max key length is 1536 bytes diff --git a/mysql-test/main/long_unique_innodb.test b/mysql-test/main/long_unique_innodb.test index aac68cd2271..dd2d9f94de3 100644 --- a/mysql-test/main/long_unique_innodb.test +++ b/mysql-test/main/long_unique_innodb.test @@ -138,3 +138,8 @@ connection default; drop table t1; disconnect con1; disconnect con2; + +--echo # MDEV-20131 Assertion `!pk->has_virtual()' failed + +--error ER_TOO_LONG_KEY +create table t1 (a text, primary key(a(1871))) engine=innodb; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 12e09e7bcb2..f64abe72bac 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -3968,7 +3968,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, if (!column->length) { if (key->type == Key::UNIQUE) - is_hash_field_needed= true; + is_hash_field_needed= true; // for case "a BLOB UNIQUE" else if (key->type == Key::MULTIPLE) column->length= file->max_key_length() + 1; else @@ -4064,8 +4064,6 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, /* Align key length to multibyte char boundary */ key_part_length-= key_part_length % sql_field->charset->mbmaxlen; } - else - is_hash_field_needed= true; } } // Catch invalid use of partial keys @@ -4111,11 +4109,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, } else { - if (key->type == Key::UNIQUE) - { - is_hash_field_needed= true; - } - else + if (key->type != Key::UNIQUE) { key_part_length= MY_MIN(max_key_length, file->max_key_part_length()); my_error(ER_TOO_LONG_KEY, MYF(0), key_part_length); @@ -4123,6 +4117,14 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, } } } + + if (key->type == Key::UNIQUE + && key_part_length > MY_MIN(max_key_length, + file->max_key_part_length())) + { + is_hash_field_needed= true; + } + /* We can not store key_part_length more then 2^16 - 1 in frm */ if (is_hash_field_needed && column->length > UINT_MAX16) { -- cgit v1.2.1 From 05c3dced8611542df7eae686ab9d28147f0dbe05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 20 Oct 2021 22:16:23 +0300 Subject: MDEV-22627 fixup: Cover also ALTER TABLE...ALGORITHM=INPLACE --- mysql-test/suite/innodb/r/default_row_format_create.result | 8 ++++++++ mysql-test/suite/innodb/t/default_row_format_create.test | 9 +++++++++ storage/innobase/handler/handler0alter.cc | 2 ++ 3 files changed, 19 insertions(+) diff --git a/mysql-test/suite/innodb/r/default_row_format_create.result b/mysql-test/suite/innodb/r/default_row_format_create.result index d85fed35d92..0dfcbd80e88 100644 --- a/mysql-test/suite/innodb/r/default_row_format_create.result +++ b/mysql-test/suite/innodb/r/default_row_format_create.result @@ -56,4 +56,12 @@ DROP TABLE IF EXISTS t; SET GLOBAL innodb_compression_level=1; CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1; DROP TABLE IF EXISTS t; +SET GLOBAL innodb_compression_level=1; +CREATE TABLE t(a INT)ENGINE=InnoDB ROW_FORMAT=DYNAMIC page_compressed=1; +SET GLOBAL innodb_compression_level=0; +ALTER TABLE t FORCE, ROW_FORMAT=DEFAULT, ALGORITHM=INPLACE; +ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'PAGE_COMPRESSED' +ALTER TABLE t FORCE, ROW_FORMAT=DEFAULT, ALGORITHM=COPY; +ERROR HY000: Can't create table `test`.`t` (errno: 140 "Wrong create options") +DROP TABLE t; SET GLOBAL innodb_compression_level=@save_level; diff --git a/mysql-test/suite/innodb/t/default_row_format_create.test b/mysql-test/suite/innodb/t/default_row_format_create.test index a1c6ff0d588..534a7312620 100644 --- a/mysql-test/suite/innodb/t/default_row_format_create.test +++ b/mysql-test/suite/innodb/t/default_row_format_create.test @@ -59,4 +59,13 @@ SET GLOBAL innodb_compression_level=1; CREATE TABLE t(c INT) ENGINE=InnoDB page_compressed=1; DROP TABLE IF EXISTS t; --enable_warnings + +SET GLOBAL innodb_compression_level=1; +CREATE TABLE t(a INT)ENGINE=InnoDB ROW_FORMAT=DYNAMIC page_compressed=1; +SET GLOBAL innodb_compression_level=0; +--error ER_ILLEGAL_HA_CREATE_OPTION +ALTER TABLE t FORCE, ROW_FORMAT=DEFAULT, ALGORITHM=INPLACE; +--error ER_CANT_CREATE_TABLE +ALTER TABLE t FORCE, ROW_FORMAT=DEFAULT, ALGORITHM=COPY; +DROP TABLE t; SET GLOBAL innodb_compression_level=@save_level; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 2dd88e50c07..1bbe99ee34d 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -5584,6 +5584,8 @@ check_if_ok_to_rename: } if (!info.innobase_table_flags()) { + my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), + table_type(), "PAGE_COMPRESSED"); goto err_exit_no_heap; } -- cgit v1.2.1 From 2e844a08f75ea311a4c6f72b0449b27318d3ec02 Mon Sep 17 00:00:00 2001 From: Sergei Krivonos Date: Thu, 21 Oct 2021 11:54:01 +0300 Subject: MDEV-19129: Xcode compatibility update: mysql-test-run.pl --- mysql-test/mysql-test-run.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 29aaebd44b0..4bc212aedbe 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -420,6 +420,7 @@ sub check_wsrep_support() { my $my_print_defaults_exe= mtr_exe_maybe_exists( "$bindir/extra/my_print_defaults", + "$bindir/extra/Debug/my_print_defaults", "$path_client_bindir/my_print_defaults"); my $epath= ""; if ($my_print_defaults_exe ne "") { @@ -1415,6 +1416,7 @@ sub command_line_setup { { $path_client_bindir= mtr_path_exists("$bindir/client_release", "$bindir/client_debug", + "$bindir/client/debug", "$bindir/client$opt_vs_config", "$bindir/client", "$bindir/bin"); -- cgit v1.2.1 From d3426c4c0c112f40f81fb4e3958a68b1e06233fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 21 Oct 2021 12:26:54 +0300 Subject: MDEV-26262 fixup: Remove a bogus assertion In commit 1811fd51fbae9e6c1f06ce93faef2bf1279cd3b6 the assertion should have said error_reported instead of !error_reported. But, that revised assertion would still fail in main.defaults where ER_BAD_DATA is reported during CREATE TABLE. --- sql/table.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/sql/table.cc b/sql/table.cc index 349d04a60ed..d4f8170e0af 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -3282,9 +3282,6 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, &error_reported, mode)) { error= OPEN_FRM_CORRUPTED; - // parse_vcol_defs may fail by semantic reasons, which is ok, but the - // real corruption should never be reported during table creation - DBUG_ASSERT(!is_create_table || !error_reported); goto err; } -- cgit v1.2.1 From 6b4fad94029bb73fbb3f9d05f2dfed09e83ec31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 21 Oct 2021 12:27:38 +0300 Subject: MDEV-22627 fixup: Add a type cast for 32-bit platforms --- storage/innobase/handler/ha_innodb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 592492cac60..10e5cbe216f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -12195,7 +12195,7 @@ index_bad: m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE; } - ulint level = options->page_compression_level; + ulint level = ulint(options->page_compression_level); if (!level) { level = page_zip_level; if (!level && options->page_compressed) { -- cgit v1.2.1 From 8ce8c269f461b7a578cf0bb5cce18f1d83ebeb9b Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Wed, 6 Oct 2021 18:50:56 +0530 Subject: MDEV-19522 InnoDB commit fails when FTS_DOC_ID value is greater than 4294967295 InnoDB commit fails when consecutive FTS_DOC_ID value is greater than 4294967295. Fix is that InnoDB should remove the delta FTS_DOC_ID value limitations and fts should encode 8 byte value, remove FTS_DOC_ID_MAX_STEP variable. Replaced the fts0vlc.ic file with fts0vlc.h fts_encode_int(): Should be able to encode 10 bytes value fts_get_encoded_len(): Should get the length of the value which has 10 bytes fts_decode_vlc(): Add debug assertion to verify the maximum length allowed is 10. mach_read_uint64_little_endian(): Reads 64 bit stored in little endian format Added a unit test case which check for minimum and maximum value to do the fts encoding --- mysql-test/suite/innodb_fts/r/basic.result | 2 - .../suite/innodb_fts/r/innodb_fts_misc_1.result | 21 +++ mysql-test/suite/innodb_fts/t/basic.test | 2 - .../suite/innodb_fts/t/innodb_fts_misc_1.test | 18 +++ storage/innobase/CMakeLists.txt | 4 + storage/innobase/fts/fts0fts.cc | 10 +- storage/innobase/fts/fts0opt.cc | 10 +- storage/innobase/fts/fts0que.cc | 7 +- storage/innobase/handler/ha_innodb.cc | 13 +- storage/innobase/handler/i_s.cc | 18 +-- storage/innobase/include/fts0fts.h | 4 - storage/innobase/include/fts0types.h | 33 ----- storage/innobase/include/fts0vlc.h | 124 ++++++++++++++++++ storage/innobase/include/fts0vlc.ic | 142 --------------------- storage/innobase/include/mach0data.h | 22 ++++ storage/innobase/row/row0mysql.cc | 17 --- storage/innobase/unittest/CMakeLists.txt | 22 ++++ storage/innobase/unittest/innodb_fts-t.cc | 52 ++++++++ 18 files changed, 286 insertions(+), 235 deletions(-) create mode 100644 storage/innobase/include/fts0vlc.h delete mode 100644 storage/innobase/include/fts0vlc.ic create mode 100644 storage/innobase/unittest/CMakeLists.txt create mode 100644 storage/innobase/unittest/innodb_fts-t.cc diff --git a/mysql-test/suite/innodb_fts/r/basic.result b/mysql-test/suite/innodb_fts/r/basic.result index b3fd94509c3..a98de60674a 100644 --- a/mysql-test/suite/innodb_fts/r/basic.result +++ b/mysql-test/suite/innodb_fts/r/basic.result @@ -313,9 +313,7 @@ FTS_DOC_ID 65536 131071 drop table t1; -call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535"); CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB; INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000); -ERROR HY000: Invalid InnoDB FTS Doc ID DROP TABLE t1; diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result b/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result index 8ec0157728c..42730d7916a 100644 --- a/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result +++ b/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result @@ -972,3 +972,24 @@ SELECT * FROM information_schema.innodb_ft_deleted; DOC_ID DROP TABLE t1; SET GLOBAL innodb_ft_aux_table=DEFAULT; +# +# MDEV-19522 InnoDB commit fails when FTS_DOC_ID value +# is greater than 4294967295 +# +CREATE TABLE t1( +FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, +f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID), +FULLTEXT KEY (f1)) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1,'txt','bbb'); +UPDATE t1 SET FTS_DOC_ID = 4294967298; +SELECT * FROM t1 WHERE match(f1) against("txt"); +FTS_DOC_ID f1 f2 +4294967298 txt bbb +SET @@session.insert_id = 100000000000; +INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb'); +CREATE FULLTEXT INDEX i ON t1 (f2); +SELECT * FROM t1 WHERE match(f2) against("bbb"); +FTS_DOC_ID f1 f2 +4294967298 txt bbb +100000000000 aaa bbb +DROP TABLE t1; diff --git a/mysql-test/suite/innodb_fts/t/basic.test b/mysql-test/suite/innodb_fts/t/basic.test index 7a5c83ffb06..53ad978a5b1 100644 --- a/mysql-test/suite/innodb_fts/t/basic.test +++ b/mysql-test/suite/innodb_fts/t/basic.test @@ -277,9 +277,7 @@ insert into t1(f1, f2) values(3, "This is the third record"); select FTS_DOC_ID from t1; drop table t1; -call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535"); CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY, title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB; ---error 182 INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000); DROP TABLE t1; diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test b/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test index adc10886d66..b0bf2c669ad 100644 --- a/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test +++ b/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test @@ -942,3 +942,21 @@ SET GLOBAL innodb_ft_aux_table='test/t1'; SELECT * FROM information_schema.innodb_ft_deleted; DROP TABLE t1; SET GLOBAL innodb_ft_aux_table=DEFAULT; + +--echo # +--echo # MDEV-19522 InnoDB commit fails when FTS_DOC_ID value +--echo # is greater than 4294967295 +--echo # +CREATE TABLE t1( + FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID), + FULLTEXT KEY (f1)) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1,'txt','bbb'); +UPDATE t1 SET FTS_DOC_ID = 4294967298; +SELECT * FROM t1 WHERE match(f1) against("txt"); +SET @@session.insert_id = 100000000000; +INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb'); +CREATE FULLTEXT INDEX i ON t1 (f2); +SELECT * FROM t1 WHERE match(f2) against("bbb"); +# Cleanup +DROP TABLE t1; diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index b965c4fbb45..ff646e00f39 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -188,3 +188,7 @@ IF(MSVC) ENDIF() ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup) + +IF(WITH_UNIT_TESTS) + ADD_SUBDIRECTORY(unittest) +ENDIF() diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 07077006096..bbe53f4d163 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -32,7 +32,7 @@ Full Text Search interface #include "fts0priv.h" #include "fts0types.h" #include "fts0types.ic" -#include "fts0vlc.ic" +#include "fts0vlc.h" #include "fts0plugin.h" #include "dict0priv.h" #include "dict0stats.h" @@ -1247,7 +1247,7 @@ fts_cache_node_add_positions( ulint enc_len; ulint last_pos; byte* ptr_start; - ulint doc_id_delta; + doc_id_t doc_id_delta; #ifdef UNIV_DEBUG if (cache) { @@ -1258,7 +1258,7 @@ fts_cache_node_add_positions( ut_ad(doc_id >= node->last_doc_id); /* Calculate the space required to store the ilist. */ - doc_id_delta = (ulint)(doc_id - node->last_doc_id); + doc_id_delta = doc_id - node->last_doc_id; enc_len = fts_get_encoded_len(doc_id_delta); last_pos = 0; @@ -1307,14 +1307,14 @@ fts_cache_node_add_positions( ptr_start = ptr; /* Encode the new fragment. */ - ptr += fts_encode_int(doc_id_delta, ptr); + ptr = fts_encode_int(doc_id_delta, ptr); last_pos = 0; for (i = 0; i < ib_vector_size(positions); i++) { ulint pos = *(static_cast( ib_vector_get(positions, i))); - ptr += fts_encode_int(pos - last_pos, ptr); + ptr = fts_encode_int(pos - last_pos, ptr); last_pos = pos; } diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index c4cbbfafff4..2d506a757a0 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -36,6 +36,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang #include "ut0list.h" #include "zlib.h" #include "fts0opt.h" +#include "fts0vlc.h" /** The FTS optimize thread's work queue. */ ib_wqueue_t* fts_optimize_wq; @@ -1116,7 +1117,7 @@ fts_optimize_encode_node( ulint pos_enc_len; doc_id_t doc_id_delta; dberr_t error = DB_SUCCESS; - byte* src = enc->src_ilist_ptr; + const byte* src = enc->src_ilist_ptr; if (node->first_doc_id == 0) { ut_a(node->last_doc_id == 0); @@ -1173,7 +1174,7 @@ fts_optimize_encode_node( /* Encode the doc id. Cast to ulint, the delta should be small and therefore no loss of precision. */ - dst += fts_encode_int((ulint) doc_id_delta, dst); + dst = fts_encode_int(doc_id_delta, dst); /* Copy the encoded pos array. */ memcpy(dst, src, pos_enc_len); @@ -1220,7 +1221,8 @@ fts_optimize_node( doc_id_t delta; doc_id_t del_doc_id = FTS_NULL_DOC_ID; - delta = fts_decode_vlc(&enc->src_ilist_ptr); + delta = fts_decode_vlc( + (const byte**)&enc->src_ilist_ptr); test_again: /* Check whether the doc id is in the delete list, if @@ -1248,7 +1250,7 @@ test_again: /* Skip the entries for this document. */ while (*enc->src_ilist_ptr) { - fts_decode_vlc(&enc->src_ilist_ptr); + fts_decode_vlc((const byte**)&enc->src_ilist_ptr); } /* Skip the end of word position marker. */ diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc index b4c72e53afe..965c79bfaa5 100644 --- a/storage/innobase/fts/fts0que.cc +++ b/storage/innobase/fts/fts0que.cc @@ -34,6 +34,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang #include "fts0pars.h" #include "fts0types.h" #include "fts0plugin.h" +#include "fts0vlc.h" #include #include @@ -3224,7 +3225,7 @@ fts_query_filter_doc_ids( ulint len, /*!< in: doc id ilist size */ ibool calc_doc_count) /*!< in: whether to remember doc count */ { - byte* ptr = static_cast(data); + const byte* ptr = static_cast(data); doc_id_t doc_id = 0; ulint decoded = 0; ib_rbt_t* doc_freqs = word_freq->doc_freqs; @@ -3234,8 +3235,8 @@ fts_query_filter_doc_ids( ulint freq = 0; fts_doc_freq_t* doc_freq; fts_match_t* match = NULL; - ulint last_pos = 0; - ulint pos = fts_decode_vlc(&ptr); + doc_id_t last_pos = 0; + doc_id_t pos = fts_decode_vlc(&ptr); /* Some sanity checks. */ if (doc_id == 0) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 10e5cbe216f..91d245ad0e2 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -8543,8 +8543,7 @@ calc_row_difference( && prebuilt->table->fts && innobase_strcasecmp( field->field_name, FTS_DOC_ID_COL_NAME) == 0) { - doc_id = (doc_id_t) mach_read_from_n_little_endian( - n_ptr, 8); + doc_id = mach_read_uint64_little_endian(n_ptr); if (doc_id == 0) { return(DB_FTS_INVALID_DOCID); } @@ -8787,16 +8786,6 @@ calc_row_difference( << innodb_table->name; return(DB_FTS_INVALID_DOCID); - } else if ((doc_id - - prebuilt->table->fts->cache->next_doc_id) - >= FTS_DOC_ID_MAX_STEP) { - - ib::warn() << "Doc ID " << doc_id << " is too" - " big. Its difference with largest" - " Doc ID used " << prebuilt->table->fts - ->cache->next_doc_id - 1 - << " cannot exceed or equal to " - << FTS_DOC_ID_MAX_STEP; } diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 03c0efff027..1111d974ad0 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -58,6 +58,7 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits) #include "fil0fil.h" #include "fil0crypt.h" #include "dict0crea.h" +#include "fts0vlc.h" /** The latest successfully looked up innodb_fts_aux_table */ UNIV_INTERN table_id_t innodb_ft_aux_table_id; @@ -2775,7 +2776,7 @@ i_s_fts_index_cache_fill_one_index( /* Decrypt the ilist, and display Dod ID and word position */ for (ulint i = 0; i < ib_vector_size(word->nodes); i++) { fts_node_t* node; - byte* ptr; + const byte* ptr; ulint decoded = 0; doc_id_t doc_id = 0; @@ -2785,13 +2786,11 @@ i_s_fts_index_cache_fill_one_index( ptr = node->ilist; while (decoded < node->ilist_size) { - ulint pos = fts_decode_vlc(&ptr); - doc_id += pos; + doc_id += fts_decode_vlc(&ptr); /* Get position info */ while (*ptr) { - pos = fts_decode_vlc(&ptr); OK(field_store_string( fields[I_S_FTS_WORD], @@ -2812,7 +2811,7 @@ i_s_fts_index_cache_fill_one_index( doc_id, true)); OK(fields[I_S_FTS_ILIST_DOC_POS]->store( - pos, true)); + fts_decode_vlc(&ptr), true)); OK(schema_table_store_record( thd, table)); @@ -3146,7 +3145,7 @@ i_s_fts_index_table_fill_one_fetch( /* Decrypt the ilist, and display Dod ID and word position */ for (ulint i = 0; i < ib_vector_size(word->nodes); i++) { fts_node_t* node; - byte* ptr; + const byte* ptr; ulint decoded = 0; doc_id_t doc_id = 0; @@ -3156,13 +3155,10 @@ i_s_fts_index_table_fill_one_fetch( ptr = node->ilist; while (decoded < node->ilist_size) { - ulint pos = fts_decode_vlc(&ptr); - - doc_id += pos; + doc_id += fts_decode_vlc(&ptr); /* Get position info */ while (*ptr) { - pos = fts_decode_vlc(&ptr); OK(field_store_string( fields[I_S_FTS_WORD], @@ -3181,7 +3177,7 @@ i_s_fts_index_table_fill_one_fetch( longlong(doc_id), true)); OK(fields[I_S_FTS_ILIST_DOC_POS]->store( - pos, true)); + fts_decode_vlc(&ptr), true)); OK(schema_table_store_record( thd, table)); diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index 84d8ccd26ef..dfac5117c17 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -96,10 +96,6 @@ those defined in mysql file ft_global.h */ /** Threshold where our optimize thread automatically kicks in */ #define FTS_OPTIMIZE_THRESHOLD 10000000 -/** Threshold to avoid exhausting of doc ids. Consecutive doc id difference -should not exceed FTS_DOC_ID_MAX_STEP */ -#define FTS_DOC_ID_MAX_STEP 65535 - /** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */ #define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4) diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h index f5760a16c0e..21d32c7d313 100644 --- a/storage/innobase/include/fts0types.h +++ b/storage/innobase/include/fts0types.h @@ -314,16 +314,6 @@ int fts_doc_id_cmp( const void* p1, /*!< in: id1 */ const void* p2); /*!< in: id2 */ -/******************************************************************//** -Decode and return the integer that was encoded using our VLC scheme.*/ -UNIV_INLINE -ulint -fts_decode_vlc( -/*===========*/ - /*!< out: value decoded */ - byte** ptr); /*!< in: ptr to decode from, this ptr is - incremented by the number of bytes decoded */ - /******************************************************************//** Duplicate a string. */ UNIV_INLINE @@ -338,28 +328,6 @@ fts_string_dup( const fts_string_t* src, /*!< in: src string */ mem_heap_t* heap); /*!< in: heap to use */ -/******************************************************************//** -Return length of val if it were encoded using our VLC scheme. */ -UNIV_INLINE -ulint -fts_get_encoded_len( -/*================*/ - /*!< out: length of value - encoded, in bytes */ - ulint val); /*!< in: value to encode */ - -/******************************************************************//** -Encode an integer using our VLC scheme and return the length in bytes. */ -UNIV_INLINE -ulint -fts_encode_int( -/*===========*/ - /*!< out: length of value - encoded, in bytes */ - ulint val, /*!< in: value to encode */ - byte* buf); /*!< in: buffer, must have - enough space */ - /******************************************************************//** Get the selected FTS aux INDEX suffix. */ UNIV_INLINE @@ -381,6 +349,5 @@ fts_select_index( ulint len); #include "fts0types.ic" -#include "fts0vlc.ic" #endif /* INNOBASE_FTS0TYPES_H */ diff --git a/storage/innobase/include/fts0vlc.h b/storage/innobase/include/fts0vlc.h new file mode 100644 index 00000000000..d6e6037777e --- /dev/null +++ b/storage/innobase/include/fts0vlc.h @@ -0,0 +1,124 @@ +/** + +Copyright (c) 2021, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +**/ +/** +@file include/fts0vlc.h +Full text variable length integer encoding/decoding. + +Created 2021-10-19 Thirunarayanan Balathandayuthapani +**/ + +/** Return length of val if it were encoded using our VLC scheme. +@param val value to encode +@return length of value encoded, in bytes */ +inline size_t fts_get_encoded_len(doc_id_t val) +{ + if (val < static_cast(1) << 7) + return 1; + if (val < static_cast(1) << 14) + return 2; + if (val < static_cast(1) << 21) + return 3; + if (val < static_cast(1) << 28) + return 4; + if (val < static_cast(1) << 35) + return 5; + if (val < static_cast(1) << 42) + return 6; + if (val < static_cast(1) << 49) + return 7; + if (val < static_cast(1) << 56) + return 8; + if (val < static_cast(1) << 63) + return 9; + return 10; +} + +/** Encode an integer using our VLC scheme and return the +length in bytes. +@param val value to encode +@param buf buffer, must have enough space +@return length of value encoded, in bytes */ +inline byte *fts_encode_int(doc_id_t val, byte *buf) +{ + if (val < static_cast(1) << 7) + goto add_1; + if (val < static_cast(1) << 14) + goto add_2; + if (val < static_cast(1) << 21) + goto add_3; + if (val < static_cast(1) << 28) + goto add_4; + if (val < static_cast(1) << 35) + goto add_5; + if (val < static_cast(1) << 42) + goto add_6; + if (val < static_cast(1) << 49) + goto add_7; + if (val < static_cast(1) << 56) + goto add_8; + if (val < static_cast(1) << 63) + goto add_9; + + *buf++= static_cast(val >> 63); +add_9: + *buf++= static_cast(val >> 56) & 0x7F; +add_8: + *buf++= static_cast(val >> 49) & 0x7F; +add_7: + *buf++= static_cast(val >> 42) & 0x7F; +add_6: + *buf++= static_cast(val >> 35) & 0x7F; +add_5: + *buf++= static_cast(val >> 28) & 0x7F; +add_4: + *buf++= static_cast(val >> 21) & 0x7F; +add_3: + *buf++= static_cast(val >> 14) & 0x7F; +add_2: + *buf++= static_cast(val >> 7) & 0x7F; +add_1: + *buf++= static_cast(val) | 0x80; + return buf; +} + +/** Decode and return the integer that was encoded using +our VLC scheme. +@param ptr pointer to decode from, this ptr is + incremented by the number of bytes decoded +@return value decoded */ +inline doc_id_t fts_decode_vlc(const byte **ptr) +{ + ut_d(const byte *const start= *ptr); + ut_ad(*start); + + doc_id_t val= 0; + for (;;) + { + byte b= *(*ptr)++; + val|= (b & 0x7F); + + /* High-bit on means "last byte in the encoded integer". */ + if (b & 0x80) + break; + ut_ad(val < static_cast(1) << (64 - 7)); + val <<= 7; + } + + ut_ad(*ptr - start <= 10); + + return(val); +} diff --git a/storage/innobase/include/fts0vlc.ic b/storage/innobase/include/fts0vlc.ic deleted file mode 100644 index 75d8535057e..00000000000 --- a/storage/innobase/include/fts0vlc.ic +++ /dev/null @@ -1,142 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0vlc.ic -Full text variable length integer encoding/decoding. - -Created 2007-03-27 Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_FTS0VLC_IC -#define INNOBASE_FTS0VLC_IC - -#include "fts0types.h" - -/******************************************************************//** -Return length of val if it were encoded using our VLC scheme. -FIXME: We will need to be able encode 8 bytes value -@return length of value encoded, in bytes */ -UNIV_INLINE -ulint -fts_get_encoded_len( -/*================*/ - ulint val) /* in: value to encode */ -{ - if (val <= 127) { - return(1); - } else if (val <= 16383) { - return(2); - } else if (val <= 2097151) { - return(3); - } else if (val <= 268435455) { - return(4); - } else { - /* Possibly we should care that on 64-bit machines ulint can - contain values that we can't encode in 5 bytes, but - fts_encode_int doesn't handle them either so it doesn't much - matter. */ - - return(5); - } -} - -/******************************************************************//** -Encode an integer using our VLC scheme and return the length in bytes. -@return length of value encoded, in bytes */ -UNIV_INLINE -ulint -fts_encode_int( -/*===========*/ - ulint val, /* in: value to encode */ - byte* buf) /* in: buffer, must have enough space */ -{ - ulint len; - - if (val <= 127) { - *buf = (byte) val; - - len = 1; - } else if (val <= 16383) { - *buf++ = (byte)(val >> 7); - *buf = (byte)(val & 0x7F); - - len = 2; - } else if (val <= 2097151) { - *buf++ = (byte)(val >> 14); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 3; - } else if (val <= 268435455) { - *buf++ = (byte)(val >> 21); - *buf++ = (byte)((val >> 14) & 0x7F); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 4; - } else { - /* Best to keep the limitations of the 32/64 bit versions - identical, at least for the time being. */ - ut_ad(val <= 4294967295u); - - *buf++ = (byte)(val >> 28); - *buf++ = (byte)((val >> 21) & 0x7F); - *buf++ = (byte)((val >> 14) & 0x7F); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 5; - } - - /* High-bit on means "last byte in the encoded integer". */ - *buf |= 0x80; - - return(len); -} - -/******************************************************************//** -Decode and return the integer that was encoded using our VLC scheme. -@return value decoded */ -UNIV_INLINE -ulint -fts_decode_vlc( -/*===========*/ - byte** ptr) /* in: ptr to decode from, this ptr is - incremented by the number of bytes decoded */ -{ - ulint val = 0; - - for (;;) { - byte b = **ptr; - - ++*ptr; - val |= (b & 0x7F); - - /* High-bit on means "last byte in the encoded integer". */ - if (b & 0x80) { - break; - } else { - val <<= 7; - } - } - - return(val); -} - -#endif diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h index 8141c8a91e0..860ef20b8bd 100644 --- a/storage/innobase/include/mach0data.h +++ b/storage/innobase/include/mach0data.h @@ -316,6 +316,28 @@ mach_read_from_n_little_endian( const byte* buf, /*!< in: from where to read */ ulint buf_size) /*!< in: from how many bytes to read */ MY_ATTRIBUTE((warn_unused_result)); + + +/** Reads a 64 bit stored in big endian format +@param buf From where to read +@return uint64_t */ +UNIV_INLINE +uint64_t +mach_read_uint64_little_endian(const byte* buf) +{ +#ifdef WORDS_BIGENDIAN + return + uint64_t(buf[0]) | uint64_t(buf[1]) << 8 | + uint64_t(buf[2]) << 16 | uint64_t(buf[3]) << 24 | + uint64_t(buf[4]) << 32 | uint64_t(buf[5]) << 40 | + uint64_t(buf[6]) << 48 | uint64_t(buf[7]) << 56; +#else + uint64_t n; + memcpy(&n, buf, sizeof(uint64_t)); + return n; +#endif +} + /*********************************************************//** Writes a ulint in the little-endian format. */ UNIV_INLINE diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index c2f9186d408..6445f67f3c2 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -1465,23 +1465,6 @@ error_exit: trx->error_state = DB_FTS_INVALID_DOCID; goto error_exit; } - - /* Difference between Doc IDs are restricted within - 4 bytes integer. See fts_get_encoded_len(). Consecutive - doc_ids difference should not exceed - FTS_DOC_ID_MAX_STEP value. */ - - if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) { - ib::error() << "Doc ID " << doc_id - << " is too big. Its difference with" - " largest used Doc ID " - << next_doc_id - 1 << " cannot" - " exceed or equal to " - << FTS_DOC_ID_MAX_STEP; - err = DB_FTS_INVALID_DOCID; - trx->error_state = DB_FTS_INVALID_DOCID; - goto error_exit; - } } if (table->skip_alter_undo) { diff --git a/storage/innobase/unittest/CMakeLists.txt b/storage/innobase/unittest/CMakeLists.txt new file mode 100644 index 00000000000..df98cddf73e --- /dev/null +++ b/storage/innobase/unittest/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (c) 2021, MariaDB Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/unittest/mytap + ${CMAKE_SOURCE_DIR}/storage/innobase/include) +ADD_EXECUTABLE(innodb_fts-t innodb_fts-t.cc) +TARGET_LINK_LIBRARIES(innodb_fts-t mysys mytap) +ADD_DEPENDENCIES(innodb_fts-t GenError) +MY_ADD_TEST(innodb_fts) diff --git a/storage/innobase/unittest/innodb_fts-t.cc b/storage/innobase/unittest/innodb_fts-t.cc new file mode 100644 index 00000000000..57585e337c2 --- /dev/null +++ b/storage/innobase/unittest/innodb_fts-t.cc @@ -0,0 +1,52 @@ +#include "tap.h" +#include "fts0fts.h" +#include "fts0vlc.h" + +struct fts_encode_info +{ + const byte buf[10]; + int32_t len; + doc_id_t val; +}; + +/* Contains fts encoding min & max value for each length bytes */ +static const fts_encode_info fts_info[]= +{ + {{0x80}, 1, 0}, + {{0xFF}, 1, (1 << 7) - 1}, + {{0x01, 0x80}, 2, 1 << 7}, + {{0x7F, 0XFF}, 2, (1 << 14) - 1}, + {{0x01, 0x00, 0x80}, 3, 1 << 14}, + {{0x7F, 0X7F, 0XFF}, 3, (1 << 21) - 1}, + {{0x01, 0x00, 0x00, 0x80}, 4, 1 << 21}, + {{0x7F, 0X7F, 0X7F, 0xFF}, 4, (1 << 28) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x80}, 5, 1 << 28}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0xFF}, 5, (1ULL << 35) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x80}, 6, 1ULL << 35}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0xFF}, 6, (1ULL << 42) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 7, 1ULL << 42}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0XFF}, 7, (1ULL << 49) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 8, 1ULL << 49}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0XFF}, 8, (1ULL << 56) -1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 9, 1ULL << 56}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0XFF}, 9, (1ULL << 63) -1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 10, 1ULL << 63}, + {{0x01, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0x7F, 0xFF}, 10, ~0ULL} +}; + +int main(int, char**) +{ + for (int i= array_elements(fts_info); i--;) + { + byte buf[10]; + const byte* fts_buf= buf; + int32_t len= fts_encode_int(fts_info[i].val, buf) - &buf[0]; + if (fts_info[i].len == len && + !memcmp(&fts_info[i].buf, buf, len) && + fts_decode_vlc(&fts_buf) == fts_info[i].val && + fts_buf == &buf[len]) + ok(true, "FTS Encoded for %d bytes", fts_info[i].len); + else + ok(false, "FTS Encoded for %d bytes", fts_info[i].len); + } +} -- cgit v1.2.1 From c484a358c897413be390d03bdcb8dc4d70c7d1c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 21 Oct 2021 12:29:33 +0300 Subject: MDEV-26864 Race condition between transaction commit and undo log truncation trx_commit_in_memory(): Do not release the rseg reference before trx_undo_commit_cleanup() has been invoked and the current transaction is truly done with the rollback segment. The purpose of the reference count is to prevent data races with trx_purge_truncate_history(). This is based on mysql/mysql-server@ac79aa1522f33e6eb912133a81fa2614db764c9c. --- storage/innobase/trx/trx0trx.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 3e558a7181d..6669d15a31b 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1775,12 +1775,7 @@ trx_commit_in_memory( ut_ad(!trx->rsegs.m_redo.update_undo); - if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) { - mutex_enter(&rseg->mutex); - ut_ad(rseg->trx_ref_count > 0); - --rseg->trx_ref_count; - mutex_exit(&rseg->mutex); - + if (ut_d(trx_rseg_t* rseg =) trx->rsegs.m_redo.rseg) { if (trx_undo_t*& insert = trx->rsegs.m_redo.insert_undo) { ut_ad(insert->rseg == rseg); trx_undo_commit_cleanup(insert, false); @@ -1849,6 +1844,15 @@ trx_commit_in_memory( ut_ad(!trx->rsegs.m_noredo.undo); + /* Only after trx_undo_commit_cleanup() it is safe to release + our rseg reference. */ + if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) { + mutex_enter(&rseg->mutex); + ut_ad(rseg->trx_ref_count > 0); + --rseg->trx_ref_count; + mutex_exit(&rseg->mutex); + } + /* Free all savepoints, starting from the first. */ trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints); -- cgit v1.2.1 From 2d98b967e31623d9027c0db55330dde2c9d1d99a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 21 Oct 2021 12:44:27 +0300 Subject: MDEV-26865 fts_optimize_thread cannot keep up with workload fts_cache_t::total_size_at_sync: New field, to sample total_size. fts_add_doc_by_id(): Invoke sync if total_size has grown too much since the previous sync request. (Maintain cache->total_size_at_sync.) ib_wqueue_t::length: Caches ib_list_len(*items). ib_wqueue_len(): Removed. We will refer to fts_optimize_wq->length directly. Based on mysql/mysql-server@bc9c46bf2894673d0df17cd0ee872d0d99663121 --- storage/innobase/buf/buf0mtflu.cc | 8 -------- storage/innobase/fts/fts0fts.cc | 14 +++++++++----- storage/innobase/include/fts0types.h | 5 ++++- storage/innobase/include/ut0wqueue.h | 11 +++-------- storage/innobase/ut/ut0wqueue.cc | 29 ++++++++--------------------- 5 files changed, 24 insertions(+), 43 deletions(-) diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc index aae90e48168..ee52a11c394 100644 --- a/storage/innobase/buf/buf0mtflu.cc +++ b/storage/innobase/buf/buf0mtflu.cc @@ -367,14 +367,6 @@ DECLARE_THREAD(mtflush_io_thread)(void* arg) mutex_exit(&(mtflush_io->thread_global_mtx)); while (TRUE) { - -#ifdef UNIV_MTFLUSH_DEBUG - fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n", - os_thread_get_curr_id(), - ib_wqueue_len(mtflush_io->wq), - ib_wqueue_len(mtflush_io->wr_cq)); -#endif /* UNIV_MTFLUSH_DEBUG */ - mtflush_service_io(mtflush_io, this_thread_data); diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index bbe53f4d163..f9c7bcd75c4 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -580,6 +580,7 @@ fts_cache_init( cache->sync_heap->arg = mem_heap_create(1024); cache->total_size = 0; + cache->total_size_at_sync = 0; mutex_enter((ib_mutex_t*) &cache->deleted_lock); cache->deleted_doc_ids = ib_vector_create( @@ -3571,11 +3572,14 @@ fts_add_doc_by_id( get_doc->index_cache, doc_id, doc.tokens); - bool need_sync = false; - if ((cache->total_size > fts_max_cache_size / 10 - || fts_need_sync) - && !cache->sync->in_progress) { - need_sync = true; + bool need_sync = !cache->sync->in_progress + && (fts_need_sync + || (cache->total_size + - cache->total_size_at_sync) + > fts_max_cache_size / 10); + if (need_sync) { + cache->total_size_at_sync = + cache->total_size; } rw_lock_x_unlock(&table->fts->cache->lock); diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h index 21d32c7d313..d49bc7c0254 100644 --- a/storage/innobase/include/fts0types.h +++ b/storage/innobase/include/fts0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -150,6 +150,9 @@ struct fts_cache_t { size_t total_size; /*!< total size consumed by the ilist field of all nodes. SYNC is run whenever this gets too big */ + /** total_size at the time of the previous SYNC request */ + size_t total_size_at_sync; + fts_sync_t* sync; /*!< sync structure to sync data to disk */ ib_alloc_t* sync_heap; /*!< The heap allocator, for indexes diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h index 5a895f4ea3c..d9cc7aec9c9 100644 --- a/storage/innobase/include/ut0wqueue.h +++ b/storage/innobase/include/ut0wqueue.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2019, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -46,6 +46,8 @@ struct ib_wqueue_t ib_mutex_t mutex; /** Work item list */ ib_list_t* items; + /** ib_list_len(*items) */ + size_t length; /** event we use to signal additions to list; os_event_set() and os_event_reset() are protected by the mutex */ os_event_t event; @@ -103,12 +105,5 @@ void* ib_wqueue_nowait( /*=============*/ ib_wqueue_t* wq); /*items = ib_list_create(); wq->event = os_event_create(0); + wq->length = 0; return(wq); } @@ -76,6 +77,8 @@ ib_wqueue_add(ib_wqueue_t* wq, void* item, mem_heap_t* heap, bool wq_locked) } ib_list_add_last(wq->items, item, heap); + wq->length++; + ut_ad(wq->length == ib_list_len(wq->items)); os_event_set(wq->event); if (!wq_locked) { @@ -102,12 +105,12 @@ ib_wqueue_wait( if (node) { ib_list_remove(wq->items, node); - - if (!ib_list_get_first(wq->items)) { + if (!--wq->length) { /* We must reset the event when the list gets emptied. */ os_event_reset(wq->event); } + ut_ad(wq->length == ib_list_len(wq->items)); break; } @@ -142,7 +145,8 @@ ib_wqueue_timedwait( if (node) { ib_list_remove(wq->items, node); - + wq->length--; + ut_ad(wq->length == ib_list_len(wq->items)); mutex_exit(&wq->mutex); break; } @@ -204,20 +208,3 @@ bool ib_wqueue_is_empty(ib_wqueue_t* wq) mutex_exit(&wq->mutex); return is_empty; } - -/******************************************************************** -Get number of items on queue. -@return number of items on queue */ -ulint -ib_wqueue_len( -/*==========*/ - ib_wqueue_t* wq) /*mutex); - len = ib_list_len(wq->items); - mutex_exit(&wq->mutex); - - return(len); -} -- cgit v1.2.1 From 1a2308d3f443d8fcacf5506cb96f802dee3a3519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 21 Oct 2021 12:57:09 +0300 Subject: MDEV-26865: Add test case and instrumentation Based on mysql/mysql-server@bc9c46bf2894673d0df17cd0ee872d0d99663121 but without sleeps. The test was verified to hit the debug assertion if the change to fts_add_doc_by_id() in commit 2d98b967e31623d9027c0db55330dde2c9d1d99a was reverted. --- mysql-test/suite/innodb_fts/r/bug_32831765.result | 131 +++++++++++++++++ mysql-test/suite/innodb_fts/t/bug_32831765.test | 164 ++++++++++++++++++++++ storage/innobase/fts/fts0opt.cc | 3 + 3 files changed, 298 insertions(+) create mode 100644 mysql-test/suite/innodb_fts/r/bug_32831765.result create mode 100644 mysql-test/suite/innodb_fts/t/bug_32831765.test diff --git a/mysql-test/suite/innodb_fts/r/bug_32831765.result b/mysql-test/suite/innodb_fts/r/bug_32831765.result new file mode 100644 index 00000000000..1b828f4266b --- /dev/null +++ b/mysql-test/suite/innodb_fts/r/bug_32831765.result @@ -0,0 +1,131 @@ +# +# Bug#32831765 SERVER HITS OOM CONDITION WHEN LOADING TWO +# INNODB TABLES WITH FTS INDEXES +# +create table t1 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; +create table t2 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; +create table t3 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; +create table t4 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; +#create procedure to inset into the table. +CREATE PROCEDURE `proc_insert`(IN tab_name VARCHAR(40)) +BEGIN +DECLARE i INT DEFAULT 1; +SET @insert_tbl =CONCAT('INSERT INTO ', tab_name, '( `col01`, `col02`, + `col03`, `col04`, `col05`, `col06`, `col07`, `col08`, `col09`, `col10`, + `col11`, `col12`, `col13`, `col14`, `col15`, `col16`, `col17`, `col18`, + `col19`, `col20`, `col21`, `col22`, `col23`, `col24`, `col25`, `col26`, + `col27`, `col28`, `col29`, `col30`) + VALUES ( MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()))'); +PREPARE ins_stmt FROM @insert_tbl; +while (i <= 2000) DO +EXECUTE ins_stmt; +SET i = i + 1; +END WHILE; +DEALLOCATE PREPARE ins_stmt; +END | +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,fts_optimize_wq_count_check"; +connect con1,localhost,root,,; +call proc_insert('t1'); +connect con2,localhost,root,,; +call proc_insert('t1'); +connect con3,localhost,root,,; +call proc_insert('t2'); +connect con4,localhost,root,,; +call proc_insert('t2'); +connect con5,localhost,root,,; +call proc_insert('t3'); +connect con6,localhost,root,,; +call proc_insert('t3'); +connect con7,localhost,root,,; +call proc_insert('t4'); +connection default; +call proc_insert('t4'); +SET GLOBAL debug_dbug= @save_dbug; +connection con1; +disconnect con1; +connection con2; +disconnect con2; +connection con3; +disconnect con3; +connection con4; +disconnect con4; +connection con5; +disconnect con5; +connection con6; +disconnect con6; +connection con7; +disconnect con7; +connection default; +DROP TABLE t1,t2,t3,t4; +DROP PROCEDURE proc_insert; diff --git a/mysql-test/suite/innodb_fts/t/bug_32831765.test b/mysql-test/suite/innodb_fts/t/bug_32831765.test new file mode 100644 index 00000000000..a4551cf91ef --- /dev/null +++ b/mysql-test/suite/innodb_fts/t/bug_32831765.test @@ -0,0 +1,164 @@ +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/big_test.inc + +--echo # +--echo # Bug#32831765 SERVER HITS OOM CONDITION WHEN LOADING TWO +--echo # INNODB TABLES WITH FTS INDEXES +--echo # + +create table t1 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; + +create table t2 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; + + +create table t3 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; + +create table t4 ( `id` int unsigned NOT NULL AUTO_INCREMENT, `col01` text, +`col02` text, `col03` text, `col04` text, `col05` text, `col06` text, `col07` +text, `col08` text, `col09` text, `col10` text, `col11` text, `col12` text, +`col13` text, `col14` text, `col15` text, `col16` text, `col17` text, `col18` +text, `col19` text, `col20` text, `col21` text, `col22` text, `col23` text, +`col24` text, `col25` text, `col26` text, `col27` text, `col28` text, `col29` +text, `col30` text, PRIMARY KEY (`id`), FULLTEXT KEY (`col01`), FULLTEXT KEY +(`col02`), FULLTEXT KEY (`col03`), FULLTEXT KEY (`col04`), FULLTEXT KEY +(`col05`), FULLTEXT KEY (`col06`), FULLTEXT KEY (`col07`), FULLTEXT KEY +(`col08`), FULLTEXT KEY (`col09`), FULLTEXT KEY (`col10`), FULLTEXT KEY +(`col11`), FULLTEXT KEY (`col12`), FULLTEXT KEY (`col13`), FULLTEXT KEY +(`col14`), FULLTEXT KEY (`col15`), FULLTEXT KEY (`col16`), FULLTEXT KEY +(`col17`), FULLTEXT KEY (`col18`), FULLTEXT KEY (`col19`), FULLTEXT KEY +(`col20`), FULLTEXT KEY (`col21`), FULLTEXT KEY (`col22`), FULLTEXT KEY +(`col23`), FULLTEXT KEY (`col24`), FULLTEXT KEY (`col25`), FULLTEXT KEY +(`col26`), FULLTEXT KEY (`col27`), FULLTEXT KEY (`col28`), FULLTEXT KEY +(`col29`), FULLTEXT KEY (`col30`)) engine=innodb; + +delimiter |; + +--echo #create procedure to inset into the table. +CREATE PROCEDURE `proc_insert`(IN tab_name VARCHAR(40)) +BEGIN + DECLARE i INT DEFAULT 1; + SET @insert_tbl =CONCAT('INSERT INTO ', tab_name, '( `col01`, `col02`, + `col03`, `col04`, `col05`, `col06`, `col07`, `col08`, `col09`, `col10`, + `col11`, `col12`, `col13`, `col14`, `col15`, `col16`, `col17`, `col18`, + `col19`, `col20`, `col21`, `col22`, `col23`, `col24`, `col25`, `col26`, + `col27`, `col28`, `col29`, `col30`) + VALUES ( MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), MD5(RAND()), + MD5(RAND()))'); + PREPARE ins_stmt FROM @insert_tbl; + while (i <= 2000) DO + EXECUTE ins_stmt; + SET i = i + 1; + END WHILE; + DEALLOCATE PREPARE ins_stmt; +END | + +delimiter ;| + +# Ensure that the number of SYNC requests will not exceed 1000. +SET @save_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,fts_optimize_wq_count_check"; + +connect (con1,localhost,root,,); +send call proc_insert('t1'); +connect (con2,localhost,root,,); +send call proc_insert('t1'); +connect (con3,localhost,root,,); +send call proc_insert('t2'); +connect (con4,localhost,root,,); +send call proc_insert('t2'); +connect (con5,localhost,root,,); +send call proc_insert('t3'); +connect (con6,localhost,root,,); +send call proc_insert('t3'); +connect (con7,localhost,root,,); +send call proc_insert('t4'); + +connection default; +call proc_insert('t4'); +SET GLOBAL debug_dbug= @save_dbug; + +connection con1; +reap; +disconnect con1; + +connection con2; +reap; +disconnect con2; + +connection con3; +reap; +disconnect con3; + +connection con4; +reap; +disconnect con4; + +connection con5; +reap; +disconnect con5; + +connection con6; +reap; +disconnect con6; + +connection con7; +reap; +disconnect con7; + +connection default; +DROP TABLE t1,t2,t3,t4; +DROP PROCEDURE proc_insert; diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index 2d506a757a0..13e01befe55 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -2641,6 +2641,9 @@ fts_optimize_request_sync_table( ib_wqueue_add(fts_optimize_wq, msg, msg->heap, true); + DBUG_EXECUTE_IF("fts_optimize_wq_count_check", + DBUG_ASSERT(fts_optimize_wq->length <= 1000);); + mutex_exit(&fts_optimize_wq->mutex); } -- cgit v1.2.1 From fbb1e92e2553cbc98fbbbe23399f2a4342de41fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 21 Oct 2021 14:35:23 +0300 Subject: MDEV-19522 fixup: Integer type mismatch in unit test --- storage/innobase/unittest/innodb_fts-t.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/innobase/unittest/innodb_fts-t.cc b/storage/innobase/unittest/innodb_fts-t.cc index 57585e337c2..4e10c0e439e 100644 --- a/storage/innobase/unittest/innodb_fts-t.cc +++ b/storage/innobase/unittest/innodb_fts-t.cc @@ -5,7 +5,7 @@ struct fts_encode_info { const byte buf[10]; - int32_t len; + size_t len; doc_id_t val; }; @@ -40,7 +40,7 @@ int main(int, char**) { byte buf[10]; const byte* fts_buf= buf; - int32_t len= fts_encode_int(fts_info[i].val, buf) - &buf[0]; + size_t len= fts_encode_int(fts_info[i].val, buf) - &buf[0]; if (fts_info[i].len == len && !memcmp(&fts_info[i].buf, buf, len) && fts_decode_vlc(&fts_buf) == fts_info[i].val && -- cgit v1.2.1 From a0fda162ebd991a60634103f2c9d0735154b9dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 21 Oct 2021 15:31:21 +0300 Subject: Fix GCC 11.2.0 -m32 (IA-32) warnings page_create_low(): Fix -Warray-bounds log_buffer_extend(): Fix -Wstringop-overflow --- storage/innobase/log/log0log.cc | 2 -- storage/innobase/page/page0page.cc | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 257645cb6a4..f677aedc87c 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -79,10 +79,8 @@ void log_buffer_extend(ulong len) const size_t new_buf_size = ut_calc_align(len, srv_page_size); byte* new_buf = static_cast (ut_malloc_dontdump(new_buf_size, PSI_INSTRUMENT_ME)); - TRASH_ALLOC(new_buf, new_buf_size); byte* new_flush_buf = static_cast (ut_malloc_dontdump(new_buf_size, PSI_INSTRUMENT_ME)); - TRASH_ALLOC(new_flush_buf, new_buf_size); mysql_mutex_lock(&log_sys.mutex); diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 9b83470e65c..1b8b3cb339f 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -283,7 +283,7 @@ void page_create_low(const buf_block_t* block, bool comp) compile_time_assert(PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE <= PAGE_DATA); - page = buf_block_get_frame(block); + page = block->frame; fil_page_set_type(page, FIL_PAGE_INDEX); -- cgit v1.2.1 From 059a5f11711fd502982abed8781faf9f255fa975 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Thu, 21 Oct 2021 17:19:43 +0200 Subject: Remove trailing space --- scripts/sys_schema/procedures/ps_setup_reset_to_default_57.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sys_schema/procedures/ps_setup_reset_to_default_57.sql b/scripts/sys_schema/procedures/ps_setup_reset_to_default_57.sql index 8d3b2eecf71..9f2ea4682d0 100644 --- a/scripts/sys_schema/procedures/ps_setup_reset_to_default_57.sql +++ b/scripts/sys_schema/procedures/ps_setup_reset_to_default_57.sql @@ -105,7 +105,7 @@ BEGIN SET @query = 'DELETE FROM performance_schema.setup_objects - WHERE NOT (OBJECT_TYPE IN (''EVENT'', ''FUNCTION'', ''PROCEDURE'', ''TABLE'', ''TRIGGER'') AND OBJECT_NAME = ''%'' + WHERE NOT (OBJECT_TYPE IN (''EVENT'', ''FUNCTION'', ''PROCEDURE'', ''TABLE'', ''TRIGGER'') AND OBJECT_NAME = ''%'' AND (OBJECT_SCHEMA = ''mysql'' AND ENABLED = ''NO'' AND TIMED = ''NO'' ) OR (OBJECT_SCHEMA = ''performance_schema'' AND ENABLED = ''NO'' AND TIMED = ''NO'' ) OR (OBJECT_SCHEMA = ''information_schema'' AND ENABLED = ''NO'' AND TIMED = ''NO'' ) -- cgit v1.2.1 From 5caff20216f47fc10540f7de14548cc80cd1c369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 22 Oct 2021 11:59:44 +0300 Subject: MDEV-26883 InnoDB hang due to table lock conflict In a stress test campaign of a 10.6-based branch by Matthias Leich, a deadlock between two InnoDB threads occurred, involving lock_sys.wait_mutex and a dict_table_t::lock_mutex. The cause of the hang is a latching order violation in lock_sys_t::cancel(). That function and the latching order violation were originally introduced in commit 8d16da14873d880b9b5121de1619b7cb5e0f7135 (MDEV-24789). lock_sys_t::cancel(): Invoke table->lock_mutex_trylock() in order to avoid a deadlock. If that fails, release lock_sys.wait_mutex, and acquire both latches. In that way, we will be obeying the latching order and no hangs will occur. This hang should mostly affect DDL operations. DML operations will acquire only IX or IS table locks, which are compatible with each other. --- storage/innobase/lock/lock0lock.cc | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 86c44d2e52f..33c827235be 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -5619,10 +5619,25 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock, bool check_victim) { resolve_table_lock: dict_table_t *table= lock->un_member.tab_lock.table; - table->lock_mutex_lock(); + if (!table->lock_mutex_trylock()) + { + /* The correct latching order is: + lock_sys.latch, table->lock_mutex_lock(), lock_sys.wait_mutex. + Thus, we must release lock_sys.wait_mutex for a blocking wait. */ + mysql_mutex_unlock(&lock_sys.wait_mutex); + table->lock_mutex_lock(); + mysql_mutex_lock(&lock_sys.wait_mutex); + lock= trx->lock.wait_lock; + if (!lock) + goto retreat; + else if (check_victim && trx->lock.was_chosen_as_deadlock_victim) + { + err= DB_DEADLOCK; + goto retreat; + } + } if (lock->is_waiting()) lock_cancel_waiting_and_release(lock); - table->lock_mutex_unlock(); /* Even if lock->is_waiting() did not hold above, we must return DB_LOCK_WAIT, or otherwise optimistic parallel replication could occasionally hang. Potentially affected tests: @@ -5630,6 +5645,8 @@ resolve_table_lock: rpl.rpl_parallel_optimistic_nobinlog rpl.rpl_parallel_optimistic_xa_lsu_off */ err= DB_LOCK_WAIT; +retreat: + table->lock_mutex_unlock(); } lock_sys.rd_unlock(); } -- cgit v1.2.1 From fdae71f8e3d555abd582086798512d212e86a70f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 22 Oct 2021 12:32:26 +0300 Subject: MDEV-26828 Spinning on buf_pool.page_hash is wasting CPU cycles page_hash_latch: Only use the spinlock implementation on SUX_LOCK_GENERIC platforms (those for which we do not implement a futex-like interface). Use srw_spin_mutex on 32-bit systems (except Microsoft Windows) to satisfy the size constraints. rw_lock::is_read_locked(): Remove. We will use the slightly broader assertion is_locked(). srw_lock_: Implement is_locked(), is_write_locked() in a hacky way for the Microsoft Windows SRWLOCK. This should be acceptable, because we are only using these predicates in debug assertions (or later, in lock elision), and false positives should not matter. --- storage/innobase/buf/buf0buf.cc | 4 +++- storage/innobase/include/buf0buf.h | 16 +++++++++++--- storage/innobase/include/buf0types.h | 43 ++++++++++++++++++++++-------------- storage/innobase/include/rw_lock.h | 6 ----- storage/innobase/include/srw_lock.h | 27 +++++++++++++++++----- 5 files changed, 63 insertions(+), 33 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 4ec6a61ccb9..f4689611cbe 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -277,6 +277,7 @@ the read requests for the whole area. */ #ifndef UNIV_INNOCHECKSUM +# ifdef SUX_LOCK_GENERIC void page_hash_latch::read_lock_wait() { /* First, try busy spinning for a while. */ @@ -309,6 +310,7 @@ void page_hash_latch::write_lock_wait() std::this_thread::yield(); while (!write_lock_poll()); } +# endif constexpr std::chrono::microseconds WAIT_FOR_READ(100); constexpr int WAIT_FOR_WRITE= 100; @@ -2297,7 +2299,7 @@ lookup: #endif /* UNIV_DEBUG */ } - ut_ad(hash_lock->is_read_locked()); + ut_ad(hash_lock->is_locked()); if (!bpage->zip.data) { diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 480c41669e0..a972f18e61f 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1551,7 +1551,10 @@ private: buf_page_t *bpage= page_hash_get_low(page_id, fold); if (!bpage || watch_is_sentinel(*bpage)) { - latch->release(); + if (exclusive) + latch->write_unlock(); + else + latch->read_unlock(); if (hash_lock) *hash_lock= nullptr; return watch ? bpage : nullptr; @@ -1562,8 +1565,10 @@ private: if (hash_lock) *hash_lock= latch; /* to be released by the caller */ + else if (exclusive) + latch->write_unlock(); else - latch->release(); + latch->read_unlock(); return bpage; } public: @@ -1785,7 +1790,10 @@ public: template page_hash_latch *lock(ulint fold) { page_hash_latch *latch= lock_get(fold, n_cells); - latch->acquire(); + if (exclusive) + latch->write_lock(); + else + latch->read_lock(); return latch; } @@ -2024,6 +2032,7 @@ private: /** The InnoDB buffer pool */ extern buf_pool_t buf_pool; +#ifdef SUX_LOCK_GENERIC inline void page_hash_latch::read_lock() { mysql_mutex_assert_not_owner(&buf_pool.mutex); @@ -2036,6 +2045,7 @@ inline void page_hash_latch::write_lock() if (!write_trylock()) write_lock_wait(); } +#endif /* SUX_LOCK_GENERIC */ inline void buf_page_t::add_buf_fix_count(uint32_t count) { diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 04b47aaddab..b14e2af36c3 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -178,6 +178,7 @@ enum rw_lock_type_t #include "sux_lock.h" +#ifdef SUX_LOCK_GENERIC class page_hash_latch : public rw_lock { public: @@ -190,23 +191,31 @@ public: inline void read_lock(); /** Acquire an exclusive lock */ inline void write_lock(); - - /** Acquire a lock */ - template void acquire() - { - if (exclusive) - write_lock(); - else - read_lock(); - } - /** Release a lock */ - template void release() - { - if (exclusive) - write_unlock(); - else - read_unlock(); - } }; +#elif defined _WIN32 || SIZEOF_SIZE_T >= 8 +class page_hash_latch +{ + srw_spin_lock_low lock; +public: + void read_lock() { lock.rd_lock(); } + void read_unlock() { lock.rd_unlock(); } + void write_lock() { lock.wr_lock(); } + void write_unlock() { lock.wr_unlock(); } + bool is_locked() const { return lock.is_locked(); } + bool is_write_locked() const { return lock.is_write_locked(); } +}; +#else +class page_hash_latch +{ + srw_spin_mutex lock; +public: + void read_lock() { write_lock(); } + void read_unlock() { write_unlock(); } + void write_lock() { lock.wr_lock(); } + void write_unlock() { lock.wr_unlock(); } + bool is_locked() const { return lock.is_locked(); } + bool is_write_locked() const { return is_locked(); } +}; +#endif #endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/innobase/include/rw_lock.h b/storage/innobase/include/rw_lock.h index ba380b77261..6447eea18ae 100644 --- a/storage/innobase/include/rw_lock.h +++ b/storage/innobase/include/rw_lock.h @@ -229,12 +229,6 @@ public: bool is_update_locked() const { return !!(lock.load(std::memory_order_relaxed) & UPDATER); } #endif /* SUX_LOCK_GENERIC */ - /** @return whether a shared lock is being held by any thread */ - bool is_read_locked() const - { - auto l= lock.load(std::memory_order_relaxed); - return (l & ~WRITER_PENDING) && !(l & WRITER); - } /** @return whether any lock is being held or waited for by any thread */ bool is_locked_or_waiting() const { return lock.load(std::memory_order_relaxed) != 0; } diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h index 9e2eac15df0..d69c9de268f 100644 --- a/storage/innobase/include/srw_lock.h +++ b/storage/innobase/include/srw_lock.h @@ -175,10 +175,7 @@ public: # ifndef DBUG_OFF /** @return whether the lock is being held or waited for */ bool is_vacant() const - { - return !readers.load(std::memory_order_relaxed) && - !writer.is_locked_or_waiting(); - } + { return !is_locked() && !writer.is_locked_or_waiting(); } # endif /* !DBUG_OFF */ bool rd_lock_try() @@ -250,7 +247,7 @@ public: void wr_u_downgrade() { DBUG_ASSERT(writer.is_locked()); - DBUG_ASSERT(readers.load(std::memory_order_relaxed) == WRITER); + DBUG_ASSERT(is_write_locked()); readers.store(1, std::memory_order_release); /* Note: Any pending rd_lock() will not be woken up until u_unlock() */ } @@ -272,10 +269,16 @@ public: } void wr_unlock() { - DBUG_ASSERT(readers.load(std::memory_order_relaxed) == WRITER); + DBUG_ASSERT(is_write_locked()); readers.store(0, std::memory_order_release); writer.wr_unlock(); } + /** @return whether an exclusive lock may be held by any thread */ + bool is_write_locked() const noexcept + { return readers.load(std::memory_order_relaxed) == WRITER; } + /** @return whether any lock may be held by any thread */ + bool is_locked() const noexcept + { return readers.load(std::memory_order_relaxed) != 0; } #endif }; @@ -308,6 +311,18 @@ public: { return IF_WIN(TryAcquireSRWLockExclusive(&lock), !rw_trywrlock(&lock)); } void wr_unlock() { IF_WIN(ReleaseSRWLockExclusive(&lock), rw_unlock(&lock)); } +#ifdef _WIN32 + /** @return whether any lock may be held by any thread */ + bool is_locked_or_waiting() const noexcept { return (size_t&)(lock) != 0; } + /** @return whether any lock may be held by any thread */ + bool is_locked() const noexcept { return is_locked_or_waiting(); } + /** @return whether an exclusive lock may be held by any thread */ + bool is_write_locked() const noexcept + { + // FIXME: this returns false positives for shared locks + return is_locked(); + } +#endif }; template<> void srw_lock_::rd_wait(); -- cgit v1.2.1 From c091a0bc8da87045f10bfc96618ed7194768fa2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 22 Oct 2021 12:33:37 +0300 Subject: MDEV-26826 Duplicated computations of buf_pool.page_hash addresses Since commit bd5a6403cace36c6ed428cde62e35adcd3f7e7d0 (MDEV-26033) we can actually calculate the buf_pool.page_hash cell and latch addresses while not holding buf_pool.mutex. buf_page_alloc_descriptor(): Remove the MEM_UNDEFINED. We now expect buf_page_t::hash to be zero-initialized. buf_pool_t::hash_chain: Dedicated data type for buf_pool.page_hash.array. buf_LRU_free_one_page(): Merged to the only caller buf_pool_t::corrupted_evict(). --- storage/innobase/btr/btr0bulk.cc | 3 +- storage/innobase/btr/btr0cur.cc | 17 +- storage/innobase/btr/btr0sea.cc | 16 +- storage/innobase/buf/buf0block_hint.cc | 9 +- storage/innobase/buf/buf0buddy.cc | 15 +- storage/innobase/buf/buf0buf.cc | 286 +++++++++++++------------------- storage/innobase/buf/buf0flu.cc | 7 +- storage/innobase/buf/buf0lru.cc | 71 +++++--- storage/innobase/buf/buf0rea.cc | 65 ++++---- storage/innobase/fsp/fsp0fsp.cc | 4 +- storage/innobase/ibuf/ibuf0ibuf.cc | 6 +- storage/innobase/include/buf0buf.h | 291 +++++++++++++++------------------ storage/innobase/include/buf0buf.ic | 67 +------- storage/innobase/include/buf0lru.h | 17 +- storage/innobase/include/buf0types.h | 3 +- storage/innobase/include/hash0hash.h | 12 -- storage/innobase/include/ut0new.h | 2 + storage/innobase/log/log0recv.cc | 4 +- 18 files changed, 378 insertions(+), 517 deletions(-) diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index 13e0e3a0eff..046291158a8 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -856,9 +856,10 @@ PageBulk::latch() ut_ad(m_block->page.buf_fix_count()); - /* In case the block is S-latched by page_cleaner. */ + /* In case the block is U-latched by page_cleaner. */ if (!buf_page_optimistic_get(RW_X_LATCH, m_block, m_modify_clock, &m_mtr)) { + /* FIXME: avoid another lookup */ m_block = buf_page_get_gen(page_id_t(m_index->table->space_id, m_page_no), 0, RW_X_LATCH, diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 32bdf9a8a51..c61799b8b12 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1630,6 +1630,9 @@ retry_page_get: ut_ad(cursor->thr); switch (btr_op) { + default: + ut_error; + break; case BTR_INSERT_OP: case BTR_INSERT_IGNORE_UNIQUE_OP: ut_ad(buf_mode == BUF_GET_IF_IN_POOL); @@ -1662,6 +1665,8 @@ retry_page_get: case BTR_DELETE_OP: ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); ut_ad(!dict_index_is_spatial(index)); + auto& chain = buf_pool.page_hash.cell_get( + page_id.fold()); if (!row_purge_poss_sec(cursor->purge_node, index, tuple)) { @@ -1676,15 +1681,12 @@ retry_page_get: cursor->flag = BTR_CUR_DELETE_IBUF; } else { /* The purge could not be buffered. */ - buf_pool.watch_unset(page_id); + buf_pool.watch_unset(page_id, chain); break; } - buf_pool.watch_unset(page_id); + buf_pool.watch_unset(page_id, chain); goto func_exit; - - default: - ut_error; } /* Insert to the insert/delete buffer did not succeed, we @@ -6743,11 +6745,10 @@ static void btr_blob_free(buf_block_t *block, bool all, mtr_t *mtr) ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); mtr->commit(); - const ulint fold= page_id.fold(); - + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); mysql_mutex_lock(&buf_pool.mutex); - if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold)) + if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain)) if (!buf_LRU_free_page(bpage, all) && all && bpage->zip.data) /* Attempt to deallocate the redundant copy of the uncompressed page if the whole ROW_FORMAT=COMPRESSED block cannot be deallocted. */ diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index b95fbbe694a..c7a13181590 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1090,15 +1090,16 @@ fail: buf_block_t* block = buf_pool.block_from_ahi(rec); if (!ahi_latch) { - page_hash_latch* hash_lock = buf_pool.hash_lock_get( - block->page.id()); - hash_lock->read_lock(); + buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get( + block->page.id().fold()); + page_hash_latch&hash_lock = buf_pool.page_hash.lock_get(chain); + hash_lock.read_lock(); if (block->page.state() == BUF_BLOCK_REMOVE_HASH) { /* Another thread is just freeing the block from the LRU list of the buffer pool: do not try to access this page. */ - hash_lock->read_unlock(); + hash_lock.read_unlock(); goto fail; } @@ -1109,7 +1110,7 @@ fail: DBUG_ASSERT(fail || block->page.status != buf_page_t::FREED); buf_block_buf_fix_inc(block); - hash_lock->read_unlock(); + hash_lock.read_unlock(); block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); @@ -2209,8 +2210,9 @@ btr_search_hash_table_validate(ulint hash_table_id) assertion and the comment below) */ const page_id_t id(block->page.id()); if (const buf_page_t* hash_page - = buf_pool.page_hash_get_low( - id, id.fold())) { + = buf_pool.page_hash.get( + id, buf_pool.page_hash.cell_get( + id.fold()))) { ut_ad(hash_page == &block->page); goto state_ok; } diff --git a/storage/innobase/buf/buf0block_hint.cc b/storage/innobase/buf/buf0block_hint.cc index f9581bc7b5d..9fac76b77a5 100644 --- a/storage/innobase/buf/buf0block_hint.cc +++ b/storage/innobase/buf/buf0block_hint.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2020, MariaDB Corporation. +Copyright (c) 2020, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by the @@ -46,14 +46,15 @@ void Block_hint::buffer_fix_block_if_still_valid() validate m_block->state() to ensure that the block is not being freed. */ if (m_block) { - const ulint fold= m_page_id.fold(); - page_hash_latch *hash_lock= buf_pool.page_hash.lock(fold); + auto &cell= buf_pool.page_hash.cell_get(m_page_id.fold()); + page_hash_latch &latch= buf_pool.page_hash.lock_get(cell); + latch.read_lock(); if (buf_pool.is_uncompressed(m_block) && m_page_id == m_block->page.id() && m_block->page.state() == BUF_BLOCK_FILE_PAGE) buf_block_buf_fix_inc(m_block); else clear(); - hash_lock->read_unlock(); + latch.read_unlock(); } } } // namespace buf diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index f822adc3389..1e5cff4959f 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2020, MariaDB Corporation. +Copyright (c) 2018, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -499,9 +499,10 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) ut_ad(space != BUF_BUDDY_STAMP_FREE); const page_id_t page_id(space, offset); - const ulint fold= page_id.fold(); + /* FIXME: we are computing this while holding buf_pool.mutex */ + auto &cell= buf_pool.page_hash.cell_get(page_id.fold()); - bpage = buf_pool.page_hash_get_low(page_id, fold); + bpage = buf_pool.page_hash.get(page_id, cell); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly @@ -546,8 +547,8 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) return false; } - page_hash_latch *hash_lock = buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + page_hash_latch &hash_lock = buf_pool.page_hash.lock_get(cell); + hash_lock.write_lock(); if (bpage->can_relocate()) { /* Relocate the compressed page. */ @@ -558,7 +559,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) memcpy(dst, src, size); bpage->zip.data = reinterpret_cast(dst); - hash_lock->write_unlock(); + hash_lock.write_unlock(); buf_buddy_mem_invalid( reinterpret_cast(src), i); @@ -569,7 +570,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) return(true); } - hash_lock->write_unlock(); + hash_lock.write_unlock(); return(false); } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index f4689611cbe..64aa6f873cc 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1147,7 +1147,7 @@ void buf_pool_t::page_hash_table::create(ulint n) const size_t size= pad(n_cells) * sizeof *array; void* v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE); memset(v, 0, size); - array= static_cast(v); + array= static_cast(v); } /** Create the buffer pool. @@ -1336,9 +1336,10 @@ inline bool buf_pool_t::realloc(buf_block_t *block) return(false); /* free list was not enough */ } - const page_id_t id(block->page.id()); - page_hash_latch* hash_lock = hash_lock_get(id); - hash_lock->write_lock(); + const page_id_t id{block->page.id()}; + hash_chain& chain = page_hash.cell_get(id.fold()); + page_hash_latch& hash_lock = page_hash.lock_get(chain); + hash_lock.write_lock(); if (block->page.can_relocate()) { memcpy_aligned( @@ -1382,14 +1383,10 @@ inline bool buf_pool_t::realloc(buf_block_t *block) } /* relocate page_hash */ - ut_ad(block->page.in_page_hash); - ut_ad(new_block->page.in_page_hash); - const ulint fold = id.fold(); - ut_ad(&block->page == page_hash_get_low(id, fold)); - ut_d(block->page.in_page_hash = false); - HASH_REPLACE(buf_page_t, hash, &page_hash, fold, - &block->page, &new_block->page); - + hash_chain& chain = page_hash.cell_get(id.fold()); + ut_ad(&block->page == page_hash.get(id, chain)); + buf_pool.page_hash.replace(chain, &block->page, + &new_block->page); buf_block_modify_clock_inc(block); static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); memset_aligned<4>(block->frame + FIL_PAGE_OFFSET, 0xff, 4); @@ -1424,7 +1421,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block) new_block = block; } - hash_lock->write_unlock(); + hash_lock.write_unlock(); buf_LRU_block_free_non_file_page(new_block); return(true); /* free_list was enough */ } @@ -2049,13 +2046,14 @@ The caller must relocate bpage->list. @param dpage destination control block */ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) { - const ulint fold= bpage->id().fold(); + const page_id_t id= bpage->id(); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE); mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(buf_pool.hash_lock_get(bpage->id())->is_write_locked()); + ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked()); ut_a(bpage->io_fix() == BUF_IO_NONE); ut_a(!bpage->buf_fix_count()); - ut_ad(bpage == buf_pool.page_hash_get_low(bpage->id(), fold)); + ut_ad(bpage == buf_pool.page_hash.get(id, chain)); ut_ad(!buf_pool.watch_is_sentinel(*bpage)); ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE); @@ -2090,29 +2088,24 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) ut_d(CheckInLRUList::validate()); - /* relocate buf_pool.page_hash */ - ut_ad(bpage->in_page_hash); - ut_ad(dpage->in_page_hash); - ut_d(bpage->in_page_hash= false); - HASH_REPLACE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage, dpage); + buf_pool.page_hash.replace(chain, bpage, dpage); } /** Register a watch for a page identifier. The caller must hold an exclusive page hash latch. The *hash_lock may be released, relocated, and reacquired. @param id page identifier -@param hash_lock exclusively held page_hash latch +@param chain hash table chain with exclusively held page_hash @return a buffer pool block corresponding to id @retval nullptr if the block was not present, and a watch was installed */ inline buf_page_t *buf_pool_t::watch_set(const page_id_t id, - page_hash_latch **hash_lock) + buf_pool_t::hash_chain &chain) { - const ulint fold= id.fold(); - ut_ad(*hash_lock == page_hash.lock_get(fold)); - ut_ad((*hash_lock)->is_write_locked()); + ut_ad(&chain == &page_hash.cell_get(id.fold())); + ut_ad(page_hash.lock_get(chain).is_write_locked()); retry: - if (buf_page_t *bpage= page_hash_get_low(id, fold)) + if (buf_page_t *bpage= page_hash.get(id, chain)) { if (!watch_is_sentinel(*bpage)) /* The page was loaded meanwhile. */ @@ -2122,7 +2115,7 @@ retry: return nullptr; } - (*hash_lock)->write_unlock(); + page_hash.lock_get(chain).write_unlock(); /* Allocate a watch[] and then try to insert it into the page_hash. */ mysql_mutex_lock(&mutex); @@ -2142,28 +2135,23 @@ retry: ut_ad(!w->buf_fix_count()); /* w is pointing to watch[], which is protected by mutex. Normally, buf_page_t::id for objects that are reachable by - page_hash_get_low(id, fold) are protected by hash_lock. */ + page_hash.get(id, chain) are protected by hash_lock. */ w->set_state(BUF_BLOCK_ZIP_PAGE); w->id_= id; - *hash_lock= page_hash.lock_get(fold); - - buf_page_t *bpage= page_hash_get_low(id, fold); + buf_page_t *bpage= page_hash.get(id, chain); if (UNIV_LIKELY_NULL(bpage)) { w->set_state(BUF_BLOCK_NOT_USED); - *hash_lock= page_hash.lock_get(fold); - (*hash_lock)->write_lock(); + page_hash.lock_get(chain).write_lock(); mysql_mutex_unlock(&mutex); goto retry; } - (*hash_lock)->write_lock(); + page_hash.lock_get(chain).write_lock(); ut_ad(!w->buf_fix_count_); w->buf_fix_count_= 1; - ut_ad(!w->in_page_hash); - ut_d(w->in_page_hash= true); - HASH_INSERT(buf_page_t, hash, &page_hash, fold, w); + buf_pool.page_hash.append(chain, w); mysql_mutex_unlock(&mutex); return nullptr; } @@ -2175,43 +2163,40 @@ retry: /** Stop watching whether a page has been read in. watch_set(id) must have returned nullptr before. -@param id page identifier */ -void buf_pool_t::watch_unset(const page_id_t id) +@param id page identifier +@param chain unlocked hash table chain */ +void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain) { mysql_mutex_assert_not_owner(&mutex); - const ulint fold= id.fold(); - page_hash_latch *hash_lock= page_hash.lock(fold); + page_hash_latch &hash_lock= page_hash.lock_get(chain); + hash_lock.write_lock(); /* The page must exist because watch_set() increments buf_fix_count. */ - buf_page_t *w= page_hash_get_low(id, fold); + buf_page_t *w= page_hash.get(id, chain); const auto buf_fix_count= w->buf_fix_count(); ut_ad(buf_fix_count); const bool must_remove= buf_fix_count == 1 && watch_is_sentinel(*w); - ut_ad(w->in_page_hash); if (!must_remove) w->unfix(); - hash_lock->write_unlock(); + hash_lock.write_unlock(); if (must_remove) { const auto old= w; /* The following is based on buf_pool_t::watch_remove(). */ mysql_mutex_lock(&mutex); - w= page_hash_get_low(id, fold); - page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + w= page_hash.get(id, chain); + hash_lock.write_lock(); if (w->unfix() == 0 && w == old) { - ut_ad(w->in_page_hash); - ut_d(w->in_page_hash= false); - HASH_DELETE(buf_page_t, hash, &page_hash, fold, w); + page_hash.remove(chain, w); // Now that the watch is detached from page_hash, release it to watch[]. ut_ad(w->id_ == id); ut_ad(!w->buf_fix_count()); ut_ad(w->state() == BUF_BLOCK_ZIP_PAGE); w->set_state(BUF_BLOCK_NOT_USED); } - hash_lock->write_unlock(); mysql_mutex_unlock(&mutex); + hash_lock.write_unlock(); } } @@ -2233,10 +2218,11 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) ++buf_pool.stat.n_page_gets; const page_id_t page_id(space->id, page); - const ulint fold= page_id.fold(); - page_hash_latch *hash_lock= buf_pool.page_hash.lock(fold); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + hash_lock.read_lock(); if (buf_block_t *block= reinterpret_cast - (buf_pool.page_hash_get_low(page_id, fold))) + (buf_pool.page_hash.get(page_id, chain))) { if (block->page.state() != BUF_BLOCK_FILE_PAGE) /* FIXME: convert, but avoid buf_zip_decompress() */; @@ -2244,7 +2230,7 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) { buf_block_buf_fix_inc(block); ut_ad(block->page.buf_fix_count()); - hash_lock->read_unlock(); + hash_lock.read_unlock(); mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); block->lock.x_lock(); @@ -2254,7 +2240,7 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) } } - hash_lock->read_unlock(); + hash_lock.read_unlock(); } /** Get read access to a compressed page (usually of type @@ -2274,16 +2260,18 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size) ++buf_pool.stat.n_page_gets; bool discard_attempted= false; - const ulint fold= page_id.fold(); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); buf_page_t *bpage; - page_hash_latch *hash_lock; for (;;) { lookup: - bpage= buf_pool.page_hash_get_locked(page_id, fold, &hash_lock); + hash_lock.read_lock(); + bpage= buf_pool.page_hash.get(page_id, chain); if (bpage) break; + hash_lock.read_unlock(); dberr_t err= buf_read_page(page_id, zip_size); @@ -2299,13 +2287,11 @@ lookup: #endif /* UNIV_DEBUG */ } - ut_ad(hash_lock->is_locked()); - if (!bpage->zip.data) { /* There is no compressed page. */ err_exit: - hash_lock->read_unlock(); + hash_lock.read_unlock(); return nullptr; } @@ -2317,9 +2303,9 @@ err_exit: if (!discard_attempted) { discard_attempted= true; - hash_lock->read_unlock(); + hash_lock.read_unlock(); mysql_mutex_lock(&buf_pool.mutex); - if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold)) + if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain)) buf_LRU_free_page(bpage, false); mysql_mutex_unlock(&buf_pool.mutex); goto lookup; @@ -2337,7 +2323,7 @@ err_exit: got_block: bool must_read= bpage->io_fix() == BUF_IO_READ; - hash_lock->read_unlock(); + hash_lock.read_unlock(); DBUG_ASSERT(bpage->status != buf_page_t::FREED); @@ -2521,7 +2507,6 @@ buf_page_get_low( buf_block_t* block; unsigned access_time; ulint retries = 0; - const ulint fold = page_id.fold(); ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL)); ut_ad(!mtr || mtr->is_active()); @@ -2572,57 +2557,53 @@ buf_page_get_low( || ibuf_page_low(page_id, zip_size, FALSE, NULL)); ++buf_pool.stat.n_page_gets; + + auto& chain= buf_pool.page_hash.cell_get(page_id.fold()); + page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); loop: buf_block_t* fix_block; - block = guess; - - page_hash_latch* hash_lock = buf_pool.page_hash.lock(fold); - - if (block) { + hash_lock.read_lock(); + + /* If the guess is a compressed page descriptor that + has been allocated by buf_page_alloc_descriptor(), + it may have been freed by buf_relocate(). */ + + if (guess && buf_pool.is_uncompressed(guess) + && page_id == guess->page.id() + && guess->page.state() == BUF_BLOCK_FILE_PAGE) { + ut_ad(!guess->page.in_zip_hash); + block = guess; + goto have_block; + } - /* If the guess is a compressed page descriptor that - has been allocated by buf_page_alloc_descriptor(), - it may have been freed by buf_relocate(). */ + guess = nullptr; + block = reinterpret_cast( + buf_pool.page_hash.get(page_id, chain)); - if (!buf_pool.is_uncompressed(block) - || page_id != block->page.id() - || block->page.state() != BUF_BLOCK_FILE_PAGE) { - /* Our guess was bogus or things have changed - since. */ - guess = nullptr; - goto lookup; - } else { - ut_ad(!block->page.in_zip_hash); - } + if (block && !buf_pool.watch_is_sentinel(block->page)) { +have_block: + fix_block = block; } else { -lookup: - block = reinterpret_cast( - buf_pool.page_hash_get_low(page_id, fold)); - } + hash_lock.read_unlock(); + fix_block = block = nullptr; - if (!block || buf_pool.watch_is_sentinel(block->page)) { - hash_lock->read_unlock(); - block = nullptr; - } - - if (UNIV_UNLIKELY(!block)) { /* Page not in buf_pool: needs to be read from file */ if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { - hash_lock = buf_pool.page_hash.lock(fold); + hash_lock.write_lock(); - if (buf_page_t *bpage= buf_pool.watch_set( - page_id, &hash_lock)) { + if (buf_page_t *bpage= buf_pool.watch_set(page_id, + chain)) { /* We can release hash_lock after we increment the fix count to make sure that no state change takes place. */ bpage->fix(); - hash_lock->write_unlock(); + hash_lock.write_unlock(); block = reinterpret_cast(bpage); fix_block = block; goto got_block; } - hash_lock->write_unlock(); + hash_lock.write_unlock(); } switch (mode) { @@ -2714,12 +2695,10 @@ lookup: if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ goto loop; - } else { - fix_block = block; } fix_block->fix(); - hash_lock->read_unlock(); + hash_lock.read_unlock(); got_block: switch (mode) { @@ -2811,12 +2790,10 @@ evict_from_pool: buf_block_init_low(block); mysql_mutex_lock(&buf_pool.mutex); - hash_lock = buf_pool.page_hash.lock_get(fold); - - hash_lock->write_lock(); + hash_lock.write_lock(); /* Buffer-fixing prevents the page_hash from changing. */ - ut_ad(bpage == buf_pool.page_hash_get_low(page_id, fold)); + ut_ad(bpage == buf_pool.page_hash.get(page_id, chain)); fix_block->unfix(); /* hash_lock protects us after this */ @@ -2827,7 +2804,7 @@ evict_from_pool: This should be extremely unlikely, for example, if buf_page_get_zip() was invoked. */ - hash_lock->write_unlock(); + hash_lock.write_unlock(); buf_LRU_block_free_non_file_page(block); mysql_mutex_unlock(&buf_pool.mutex); @@ -2866,7 +2843,7 @@ evict_from_pool: MEM_UNDEFINED(bpage, sizeof *bpage); mysql_mutex_unlock(&buf_pool.mutex); - hash_lock->write_unlock(); + hash_lock.write_unlock(); buf_pool.n_pend_unzip++; access_time = block->page.is_accessed(); @@ -2923,17 +2900,16 @@ re_evict: space->release(); if (evicted) { - hash_lock = buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + hash_lock.write_lock(); mysql_mutex_unlock(&buf_pool.mutex); /* We may set the watch, as it would have been set if the page were not in the buffer pool in the first place. */ block= reinterpret_cast( mode == BUF_GET_IF_IN_POOL_OR_WATCH - ? buf_pool.watch_set(page_id, &hash_lock) - : buf_pool.page_hash_get_low(page_id, fold)); - hash_lock->write_unlock(); + ? buf_pool.watch_set(page_id, chain) + : buf_pool.page_hash.get(page_id, chain)); + hash_lock.write_unlock(); if (block != NULL) { /* Either the page has been read in or @@ -3114,20 +3090,20 @@ buf_page_optimistic_get( return FALSE; } - const page_id_t id(block->page.id()); - - page_hash_latch *hash_lock = buf_pool.hash_lock_get(id); - hash_lock->read_lock(); + const page_id_t id{block->page.id()}; + buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(id.fold()); + page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); + hash_lock.read_lock(); if (UNIV_UNLIKELY(id != block->page.id() || block->page.state() != BUF_BLOCK_FILE_PAGE || block->page.io_fix() != BUF_IO_NONE)) { - hash_lock->read_unlock(); + hash_lock.read_unlock(); return(FALSE); } buf_block_buf_fix_inc(block); - hash_lock->read_unlock(); + hash_lock.read_unlock(); block->page.set_accessed(); @@ -3194,21 +3170,19 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr) ut_ad(mtr); ut_ad(mtr->is_active()); - page_hash_latch *hash_lock; - buf_page_t *bpage= buf_pool.page_hash_get_locked(page_id, - page_id.fold(), - &hash_lock); - if (!bpage) - return nullptr; - if (bpage->state() != BUF_BLOCK_FILE_PAGE) + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + hash_lock.read_lock(); + buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain); + if (!bpage || bpage->state() != BUF_BLOCK_FILE_PAGE) { - hash_lock->read_unlock(); + hash_lock.read_unlock(); return nullptr; } buf_block_t *block= reinterpret_cast(bpage); buf_block_buf_fix_inc(block); - hash_lock->read_unlock(); + hash_lock.read_unlock(); if (!block->lock.s_lock_try()) { @@ -3250,12 +3224,12 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size, free_block->initialise(page_id, zip_size, 1); - const ulint fold= page_id.fold(); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); mysql_mutex_lock(&buf_pool.mutex); loop: buf_block_t *block= reinterpret_cast - (buf_pool.page_hash_get_low(page_id, fold)); + (buf_pool.page_hash.get(page_id, chain)); if (block && block->page.in_file() && !buf_pool.watch_is_sentinel(block->page)) @@ -3294,11 +3268,11 @@ loop: #endif break; case BUF_BLOCK_ZIP_PAGE: - page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + hash_lock.write_lock(); if (block->page.io_fix() != BUF_IO_NONE) { - hash_lock->write_unlock(); + hash_lock.write_unlock(); /* Wait for buf_page_write_complete() to release the I/O fix. */ timespec abstime; set_timespec_nsec(abstime, 1000000); @@ -3315,7 +3289,7 @@ loop: free_block->page.set_state(BUF_BLOCK_FILE_PAGE); buf_unzip_LRU_add_block(free_block, FALSE); - hash_lock->write_unlock(); + hash_lock.write_unlock(); buf_page_free_descriptor(&block->page); block= free_block; buf_block_buf_fix_inc(block); @@ -3351,11 +3325,10 @@ loop: /* The block must be put to the LRU list */ buf_LRU_add_block(&block->page, false); - page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + hash_lock.write_lock(); block->page.set_state(BUF_BLOCK_FILE_PAGE); - ut_d(block->page.in_page_hash= true); - HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, &block->page); + buf_pool.page_hash.append(chain, &block->page); block->lock.x_lock(); if (UNIV_UNLIKELY(zip_size)) @@ -3364,7 +3337,7 @@ loop: release and reacquire buf_pool.mutex, by IO-fixing and X-latching the block. */ block->page.set_io_fix(BUF_IO_READ); - hash_lock->write_unlock(); + hash_lock.write_unlock(); /* buf_pool.mutex may be released and reacquired by buf_buddy_alloc(). We must defer this operation until @@ -3381,7 +3354,7 @@ loop: block->page.set_io_fix(BUF_IO_NONE); } else - hash_lock->write_unlock(); + hash_lock.write_unlock(); mysql_mutex_unlock(&buf_pool.mutex); @@ -3564,32 +3537,6 @@ static void buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t& space) } } -/** Release and evict a corrupted page. -@param bpage page that was being read */ -ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage) -{ - const page_id_t id(bpage->id()); - page_hash_latch *hash_lock= hash_lock_get(id); - - mysql_mutex_lock(&mutex); - hash_lock->write_lock(); - - ut_ad(bpage->io_fix() == BUF_IO_READ); - ut_ad(!bpage->oldest_modification()); - bpage->set_corrupt_id(); - - if (bpage->state() == BUF_BLOCK_FILE_PAGE) - reinterpret_cast(bpage)->lock.x_unlock(true); - bpage->io_unfix(); - - /* remove from LRU and page_hash */ - buf_LRU_free_one_page(bpage, id, hash_lock); - mysql_mutex_unlock(&mutex); - - ut_d(auto n=) n_pend_reads--; - ut_ad(n > 0); -} - /** Mark a table corrupted. @param[in] bpage Corrupted page @param[in] node data file @@ -3955,7 +3902,8 @@ void buf_pool_t::validate() case BUF_BLOCK_FILE_PAGE: const page_id_t id = block->page.id(); - ut_ad(page_hash_get_low(id, id.fold()) + ut_ad(page_hash.get(id, page_hash.cell_get( + id.fold())) == &block->page); n_lru++; break; @@ -3988,7 +3936,7 @@ void buf_pool_t::validate() break; } const page_id_t id = b->id(); - ut_ad(page_hash_get_low(id, id.fold()) == b); + ut_ad(page_hash.get(id, page_hash.cell_get(id.fold())) == b); } ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing); diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index db546e287b4..1f720989f6f 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -947,7 +947,9 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru) mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(fold == id.fold()); - buf_page_t *bpage= buf_pool.page_hash_get_low(id, fold); + /* FIXME: cell_get() is being invoked while holding buf_pool.mutex */ + const buf_page_t *bpage= + buf_pool.page_hash.get(id, buf_pool.page_hash.cell_get(fold)); if (!bpage || buf_pool.watch_is_sentinel(*bpage)) return false; @@ -1107,9 +1109,10 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, id_fold= id.fold(); } + const buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id_fold); mysql_mutex_lock(&buf_pool.mutex); - if (buf_page_t *bpage= buf_pool.page_hash_get_low(id, id_fold)) + if (buf_page_t *bpage= buf_pool.page_hash.get(id, chain)) { ut_ad(bpage->in_file()); /* We avoid flushing 'non-old' blocks in an LRU flush, diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index a5b3cc72fc7..2b02633cbb9 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -113,7 +113,7 @@ the object will be freed. @param bpage buffer block @param id page identifier -@param hash_lock buf_pool.page_hash latch (will be released here) +@param chain locked buf_pool.page_hash chain (will be released here) @param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed If a compressed page is freed other compressed pages may be relocated. @@ -122,7 +122,8 @@ caller needs to free the page to the free list @retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In this case the block is already returned to the buddy allocator. */ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, - page_hash_latch *hash_lock, bool zip); + buf_pool_t::hash_chain &chain, + bool zip); /** Free a block to buf_pool */ static void buf_LRU_block_free_hashed_page(buf_block_t *block) @@ -807,9 +808,9 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) /* We must hold an exclusive hash_lock to prevent bpage->can_relocate() from changing due to a concurrent execution of buf_page_get_low(). */ - const ulint fold = id.fold(); - page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold()); + page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); + hash_lock.write_lock(); lsn_t oldest_modification = bpage->oldest_modification_acquire(); if (UNIV_UNLIKELY(!bpage->can_relocate())) { @@ -839,7 +840,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) } else if (oldest_modification && bpage->state() != BUF_BLOCK_FILE_PAGE) { func_exit: - hash_lock->write_unlock(); + hash_lock.write_unlock(); return(false); } else if (bpage->state() == BUF_BLOCK_FILE_PAGE) { @@ -859,7 +860,7 @@ func_exit: ut_ad(bpage->can_relocate()); - if (!buf_LRU_block_remove_hashed(bpage, id, hash_lock, zip)) { + if (!buf_LRU_block_remove_hashed(bpage, id, chain, zip)) { ut_ad(!b); mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); return(true); @@ -875,7 +876,7 @@ func_exit: if (UNIV_LIKELY_NULL(b)) { buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b); - ut_ad(!buf_pool.page_hash_get_low(id, fold)); + ut_ad(!buf_pool.page_hash.get(id, chain)); ut_ad(b->zip_size()); /* The field in_LRU_list of @@ -894,8 +895,10 @@ func_exit: ut_ad(!b->in_zip_hash); ut_ad(b->in_LRU_list); ut_ad(b->in_page_hash); + ut_d(b->in_page_hash = false); + b->hash = nullptr; - HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, b); + buf_pool.page_hash.append(chain, b); /* Insert b where bpage was in the LRU list. */ if (prev_b) { @@ -951,9 +954,9 @@ func_exit: decompressing the block while we release hash_lock. */ b->set_io_fix(BUF_IO_PIN); - hash_lock->write_unlock(); + hash_lock.write_unlock(); } else if (!zip) { - hash_lock->write_unlock(); + hash_lock.write_unlock(); } buf_block_t* block = reinterpret_cast(bpage); @@ -1063,7 +1066,7 @@ the object will be freed. @param bpage buffer block @param id page identifier -@param hash_lock buf_pool.page_hash latch (will be released here) +@param chain locked buf_pool.page_hash chain (will be released here) @param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed If a compressed page is freed other compressed pages may be relocated. @@ -1072,10 +1075,11 @@ caller needs to free the page to the free list @retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In this case the block is already returned to the buddy allocator. */ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, - page_hash_latch *hash_lock, bool zip) + buf_pool_t::hash_chain &chain, + bool zip) { mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(hash_lock->is_write_locked()); + ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked()); ut_a(bpage->io_fix() == BUF_IO_NONE); ut_a(!bpage->buf_fix_count()); @@ -1155,7 +1159,8 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, } ut_ad(!bpage->in_zip_hash); - HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, id.fold(), bpage); + buf_pool.page_hash.remove(chain, bpage); + page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); switch (bpage->state()) { case BUF_BLOCK_ZIP_PAGE: @@ -1165,7 +1170,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, ut_a(bpage->zip.ssize); ut_ad(!bpage->oldest_modification()); - hash_lock->write_unlock(); + hash_lock.write_unlock(); buf_pool_mutex_exit_forbid(); buf_buddy_free(bpage->zip.data, bpage->zip_size()); @@ -1209,7 +1214,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, and by the time we'll release it in the caller we'd have inserted the compressed only descriptor in the page_hash. */ - hash_lock->write_unlock(); + hash_lock.write_unlock(); if (bpage->zip.data) { /* Free the compressed page. */ @@ -1240,20 +1245,38 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, return(false); } -/** Remove one page from LRU list and put it to free list. -@param bpage file page to be freed -@param id page identifier -@param hash_lock buf_pool.page_hash latch (will be released here) */ -void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id, - page_hash_latch *hash_lock) +/** Release and evict a corrupted page. +@param bpage page that was being read */ +ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage) { + const page_id_t id(bpage->id()); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + + mysql_mutex_lock(&mutex); + hash_lock.write_lock(); + + ut_ad(bpage->io_fix() == BUF_IO_READ); + ut_ad(!bpage->oldest_modification()); + bpage->set_corrupt_id(); + bpage->io_unfix(); + + if (bpage->state() == BUF_BLOCK_FILE_PAGE) + reinterpret_cast(bpage)->lock.x_unlock(true); + while (bpage->buf_fix_count()) /* Wait for other threads to release the fix count before releasing the bpage from LRU list. */ (void) LF_BACKOFF(); - if (buf_LRU_block_remove_hashed(bpage, id, hash_lock, true)) + /* remove from LRU and page_hash */ + if (buf_LRU_block_remove_hashed(bpage, id, chain, true)) buf_LRU_block_free_hashed_page(reinterpret_cast(bpage)); + + mysql_mutex_unlock(&mutex); + + ut_d(auto n=) n_pend_reads--; + ut_ad(n > 0); } /** Update buf_pool.LRU_old_ratio. diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 13e1a35f08a..55e5e4afba1 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -50,17 +50,17 @@ i/o-fixed buffer blocks */ /** Remove the sentinel block for the watch before replacing it with a real block. watch_unset() or watch_occurred() will notice that the block has been replaced with the real block. -@param watch sentinel */ -inline void buf_pool_t::watch_remove(buf_page_t *watch) +@param watch sentinel +@param chain locked hash table chain */ +inline void buf_pool_t::watch_remove(buf_page_t *watch, + buf_pool_t::hash_chain &chain) { mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(hash_lock_get(watch->id())->is_write_locked()); + ut_ad(page_hash.lock_get(chain).is_write_locked()); ut_a(watch_is_sentinel(*watch)); if (watch->buf_fix_count()) { - ut_ad(watch->in_page_hash); - ut_d(watch->in_page_hash= false); - HASH_DELETE(buf_page_t, hash, &page_hash, watch->id().fold(), watch); + page_hash.remove(chain, watch); watch->set_buf_fix_count(0); } ut_ad(!watch->in_page_hash); @@ -114,11 +114,12 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, block->lock.x_lock(true); } - const ulint fold= page_id.fold(); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); mysql_mutex_lock(&buf_pool.mutex); - buf_page_t *hash_page= buf_pool.page_hash_get_low(page_id, fold); + buf_page_t *hash_page= buf_pool.page_hash.get(page_id, chain); if (hash_page && !buf_pool.watch_is_sentinel(*hash_page)) { /* The page is already in the buffer pool. */ @@ -135,8 +136,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, bpage= &block->page; /* Insert into the hash table of file pages */ - page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + hash_lock.write_lock(); if (hash_page) { @@ -144,18 +144,16 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, auto buf_fix_count= hash_page->buf_fix_count(); ut_a(buf_fix_count > 0); block->page.add_buf_fix_count(buf_fix_count); - buf_pool.watch_remove(hash_page); + buf_pool.watch_remove(hash_page, chain); } block->page.set_io_fix(BUF_IO_READ); block->page.set_state(BUF_BLOCK_FILE_PAGE); - ut_ad(!block->page.in_page_hash); - ut_d(block->page.in_page_hash= true); - HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage); - hash_lock->write_unlock(); + buf_pool.page_hash.append(chain, &block->page); + hash_lock.write_unlock(); /* The block must be put to the LRU list, to the old blocks */ - buf_LRU_add_block(bpage, true/* to old blocks */); + buf_LRU_add_block(&block->page, true/* to old blocks */); if (UNIV_UNLIKELY(zip_size)) { @@ -188,7 +186,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, check the page_hash again, as it may have been modified. */ if (UNIV_UNLIKELY(lru)) { - hash_page= buf_pool.page_hash_get_low(page_id, fold); + hash_page= buf_pool.page_hash.get(page_id, chain); if (UNIV_UNLIKELY(hash_page && !buf_pool.watch_is_sentinel(*hash_page))) { @@ -206,8 +204,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, bpage->init(BUF_BLOCK_ZIP_PAGE, page_id); - page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold); - hash_lock->write_lock(); + hash_lock.write_lock(); if (hash_page) { @@ -215,14 +212,12 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, buf_pool_t::watch_unset() is executing concurrently, waiting for buf_pool.mutex, which we are holding. */ bpage->add_buf_fix_count(hash_page->buf_fix_count()); - buf_pool.watch_remove(hash_page); + buf_pool.watch_remove(hash_page, chain); } - ut_ad(!bpage->in_page_hash); - ut_d(bpage->in_page_hash= true); - HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage); + buf_pool.page_hash.append(chain, bpage); bpage->set_io_fix(BUF_IO_READ); - hash_lock->write_unlock(); + hash_lock.write_unlock(); /* The block must be put to the LRU list, to the old blocks. The zip size is already set into the page zip */ @@ -408,11 +403,12 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf) for (page_id_t i= low; i < high; ++i) { - const ulint fold= i.fold(); - page_hash_latch *hash_lock= buf_pool.page_hash.lock(fold); - const buf_page_t *bpage= buf_pool.page_hash_get_low(i, fold); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold()); + page_hash_latch &latch= buf_pool.page_hash.lock_get(chain); + latch.read_lock(); + const buf_page_t *bpage= buf_pool.page_hash.get(i, chain); bool found= bpage && bpage->is_accessed() && buf_page_peek_if_young(bpage); - hash_lock->read_unlock(); + latch.read_unlock(); if (found && !--count) goto read_ahead; } @@ -608,9 +604,10 @@ fail: unsigned prev_accessed= 0; for (page_id_t i= low; i != high_1; ++i) { - const ulint fold= i.fold(); - page_hash_latch *hash_lock= buf_pool.page_hash.lock(fold); - const buf_page_t* bpage= buf_pool.page_hash_get_low(i, fold); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold()); + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + hash_lock.read_lock(); + const buf_page_t* bpage= buf_pool.page_hash.get(i, chain); if (i == page_id) { /* Read the natural predecessor and successor page addresses from @@ -621,7 +618,7 @@ fail: if (!bpage) { hard_fail: - hash_lock->read_unlock(); + hash_lock.read_unlock(); goto fail; } const byte *f; @@ -661,7 +658,7 @@ hard_fail: else if (!bpage) { failed: - hash_lock->read_unlock(); + hash_lock.read_unlock(); if (--count) continue; goto fail; @@ -681,7 +678,7 @@ failed: prev_accessed= accessed; if (fail) goto failed; - hash_lock->read_unlock(); + hash_lock.read_unlock(); } /* If we got this far, read-ahead can be sensible: do it */ diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 2581cecddc3..ac70923b446 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1052,10 +1052,10 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) if (UNIV_UNLIKELY(space->is_being_truncated)) { const page_id_t page_id{space->id, offset}; - const ulint fold= page_id.fold(); + buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); mysql_mutex_lock(&buf_pool.mutex); block= reinterpret_cast - (buf_pool.page_hash_get_low(page_id, fold)); + (buf_pool.page_hash.get(page_id, chain)); if (block && block->page.oldest_modification() <= 1) block= nullptr; mysql_mutex_unlock(&buf_pool.mutex); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index bd471dff765..4177215cca7 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -3310,7 +3310,8 @@ fail_exit: /* We check if the index page is suitable for buffered entries */ - if (buf_pool.page_hash_contains(page_id)) { + if (buf_pool.page_hash_contains( + page_id, buf_pool.page_hash.cell_get(page_id.fold()))) { commit_exit: ibuf_mtr_commit(&bitmap_mtr); goto fail_exit; @@ -3556,7 +3557,8 @@ check_watch: that the issuer of IBUF_OP_DELETE has called buf_pool_t::watch_set(). */ - if (buf_pool.page_hash_contains(page_id)) { + if (buf_pool.page_hash_contains( + page_id, buf_pool.page_hash.cell_get(page_id.fold()))) { /* A buffer pool watch has been set or the page has been read into the buffer pool. Do not buffer the request. If a purge operation diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index a972f18e61f..30729063069 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -36,7 +36,6 @@ Created 11/5/1995 Heikki Tuuri #include "assume_aligned.h" #include "buf0types.h" #ifndef UNIV_INNOCHECKSUM -#include "hash0hash.h" #include "ut0byte.h" #include "page0types.h" #include "log0log.h" @@ -169,30 +168,10 @@ operator<<( const page_id_t page_id); #ifndef UNIV_INNOCHECKSUM -/*********************************************************************//** -Gets the current size of buffer buf_pool in bytes. -@return size in bytes */ -UNIV_INLINE -ulint -buf_pool_get_curr_size(void); -/*========================*/ - -/********************************************************************//** -Allocates a buf_page_t descriptor. This function must succeed. In case -of failure we assert in this function. */ -UNIV_INLINE -buf_page_t* -buf_page_alloc_descriptor(void) -/*===========================*/ - MY_ATTRIBUTE((malloc)); -/********************************************************************//** -Free a buf_page_t descriptor. */ -UNIV_INLINE -void -buf_page_free_descriptor( -/*=====================*/ - buf_page_t* bpage) /*!< in: bpage descriptor to free. */ - MY_ATTRIBUTE((nonnull)); +# define buf_pool_get_curr_size() srv_buf_pool_curr_size +# define buf_page_alloc_descriptor() \ + static_cast(ut_zalloc_nokey(sizeof(buf_page_t))) +# define buf_page_free_descriptor(bpage) ut_free(bpage) /** Allocate a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ @@ -349,25 +328,6 @@ void buf_page_make_young(buf_page_t *bpage); @param[in,out] mtr mini-transaction */ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr); -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -unsigned -buf_page_get_freed_page_clock( -/*==========================*/ - const buf_page_t* bpage) /*!< in: block */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -unsigned -buf_block_get_freed_page_clock( -/*===========================*/ - const buf_block_t* block) /*!< in: block */ - MY_ATTRIBUTE((warn_unused_result)); - /** Determine if a block is still close enough to the MRU end of the LRU list meaning that it is not in danger of getting evicted and also implying that it has been accessed recently. @@ -665,7 +625,7 @@ class buf_page_t /* @{ */ public: // FIXME: fix fil_iterate() - /** Page id. Protected by buf_pool.hash_lock_get(id) when + /** Page id. Protected by buf_pool.page_hash.lock_get() when the page is in buf_pool.page_hash. */ page_id_t id_; private: @@ -687,13 +647,13 @@ private: Atomic_relaxed io_fix_; /** Block state. @see in_file(). State transitions between in_file() states and to - BUF_BLOCK_REMOVE_HASH are protected by buf_pool.hash_lock_get(id) + BUF_BLOCK_REMOVE_HASH are protected by buf_pool.page_hash.lock_get() when the block is in buf_pool.page_hash. Other transitions when in_LRU_list are protected by buf_pool.mutex. */ buf_page_state state_; public: - /** buf_pool.page_hash link; protected by buf_pool.hash_lock_get(id) */ + /** buf_pool.page_hash link; protected by buf_pool.page_hash.lock_get() */ buf_page_t *hash; /* @} */ page_zip_des_t zip; /*!< compressed page; zip.data @@ -801,7 +761,6 @@ public: ut_d(in_free_list= false); ut_d(in_LRU_list= false); ut_d(in_page_hash= false); - HASH_INVALIDATE(this, hash); } /** Initialize some more fields */ @@ -819,6 +778,7 @@ public: init(); id_= id; buf_fix_count_= buf_fix_count; + hash= nullptr; } public: @@ -1347,7 +1307,14 @@ class buf_pool_t inline const buf_block_t *not_freed() const; #endif /* UNIV_DEBUG */ }; - +public: + /** Hash cell chain in page_hash_table */ + struct hash_chain + { + /** pointer to the first block */ + buf_page_t *first; + }; +private: /** Withdraw blocks from the buffer pool until meeting withdraw_target. @return whether retry is needed */ inline bool withdraw_blocks(); @@ -1509,89 +1476,29 @@ public: return is_block_field(reinterpret_cast(block)); } - /** Get the page_hash latch for a page */ - page_hash_latch *hash_lock_get(const page_id_t id) const - { - return page_hash.lock_get(id.fold()); - } - - /** Look up a block descriptor. - @param id page identifier - @param fold id.fold() - @return block descriptor, possibly in watch[] - @retval nullptr if not found*/ - buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold) - { - ut_ad(id.fold() == fold); -#ifdef SAFE_MUTEX - DBUG_ASSERT(mysql_mutex_is_owner(&mutex) || - page_hash.lock_get(fold)->is_locked()); -#endif /* SAFE_MUTEX */ - buf_page_t *bpage; - /* Look for the page in the hash table */ - HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage, - ut_ad(bpage->in_page_hash), id == bpage->id()); - return bpage; - } -private: - /** Look up a block descriptor. - @tparam exclusive whether the latch is to be acquired exclusively +public: + /** @return whether the buffer pool contains a page @tparam watch whether to allow watch_is_sentinel() @param page_id page identifier - @param fold page_id.fold() - @param hash_lock pointer to the acquired latch (to be released by caller) - @return pointer to the block - @retval nullptr if no block was found; !lock || !*lock will also hold */ - template - buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold, - page_hash_latch **hash_lock) + @param chain hash table chain for page_id.fold() */ + template + bool page_hash_contains(const page_id_t page_id, hash_chain &chain) { - ut_ad(hash_lock || !exclusive); - page_hash_latch *latch= page_hash.lock(fold); - buf_page_t *bpage= page_hash_get_low(page_id, fold); + page_hash_latch &latch= page_hash.lock_get(chain); + latch.read_lock(); + buf_page_t *bpage= page_hash.get(page_id, chain); if (!bpage || watch_is_sentinel(*bpage)) { - if (exclusive) - latch->write_unlock(); - else - latch->read_unlock(); - if (hash_lock) - *hash_lock= nullptr; + latch.read_unlock(); return watch ? bpage : nullptr; } ut_ad(bpage->in_file()); ut_ad(page_id == bpage->id()); - if (hash_lock) - *hash_lock= latch; /* to be released by the caller */ - else if (exclusive) - latch->write_unlock(); - else - latch->read_unlock(); + latch.read_unlock(); return bpage; } -public: - /** Look up a block descriptor. - @tparam exclusive whether the latch is to be acquired exclusively - @param page_id page identifier - @param fold page_id.fold() - @param hash_lock pointer to the acquired latch (to be released by caller) - @return pointer to the block - @retval nullptr if no block was found; !lock || !*lock will also hold */ - template - buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold, - page_hash_latch **hash_lock) - { return page_hash_get_locked(page_id, fold, hash_lock); } - - /** @return whether the buffer pool contains a page - @tparam watch whether to allow watch_is_sentinel() - @param page_id page identifier */ - template - bool page_hash_contains(const page_id_t page_id) - { - return page_hash_get_locked(page_id, page_id.fold(), nullptr); - } /** Determine if a block is a sentinel for a buffer pool watch. @param bpage page descriptor @@ -1600,7 +1507,8 @@ public: { #ifdef SAFE_MUTEX DBUG_ASSERT(mysql_mutex_is_owner(&mutex) || - hash_lock_get(bpage.id())->is_locked()); + page_hash.lock_get(page_hash.cell_get(bpage.id().fold())). + is_locked()); #endif /* SAFE_MUTEX */ ut_ad(bpage.in_file()); @@ -1622,12 +1530,13 @@ public: @return whether the page was read to the buffer pool */ bool watch_occurred(const page_id_t id) { - const ulint fold= id.fold(); - page_hash_latch *hash_lock= page_hash.lock(fold); + hash_chain &chain= page_hash.cell_get(id.fold()); + page_hash_latch &latch= page_hash.lock_get(chain); + latch.read_lock(); /* The page must exist because watch_set() increments buf_fix_count. */ - buf_page_t *bpage= page_hash_get_low(id, fold); + buf_page_t *bpage= page_hash.get(id, chain); const bool is_sentinel= watch_is_sentinel(*bpage); - hash_lock->read_unlock(); + latch.read_unlock(); return !is_sentinel; } @@ -1635,22 +1544,23 @@ public: exclusive page hash latch. The *hash_lock may be released, relocated, and reacquired. @param id page identifier - @param hash_lock exclusively held page_hash latch + @param chain hash table chain with exclusively held page_hash @return a buffer pool block corresponding to id @retval nullptr if the block was not present, and a watch was installed */ - inline buf_page_t *watch_set(const page_id_t id, - page_hash_latch **hash_lock); + inline buf_page_t *watch_set(const page_id_t id, hash_chain &chain); /** Stop watching whether a page has been read in. watch_set(id) must have returned nullptr before. - @param id page identifier */ - void watch_unset(const page_id_t id); + @param id page identifier + @param chain unlocked hash table chain */ + void watch_unset(const page_id_t id, hash_chain &chain); /** Remove the sentinel block for the watch before replacing it with a real block. watch_unset() or watch_occurred() will notice that the block has been replaced with the real block. - @param watch sentinel */ - inline void watch_remove(buf_page_t *watch); + @param watch sentinel + @param chain locked hash table chain */ + inline void watch_remove(buf_page_t *watch, hash_chain &chain); /** @return whether less than 1/4 of the buffer pool is available */ bool running_out() const @@ -1728,7 +1638,7 @@ public: /** read-ahead request size in pages */ Atomic_counter read_ahead_area; - /** Hash table with singly-linked overflow lists. @see hash_table_t */ + /** Hash table with singly-linked overflow lists */ struct page_hash_table { static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "less than 64 bytes"); @@ -1744,7 +1654,7 @@ public: /** number of payload elements in array[] */ Atomic_relaxed n_cells; /** the hash table, with pad(n_cells) elements, aligned to L1 cache size */ - hash_cell_t *array; + hash_chain *array; /** Create the hash table. @param n the lower bound of n_cells */ @@ -1771,32 +1681,72 @@ public: { return pad(hash(fold, n_cells)); } - /** Get a page_hash latch. */ - page_hash_latch *lock_get(ulint fold, ulint n) const + public: + /** @return the latch covering a hash table chain */ + static page_hash_latch &lock_get(hash_chain &chain) { static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH), "must be one less than a power of 2"); - return reinterpret_cast - (&array[calc_hash(fold, n) & ~ELEMENTS_PER_LATCH]); + const size_t addr= reinterpret_cast(&chain); + ut_ad(addr & (ELEMENTS_PER_LATCH * sizeof chain)); + return *reinterpret_cast + (addr & ~(ELEMENTS_PER_LATCH * sizeof chain)); } - public: - /** Get a page_hash latch. */ - page_hash_latch *lock_get(ulint fold) const - { return lock_get(fold, n_cells); } - - /** Acquire an array latch. - @tparam exclusive whether the latch is to be acquired exclusively - @param fold hash bucket key */ - template page_hash_latch *lock(ulint fold) + + /** Get a hash table slot. */ + hash_chain &cell_get(ulint fold) const + { return array[calc_hash(fold, n_cells)]; } + + /** Append a block descriptor to a hash bucket chain. */ + void append(hash_chain &chain, buf_page_t *bpage) + { + ut_ad(!bpage->in_page_hash); + ut_ad(!bpage->hash); + ut_d(bpage->in_page_hash= true); + buf_page_t **prev= &chain.first; + while (*prev) + { + ut_ad((*prev)->in_page_hash); + prev= &(*prev)->hash; + } + *prev= bpage; + } + + /** Remove a block descriptor from a hash bucket chain. */ + void remove(hash_chain &chain, buf_page_t *bpage) + { + ut_ad(bpage->in_page_hash); + buf_page_t **prev= &chain.first; + while (*prev != bpage) + { + ut_ad((*prev)->in_page_hash); + prev= &(*prev)->hash; + } + *prev= bpage->hash; + ut_d(bpage->in_page_hash= false); + bpage->hash= nullptr; + } + + /** Replace a block descriptor with another. */ + void replace(hash_chain &chain, buf_page_t *old, buf_page_t *bpage) { - page_hash_latch *latch= lock_get(fold, n_cells); - if (exclusive) - latch->write_lock(); - else - latch->read_lock(); - return latch; + ut_ad(old->in_page_hash); + ut_ad(bpage->in_page_hash); + ut_d(old->in_page_hash= false); + ut_ad(bpage->hash == old->hash); + old->hash= nullptr; + buf_page_t **prev= &chain.first; + while (*prev != old) + { + ut_ad((*prev)->in_page_hash); + prev= &(*prev)->hash; + } + *prev= bpage; } + /** Look up a page in a hash bucket chain. */ + inline buf_page_t *get(const page_id_t id, const hash_chain &chain) const; + /** Exclusively aqcuire all latches */ inline void write_lock_all(); @@ -2032,6 +1982,23 @@ private: /** The InnoDB buffer pool */ extern buf_pool_t buf_pool; +inline buf_page_t *buf_pool_t::page_hash_table::get(const page_id_t id, + const hash_chain &chain) + const +{ +#ifdef SAFE_MUTEX + DBUG_ASSERT(mysql_mutex_is_owner(&buf_pool.mutex) || + lock_get(const_cast(chain)).is_locked()); +#endif /* SAFE_MUTEX */ + for (buf_page_t *bpage= chain.first; bpage; bpage= bpage->hash) + { + ut_ad(bpage->in_page_hash); + if (bpage->id() == id) + return bpage; + } + return nullptr; +} + #ifdef SUX_LOCK_GENERIC inline void page_hash_latch::read_lock() { @@ -2070,18 +2037,17 @@ inline void buf_page_t::set_state(buf_page_state state) we are holding the hash_lock. */ break; case BUF_BLOCK_MEMORY: - if (!in_file()) break; - /* fall through */ - case BUF_BLOCK_FILE_PAGE: - ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked()); break; case BUF_BLOCK_NOT_USED: - if (!in_file()) break; - /* fall through */ + break; case BUF_BLOCK_ZIP_PAGE: - ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked() || - (this >= &buf_pool.watch[0] && - this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)])); + if (this >= &buf_pool.watch[0] && + this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) + break; + /* fall through */ + case BUF_BLOCK_FILE_PAGE: + ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())). + is_write_locked()); break; } #endif @@ -2113,7 +2079,8 @@ inline void buf_page_t::set_corrupt_id() break; case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_FILE_PAGE: - ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked()); + ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())). + is_write_locked()); break; case BUF_BLOCK_NOT_USED: case BUF_BLOCK_MEMORY: diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 13eda113a21..30fd0b2b1f9 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2014, 2020, MariaDB Corporation. +Copyright (c) 2014, 2021, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -37,42 +37,6 @@ Created 11/5/1995 Heikki Tuuri #include "buf0rea.h" #include "fsp0types.h" -/*********************************************************************//** -Gets the current size of buffer buf_pool in bytes. -@return size in bytes */ -UNIV_INLINE -ulint -buf_pool_get_curr_size(void) -/*========================*/ -{ - return(srv_buf_pool_curr_size); -} - -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -unsigned -buf_page_get_freed_page_clock( -/*==========================*/ - const buf_page_t* bpage) /*!< in: block */ -{ - /* This is sometimes read without holding buf_pool.mutex. */ - return(bpage->freed_page_clock); -} - -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -unsigned -buf_block_get_freed_page_clock( -/*===========================*/ - const buf_block_t* block) /*!< in: block */ -{ - return(buf_page_get_freed_page_clock(&block->page)); -} - /** Determine if a block is still close enough to the MRU end of the LRU list meaning that it is not in danger of getting evicted and also implying that it has been accessed recently. @@ -154,35 +118,6 @@ ok: } #endif /* UNIV_DEBUG */ -/********************************************************************//** -Allocates a buf_page_t descriptor. This function must succeed. In case -of failure we assert in this function. -@return: the allocated descriptor. */ -UNIV_INLINE -buf_page_t* -buf_page_alloc_descriptor(void) -/*===========================*/ -{ - buf_page_t* bpage; - - bpage = (buf_page_t*) ut_zalloc_nokey(sizeof *bpage); - ut_ad(bpage); - MEM_UNDEFINED(bpage, sizeof *bpage); - - return(bpage); -} - -/********************************************************************//** -Free a buf_page_t descriptor. */ -UNIV_INLINE -void -buf_page_free_descriptor( -/*=====================*/ - buf_page_t* bpage) /*!< in: bpage descriptor to free. */ -{ - ut_free(bpage); -} - /** Allocate a buffer block. @return own: the allocated block, in state BUF_BLOCK_MEMORY */ inline buf_block_t *buf_block_alloc() diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index 540c14a49c9..aec08e77f54 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,11 +24,10 @@ The database buffer pool LRU replacement algorithm Created 11/5/1995 Heikki Tuuri *******************************************************/ -#ifndef buf0lru_h -#define buf0lru_h +#pragma once -#include "ut0byte.h" #include "buf0types.h" +#include "hash0hash.h" // Forward declaration struct trx_t; @@ -132,14 +131,6 @@ policy at the end of each interval. */ void buf_LRU_stat_update(); -/** Remove one page from LRU list and put it to free list. -@param bpage file page to be freed -@param id page identifier -@param hash_lock buf_pool.page_hash latch (will be released here) */ -void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id, - page_hash_latch *hash_lock) - MY_ATTRIBUTE((nonnull)); - #ifdef UNIV_DEBUG /** Validate the LRU list. */ void buf_LRU_validate(); @@ -200,5 +191,3 @@ Increments the I/O counter in buf_LRU_stat_cur. */ /********************************************************************//** Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */ #define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++ - -#endif diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index b14e2af36c3..2f08220fe98 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -181,12 +181,11 @@ enum rw_lock_type_t #ifdef SUX_LOCK_GENERIC class page_hash_latch : public rw_lock { -public: /** Wait for a shared lock */ void read_lock_wait(); /** Wait for an exclusive lock */ void write_lock_wait(); - +public: /** Acquire a shared lock */ inline void read_lock(); /** Acquire an exclusive lock */ diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h index 46a43b13a0a..8e7b8dfd1e6 100644 --- a/storage/innobase/include/hash0hash.h +++ b/storage/innobase/include/hash0hash.h @@ -117,18 +117,6 @@ do {\ HASH_INVALIDATE(DATA, NAME);\ } while (0) -#define HASH_REPLACE(TYPE, NAME, TABLE, FOLD, DATA_OLD, DATA_NEW) \ - do { \ - (DATA_NEW)->NAME = (DATA_OLD)->NAME; \ - \ - hash_cell_t& cell3333 \ - = (TABLE)->array[(TABLE)->calc_hash(FOLD)]; \ - TYPE** struct3333 = (TYPE**)&cell3333.node; \ - while (*struct3333 != DATA_OLD) { \ - struct3333 = &((*struct3333)->NAME); \ - } \ - *struct3333 = DATA_NEW; \ - } while (0) /*******************************************************************//** Gets the first struct in a hash chain, NULL if none. */ diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h index 82c80994e72..cd116cc3a20 100644 --- a/storage/innobase/include/ut0new.h +++ b/storage/innobase/include/ut0new.h @@ -841,6 +841,8 @@ constexpr const char* const auto_event_names[] = "buf0buf", "buf0dblwr", "buf0dump", + "buf0lru", + "buf0rea", "dict0dict", "dict0mem", "dict0stats", diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 05f7b1d053b..d2e29cddadf 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2862,7 +2862,9 @@ static void recv_read_in_area(page_id_t page_id) && i->first.space() == page_id.space() && i->first.page_no() < up_limit; i++) { if (i->second.state == page_recv_t::RECV_NOT_PROCESSED - && !buf_pool.page_hash_contains(i->first)) { + && !buf_pool.page_hash_contains( + i->first, + buf_pool.page_hash.cell_get(i->first.fold()))) { i->second.state = page_recv_t::RECV_BEING_READ; *p++ = i->first.page_no(); } -- cgit v1.2.1 From 1f02280904fcfbb2bd86404d1c85c025634f8c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 22 Oct 2021 12:38:45 +0300 Subject: MDEV-26769 InnoDB does not support hardware lock elision This implements memory transaction support for: * Intel Restricted Transactional Memory (RTM), also known as TSX-NI (Transactional Synchronization Extensions New Instructions) * POWER v2.09 Hardware Trace Monitor (HTM) on GNU/Linux transactional_lock_guard, transactional_shared_lock_guard: RAII lock guards that try to elide the lock acquisition when transactional memory is available. buf_pool.page_hash: Try to elide latches whenever feasible. Related to the InnoDB change buffer and ROW_FORMAT=COMPRESSED tables, this is not always possible. In buf_page_get_low(), memory transactions only work reasonably well for validating a guessed block address. TMLockGuard, TMLockTrxGuard, TMLockMutexGuard: RAII lock guards that try to elide lock_sys.latch and related latches. --- storage/innobase/btr/btr0btr.cc | 1 + storage/innobase/btr/btr0cur.cc | 16 +- storage/innobase/btr/btr0sea.cc | 42 +- storage/innobase/buf/buf0block_hint.cc | 8 +- storage/innobase/buf/buf0buddy.cc | 9 +- storage/innobase/buf/buf0buf.cc | 583 ++++++++++----------- storage/innobase/buf/buf0lru.cc | 17 +- storage/innobase/buf/buf0rea.cc | 109 ++-- storage/innobase/dict/dict0crea.cc | 2 + storage/innobase/dict/dict0dict.cc | 2 + storage/innobase/gis/gis0sea.cc | 3 +- storage/innobase/handler/ha_innodb.cc | 16 +- storage/innobase/ibuf/ibuf0ibuf.cc | 3 +- storage/innobase/include/btr0sea.h | 12 +- storage/innobase/include/buf0buf.h | 53 +- storage/innobase/include/buf0types.h | 49 +- storage/innobase/include/dict0mem.h | 8 +- storage/innobase/include/lock0lock.h | 188 ++++++- storage/innobase/include/lock0priv.h | 7 +- storage/innobase/include/lock0priv.ic | 7 +- storage/innobase/include/rw_lock.h | 12 +- storage/innobase/include/srw_lock.h | 55 +- .../innobase/include/transactional_lock_guard.h | 167 ++++++ storage/innobase/include/trx0trx.h | 3 + storage/innobase/lock/lock0lock.cc | 280 +++++++--- storage/innobase/log/log0recv.cc | 1 + storage/innobase/row/row0ins.cc | 3 +- storage/innobase/srv/srv0srv.cc | 17 +- storage/innobase/sync/srw_lock.cc | 71 ++- storage/innobase/trx/trx0purge.cc | 54 +- storage/innobase/trx/trx0rec.cc | 22 +- storage/innobase/trx/trx0trx.cc | 23 +- 32 files changed, 1225 insertions(+), 618 deletions(-) create mode 100644 storage/innobase/include/transactional_lock_guard.h diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 02aa89361c5..e02df95b641 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1069,6 +1069,7 @@ top_loop: /** Clear the index tree and reinitialize the root page, in the rollback of TRX_UNDO_EMPTY. The BTR_SEG_LEAF is freed and reinitialized. @param thr query thread */ +TRANSACTIONAL_TARGET void dict_index_t::clear(que_thr_t *thr) { mtr_t mtr; diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index c61799b8b12..435b62a7493 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1214,6 +1214,7 @@ If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the search tuple should be performed in the B-tree. InnoDB does an insert immediately after the cursor. Thus, the cursor may end up on a user record, or on a page infimum record. */ +TRANSACTIONAL_TARGET dberr_t btr_cur_search_to_nth_level_func( dict_index_t* index, /*!< in: index */ @@ -1994,16 +1995,15 @@ retry_page_get: && mode != PAGE_CUR_RTREE_INSERT && mode != PAGE_CUR_RTREE_LOCATE && mode >= PAGE_CUR_CONTAIN) { - trx_t* trx = thr_get_trx(cursor->thr); lock_prdt_t prdt; - lock_sys.rd_lock(SRW_LOCK_CALL); - trx->mutex_lock(); - lock_init_prdt_from_mbr( - &prdt, &cursor->rtr_info->mbr, mode, - trx->lock.lock_heap); - lock_sys.rd_unlock(); - trx->mutex_unlock(); + { + trx_t* trx = thr_get_trx(cursor->thr); + TMLockTrxGuard g{TMLockTrxArgs(*trx)}; + lock_init_prdt_from_mbr( + &prdt, &cursor->rtr_info->mbr, mode, + trx->lock.lock_heap); + } if (rw_latch == RW_NO_LATCH && height != 0) { block->lock.s_lock(); diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index c7a13181590..a59a54676ed 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1007,6 +1007,7 @@ both have sensible values. or NULL @param[in] mtr mini transaction @return whether the search succeeded */ +TRANSACTIONAL_TARGET bool btr_search_guess_on_hash( dict_index_t* index, @@ -1092,25 +1093,32 @@ fail: if (!ahi_latch) { buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get( block->page.id().fold()); - page_hash_latch&hash_lock = buf_pool.page_hash.lock_get(chain); - hash_lock.read_lock(); - - if (block->page.state() == BUF_BLOCK_REMOVE_HASH) { - /* Another thread is just freeing the block - from the LRU list of the buffer pool: do not - try to access this page. */ - hash_lock.read_unlock(); - goto fail; + bool fail; + { + transactional_shared_lock_guard g{ + buf_pool.page_hash.lock_get(chain)}; + + switch (block->page.state()) { + case BUF_BLOCK_REMOVE_HASH: + /* Another thread is just freeing the block + from the LRU list of the buffer pool: do not + try to access this page. */ + goto fail; + case BUF_BLOCK_FILE_PAGE: + break; + default: +#ifndef NO_ELISION + xend(); +#endif + ut_error; + } + + block->fix(); + fail = index != block->index + && index_id == block->index->id; } - const bool fail = index != block->index - && index_id == block->index->id; ut_a(!fail || block->index->freed()); - ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); - DBUG_ASSERT(fail || block->page.status != buf_page_t::FREED); - - buf_block_buf_fix_inc(block); - hash_lock.read_unlock(); block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); @@ -1137,6 +1145,8 @@ got_no_latch: if (UNIV_UNLIKELY(fail)) { goto fail_and_release_page; } + + DBUG_ASSERT(block->page.status != buf_page_t::FREED); } else if (UNIV_UNLIKELY(index != block->index && index_id == block->index->id)) { ut_a(block->index->freed()); diff --git a/storage/innobase/buf/buf0block_hint.cc b/storage/innobase/buf/buf0block_hint.cc index 9fac76b77a5..00c968511b3 100644 --- a/storage/innobase/buf/buf0block_hint.cc +++ b/storage/innobase/buf/buf0block_hint.cc @@ -28,6 +28,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include "buf0block_hint.h" namespace buf { +TRANSACTIONAL_TARGET void Block_hint::buffer_fix_block_if_still_valid() { /* To check if m_block belongs to the current buf_pool, we must @@ -47,14 +48,13 @@ void Block_hint::buffer_fix_block_if_still_valid() if (m_block) { auto &cell= buf_pool.page_hash.cell_get(m_page_id.fold()); - page_hash_latch &latch= buf_pool.page_hash.lock_get(cell); - latch.read_lock(); + transactional_shared_lock_guard g + {buf_pool.page_hash.lock_get(cell)}; if (buf_pool.is_uncompressed(m_block) && m_page_id == m_block->page.id() && m_block->page.state() == BUF_BLOCK_FILE_PAGE) - buf_block_buf_fix_inc(m_block); + m_block->fix(); else clear(); - latch.read_unlock(); } } } // namespace buf diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index 1e5cff4959f..6f4b4554518 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -548,7 +548,10 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) } page_hash_latch &hash_lock = buf_pool.page_hash.lock_get(cell); - hash_lock.write_lock(); + /* It does not make sense to use transactional_lock_guard here, + because the memcpy() of 1024 to 16384 bytes would likely make the + memory transaction too large. */ + hash_lock.lock(); if (bpage->can_relocate()) { /* Relocate the compressed page. */ @@ -559,7 +562,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) memcpy(dst, src, size); bpage->zip.data = reinterpret_cast(dst); - hash_lock.write_unlock(); + hash_lock.unlock(); buf_buddy_mem_invalid( reinterpret_cast(src), i); @@ -570,7 +573,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) return(true); } - hash_lock.write_unlock(); + hash_lock.unlock(); return(false); } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 64aa6f873cc..d03a92cc7a5 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1339,7 +1339,11 @@ inline bool buf_pool_t::realloc(buf_block_t *block) const page_id_t id{block->page.id()}; hash_chain& chain = page_hash.cell_get(id.fold()); page_hash_latch& hash_lock = page_hash.lock_get(chain); - hash_lock.write_lock(); + /* It does not make sense to use transactional_lock_guard + here, because copying innodb_page_size (4096 to 65536) bytes + as well as other changes would likely make the memory + transaction too large. */ + hash_lock.lock(); if (block->page.can_relocate()) { memcpy_aligned( @@ -1421,7 +1425,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block) new_block = block; } - hash_lock.write_unlock(); + hash_lock.unlock(); buf_LRU_block_free_non_file_page(new_block); return(true); /* free_list was enough */ } @@ -1596,7 +1600,7 @@ inline void buf_pool_t::page_hash_table::write_lock_all() { for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) { - reinterpret_cast(array[n]).write_lock(); + reinterpret_cast(array[n]).lock(); if (!n) break; } @@ -1607,7 +1611,7 @@ inline void buf_pool_t::page_hash_table::write_unlock_all() { for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) { - reinterpret_cast(array[n]).write_unlock(); + reinterpret_cast(array[n]).unlock(); if (!n) break; } @@ -1742,6 +1746,8 @@ withdraw_retry: {found, withdraw_started, my_hrtime_coarse()}; withdraw_started = current_time; + /* This is going to exceed the maximum size of a + memory transaction. */ LockMutexGuard g{SRW_LOCK_CALL}; trx_sys.trx_list.for_each(f); } @@ -2115,7 +2121,7 @@ retry: return nullptr; } - page_hash.lock_get(chain).write_unlock(); + page_hash.lock_get(chain).unlock(); /* Allocate a watch[] and then try to insert it into the page_hash. */ mysql_mutex_lock(&mutex); @@ -2143,12 +2149,12 @@ retry: if (UNIV_LIKELY_NULL(bpage)) { w->set_state(BUF_BLOCK_NOT_USED); - page_hash.lock_get(chain).write_lock(); + page_hash.lock_get(chain).lock(); mysql_mutex_unlock(&mutex); goto retry; } - page_hash.lock_get(chain).write_lock(); + page_hash.lock_get(chain).lock(); ut_ad(!w->buf_fix_count_); w->buf_fix_count_= 1; buf_pool.page_hash.append(chain, w); @@ -2165,45 +2171,55 @@ retry: watch_set(id) must have returned nullptr before. @param id page identifier @param chain unlocked hash table chain */ +TRANSACTIONAL_TARGET void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain) { mysql_mutex_assert_not_owner(&mutex); - page_hash_latch &hash_lock= page_hash.lock_get(chain); - hash_lock.write_lock(); - /* The page must exist because watch_set() increments buf_fix_count. */ - buf_page_t *w= page_hash.get(id, chain); - const auto buf_fix_count= w->buf_fix_count(); - ut_ad(buf_fix_count); - const bool must_remove= buf_fix_count == 1 && watch_is_sentinel(*w); - if (!must_remove) - w->unfix(); - hash_lock.write_unlock(); - - if (must_remove) + buf_page_t *w; { - const auto old= w; - /* The following is based on buf_pool_t::watch_remove(). */ - mysql_mutex_lock(&mutex); + transactional_lock_guard g{page_hash.lock_get(chain)}; + /* The page must exist because watch_set() increments buf_fix_count. */ w= page_hash.get(id, chain); - hash_lock.write_lock(); + const auto buf_fix_count= w->buf_fix_count(); + ut_ad(buf_fix_count); + ut_ad(w->in_page_hash); + if (buf_fix_count != 1 || !watch_is_sentinel(*w)) + { + w->unfix(); + w= nullptr; + } + } + + if (!w) + return; + + const auto old= w; + /* The following is based on buf_pool_t::watch_remove(). */ + mysql_mutex_lock(&mutex); + w= page_hash.get(id, chain); + + { + transactional_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; if (w->unfix() == 0 && w == old) { page_hash.remove(chain, w); - // Now that the watch is detached from page_hash, release it to watch[]. + // Now that w is detached from page_hash, release it to watch[]. ut_ad(w->id_ == id); ut_ad(!w->buf_fix_count()); ut_ad(w->state() == BUF_BLOCK_ZIP_PAGE); w->set_state(BUF_BLOCK_NOT_USED); } - mysql_mutex_unlock(&mutex); - hash_lock.write_unlock(); } + + mysql_mutex_unlock(&mutex); } /** Mark the page status as FREED for the given tablespace and page number. @param[in,out] space tablespace @param[in] page page number @param[in,out] mtr mini-transaction */ +TRANSACTIONAL_TARGET void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) { ut_ad(mtr); @@ -2219,28 +2235,22 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr) ++buf_pool.stat.n_page_gets; const page_id_t page_id(space->id, page); buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); - page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); - hash_lock.read_lock(); - if (buf_block_t *block= reinterpret_cast - (buf_pool.page_hash.get(page_id, chain))) + buf_block_t *block; { - if (block->page.state() != BUF_BLOCK_FILE_PAGE) - /* FIXME: convert, but avoid buf_zip_decompress() */; - else - { - buf_block_buf_fix_inc(block); - ut_ad(block->page.buf_fix_count()); - hash_lock.read_unlock(); - - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - block->lock.x_lock(); - - block->page.status= buf_page_t::FREED; + transactional_shared_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; + block= reinterpret_cast + (buf_pool.page_hash.get(page_id, chain)); + if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) + /* FIXME: convert ROW_FORMAT=COMPRESSED, without buf_zip_decompress() */ return; - } + block->fix(); } + ut_ad(block->page.buf_fix_count()); - hash_lock.read_unlock(); + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); + block->lock.x_lock(); + block->page.status= buf_page_t::FREED; } /** Get read access to a compressed page (usually of type @@ -2253,80 +2263,48 @@ the same set of mutexes or latches. @param[in] page_id page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size @return pointer to the block */ +TRANSACTIONAL_TARGET buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size) { ut_ad(zip_size); ut_ad(ut_is_2pow(zip_size)); ++buf_pool.stat.n_page_gets; - bool discard_attempted= false; buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); buf_page_t *bpage; - for (;;) - { lookup: - hash_lock.read_lock(); - bpage= buf_pool.page_hash.get(page_id, chain); - if (bpage) - break; - hash_lock.read_unlock(); - - dberr_t err= buf_read_page(page_id, zip_size); - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) + for (bool discard_attempted= false;;) + { { - ib::error() << "Reading compressed page " << page_id - << " failed with error: " << err; - goto err_exit; - } + transactional_shared_lock_guard g{hash_lock}; + bpage= buf_pool.page_hash.get(page_id, chain); + if (!bpage || buf_pool.watch_is_sentinel(*bpage)) + goto must_read_page; -#ifdef UNIV_DEBUG - if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); -#endif /* UNIV_DEBUG */ - } - - if (!bpage->zip.data) - { - /* There is no compressed page. */ -err_exit: - hash_lock.read_unlock(); - return nullptr; - } + ut_ad(bpage->in_file()); + ut_ad(page_id == bpage->id()); - ut_ad(!buf_pool.watch_is_sentinel(*bpage)); + if (!bpage->zip.data) + /* There is no ROW_FORMAT=COMPRESSED page. */ + return nullptr; - switch (bpage->state()) { - case BUF_BLOCK_FILE_PAGE: - /* Discard the uncompressed page frame if possible. */ - if (!discard_attempted) - { - discard_attempted= true; - hash_lock.read_unlock(); - mysql_mutex_lock(&buf_pool.mutex); - if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain)) - buf_LRU_free_page(bpage, false); - mysql_mutex_unlock(&buf_pool.mutex); - goto lookup; + if (discard_attempted || bpage->state() == BUF_BLOCK_ZIP_PAGE) + { + bpage->fix(); + break; + } } - /* fall through */ - case BUF_BLOCK_ZIP_PAGE: - bpage->fix(); - goto got_block; - default: - break; - } - ut_error; - goto err_exit; - -got_block: - bool must_read= bpage->io_fix() == BUF_IO_READ; - hash_lock.read_unlock(); + discard_attempted= true; + mysql_mutex_lock(&buf_pool.mutex); + if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain)) + buf_LRU_free_page(bpage, false); + mysql_mutex_unlock(&buf_pool.mutex); + } DBUG_ASSERT(bpage->status != buf_page_t::FREED); - bpage->set_accessed(); buf_page_make_young_if_needed(bpage); @@ -2336,12 +2314,19 @@ got_block: ut_ad(bpage->buf_fix_count()); ut_ad(bpage->in_file()); - if (must_read) - /* Let us wait until the read operation completes */ - while (bpage->io_fix() == BUF_IO_READ) - std::this_thread::sleep_for(WAIT_FOR_READ); - + /* Let us wait until the read operation completes */ + while (bpage->io_fix() == BUF_IO_READ) + std::this_thread::sleep_for(WAIT_FOR_READ); return bpage; + +must_read_page: + if (dberr_t err= buf_read_page(page_id, zip_size)) + { + ib::error() << "Reading compressed page " << page_id + << " failed with error: " << err; + return nullptr; + } + goto lookup; } /********************************************************************//** @@ -2493,6 +2478,7 @@ while reading the page from file then it makes sure that it does merging of change buffer changes while reading the page from file. @return pointer to the block or NULL */ +TRANSACTIONAL_TARGET buf_block_t* buf_page_get_low( const page_id_t page_id, @@ -2504,7 +2490,6 @@ buf_page_get_low( dberr_t* err, bool allow_ibuf_merge) { - buf_block_t* block; unsigned access_time; ulint retries = 0; @@ -2561,146 +2546,137 @@ buf_page_get_low( auto& chain= buf_pool.page_hash.cell_get(page_id.fold()); page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); loop: - buf_block_t* fix_block; - hash_lock.read_lock(); - - /* If the guess is a compressed page descriptor that - has been allocated by buf_page_alloc_descriptor(), - it may have been freed by buf_relocate(). */ - - if (guess && buf_pool.is_uncompressed(guess) - && page_id == guess->page.id() - && guess->page.state() == BUF_BLOCK_FILE_PAGE) { - ut_ad(!guess->page.in_zip_hash); - block = guess; - goto have_block; + buf_block_t* block = guess; + + if (block) { + transactional_shared_lock_guard g{hash_lock}; + if (buf_pool.is_uncompressed(block) + && page_id == block->page.id() + && block->page.state() == BUF_BLOCK_FILE_PAGE) { + ut_ad(!block->page.in_zip_hash); + block->fix(); + goto got_block; + } } guess = nullptr; + + /* A memory transaction would frequently be aborted here. */ + hash_lock.lock_shared(); block = reinterpret_cast( buf_pool.page_hash.get(page_id, chain)); + if (UNIV_LIKELY(block + && !buf_pool.watch_is_sentinel(block->page))) { + block->fix(); + hash_lock.unlock_shared(); + goto got_block; + } + hash_lock.unlock_shared(); - if (block && !buf_pool.watch_is_sentinel(block->page)) { -have_block: - fix_block = block; - } else { - hash_lock.read_unlock(); - fix_block = block = nullptr; - - /* Page not in buf_pool: needs to be read from file */ - if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { - hash_lock.write_lock(); - - if (buf_page_t *bpage= buf_pool.watch_set(page_id, - chain)) { - /* We can release hash_lock after we - increment the fix count to make - sure that no state change takes place. */ - bpage->fix(); - hash_lock.write_unlock(); - block = reinterpret_cast(bpage); - fix_block = block; - goto got_block; - } - - hash_lock.write_unlock(); + /* Page not in buf_pool: needs to be read from file */ + switch (mode) { + case BUF_GET_IF_IN_POOL: + case BUF_PEEK_IF_IN_POOL: + case BUF_EVICT_IF_IN_POOL: + return nullptr; + case BUF_GET_IF_IN_POOL_OR_WATCH: + /* We cannot easily use a memory transaction here. */ + hash_lock.lock(); + block = reinterpret_cast + (buf_pool.watch_set(page_id, chain)); + if (block) { + /* buffer-fixing prevents block->page.state() + changes */ + block->fix(); } + hash_lock.unlock(); - switch (mode) { - case BUF_GET_IF_IN_POOL: - case BUF_GET_IF_IN_POOL_OR_WATCH: - case BUF_PEEK_IF_IN_POOL: - case BUF_EVICT_IF_IN_POOL: - return(NULL); + if (block) { + goto got_block; } - /* The call path is buf_read_page() -> - buf_read_page_low() (fil_space_t::io()) -> - buf_page_read_complete() -> - buf_decrypt_after_read(). Here fil_space_t* is used - and we decrypt -> buf_page_check_corrupt() where page - checksums are compared. Decryption, decompression as - well as error handling takes place at a lower level. - Here we only need to know whether the page really is - corrupted, or if an encrypted page with a valid - checksum cannot be decypted. */ - - dberr_t local_err = buf_read_page(page_id, zip_size); - - if (local_err == DB_SUCCESS) { - buf_read_ahead_random(page_id, zip_size, - ibuf_inside(mtr)); - - retries = 0; - } else if (mode == BUF_GET_POSSIBLY_FREED) { + return nullptr; + } + + /* The call path is buf_read_page() -> + buf_read_page_low() (fil_space_t::io()) -> + buf_page_read_complete() -> + buf_decrypt_after_read(). Here fil_space_t* is used + and we decrypt -> buf_page_check_corrupt() where page + checksums are compared. Decryption, decompression as + well as error handling takes place at a lower level. + Here we only need to know whether the page really is + corrupted, or if an encrypted page with a valid + checksum cannot be decypted. */ + + if (dberr_t local_err = buf_read_page(page_id, zip_size)) { + if (mode == BUF_GET_POSSIBLY_FREED) { if (err) { *err = local_err; } - return NULL; + return nullptr; } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { ++retries; - - DBUG_EXECUTE_IF( - "innodb_page_corruption_retries", - retries = BUF_PAGE_READ_MAX_RETRIES; - ); + DBUG_EXECUTE_IF("innodb_page_corruption_retries", + retries = BUF_PAGE_READ_MAX_RETRIES;); } else { if (err) { *err = local_err; } - - /* Pages whose encryption key is unavailable or used - key, encryption algorithm or encryption method is - incorrect are marked as encrypted in + /* Pages whose encryption key is unavailable or the + configured key, encryption algorithm or encryption + method are incorrect are marked as encrypted in buf_page_check_corrupt(). Unencrypted page could be corrupted in a way where the key_id field is nonzero. There is no checksum on field FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION. */ - if (local_err == DB_DECRYPTION_FAILED) { - return (NULL); - } - - if (local_err == DB_PAGE_CORRUPTED - && srv_force_recovery) { - return NULL; + switch (local_err) { + case DB_PAGE_CORRUPTED: + if (!srv_force_recovery) { + break; + } + /* fall through */ + case DB_DECRYPTION_FAILED: + return nullptr; + default: + break; } /* Try to set table as corrupted instead of asserting. */ if (page_id.space() == TRX_SYS_SPACE) { } else if (page_id.space() == SRV_TMP_SPACE_ID) { - } else if (fil_space_t* space= fil_space_t::get( - page_id.space())) { + } else if (fil_space_t* space + = fil_space_t::get(page_id.space())) { bool set = dict_set_corrupted_by_space(space); space->release(); if (set) { - return NULL; + return nullptr; } } if (local_err == DB_IO_ERROR) { - return NULL; + return nullptr; } ib::fatal() << "Unable to read page " << page_id - << " into the buffer pool after " - << BUF_PAGE_READ_MAX_RETRIES - << ". The most probable cause" + << " into the buffer pool after " + << BUF_PAGE_READ_MAX_RETRIES + << ". The most probable cause" " of this error may be that the" " table has been corrupted." " See https://mariadb.com/kb/en/library/innodb-recovery-modes/"; } - -#ifdef UNIV_DEBUG - if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); -#endif /* UNIV_DEBUG */ - goto loop; + } else { + buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr)); + retries = 0; } - fix_block->fix(); - hash_lock.read_unlock(); + ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate()); + goto loop; got_block: + ut_ad(!block->page.in_zip_hash); switch (mode) { default: ut_ad(block->zip_size() == zip_size); @@ -2708,23 +2684,23 @@ got_block: case BUF_GET_IF_IN_POOL: case BUF_PEEK_IF_IN_POOL: case BUF_EVICT_IF_IN_POOL: - if (fix_block->page.io_fix() == BUF_IO_READ) { + if (block->page.io_fix() == BUF_IO_READ) { /* The page is being read to buffer pool, but we cannot wait around for the read to complete. */ - fix_block->unfix(); + block->unfix(); return(NULL); } } - switch (UNIV_EXPECT(fix_block->page.state(), BUF_BLOCK_FILE_PAGE)) { + switch (UNIV_EXPECT(block->page.state(), BUF_BLOCK_FILE_PAGE)) { case BUF_BLOCK_FILE_PAGE: if (fsp_is_system_temporary(page_id.space()) && block->page.io_fix() != BUF_IO_NONE) { /* This suggests that the page is being flushed. Avoid returning reference to this page. Instead wait for the flush action to complete. */ - fix_block->unfix(); + block->unfix(); std::this_thread::sleep_for( std::chrono::microseconds(WAIT_FOR_WRITE)); goto loop; @@ -2732,11 +2708,11 @@ got_block: if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) { evict_from_pool: - ut_ad(!fix_block->page.oldest_modification()); + ut_ad(!block->page.oldest_modification()); mysql_mutex_lock(&buf_pool.mutex); - fix_block->unfix(); + block->unfix(); - if (!buf_LRU_free_page(&fix_block->page, true)) { + if (!buf_LRU_free_page(&block->page, true)) { ut_ad(0); } @@ -2759,7 +2735,7 @@ evict_from_pool: adaptive hash index. There cannot be an adaptive hash index for a compressed-only page, so do not bother decompressing the page. */ - fix_block->unfix(); + block->unfix(); return(NULL); } @@ -2773,7 +2749,7 @@ evict_from_pool: /* This condition often occurs when the buffer is not buffer-fixed, but I/O-fixed by buf_page_init_for_read(). */ - fix_block->unfix(); + block->unfix(); /* The block is buffer-fixed or I/O-fixed. Try again later. */ @@ -2786,16 +2762,21 @@ evict_from_pool: or relocated while we are attempting to allocate an uncompressed page. */ - block = buf_LRU_get_free_block(false); - buf_block_init_low(block); + buf_block_t *new_block = buf_LRU_get_free_block(false); + buf_block_init_low(new_block); mysql_mutex_lock(&buf_pool.mutex); - hash_lock.write_lock(); + page_hash_latch& hash_lock=buf_pool.page_hash.lock_get(chain); + + /* It does not make sense to use + transactional_lock_guard here, because buf_relocate() + would likely make a memory transaction too large. */ + hash_lock.lock(); /* Buffer-fixing prevents the page_hash from changing. */ ut_ad(bpage == buf_pool.page_hash.get(page_id, chain)); - fix_block->unfix(); /* hash_lock protects us after this */ + block->unfix(); /* hash_lock protects us after this */ if (bpage->buf_fix_count() || bpage->io_fix() != BUF_IO_NONE) { /* The block was buffer-fixed or I/O-fixed while @@ -2804,15 +2785,15 @@ evict_from_pool: This should be extremely unlikely, for example, if buf_page_get_zip() was invoked. */ - hash_lock.write_unlock(); - buf_LRU_block_free_non_file_page(block); + hash_lock.unlock(); + buf_LRU_block_free_non_file_page(new_block); mysql_mutex_unlock(&buf_pool.mutex); /* Try again */ goto loop; } - fix_block = block; + block = new_block; /* Move the compressed page from bpage to block, and uncompress it. */ @@ -2843,7 +2824,7 @@ evict_from_pool: MEM_UNDEFINED(bpage, sizeof *bpage); mysql_mutex_unlock(&buf_pool.mutex); - hash_lock.write_unlock(); + hash_lock.unlock(); buf_pool.n_pend_unzip++; access_time = block->page.is_accessed(); @@ -2859,9 +2840,9 @@ evict_from_pool: buf_pool.mutex. */ if (!buf_zip_decompress(block, false)) { - fix_block->lock.x_unlock(); - fix_block->page.io_unfix(); - fix_block->unfix(); + block->lock.x_unlock(); + block->page.io_unfix(); + block->unfix(); --buf_pool.n_pend_unzip; if (err) { @@ -2870,16 +2851,14 @@ evict_from_pool: return NULL; } + block->page.io_unfix(); block->lock.x_unlock(); - fix_block->page.io_unfix(); --buf_pool.n_pend_unzip; - break; } - ut_ad(block == fix_block); - ut_ad(fix_block->page.buf_fix_count()); + ut_ad(block->page.buf_fix_count()); - ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG re_evict: @@ -2892,15 +2871,17 @@ re_evict: mysql_mutex_lock(&buf_pool.mutex); - fix_block->unfix(); + block->unfix(); /* Blocks cannot be relocated or enter or exit the buf_pool while we are holding the buf_pool.mutex. */ - const bool evicted = buf_LRU_free_page(&fix_block->page, true); + const bool evicted = buf_LRU_free_page(&block->page, true); space->release(); if (evicted) { - hash_lock.write_lock(); + page_hash_latch& hash_lock + = buf_pool.page_hash.lock_get(chain); + hash_lock.lock(); mysql_mutex_unlock(&buf_pool.mutex); /* We may set the watch, as it would have been set if the page were not in the @@ -2909,31 +2890,19 @@ re_evict: mode == BUF_GET_IF_IN_POOL_OR_WATCH ? buf_pool.watch_set(page_id, chain) : buf_pool.page_hash.get(page_id, chain)); - hash_lock.write_unlock(); - - if (block != NULL) { - /* Either the page has been read in or - a watch was set on that in the window - where we released the buf_pool.mutex - and before we acquire the hash_lock - above. Try again. */ - guess = block; - - goto loop; - } - + hash_lock.unlock(); return(NULL); } - fix_block->fix(); + block->fix(); mysql_mutex_unlock(&buf_pool.mutex); buf_flush_list(); buf_flush_wait_batch_end_acquiring_mutex(false); while (buf_flush_list_space(space)); os_aio_wait_until_no_pending_writes(); - if (fix_block->page.buf_fix_count() == 1 - && !fix_block->page.oldest_modification()) { + if (block->page.buf_fix_count() == 1 + && !block->page.oldest_modification()) { goto re_evict; } @@ -2941,7 +2910,7 @@ re_evict: } #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - ut_ad(fix_block->page.buf_fix_count()); + ut_ad(block->page.buf_fix_count()); /* While tablespace is reinited the indexes are already freed but the blocks related to it still resides in buffer pool. Trying to remove @@ -2952,25 +2921,25 @@ re_evict: "btr_search_drop_page_hash_when_freed". */ ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL - || fix_block->page.status != buf_page_t::FREED); + || block->page.status != buf_page_t::FREED); - const bool not_first_access = fix_block->page.set_accessed(); + const bool not_first_access = block->page.set_accessed(); if (mode != BUF_PEEK_IF_IN_POOL) { - buf_page_make_young_if_needed(&fix_block->page); + buf_page_make_young_if_needed(&block->page); } #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ - ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); /* We have to wait here because the IO_READ state was set under the protection of the hash_lock and not block->lock. */ - buf_wait_for_read(fix_block); + buf_wait_for_read(block); - if (fix_block->page.id() != page_id) { - buf_block_buf_fix_dec(fix_block); + if (block->page.id() != page_id) { + buf_block_buf_fix_dec(block); if (err) { *err = DB_PAGE_CORRUPTED; @@ -2979,27 +2948,27 @@ re_evict: return NULL; } - if (fix_block->page.status != buf_page_t::FREED + if (block->page.status != buf_page_t::FREED && allow_ibuf_merge - && fil_page_get_type(fix_block->frame) == FIL_PAGE_INDEX - && page_is_leaf(fix_block->frame)) { - fix_block->lock.x_lock(); + && fil_page_get_type(block->frame) == FIL_PAGE_INDEX + && page_is_leaf(block->frame)) { + block->lock.x_lock(); - if (fix_block->page.ibuf_exist) { - fix_block->page.ibuf_exist = false; - ibuf_merge_or_delete_for_page(fix_block, page_id, + if (block->page.ibuf_exist) { + block->page.ibuf_exist = false; + ibuf_merge_or_delete_for_page(block, page_id, zip_size); } if (rw_latch == RW_X_LATCH) { - mtr->memo_push(fix_block, MTR_MEMO_PAGE_X_FIX); + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); } else { - fix_block->lock.x_unlock(); + block->lock.x_unlock(); goto get_latch; } } else { get_latch: - mtr->page_lock(fix_block, rw_latch); + mtr->page_lock(block, rw_latch); } if (!not_first_access && mode != BUF_PEEK_IF_IN_POOL) { @@ -3009,7 +2978,7 @@ get_latch: buf_read_ahead_linear(page_id, zip_size, ibuf_inside(mtr)); } - return(fix_block); + return block; } /** Get access to a database page. Buffered redo log may be applied. @@ -3070,6 +3039,7 @@ buf_page_get_gen( This is the general function used to get optimistic access to a database page. @return TRUE if success */ +TRANSACTIONAL_TARGET ibool buf_page_optimistic_get( /*====================*/ @@ -3085,26 +3055,26 @@ buf_page_optimistic_get( ut_ad(mtr->is_active()); ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH); - if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE - || block->page.io_fix() != BUF_IO_NONE)) { + if (have_transactional_memory) { + } else if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE + || block->page.io_fix() != BUF_IO_NONE)) { return FALSE; } const page_id_t id{block->page.id()}; buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(id.fold()); - page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); - hash_lock.read_lock(); - if (UNIV_UNLIKELY(id != block->page.id() - || block->page.state() != BUF_BLOCK_FILE_PAGE - || block->page.io_fix() != BUF_IO_NONE)) { - hash_lock.read_unlock(); - return(FALSE); + { + transactional_shared_lock_guard g{ + buf_pool.page_hash.lock_get(chain)}; + if (UNIV_UNLIKELY(id != block->page.id() + || block->page.state() != BUF_BLOCK_FILE_PAGE + || block->page.io_fix() != BUF_IO_NONE)) { + return FALSE; + } + block->fix(); } - buf_block_buf_fix_inc(block); - hash_lock.read_unlock(); - block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); @@ -3165,28 +3135,27 @@ Suitable for using when holding the lock_sys latches (as it avoids deadlock). @param[in,out] mtr mini-transaction @return the block @retval nullptr if an S-latch cannot be granted immediately */ +TRANSACTIONAL_TARGET buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr) { ut_ad(mtr); ut_ad(mtr->is_active()); - buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); - page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); - hash_lock.read_lock(); - buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain); - if (!bpage || bpage->state() != BUF_BLOCK_FILE_PAGE) + buf_block_t *block; + { - hash_lock.read_unlock(); - return nullptr; + transactional_shared_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; + block= reinterpret_cast + (buf_pool.page_hash.get(page_id, chain)); + if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) + return nullptr; + block->fix(); } - buf_block_t *block= reinterpret_cast(bpage); - buf_block_buf_fix_inc(block); - hash_lock.read_unlock(); - if (!block->lock.s_lock_try()) { - buf_block_buf_fix_dec(block); + block->unfix(); return nullptr; } @@ -3195,9 +3164,9 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr) #ifdef UNIV_DEBUG if (!(++buf_dbg_counter % 5771)) buf_pool.validate(); #endif /* UNIV_DEBUG */ - ut_ad(bpage->buf_fix_count()); - ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE); - ut_ad(bpage->id() == page_id); + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.id() == page_id); ++buf_pool.stat.n_page_gets; return block; @@ -3216,6 +3185,7 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size, page_zip_set_size(&page.zip, zip_size); } +TRANSACTIONAL_TARGET static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size, mtr_t *mtr, buf_block_t *free_block) { @@ -3269,10 +3239,13 @@ loop: break; case BUF_BLOCK_ZIP_PAGE: page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); - hash_lock.write_lock(); + /* It does not make sense to use transactional_lock_guard here, + because buf_relocate() would likely make the memory transaction + too large. */ + hash_lock.lock(); if (block->page.io_fix() != BUF_IO_NONE) { - hash_lock.write_unlock(); + hash_lock.unlock(); /* Wait for buf_page_write_complete() to release the I/O fix. */ timespec abstime; set_timespec_nsec(abstime, 1000000); @@ -3289,7 +3262,7 @@ loop: free_block->page.set_state(BUF_BLOCK_FILE_PAGE); buf_unzip_LRU_add_block(free_block, FALSE); - hash_lock.write_unlock(); + hash_lock.unlock(); buf_page_free_descriptor(&block->page); block= free_block; buf_block_buf_fix_inc(block); @@ -3325,24 +3298,20 @@ loop: /* The block must be put to the LRU list */ buf_LRU_add_block(&block->page, false); - page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); - hash_lock.write_lock(); - block->page.set_state(BUF_BLOCK_FILE_PAGE); - buf_pool.page_hash.append(chain, &block->page); + { + transactional_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; + block->page.set_state(BUF_BLOCK_FILE_PAGE); + buf_pool.page_hash.append(chain, &block->page); + block->lock.x_lock(); + if (UNIV_UNLIKELY(zip_size)) + /* Prevent race conditions during buf_buddy_alloc(), which may + release and reacquire buf_pool.mutex, by IO-fixing and X-latching. */ + block->page.set_io_fix(BUF_IO_READ); + } - block->lock.x_lock(); if (UNIV_UNLIKELY(zip_size)) { - /* Prevent race conditions during buf_buddy_alloc(), which may - release and reacquire buf_pool.mutex, by IO-fixing and X-latching - the block. */ - block->page.set_io_fix(BUF_IO_READ); - hash_lock.write_unlock(); - - /* buf_pool.mutex may be released and reacquired by - buf_buddy_alloc(). We must defer this operation until - after the block descriptor has been added to - buf_pool.LRU and buf_pool.page_hash. */ block->page.zip.data= buf_buddy_alloc(zip_size); /* To maintain the invariant block->in_unzip_LRU_list == @@ -3353,8 +3322,6 @@ loop: block->page.set_io_fix(BUF_IO_NONE); } - else - hash_lock.write_unlock(); mysql_mutex_unlock(&buf_pool.mutex); diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 2b02633cbb9..77d367074b0 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -810,7 +810,9 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) execution of buf_page_get_low(). */ buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold()); page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); - hash_lock.write_lock(); + /* We cannot use transactional_lock_guard here, + because buf_buddy_relocate() in buf_buddy_free() could get stuck. */ + hash_lock.lock(); lsn_t oldest_modification = bpage->oldest_modification_acquire(); if (UNIV_UNLIKELY(!bpage->can_relocate())) { @@ -840,7 +842,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) } else if (oldest_modification && bpage->state() != BUF_BLOCK_FILE_PAGE) { func_exit: - hash_lock.write_unlock(); + hash_lock.unlock(); return(false); } else if (bpage->state() == BUF_BLOCK_FILE_PAGE) { @@ -954,9 +956,10 @@ func_exit: decompressing the block while we release hash_lock. */ b->set_io_fix(BUF_IO_PIN); - hash_lock.write_unlock(); + goto release; } else if (!zip) { - hash_lock.write_unlock(); +release: + hash_lock.unlock(); } buf_block_t* block = reinterpret_cast(bpage); @@ -1170,7 +1173,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, ut_a(bpage->zip.ssize); ut_ad(!bpage->oldest_modification()); - hash_lock.write_unlock(); + hash_lock.unlock(); buf_pool_mutex_exit_forbid(); buf_buddy_free(bpage->zip.data, bpage->zip_size()); @@ -1214,7 +1217,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, and by the time we'll release it in the caller we'd have inserted the compressed only descriptor in the page_hash. */ - hash_lock.write_unlock(); + hash_lock.unlock(); if (bpage->zip.data) { /* Free the compressed page. */ @@ -1254,7 +1257,7 @@ ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage) page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); mysql_mutex_lock(&mutex); - hash_lock.write_lock(); + hash_lock.lock(); ut_ad(bpage->io_fix() == BUF_IO_READ); ut_ad(!bpage->oldest_modification()); diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 55e5e4afba1..080f87adb0f 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -83,6 +83,7 @@ and the lock released later. requested (for ROW_FORMAT=COMPRESSED) @return pointer to the block @retval NULL in case of an error */ +TRANSACTIONAL_TARGET static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, ulint zip_size, bool unzip) { @@ -115,7 +116,6 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, } buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); - page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); mysql_mutex_lock(&buf_pool.mutex); @@ -136,21 +136,23 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, bpage= &block->page; /* Insert into the hash table of file pages */ - hash_lock.write_lock(); - - if (hash_page) { - /* Preserve the reference count. */ - auto buf_fix_count= hash_page->buf_fix_count(); - ut_a(buf_fix_count > 0); - block->page.add_buf_fix_count(buf_fix_count); - buf_pool.watch_remove(hash_page, chain); - } + transactional_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; - block->page.set_io_fix(BUF_IO_READ); - block->page.set_state(BUF_BLOCK_FILE_PAGE); - buf_pool.page_hash.append(chain, &block->page); - hash_lock.write_unlock(); + if (hash_page) + { + /* Preserve the reference count. */ + auto buf_fix_count= hash_page->buf_fix_count(); + ut_a(buf_fix_count > 0); + block->page.add_buf_fix_count(buf_fix_count); + buf_pool.watch_remove(hash_page, chain); + } + + block->page.set_io_fix(BUF_IO_READ); + block->page.set_state(BUF_BLOCK_FILE_PAGE); + buf_pool.page_hash.append(chain, &block->page); + } /* The block must be put to the LRU list, to the old blocks */ buf_LRU_add_block(&block->page, true/* to old blocks */); @@ -204,20 +206,22 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, bpage->init(BUF_BLOCK_ZIP_PAGE, page_id); - hash_lock.write_lock(); - - if (hash_page) { - /* Preserve the reference count. It can be 0 if - buf_pool_t::watch_unset() is executing concurrently, - waiting for buf_pool.mutex, which we are holding. */ - bpage->add_buf_fix_count(hash_page->buf_fix_count()); - buf_pool.watch_remove(hash_page, chain); - } + transactional_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; + + if (hash_page) + { + /* Preserve the reference count. It can be 0 if + buf_pool_t::watch_unset() is executing concurrently, + waiting for buf_pool.mutex, which we are holding. */ + bpage->add_buf_fix_count(hash_page->buf_fix_count()); + buf_pool.watch_remove(hash_page, chain); + } - buf_pool.page_hash.append(chain, bpage); - bpage->set_io_fix(BUF_IO_READ); - hash_lock.write_unlock(); + buf_pool.page_hash.append(chain, bpage); + bpage->set_io_fix(BUF_IO_READ); + } /* The block must be put to the LRU list, to the old blocks. The zip size is already set into the page zip */ @@ -370,6 +374,7 @@ wants to access @return number of page read requests issued; NOTE that if we read ibuf pages, it may happen that the page at the given page number does not get read even if we return a positive value! */ +TRANSACTIONAL_TARGET ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf) { @@ -404,13 +409,11 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf) for (page_id_t i= low; i < high; ++i) { buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold()); - page_hash_latch &latch= buf_pool.page_hash.lock_get(chain); - latch.read_lock(); - const buf_page_t *bpage= buf_pool.page_hash.get(i, chain); - bool found= bpage && bpage->is_accessed() && buf_page_peek_if_young(bpage); - latch.read_unlock(); - if (found && !--count) - goto read_ahead; + transactional_shared_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; + if (const buf_page_t *bpage= buf_pool.page_hash.get(i, chain)) + if (bpage->is_accessed() && buf_page_peek_if_young(bpage) && !--count) + goto read_ahead; } no_read_ahead: @@ -552,6 +555,7 @@ which could result in a deadlock if the OS does not support asynchronous io. @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in] ibuf whether if we are inside ibuf routine @return number of page read requests issued */ +TRANSACTIONAL_TARGET ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf) { @@ -605,9 +609,18 @@ fail: for (page_id_t i= low; i != high_1; ++i) { buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold()); - page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); - hash_lock.read_lock(); + transactional_shared_lock_guard g + {buf_pool.page_hash.lock_get(chain)}; const buf_page_t* bpage= buf_pool.page_hash.get(i, chain); + if (!bpage) + { + if (i == page_id) + goto fail; +failed: + if (--count) + continue; + goto fail; + } if (i == page_id) { /* Read the natural predecessor and successor page addresses from @@ -615,12 +628,6 @@ fail: on the page, we do not acquire an s-latch on the page, this is to prevent deadlocks. The hash_lock is only protecting the buf_pool.page_hash for page i, not the bpage contents itself. */ - if (!bpage) - { -hard_fail: - hash_lock.read_unlock(); - goto fail; - } const byte *f; switch (UNIV_EXPECT(bpage->state(), BUF_BLOCK_FILE_PAGE)) { case BUF_BLOCK_FILE_PAGE: @@ -630,38 +637,31 @@ hard_fail: f= bpage->zip.data; break; default: - goto hard_fail; + ut_ad("invalid state" == 0); + goto fail; } uint32_t prev= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_PREV)); uint32_t next= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_NEXT)); if (prev == FIL_NULL || next == FIL_NULL) - goto hard_fail; + goto fail; page_id_t id= page_id; if (descending && next - 1 == page_id.page_no()) id.set_page_no(prev); else if (!descending && prev + 1 == page_id.page_no()) id.set_page_no(next); else - goto hard_fail; /* Successor or predecessor not in the right order */ + goto fail; /* Successor or predecessor not in the right order */ new_low= id - (id.page_no() % buf_read_ahead_area); new_high_1= new_low + (buf_read_ahead_area - 1); if (id != new_low && id != new_high_1) /* This is not a border page of the area: return */ - goto hard_fail; + goto fail; if (new_high_1.page_no() > space->last_page_number()) /* The area is not whole */ - goto hard_fail; - } - else if (!bpage) - { -failed: - hash_lock.read_unlock(); - if (--count) - continue; - goto fail; + goto fail; } const unsigned accessed= bpage->is_accessed(); @@ -678,7 +678,6 @@ failed: prev_accessed= accessed; if (fail) goto failed; - hash_lock.read_unlock(); } /* If we got this far, read-ahead can be sensible: do it */ diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 667a64f907a..c88227dbade 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1381,6 +1381,8 @@ dberr_t dict_sys_t::create_or_check_sys_tables() trx_start_for_ddl(trx); { + /* Do not bother with transactional memory; this is only + executed at startup, with no conflicts present. */ LockMutexGuard g{SRW_LOCK_CALL}; trx->mutex_lock(); lock_table_create(dict_sys.sys_tables, LOCK_X, trx); diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 6074398afd3..34080b22095 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -1181,6 +1181,7 @@ inline void dict_sys_t::add(dict_table_t* table) /** Test whether a table can be evicted from dict_sys.table_LRU. @param table table to be considered for eviction @return whether the table can be evicted */ +TRANSACTIONAL_TARGET static bool dict_table_can_be_evicted(dict_table_t *table) { ut_ad(dict_sys.locked()); @@ -2064,6 +2065,7 @@ dict_index_add_to_cache( /**********************************************************************//** Removes an index from the dictionary cache. */ +TRANSACTIONAL_TARGET static void dict_index_remove_from_cache_low( diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index f067d43d6a3..41f32cf7240 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -78,6 +78,7 @@ rtr_adjust_parent_path( Find the next matching record. This function is used by search or record locating during index delete/update. @return true if there is suitable record found, otherwise false */ +TRANSACTIONAL_TARGET static bool rtr_pcur_getnext_from_path( @@ -387,7 +388,7 @@ rtr_pcur_getnext_from_path( trx_t* trx = thr_get_trx( btr_cur->rtr_info->thr); { - LockMutexGuard g{SRW_LOCK_CALL}; + TMLockTrxGuard g{TMLockTrxArgs(*trx)}; lock_init_prdt_from_mbr( &prdt, &btr_cur->rtr_info->mbr, mode, trx->lock.lock_heap); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 0b0c3b045ff..6729e7e1747 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3185,6 +3185,7 @@ the query cache. @param[in] table table object @param[in] trx transaction object @return whether the storing or retrieving from the query cache is permitted */ +TRANSACTIONAL_TARGET static bool innobase_query_caching_table_check_low( dict_table_t* table, trx_t* trx) { @@ -3211,6 +3212,16 @@ static bool innobase_query_caching_table_check_low( return false; } +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) { + if (table->lock_mutex_is_locked()) + xabort(); + auto len = UT_LIST_GET_LEN(table->locks); + xend(); + return len == 0; + } +#endif + table->lock_mutex_lock(); auto len= UT_LIST_GET_LEN(table->locks); table->lock_mutex_unlock(); @@ -18562,7 +18573,9 @@ void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id) trx_t *vtrx= thd_to_trx(vthd); if (vtrx) { - lock_sys.wr_lock(SRW_LOCK_CALL); + /* Do not bother with lock elision using transactional memory here; + this is rather complex code */ + LockMutexGuard g{SRW_LOCK_CALL}; mysql_mutex_lock(&lock_sys.wait_mutex); vtrx->mutex_lock(); /* victim transaction is either active or prepared, if it has already @@ -18607,7 +18620,6 @@ void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id) WSREP_DEBUG("kill transaction skipped due to wsrep_aborter set"); } } - lock_sys.wr_unlock(); mysql_mutex_unlock(&lock_sys.wait_mutex); vtrx->mutex_unlock(); } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 4177215cca7..fbec3bee38c 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -3145,7 +3145,7 @@ or clustered @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] thr query thread @return DB_SUCCESS, DB_STRONG_FAIL or other error */ -static MY_ATTRIBUTE((warn_unused_result)) +static TRANSACTIONAL_TARGET MY_ATTRIBUTE((warn_unused_result)) dberr_t ibuf_insert_low( ulint mode, @@ -3470,6 +3470,7 @@ is clustered or unique. @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] thr query thread @return true if success */ +TRANSACTIONAL_TARGET bool ibuf_insert( ibuf_op_t op, diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index 4339c895400..b45183a6428 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -347,11 +347,21 @@ struct btr_search_sys_t extern btr_search_sys_t btr_search_sys; /** @return number of leaf pages pointed to by the adaptive hash index */ -inline ulint dict_index_t::n_ahi_pages() const +TRANSACTIONAL_INLINE inline ulint dict_index_t::n_ahi_pages() const { if (!btr_search_enabled) return 0; srw_spin_lock *latch= &btr_search_sys.get_part(*this)->latch; +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) + { + if (latch->is_locked()) + xabort(); + ulint ref_count= search_info->ref_count; + xend(); + return ref_count; + } +#endif latch->rd_lock(SRW_LOCK_CALL); ulint ref_count= search_info->ref_count; latch->rd_unlock(); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 30729063069..ad06b17466d 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -40,6 +40,7 @@ Created 11/5/1995 Heikki Tuuri #include "page0types.h" #include "log0log.h" #include "srv0srv.h" +#include "transactional_lock_guard.h" #include // Forward declaration @@ -1478,25 +1479,29 @@ public: public: /** @return whether the buffer pool contains a page - @tparam watch whether to allow watch_is_sentinel() - @param page_id page identifier - @param chain hash table chain for page_id.fold() */ - template + @tparam allow_watch whether to allow watch_is_sentinel() + @param page_id page identifier + @param chain hash table chain for page_id.fold() */ + template + TRANSACTIONAL_INLINE bool page_hash_contains(const page_id_t page_id, hash_chain &chain) { - page_hash_latch &latch= page_hash.lock_get(chain); - latch.read_lock(); + transactional_shared_lock_guard g + {page_hash.lock_get(chain)}; buf_page_t *bpage= page_hash.get(page_id, chain); - if (!bpage || watch_is_sentinel(*bpage)) + if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)]) { - latch.read_unlock(); - return watch ? bpage : nullptr; + ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE); + ut_ad(!bpage->in_zip_hash); + ut_ad(!bpage->zip.data); + if (!allow_watch) + bpage= nullptr; + } + else if (bpage) + { + ut_ad(page_id == bpage->id()); + ut_ad(bpage->in_file()); } - - ut_ad(bpage->in_file()); - ut_ad(page_id == bpage->id()); - - latch.read_unlock(); return bpage; } @@ -1510,11 +1515,11 @@ public: page_hash.lock_get(page_hash.cell_get(bpage.id().fold())). is_locked()); #endif /* SAFE_MUTEX */ - ut_ad(bpage.in_file()); - if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)]) { - ut_ad(bpage.state() != BUF_BLOCK_ZIP_PAGE || bpage.zip.data); + ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE + ? !!bpage.zip.data + : bpage.state() == BUF_BLOCK_FILE_PAGE); return false; } @@ -1528,16 +1533,14 @@ public: This may only be called after !watch_set() and before invoking watch_unset(). @param id page identifier @return whether the page was read to the buffer pool */ + TRANSACTIONAL_INLINE bool watch_occurred(const page_id_t id) { hash_chain &chain= page_hash.cell_get(id.fold()); - page_hash_latch &latch= page_hash.lock_get(chain); - latch.read_lock(); + transactional_shared_lock_guard g + {page_hash.lock_get(chain)}; /* The page must exist because watch_set() increments buf_fix_count. */ - buf_page_t *bpage= page_hash.get(id, chain); - const bool is_sentinel= watch_is_sentinel(*bpage); - latch.read_unlock(); - return !is_sentinel; + return !watch_is_sentinel(*page_hash.get(id, chain)); } /** Register a watch for a page identifier. The caller must hold an @@ -2000,14 +2003,14 @@ inline buf_page_t *buf_pool_t::page_hash_table::get(const page_id_t id, } #ifdef SUX_LOCK_GENERIC -inline void page_hash_latch::read_lock() +inline void page_hash_latch::lock_shared() { mysql_mutex_assert_not_owner(&buf_pool.mutex); if (!read_trylock()) read_lock_wait(); } -inline void page_hash_latch::write_lock() +inline void page_hash_latch::lock() { if (!write_trylock()) write_lock_wait(); diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 2f08220fe98..2cb92a5f1df 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -179,7 +179,7 @@ enum rw_lock_type_t #include "sux_lock.h" #ifdef SUX_LOCK_GENERIC -class page_hash_latch : public rw_lock +class page_hash_latch : private rw_lock { /** Wait for a shared lock */ void read_lock_wait(); @@ -187,33 +187,50 @@ class page_hash_latch : public rw_lock void write_lock_wait(); public: /** Acquire a shared lock */ - inline void read_lock(); + inline void lock_shared(); /** Acquire an exclusive lock */ - inline void write_lock(); + inline void lock(); + +#ifdef UNIV_DEBUG + /** @return whether an exclusive lock is being held by any thread */ + bool is_write_locked() const { return rw_lock::is_write_locked(); } +#endif + + /** @return whether any lock is being held by any thread */ + bool is_locked() const { return rw_lock::is_locked(); } + /** @return whether any lock is being held or waited for by any thread */ + bool is_locked_or_waiting() const { return rw_lock::is_locked_or_waiting(); } + + /** Release a shared lock */ + void unlock_shared() { read_unlock(); } + /** Release an exclusive lock */ + void unlock() { write_unlock(); } }; #elif defined _WIN32 || SIZEOF_SIZE_T >= 8 class page_hash_latch { - srw_spin_lock_low lock; + srw_spin_lock_low lk; public: - void read_lock() { lock.rd_lock(); } - void read_unlock() { lock.rd_unlock(); } - void write_lock() { lock.wr_lock(); } - void write_unlock() { lock.wr_unlock(); } - bool is_locked() const { return lock.is_locked(); } - bool is_write_locked() const { return lock.is_write_locked(); } + void lock_shared() { lk.rd_lock(); } + void unlock_shared() { lk.rd_unlock(); } + void lock() { lk.wr_lock(); } + void unlock() { lk.wr_unlock(); } + bool is_write_locked() const { return lk.is_write_locked(); } + bool is_locked() const { return lk.is_locked(); } + bool is_locked_or_waiting() const { return lk.is_locked_or_waiting(); } }; #else class page_hash_latch { - srw_spin_mutex lock; + srw_spin_mutex lk; public: - void read_lock() { write_lock(); } - void read_unlock() { write_unlock(); } - void write_lock() { lock.wr_lock(); } - void write_unlock() { lock.wr_unlock(); } - bool is_locked() const { return lock.is_locked(); } + void lock_shared() { lock(); } + void unlock_shared() { unlock(); } + void lock() { lk.wr_lock(); } + void unlock() { lk.wr_unlock(); } + bool is_locked() const { return lk.is_locked(); } bool is_write_locked() const { return is_locked(); } + bool is_locked_or_waiting() const { return is_locked(); } }; #endif diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 2a7b38f345c..986c767ff49 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1979,11 +1979,15 @@ struct dict_table_t { ut_ad(lock_mutex_owner.exchange(0) == os_thread_get_curr_id()); lock_mutex.wr_unlock(); } +#ifndef SUX_LOCK_GENERIC + /** @return whether the lock mutex is held by some thread */ + bool lock_mutex_is_locked() const noexcept { return lock_mutex.is_locked(); } +#endif /* stats mutex lock currently defaults to lock_mutex but in the future, there could be a use-case to have separate mutex for stats. -  extra indirection (through inline so no performance hit) should -  help simplify code and increase long-term maintainability */ + extra indirection (through inline so no performance hit) should + help simplify code and increase long-term maintainability */ void stats_mutex_init() { lock_mutex_init(); } void stats_mutex_destroy() { lock_mutex_destroy(); } void stats_mutex_lock() { lock_mutex_lock(); } diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 5f051b8ffbe..50b9792cf2b 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -28,16 +28,15 @@ Created 5/7/1996 Heikki Tuuri #define lock0lock_h #include "buf0types.h" -#include "trx0types.h" +#include "trx0trx.h" #include "mtr0types.h" #include "rem0types.h" -#include "que0types.h" -#include "lock0types.h" #include "hash0hash.h" #include "srv0srv.h" #include "ut0vec.h" #include "gis0rtree.h" #include "lock0prdt.h" +#include "transactional_lock_guard.h" // Forward declaration class ReadView; @@ -62,8 +61,10 @@ lock_get_min_heap_no( /*=================*/ const buf_block_t* block); /*!< in: buffer block */ -/** Discard locks for an index */ -void lock_discard_for_index(const dict_index_t &index); +/** Discard locks for an index when purging DELETE FROM SYS_INDEXES +after an aborted CREATE INDEX operation. +@param index a stale index on which ADD INDEX operation was aborted */ +ATTRIBUTE_COLD void lock_discard_for_index(const dict_index_t &index); /*************************************************************//** Updates the lock table when we have reorganized a page. NOTE: we copy @@ -571,6 +572,9 @@ class lock_sys_t { friend struct LockGuard; friend struct LockMultiGuard; + friend struct TMLockGuard; + friend struct TMLockMutexGuard; + friend struct TMLockTrxGuard; /** Hash table latch */ struct hash_latch @@ -585,6 +589,11 @@ class lock_sys_t void acquire() { if (!try_acquire()) wait(); } /** Release a lock */ void release(); + /** @return whether any lock is being held or waited for by any thread */ + bool is_locked_or_waiting() const + { return rw_lock::is_locked_or_waiting(); } + /** @return whether this latch is possibly held by any thread */ + bool is_locked() const { return rw_lock::is_locked(); } #else { private: @@ -596,11 +605,11 @@ class lock_sys_t void acquire() { lock.wr_lock(); } /** Release a lock */ void release() { lock.wr_unlock(); } -#endif -#ifdef UNIV_DEBUG + /** @return whether any lock may be held by any thread */ + bool is_locked_or_waiting() const noexcept + { return lock.is_locked_or_waiting(); } /** @return whether this latch is possibly held by any thread */ - bool is_locked() const - { return memcmp(this, field_ref_zero, sizeof *this); } + bool is_locked() const noexcept { return lock.is_locked(); } #endif }; @@ -799,7 +808,14 @@ public: #ifdef UNIV_DEBUG /** @return whether the current thread is the lock_sys.latch writer */ bool is_writer() const - { return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id(); } + { +# ifdef SUX_LOCK_GENERIC + return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id(); +# else + return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id() || + (xtest() && !latch.is_locked_or_waiting()); +# endif + } /** Assert that a lock shard is exclusively latched (by some thread) */ void assert_locked(const lock_t &lock) const; /** Assert that a table lock shard is exclusively latched by this thread */ @@ -836,13 +852,14 @@ public: void deadlock_check(); /** Cancel a waiting lock request. - @param lock waiting lock request - @param trx active transaction - @param check_victim whether to check trx->lock.was_chosen_as_deadlock_victim + @tparam check_victim whether to check for DB_DEADLOCK + @param lock waiting lock request + @param trx active transaction @retval DB_SUCCESS if no lock existed @retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set @retval DB_LOCK_WAIT if the lock was canceled */ - static dberr_t cancel(trx_t *trx, lock_t *lock, bool check_victim); + template + static dberr_t cancel(trx_t *trx, lock_t *lock); /** Cancel a waiting lock request (if any) when killing a transaction */ static void cancel(trx_t *trx); @@ -981,6 +998,149 @@ private: hash_cell_t *cell2_; }; +/** lock_sys.latch exclusive guard using transactional memory */ +struct TMLockMutexGuard +{ + TRANSACTIONAL_INLINE + TMLockMutexGuard(SRW_LOCK_ARGS(const char *file, unsigned line)) + { +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) + { + if (was_elided()) + return; + xabort(); + } +#endif + lock_sys.wr_lock(SRW_LOCK_ARGS(file, line)); + } + TRANSACTIONAL_INLINE + ~TMLockMutexGuard() + { +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (was_elided()) xend(); else +#endif + lock_sys.wr_unlock(); + } + +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + bool was_elided() const noexcept + { return !lock_sys.latch.is_locked_or_waiting(); } +#else + bool was_elided() const noexcept { return false; } +#endif +}; + +/** lock_sys latch guard for 1 page_id_t, using transactional memory */ +struct TMLockGuard +{ + TRANSACTIONAL_TARGET + TMLockGuard(lock_sys_t::hash_table &hash, const page_id_t id); + TRANSACTIONAL_INLINE ~TMLockGuard() + { +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (elided) + { + xend(); + return; + } +#endif + lock_sys_t::hash_table::latch(cell_)->release(); + /* Must be last, to avoid a race with lock_sys_t::hash_table::resize() */ + lock_sys.rd_unlock(); + } + /** @return the hash array cell */ + hash_cell_t &cell() const { return *cell_; } +private: + /** The hash array cell */ + hash_cell_t *cell_; +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + /** whether the latches were elided */ + bool elided; +#endif +}; + +/** guard for shared lock_sys.latch and trx_t::mutex using +transactional memory */ +struct TMLockTrxGuard +{ + trx_t &trx; + + TRANSACTIONAL_INLINE +#ifndef UNIV_PFS_RWLOCK + TMLockTrxGuard(trx_t &trx) : trx(trx) +# define TMLockTrxArgs(trx) trx +#else + TMLockTrxGuard(const char *file, unsigned line, trx_t &trx) : trx(trx) +# define TMLockTrxArgs(trx) SRW_LOCK_CALL, trx +#endif + { +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) + { + if (!lock_sys.latch.is_write_locked() && was_elided()) + return; + xabort(); + } +#endif + lock_sys.rd_lock(SRW_LOCK_ARGS(file, line)); + trx.mutex_lock(); + } + TRANSACTIONAL_INLINE + ~TMLockTrxGuard() + { +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (was_elided()) + { + xend(); + return; + } +#endif + lock_sys.rd_unlock(); + trx.mutex_unlock(); + } +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + bool was_elided() const noexcept { return !trx.mutex_is_locked(); } +#else + bool was_elided() const noexcept { return false; } +#endif +}; + +/** guard for trx_t::mutex using transactional memory */ +struct TMTrxGuard +{ + trx_t &trx; + + TRANSACTIONAL_INLINE TMTrxGuard(trx_t &trx) : trx(trx) + { +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) + { + if (was_elided()) + return; + xabort(); + } +#endif + trx.mutex_lock(); + } + TRANSACTIONAL_INLINE ~TMTrxGuard() + { +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (was_elided()) + { + xend(); + return; + } +#endif + trx.mutex_unlock(); + } +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + bool was_elided() const noexcept { return !trx.mutex_is_locked(); } +#else + bool was_elided() const noexcept { return false; } +#endif +}; + /*********************************************************************//** Creates a new record lock and inserts it to the lock queue. Does NOT check for deadlocks or lock compatibility! diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h index f0595598838..b5ec7a0d29e 100644 --- a/storage/innobase/include/lock0priv.h +++ b/storage/innobase/include/lock0priv.h @@ -459,7 +459,7 @@ lock_rec_get_n_bits( /**********************************************************************//** Sets the nth bit of a record lock to TRUE. */ -UNIV_INLINE +inline void lock_rec_set_nth_bit( /*=================*/ @@ -473,7 +473,12 @@ lock_rec_set_nth_bit( inline byte lock_rec_reset_nth_bit(lock_t* lock, ulint i) { ut_ad(!lock->is_table()); +#ifdef SUX_LOCK_GENERIC ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner()); +#else + ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner() + || (xtest() && !lock->trx->mutex_is_locked())); +#endif ut_ad(i < lock->un_member.rec_lock.n_bits); byte* b = reinterpret_cast(&lock[1]) + (i >> 3); diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic index c51304cd7ed..21e7c7c95dc 100644 --- a/storage/innobase/include/lock0priv.ic +++ b/storage/innobase/include/lock0priv.ic @@ -67,7 +67,7 @@ lock_rec_get_n_bits( /**********************************************************************//** Sets the nth bit of a record lock to TRUE. */ -UNIV_INLINE +inline void lock_rec_set_nth_bit( /*=================*/ @@ -91,7 +91,12 @@ lock_rec_set_nth_bit( #if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6 # pragma GCC diagnostic pop #endif +#ifdef SUX_LOCK_GENERIC ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner()); +#else + ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner() + || (xtest() && !lock->trx->mutex_is_locked())); +#endif lock->trx->lock.n_rec_locks++; } diff --git a/storage/innobase/include/rw_lock.h b/storage/innobase/include/rw_lock.h index 6447eea18ae..0ae052fabe2 100644 --- a/storage/innobase/include/rw_lock.h +++ b/storage/innobase/include/rw_lock.h @@ -222,17 +222,13 @@ public: } /** @return whether an exclusive lock is being held by any thread */ - bool is_write_locked() const - { return !!(lock.load(std::memory_order_relaxed) & WRITER); } + bool is_write_locked() const { return !!(value() & WRITER); } #ifdef SUX_LOCK_GENERIC /** @return whether an update lock is being held by any thread */ - bool is_update_locked() const - { return !!(lock.load(std::memory_order_relaxed) & UPDATER); } + bool is_update_locked() const { return !!(value() & UPDATER); } #endif /* SUX_LOCK_GENERIC */ /** @return whether any lock is being held or waited for by any thread */ - bool is_locked_or_waiting() const - { return lock.load(std::memory_order_relaxed) != 0; } + bool is_locked_or_waiting() const { return value() != 0; } /** @return whether any lock is being held by any thread */ - bool is_locked() const - { return (lock.load(std::memory_order_relaxed) & ~WRITER_WAITING) != 0; } + bool is_locked() const { return (value() & ~WRITER_WAITING) != 0; } }; diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h index d69c9de268f..54d042419ca 100644 --- a/storage/innobase/include/srw_lock.h +++ b/storage/innobase/include/srw_lock.h @@ -60,10 +60,10 @@ class srw_mutex_impl final public: /** @return whether the mutex is being held or waited for */ bool is_locked_or_waiting() const - { return lock.load(std::memory_order_relaxed) != 0; } + { return lock.load(std::memory_order_acquire) != 0; } /** @return whether the mutex is being held by any thread */ bool is_locked() const - { return (lock.load(std::memory_order_relaxed) & HOLDER) != 0; } + { return (lock.load(std::memory_order_acquire) & HOLDER) != 0; } void init() { DBUG_ASSERT(!is_locked_or_waiting()); } void destroy() { DBUG_ASSERT(!is_locked_or_waiting()); } @@ -174,8 +174,7 @@ public: { return (readers.load(std::memory_order_relaxed) & WRITER) != 0; } # ifndef DBUG_OFF /** @return whether the lock is being held or waited for */ - bool is_vacant() const - { return !is_locked() && !writer.is_locked_or_waiting(); } + bool is_vacant() const { return !is_locked_or_waiting(); } # endif /* !DBUG_OFF */ bool rd_lock_try() @@ -275,10 +274,18 @@ public: } /** @return whether an exclusive lock may be held by any thread */ bool is_write_locked() const noexcept - { return readers.load(std::memory_order_relaxed) == WRITER; } + { return readers.load(std::memory_order_acquire) == WRITER; } /** @return whether any lock may be held by any thread */ bool is_locked() const noexcept - { return readers.load(std::memory_order_relaxed) != 0; } + { return readers.load(std::memory_order_acquire) != 0; } + /** @return whether any lock may be held by any thread */ + bool is_locked_or_waiting() const noexcept + { return is_locked() || writer.is_locked_or_waiting(); } + + void lock_shared() { rd_lock(); } + void unlock_shared() { rd_unlock(); } + void lock() { wr_lock(); } + void unlock() { wr_unlock(); } #endif }; @@ -291,29 +298,29 @@ class srw_lock_ friend srw_lock_impl; # endif # ifdef _WIN32 - SRWLOCK lock; + SRWLOCK lk; # else - rw_lock_t lock; + rw_lock_t lk; # endif void rd_wait(); void wr_wait(); public: - void init() { IF_WIN(,my_rwlock_init(&lock, nullptr)); } - void destroy() { IF_WIN(,rwlock_destroy(&lock)); } + void init() { IF_WIN(,my_rwlock_init(&lk, nullptr)); } + void destroy() { IF_WIN(,rwlock_destroy(&lk)); } inline void rd_lock(); inline void wr_lock(); bool rd_lock_try() - { return IF_WIN(TryAcquireSRWLockShared(&lock), !rw_tryrdlock(&lock)); } + { return IF_WIN(TryAcquireSRWLockShared(&lk), !rw_tryrdlock(&lk)); } void rd_unlock() - { IF_WIN(ReleaseSRWLockShared(&lock), rw_unlock(&lock)); } + { IF_WIN(ReleaseSRWLockShared(&lk), rw_unlock(&lk)); } bool wr_lock_try() - { return IF_WIN(TryAcquireSRWLockExclusive(&lock), !rw_trywrlock(&lock)); } + { return IF_WIN(TryAcquireSRWLockExclusive(&lk), !rw_trywrlock(&lk)); } void wr_unlock() - { IF_WIN(ReleaseSRWLockExclusive(&lock), rw_unlock(&lock)); } + { IF_WIN(ReleaseSRWLockExclusive(&lk), rw_unlock(&lk)); } #ifdef _WIN32 /** @return whether any lock may be held by any thread */ - bool is_locked_or_waiting() const noexcept { return (size_t&)(lock) != 0; } + bool is_locked_or_waiting() const noexcept { return (size_t&)(lk) != 0; } /** @return whether any lock may be held by any thread */ bool is_locked() const noexcept { return is_locked_or_waiting(); } /** @return whether an exclusive lock may be held by any thread */ @@ -322,6 +329,11 @@ public: // FIXME: this returns false positives for shared locks return is_locked(); } + + void lock_shared() { rd_lock(); } + void unlock_shared() { rd_unlock(); } + void lock() { wr_lock(); } + void unlock() { wr_unlock(); } #endif }; @@ -330,10 +342,10 @@ template<> void srw_lock_::wr_wait(); template<> inline void srw_lock_::rd_lock() -{ IF_WIN(AcquireSRWLockShared(&lock), rw_rdlock(&lock)); } +{ IF_WIN(AcquireSRWLockShared(&lk), rw_rdlock(&lk)); } template<> inline void srw_lock_::wr_lock() -{ IF_WIN(AcquireSRWLockExclusive(&lock), rw_wrlock(&lock)); } +{ IF_WIN(AcquireSRWLockExclusive(&lk), rw_wrlock(&lk)); } template<> inline void srw_lock_::rd_lock() { if (!rd_lock_try()) rd_wait(); } @@ -491,6 +503,15 @@ public: } bool rd_lock_try() { return lock.rd_lock_try(); } bool wr_lock_try() { return lock.wr_lock_try(); } +#ifndef SUX_LOCK_GENERIC + /** @return whether any lock may be held by any thread */ + bool is_locked_or_waiting() const noexcept + { return lock.is_locked_or_waiting(); } + /** @return whether an exclusive lock may be held by any thread */ + bool is_locked() const noexcept { return lock.is_locked(); } + /** @return whether an exclusive lock may be held by any thread */ + bool is_write_locked() const noexcept { return lock.is_write_locked(); } +#endif }; typedef srw_lock_impl srw_lock; diff --git a/storage/innobase/include/transactional_lock_guard.h b/storage/innobase/include/transactional_lock_guard.h new file mode 100644 index 00000000000..7ece27638fc --- /dev/null +++ b/storage/innobase/include/transactional_lock_guard.h @@ -0,0 +1,167 @@ +/***************************************************************************** + +Copyright (c) 2021, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +#pragma once + +#if defined __powerpc64__ && defined __clang__ && defined __linux__ +#elif defined __powerpc64__&&defined __GNUC__&&defined __linux__&&__GNUC__ > 4 +#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64) +#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) +# if __GNUC__ >= 8 +# elif defined __clang_major__ && __clang_major__ > 6 +# else +# define NO_ELISION +# endif +#else /* Transactional memory has not been implemented for this ISA */ +# define NO_ELISION +#endif + +#ifdef NO_ELISION +constexpr bool have_transactional_memory= false; +# ifdef UNIV_DEBUG +static inline bool xtest() { return false; } +# endif +# define TRANSACTIONAL_TARGET /* nothing */ +# define TRANSACTIONAL_INLINE /* nothing */ +#else +# if defined __i386__||defined __x86_64__||defined _M_IX86||defined _M_X64 +extern bool have_transactional_memory; +bool transactional_lock_enabled(); + +# include +# if defined __GNUC__ && !defined __INTEL_COMPILER +# define TRANSACTIONAL_TARGET __attribute__((target("rtm"))) +# define TRANSACTIONAL_INLINE __attribute__((target("rtm"),always_inline)) +# else +# define TRANSACTIONAL_TARGET /* nothing */ +# define TRANSACTIONAL_INLINE /* nothing */ +# endif + +TRANSACTIONAL_INLINE static inline bool xbegin() +{ + return have_transactional_memory && _xbegin() == _XBEGIN_STARTED; +} + +# ifdef UNIV_DEBUG +# ifdef __GNUC__ +/** @return whether a memory transaction is active */ +bool xtest(); +# else +static inline bool xtest() { return have_transactional_memory && _xtest(); } +# endif +# endif + +TRANSACTIONAL_INLINE static inline void xabort() { _xabort(0); } + +TRANSACTIONAL_INLINE static inline void xend() { _xend(); } +# elif defined __powerpc64__ +# include +extern bool have_transactional_memory; +bool transactional_lock_enabled(); +# define TRANSACTIONAL_TARGET __attribute__((target("htm"))) +# define TRANSACTIONAL_INLINE __attribute__((target("htm"),always_inline)) + +TRANSACTIONAL_INLINE static inline bool xbegin() +{ + return have_transactional_memory && + __TM_simple_begin() == _HTM_TBEGIN_STARTED; +} + +# ifdef UNIV_DEBUG +bool xtest(); +# endif + +TRANSACTIONAL_INLINE static inline void xabort() { __TM_abort(); } + +TRANSACTIONAL_INLINE static inline void xend() { __TM_end(); } +# endif +#endif + +template +class transactional_lock_guard +{ + mutex &m; + +public: + TRANSACTIONAL_INLINE transactional_lock_guard(mutex &m) : m(m) + { +#ifndef NO_ELISION + if (xbegin()) + { + if (was_elided()) + return; + xabort(); + } +#endif + m.lock(); + } + transactional_lock_guard(const transactional_lock_guard &)= delete; + TRANSACTIONAL_INLINE ~transactional_lock_guard() + { +#ifndef NO_ELISION + if (was_elided()) xend(); else +#endif + m.unlock(); + } + +#ifndef NO_ELISION + bool was_elided() const noexcept { return !m.is_locked_or_waiting(); } +#else + bool was_elided() const noexcept { return false; } +#endif +}; + +template +class transactional_shared_lock_guard +{ + mutex &m; +#ifndef NO_ELISION + bool elided; +#else + static constexpr bool elided= false; +#endif + +public: + TRANSACTIONAL_INLINE transactional_shared_lock_guard(mutex &m) : m(m) + { +#ifndef NO_ELISION + if (xbegin()) + { + if (!m.is_write_locked()) + { + elided= true; + return; + } + xabort(); + } + elided= false; +#endif + m.lock_shared(); + } + transactional_shared_lock_guard(const transactional_shared_lock_guard &)= + delete; + TRANSACTIONAL_INLINE ~transactional_shared_lock_guard() + { +#ifndef NO_ELISION + if (was_elided()) xend(); else +#endif + m.unlock_shared(); + } + + bool was_elided() const noexcept { return elided; } +}; diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 86e8b534f54..d2bf7075594 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -621,6 +621,9 @@ public: == os_thread_get_curr_id()); mutex.wr_unlock(); } +#ifndef SUX_LOCK_GENERIC + bool mutex_is_locked() const noexcept { return mutex.is_locked(); } +#endif #ifdef UNIV_DEBUG /** @return whether the current thread holds the mutex */ bool mutex_is_owner() const diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 33c827235be..234d215d1df 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -166,9 +166,7 @@ void lock_sys_t::assert_locked(const lock_t &lock) const void lock_sys_t::assert_locked(const dict_table_t &table) const { ut_ad(!table.is_temporary()); - - const os_thread_id_t current_thread= os_thread_get_curr_id(); - if (writer.load(std::memory_order_relaxed) == current_thread) + if (is_writer()) return; ut_ad(readers); ut_ad(table.lock_mutex_is_owner()); @@ -186,7 +184,7 @@ void lock_sys_t::hash_table::assert_locked(const page_id_t id) const /** Assert that a hash table cell is exclusively latched (by some thread) */ void lock_sys_t::assert_locked(const hash_cell_t &cell) const { - if (lock_sys.is_writer()) + if (is_writer()) return; ut_ad(lock_sys.readers); ut_ad(hash_table::latch(const_cast(&cell))->is_locked()); @@ -229,6 +227,28 @@ LockMultiGuard::~LockMultiGuard() lock_sys.rd_unlock(); } +TRANSACTIONAL_TARGET +TMLockGuard::TMLockGuard(lock_sys_t::hash_table &hash, page_id_t id) +{ + const auto id_fold= id.fold(); +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) + { + if (lock_sys.latch.is_write_locked()) + xabort(); + cell_= hash.cell_get(id_fold); + if (hash.latch(cell_)->is_locked()) + xabort(); + elided= true; + return; + } + elided= false; +#endif + lock_sys.rd_lock(SRW_LOCK_CALL); + cell_= hash.cell_get(id_fold); + hash.latch(cell_)->acquire(); +} + /** Pretty-print a table lock. @param[in,out] file output stream @param[in] lock table lock */ @@ -430,6 +450,8 @@ void lock_sys_t::rd_unlock() void lock_sys_t::resize(ulint n_cells) { ut_ad(this == &lock_sys); + /* Buffer pool resizing is rarely initiated by the user, and this + would exceed the maximum size of a memory transaction. */ LockMutexGuard g{SRW_LOCK_CALL}; rec_hash.resize(n_cells); prdt_hash.resize(n_cells); @@ -893,7 +915,8 @@ void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id); /** Kill the holders of conflicting locks. @param trx brute-force applier transaction running in the current thread */ -ATTRIBUTE_COLD ATTRIBUTE_NOINLINE static void lock_wait_wsrep(trx_t *trx) +ATTRIBUTE_COLD ATTRIBUTE_NOINLINE +static void lock_wait_wsrep(trx_t *trx) { DBUG_ASSERT(wsrep_on(trx->mysql_thd)); if (!wsrep_thd_is_BF(trx->mysql_thd, false)) @@ -1136,7 +1159,7 @@ lock_rec_create_low( ulint n_bytes; ut_d(lock_sys.hash_get(type_mode).assert_locked(page_id)); - ut_ad(holds_trx_mutex == trx->mutex_is_owner()); + ut_ad(xtest() || holds_trx_mutex == trx->mutex_is_owner()); ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); ut_ad(!(type_mode & LOCK_TABLE)); ut_ad(trx->state != TRX_STATE_NOT_STARTED); @@ -1263,7 +1286,7 @@ lock_rec_enqueue_waiting( ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); trx_t* trx = thr_get_trx(thr); - ut_ad(trx->mutex_is_owner()); + ut_ad(xtest() || trx->mutex_is_owner()); ut_ad(!trx->dict_operation_lock_mode); if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) { @@ -1331,6 +1354,7 @@ can reuse a suitable record lock object already existing on the same page, just setting the appropriate bit in its bitmap. This is a low-level function which does NOT check for deadlocks or lock compatibility! @return lock where the bit was set */ +TRANSACTIONAL_TARGET static void lock_rec_add_to_queue( @@ -1349,7 +1373,7 @@ lock_rec_add_to_queue( transaction mutex */ { ut_d(lock_sys.hash_get(type_mode).assert_locked(id)); - ut_ad(caller_owns_trx_mutex == trx->mutex_is_owner()); + ut_ad(xtest() || caller_owns_trx_mutex == trx->mutex_is_owner()); ut_ad(index->is_primary() || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION); ut_ad(!(type_mode & LOCK_TABLE)); @@ -1422,9 +1446,11 @@ lock_rec_add_to_queue( if (caller_owns_trx_mutex) { trx->mutex_unlock(); } - lock_trx->mutex_lock(); - lock_rec_set_nth_bit(lock, heap_no); - lock_trx->mutex_unlock(); + { + TMTrxGuard tg{*lock_trx}; + lock_rec_set_nth_bit(lock, heap_no); + } + if (caller_owns_trx_mutex) { trx->mutex_lock(); } @@ -1637,6 +1663,8 @@ static void lock_wait_rpl_report(trx_t *trx) if (!wait_lock) return; ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC)); + /* This would likely be too large to attempt to use a memory transaction, + even for wait_lock->is_table(). */ if (!lock_sys.wr_lock_try()) { mysql_mutex_unlock(&lock_sys.wait_mutex); @@ -1833,13 +1861,13 @@ dberr_t lock_wait(que_thr_t *thr) if (row_lock_wait) lock_sys.wait_resume(trx->mysql_thd, suspend_time, my_hrtime_coarse()); -end_wait: if (lock_t *lock= trx->lock.wait_lock) { - lock_sys_t::cancel(trx, lock, false); + lock_sys_t::cancel(trx, lock); lock_sys.deadlock_check(); } +end_wait: mysql_mutex_unlock(&lock_sys.wait_mutex); thd_wait_end(trx->mysql_thd); @@ -1993,6 +2021,7 @@ static void lock_rec_dequeue_from_page(lock_t *in_lock, bool owns_wait_mutex) /** Remove a record lock request, waiting or granted, on a discarded page @param hash hash table @param in_lock lock object */ +TRANSACTIONAL_TARGET void lock_rec_discard(lock_sys_t::hash_table &lock_hash, lock_t *in_lock) { ut_ad(!in_lock->is_table()); @@ -2000,13 +2029,15 @@ void lock_rec_discard(lock_sys_t::hash_table &lock_hash, lock_t *in_lock) HASH_DELETE(lock_t, hash, &lock_hash, in_lock->un_member.rec_lock.page_id.fold(), in_lock); - trx_t *trx= in_lock->trx; - trx->mutex_lock(); - ut_d(auto old_locks=) - in_lock->index->table->n_rec_locks--; + ut_d(uint32_t old_locks); + { + trx_t *trx= in_lock->trx; + TMTrxGuard tg{*trx}; + ut_d(old_locks=) + in_lock->index->table->n_rec_locks--; + UT_LIST_REMOVE(trx->lock.trx_locks, in_lock); + } ut_ad(old_locks); - UT_LIST_REMOVE(trx->lock.trx_locks, in_lock); - trx->mutex_unlock(); MONITOR_INC(MONITOR_RECLOCK_REMOVED); MONITOR_DEC(MONITOR_NUM_RECLOCK); } @@ -2030,11 +2061,15 @@ lock_rec_free_all_from_discard_page(page_id_t id, const hash_cell_t &cell, } } -/** Discard locks for an index */ -void lock_discard_for_index(const dict_index_t &index) +/** Discard locks for an index when purging DELETE FROM SYS_INDEXES +after an aborted CREATE INDEX operation. +@param index a stale index on which ADD INDEX operation was aborted */ +ATTRIBUTE_COLD void lock_discard_for_index(const dict_index_t &index) { ut_ad(!index.is_committed()); - lock_sys.wr_lock(SRW_LOCK_CALL); + /* This is very rarely executed code, and the size of the hash array + would exceed the maximum size of a memory transaction. */ + LockMutexGuard g{SRW_LOCK_CALL}; const ulint n= lock_sys.rec_hash.pad(lock_sys.rec_hash.n_cells); for (ulint i= 0; i < n; i++) { @@ -2052,7 +2087,6 @@ void lock_discard_for_index(const dict_index_t &index) lock= lock->hash; } } - lock_sys.wr_unlock(); } /*============= RECORD LOCK MOVING AND INHERITING ===================*/ @@ -2060,6 +2094,7 @@ void lock_discard_for_index(const dict_index_t &index) /*************************************************************//** Resets the lock bits for a single record. Releases transactions waiting for lock requests here. */ +TRANSACTIONAL_TARGET static void lock_rec_reset_and_release_wait(const hash_cell_t &cell, const page_id_t id, @@ -2072,10 +2107,8 @@ lock_rec_reset_and_release_wait(const hash_cell_t &cell, const page_id_t id, lock_rec_cancel(lock); else { - trx_t *lock_trx= lock->trx; - lock_trx->mutex_lock(); + TMTrxGuard tg{*lock->trx}; lock_rec_reset_nth_bit(lock, heap_no); - lock_trx->mutex_unlock(); } } } @@ -2157,6 +2190,7 @@ lock_rec_inherit_to_gap_if_gap_lock( /*************************************************************//** Moves the locks of a record to another record and resets the lock bits of the donating record. */ +TRANSACTIONAL_TARGET static void lock_rec_move( @@ -2243,6 +2277,7 @@ Updates the lock table when we have reorganized a page. NOTE: we copy also the locks set on the infimum of the page; the infimum may carry locks if an update of a record is occurring on the page, and its locks were temporarily stored on the infimum. */ +TRANSACTIONAL_TARGET void lock_move_reorganize_page( /*======================*/ @@ -2260,12 +2295,14 @@ lock_move_reorganize_page( const page_id_t id{block->page.id()}; const auto id_fold= id.fold(); { - LockGuard g{lock_sys.rec_hash, id}; + TMLockGuard g{lock_sys.rec_hash, id}; if (!lock_sys_t::get_first(g.cell(), id)) return; } - /* We will modify arbitrary trx->lock.trx_locks. */ + /* We will modify arbitrary trx->lock.trx_locks. + Do not bother with a memory transaction; we are going + to allocate memory and copy a lot of data. */ LockMutexGuard g{SRW_LOCK_CALL}; hash_cell_t &cell= *lock_sys.rec_hash.cell_get(id_fold); @@ -2348,10 +2385,10 @@ lock_move_reorganize_page( } trx_t *lock_trx= lock->trx; - lock_trx->mutex_lock(); + lock_trx->mutex_lock(); - /* Clear the bit in old_lock. */ - if (old_heap_no < lock->un_member.rec_lock.n_bits && + /* Clear the bit in old_lock. */ + if (old_heap_no < lock->un_member.rec_lock.n_bits && lock_rec_reset_nth_bit(lock, old_heap_no)) { ut_ad(!page_rec_is_metadata(orec)); @@ -2362,7 +2399,7 @@ lock_move_reorganize_page( new_heap_no, lock->index, lock_trx, true); } - lock_trx->mutex_unlock(); + lock_trx->mutex_unlock(); if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) { @@ -2389,6 +2426,7 @@ lock_move_reorganize_page( /*************************************************************//** Moves the explicit locks on user records to another page if a record list end is moved to another page. */ +TRANSACTIONAL_TARGET void lock_move_rec_list_end( /*===================*/ @@ -2405,6 +2443,7 @@ lock_move_rec_list_end( const page_id_t id{block->page.id()}; const page_id_t new_id{new_block->page.id()}; { + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, new_id}; /* Note: when we move locks from record to record, waiting locks @@ -2504,6 +2543,7 @@ lock_move_rec_list_end( /*************************************************************//** Moves the explicit locks on user records to another page if a record list start is moved to another page. */ +TRANSACTIONAL_TARGET void lock_move_rec_list_start( /*=====================*/ @@ -2529,6 +2569,7 @@ lock_move_rec_list_start( const page_id_t new_id{new_block->page.id()}; { + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, new_id}; for (lock_t *lock= lock_sys_t::get_first(g.cell1(), id); lock; @@ -2616,6 +2657,7 @@ lock_move_rec_list_start( /*************************************************************//** Moves the explicit locks on user records to another page if a record list start is moved to another page. */ +TRANSACTIONAL_TARGET void lock_rtr_move_rec_list( /*===================*/ @@ -2638,6 +2680,7 @@ lock_rtr_move_rec_list( const page_id_t new_id{new_block->page.id()}; { + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, new_id}; for (lock_t *lock= lock_sys_t::get_first(g.cell1(), id); lock; @@ -2712,6 +2755,7 @@ lock_update_split_right( const page_id_t l{left_block->page.id()}; const page_id_t r{right_block->page.id()}; + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, l, r}; /* Move the locks on the supremum of the left page to the supremum @@ -2764,6 +2808,7 @@ lock_update_merge_right( const page_id_t l{left_block->page.id()}; const page_id_t r{right_block->page.id()}; + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, l, r}; /* Inherit the locks from the supremum of the left page to the @@ -2790,6 +2835,7 @@ to be updated. */ void lock_update_root_raise(const buf_block_t &block, const page_id_t root) { const page_id_t id{block.page.id()}; + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, root}; /* Move the locks on the supremum of the root to the supremum of block */ lock_rec_move(g.cell1(), block, id, g.cell2(), root, @@ -2802,6 +2848,7 @@ void lock_update_root_raise(const buf_block_t &block, const page_id_t root) void lock_update_copy_and_discard(const buf_block_t &new_block, page_id_t old) { const page_id_t id{new_block.page.id()}; + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, id, old}; /* Move the locks on the supremum of the old page to the supremum of new */ lock_rec_move(g.cell1(), new_block, id, g.cell2(), old, @@ -2838,6 +2885,7 @@ void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred, const page_id_t l{left.page.id()}; + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, l, right}; const rec_t *left_next_rec= page_rec_get_next_const(orig_pred); @@ -2883,6 +2931,7 @@ lock_rec_reset_and_inherit_gap_locks( donating record */ { const page_id_t heir{heir_block.page.id()}; + /* This is a rare operation and likely too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, heir, donor}; lock_rec_reset_and_release_wait(g.cell1(), heir, heir_heap_no); lock_rec_inherit_to_gap(g.cell1(), heir, g.cell2(), donor, heir_block.frame, @@ -2906,6 +2955,7 @@ lock_update_discard( ulint heap_no; const page_id_t heir(heir_block->page.id()); const page_id_t page_id(block->page.id()); + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, heir, page_id}; if (lock_sys_t::get_first(g.cell2(), page_id)) { @@ -3379,6 +3429,37 @@ lock_table_other_has_incompatible( return(NULL); } +/** Aqcuire or enqueue a table lock */ +static dberr_t lock_table_low(dict_table_t *table, lock_mode mode, + que_thr_t *thr, trx_t *trx) +{ + lock_t *wait_for= + lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode); + dberr_t err= DB_SUCCESS; + + trx->mutex_lock(); + + if (wait_for) + err= lock_table_enqueue_waiting(mode, table, thr, wait_for); + else + lock_table_create(table, mode, trx, nullptr); + + trx->mutex_unlock(); + + return err; +} + +#ifdef WITH_WSREP +/** Aqcuire or enqueue a table lock in Galera replication mode. */ +ATTRIBUTE_NOINLINE +static dberr_t lock_table_wsrep(dict_table_t *table, lock_mode mode, + que_thr_t *thr, trx_t *trx) +{ + LockMutexGuard g{SRW_LOCK_CALL}; + return lock_table_low(table, mode, thr, trx); +} +#endif + /*********************************************************************//** Locks the specified database table in the mode given. If the lock cannot be granted immediately, the query thread is put to wait. @@ -3392,8 +3473,6 @@ lock_table( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; - dberr_t err; - lock_t* wait_for; if (table->is_temporary()) { return DB_SUCCESS; @@ -3403,7 +3482,7 @@ lock_table( /* Look for equal or stronger locks the same trx already has on the table. No need to acquire LockMutexGuard here - because only this transacton can add/access table locks + because only this transaction can add/access table locks to/from trx_t::table_locks. */ if (lock_table_has(trx, table, mode) || srv_read_only_mode) { @@ -3422,46 +3501,18 @@ lock_table( trx_set_rw_mode(trx); } - err = DB_SUCCESS; - #ifdef WITH_WSREP if (trx->is_wsrep()) { - lock_sys.wr_lock(SRW_LOCK_CALL); - } else { - lock_sys.rd_lock(SRW_LOCK_CALL); - table->lock_mutex_lock(); + return lock_table_wsrep(table, mode, thr, trx); } -#else +#endif lock_sys.rd_lock(SRW_LOCK_CALL); table->lock_mutex_lock(); -#endif - - /* We have to check if the new lock is compatible with any locks - other transactions have in the table lock queue. */ - - wait_for = lock_table_other_has_incompatible( - trx, LOCK_WAIT, table, mode); - - trx->mutex_lock(); - - if (wait_for) { - err = lock_table_enqueue_waiting(mode, table, thr, wait_for); - } else { - lock_table_create(table, mode, trx, wait_for); - } - -#ifdef WITH_WSREP - if (trx->is_wsrep()) { - lock_sys.wr_unlock(); - trx->mutex_unlock(); - return err; - } -#endif + dberr_t err = lock_table_low(table, mode, thr, trx); table->lock_mutex_unlock(); lock_sys.rd_unlock(); - trx->mutex_unlock(); - return(err); + return err; } /** Create a table lock object for a resurrected transaction. @@ -3477,6 +3528,8 @@ void lock_table_resurrect(dict_table_t *table, trx_t *trx, lock_mode mode) return; { + /* This is executed at server startup while no connections + are alowed. Do not bother with lock elision. */ LockMutexGuard g{SRW_LOCK_CALL}; ut_ad(!lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)); @@ -3650,6 +3703,7 @@ dberr_t lock_sys_tables(trx_t *trx) Removes a granted record lock of a transaction from the queue and grants locks to other transactions waiting in the queue if they now are entitled to a lock. */ +TRANSACTIONAL_TARGET void lock_rec_unlock( /*============*/ @@ -3700,9 +3754,10 @@ lock_rec_unlock( released: ut_a(!lock->is_waiting()); - trx->mutex_lock(); - lock_rec_reset_nth_bit(lock, heap_no); - trx->mutex_unlock(); + { + TMTrxGuard tg{*trx}; + lock_rec_reset_nth_bit(lock, heap_no); + } /* Check if we can now grant waiting lock requests */ @@ -3730,7 +3785,7 @@ released: /** Release the explicit locks of a committing transaction, and release possible other transactions waiting because of these locks. @return whether the operation succeeded */ -static bool lock_release_try(trx_t *trx) +TRANSACTIONAL_TARGET static bool lock_release_try(trx_t *trx) { /* At this point, trx->lock.trx_locks cannot be modified by other threads, because our transaction has been committed. @@ -3746,6 +3801,10 @@ static bool lock_release_try(trx_t *trx) bool all_released= true; restart: ulint count= 1000; + /* We will not attempt hardware lock elision (memory transaction) + here. Both lock_rec_dequeue_from_page() and lock_table_dequeue() + would likely lead to a memory transaction due to a system call, to + wake up a waiting transaction. */ lock_sys.rd_lock(SRW_LOCK_CALL); trx->mutex_lock(); @@ -3824,6 +3883,8 @@ void lock_release(trx_t *trx) /* Fall back to acquiring lock_sys.latch in exclusive mode */ restart: count= 1000; + /* There is probably no point to try lock elision here; + in lock_release_try() it is different. */ lock_sys.wr_lock(SRW_LOCK_CALL); trx->mutex_lock(); @@ -4002,10 +4063,14 @@ void lock_release_on_prepare(trx_t *trx) } /** Release locks on a table whose creation is being rolled back */ -ATTRIBUTE_COLD void lock_release_on_rollback(trx_t *trx, dict_table_t *table) +ATTRIBUTE_COLD +void lock_release_on_rollback(trx_t *trx, dict_table_t *table) { trx->mod_tables.erase(table); + /* This is very rarely executed code, in the rare case that an + CREATE TABLE operation is being rolled back. Theoretically, + we might try to remove the locks in multiple memory transactions. */ lock_sys.wr_lock(SRW_LOCK_CALL); trx->mutex_lock(); @@ -4211,6 +4276,7 @@ http://bugs.mysql.com/36942 */ /*********************************************************************//** Calculates the number of record lock structs in the record lock hash table. @return number of record locks */ +TRANSACTIONAL_TARGET static ulint lock_get_n_rec_locks() { ulint n_locks = 0; @@ -4244,6 +4310,9 @@ lock_print_info_summary( FILE* file, /*!< in: file where to print */ ibool nowait) /*!< in: whether to wait for lock_sys.latch */ { + /* Here, lock elision does not make sense, because + for the output we are going to invoke system calls, + which would interrupt a memory transaction. */ if (!nowait) { lock_sys.wr_lock(SRW_LOCK_CALL); } else if (!lock_sys.wr_lock_try()) { @@ -4799,7 +4868,6 @@ static void lock_rec_block_validate(const page_id_t page_id) } } - static my_bool lock_validate_table_locks(rw_trx_hash_element_t *element, void*) { lock_sys.assert_locked(); @@ -4853,6 +4921,7 @@ be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a gap x-lock to the lock queue. @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */ +TRANSACTIONAL_TARGET dberr_t lock_rec_insert_check_and_lock( /*===========================*/ @@ -5043,7 +5112,6 @@ static my_bool lock_rec_other_trx_holds_expl_callback( @param[in] rec user record @param[in] id page identifier */ - static void lock_rec_other_trx_holds_expl(trx_t *caller_trx, trx_t *trx, const rec_t *rec, const page_id_t id) @@ -5572,6 +5640,7 @@ static void lock_cancel_waiting_and_release(lock_t *lock) trx->mutex_unlock(); } #ifdef WITH_WSREP +TRANSACTIONAL_TARGET void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx) { lock_sys.wr_lock(SRW_LOCK_CALL); @@ -5588,19 +5657,21 @@ void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx) #endif /* WITH_WSREP */ /** Cancel a waiting lock request. -@param lock waiting lock request -@param trx active transaction -@param check_victim whether to check trx->lock.was_chosen_as_deadlock_victim +@tparam check_victim whether to check for DB_DEADLOCK +@param lock waiting lock request +@param trx active transaction @retval DB_SUCCESS if no lock existed @retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set @retval DB_LOCK_WAIT if the lock was canceled */ -dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock, bool check_victim) +template +dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) { mysql_mutex_assert_owner(&lock_sys.wait_mutex); ut_ad(trx->lock.wait_lock == lock); ut_ad(trx->state == TRX_STATE_ACTIVE); dberr_t err= DB_SUCCESS; - + /* This would be too large for a memory transaction, except in the + DB_DEADLOCK case, which was already tested in lock_trx_handle_wait(). */ if (lock->is_table()) { if (!lock_sys.rd_lock_try()) @@ -5697,7 +5768,7 @@ void lock_sys_t::cancel(trx_t *trx) if (!trx->dict_operation) { trx->error_state= DB_INTERRUPTED; - cancel(trx, lock, false); + cancel(trx, lock); } } lock_sys.deadlock_check(); @@ -5750,7 +5821,7 @@ dberr_t lock_trx_handle_wait(trx_t *trx) if (trx->lock.was_chosen_as_deadlock_victim) err= DB_DEADLOCK; else if (lock_t *wait_lock= trx->lock.wait_lock) - err= lock_sys_t::cancel(trx, wait_lock, true); + err= lock_sys_t::cancel(trx, wait_lock); lock_sys.deadlock_check(); mysql_mutex_unlock(&lock_sys.wait_mutex); return err; @@ -5799,13 +5870,27 @@ static my_bool lock_table_locks_lookup(rw_trx_hash_element_t *element, /** Check if there are any locks on a table. @return true if table has either table or record locks. */ +TRANSACTIONAL_TARGET bool lock_table_has_locks(dict_table_t *table) { if (table->n_rec_locks) return true; - table->lock_mutex_lock(); - auto len= UT_LIST_GET_LEN(table->locks); - table->lock_mutex_unlock(); + ulint len; +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) + { + if (table->lock_mutex_is_locked()) + xabort(); + len= UT_LIST_GET_LEN(table->locks); + xend(); + } + else +#endif + { + table->lock_mutex_lock(); + len= UT_LIST_GET_LEN(table->locks); + table->lock_mutex_unlock(); + } if (len) return true; #ifdef UNIV_DEBUG @@ -5985,7 +6070,7 @@ namespace Deadlock static trx_t *report(trx_t *const trx, bool current_trx) { mysql_mutex_assert_owner(&lock_sys.wait_mutex); - ut_ad(lock_sys.is_writer() == !current_trx); + ut_ad(xtest() || lock_sys.is_writer() == !current_trx); /* Normally, trx should be a direct part of the deadlock cycle. However, if innodb_deadlock_detect had been OFF in the @@ -6018,6 +6103,9 @@ namespace Deadlock undo_no_t victim_weight= ~0ULL; unsigned victim_pos= 0, trx_pos= 0; + /* Here, lock elision does not make sense, because + for the output we are going to invoke system calls, + which would interrupt a memory transaction. */ if (current_trx && !lock_sys.wr_lock_try()) { mysql_mutex_unlock(&lock_sys.wait_mutex); @@ -6166,18 +6254,22 @@ static bool Deadlock::check_and_resolve(trx_t *trx) return false; if (lock_t *wait_lock= trx->lock.wait_lock) - lock_sys_t::cancel(trx, wait_lock, false); + lock_sys_t::cancel(trx, wait_lock); lock_sys.deadlock_check(); return true; } /** Check for deadlocks while holding only lock_sys.wait_mutex. */ +TRANSACTIONAL_TARGET void lock_sys_t::deadlock_check() { ut_ad(!is_writer()); mysql_mutex_assert_owner(&wait_mutex); bool acquired= false; +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + bool elided= false; +#endif if (Deadlock::to_be_checked) { @@ -6186,7 +6278,16 @@ void lock_sys_t::deadlock_check() auto i= Deadlock::to_check.begin(); if (i == Deadlock::to_check.end()) break; - if (!acquired) + if (acquired); +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + else if (xbegin()) + { + if (latch.is_locked_or_waiting()) + xabort(); + acquired= elided= true; + } +#endif + else { acquired= wr_lock_try(); if (!acquired) @@ -6206,6 +6307,10 @@ void lock_sys_t::deadlock_check() Deadlock::to_be_checked= false; } ut_ad(Deadlock::to_check.empty()); +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (elided) + return; +#endif if (acquired) wr_unlock(); } @@ -6225,6 +6330,7 @@ void lock_update_split_and_merge( const page_id_t l{left_block->page.id()}; const page_id_t r{right_block->page.id()}; + /* This would likely be too large for a memory transaction. */ LockMultiGuard g{lock_sys.rec_hash, l, r}; const rec_t *left_next_rec= page_rec_get_next_const(orig_pred); ut_ad(!page_rec_is_metadata(left_next_rec)); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index d2e29cddadf..5822524bda9 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2848,6 +2848,7 @@ func_exit: /** Reads in pages which have hashed log records, from an area around a given page number. @param[in] page_id page id */ +TRANSACTIONAL_TARGET static void recv_read_in_area(page_id_t page_id) { uint32_t page_nos[RECV_READ_AHEAD_AREA]; diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index b67c1212271..9b2ea9db542 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -695,6 +695,7 @@ row_ins_set_detailed( Acquires dict_foreign_err_mutex, rewinds dict_foreign_err_file and displays information about the given transaction. The caller must release dict_foreign_err_mutex. */ +TRANSACTIONAL_TARGET static void row_ins_foreign_trx_print( @@ -708,7 +709,7 @@ row_ins_foreign_trx_print( ut_ad(!srv_read_only_mode); { - LockMutexGuard g{SRW_LOCK_CALL}; + TMLockMutexGuard g{SRW_LOCK_CALL}; n_rec_locks = trx->lock.n_rec_locks; n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); heap_size = mem_heap_get_size(trx->lock.lock_heap); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index a26a862e1ab..d1fbd6c3cc2 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -69,6 +69,9 @@ Created 10/8/1995 Heikki Tuuri #include "fil0pagecompress.h" #include "trx0types.h" #include +#include "log.h" + +#include "transactional_lock_guard.h" #include /* The following is the maximum allowed duration of a lock wait. */ @@ -693,13 +696,15 @@ srv_free(void) /*********************************************************************//** Boots the InnoDB server. */ -void -srv_boot(void) -/*==========*/ +void srv_boot() { - srv_thread_pool_init(); - trx_pool_init(); - srv_init(); +#ifndef NO_ELISION + if (transactional_lock_enabled()) + sql_print_information("InnoDB: Using transactional memory"); +#endif + srv_thread_pool_init(); + trx_pool_init(); + srv_init(); } /******************************************************************//** diff --git a/storage/innobase/sync/srw_lock.cc b/storage/innobase/sync/srw_lock.cc index 82f8d615477..b54191d91b0 100644 --- a/storage/innobase/sync/srw_lock.cc +++ b/storage/innobase/sync/srw_lock.cc @@ -19,6 +19,73 @@ this program; if not, write to the Free Software Foundation, Inc., #include "srw_lock.h" #include "srv0srv.h" #include "my_cpu.h" +#include "transactional_lock_guard.h" + +#ifdef NO_ELISION +#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64) +# include +bool have_transactional_memory; +bool transactional_lock_enabled() +{ + int regs[4]; + __cpuid(regs, 0); + if (regs[0] < 7) + return false; + __cpuidex(regs, 7, 0); + /* Restricted Transactional Memory (RTM) */ + have_transactional_memory= regs[1] & 1U << 11; + return have_transactional_memory; +} +#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) +# include +bool have_transactional_memory; +bool transactional_lock_enabled() +{ + if (__get_cpuid_max(0, nullptr) < 7) + return false; + unsigned eax, ebx, ecx, edx; + __cpuid_count(7, 0, eax, ebx, ecx, edx); + /* Restricted Transactional Memory (RTM) */ + have_transactional_memory= ebx & 1U << 11; + return have_transactional_memory; +} + +# ifdef UNIV_DEBUG +TRANSACTIONAL_TARGET +bool xtest() { return have_transactional_memory && _xtest(); } +# endif +#elif defined __powerpc64__ +# ifdef __linux__ +# include + +# ifndef PPC_FEATURE2_HTM_NOSC +# define PPC_FEATURE2_HTM_NOSC 0x01000000 +# endif +# ifndef PPC_FEATURE2_HTM_NO_SUSPEND +# define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 +# endif + +# ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +# endif +# endif +bool have_transactional_memory; +bool transactional_lock_enabled() +{ +# ifdef __linux__ + return getauxval(AT_HWCAP2) & + (PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM_NO_SUSPEND); +# endif +} + +# ifdef UNIV_DEBUG +TRANSACTIONAL_TARGET bool xtest() +{ + return have_transactional_memory && + _HTM_STATE (__builtin_ttest ()) == _HTM_TRANSACTIONAL; +} +# endif +#endif /** @return the parameter for srw_pause() */ static inline unsigned srw_pause_delay() @@ -477,7 +544,7 @@ template<> void srw_lock_::rd_wait() return; } - IF_WIN(AcquireSRWLockShared(&lock), rw_rdlock(&lock)); + IF_WIN(AcquireSRWLockShared(&lk), rw_rdlock(&lk)); } template<> void srw_lock_::wr_wait() @@ -491,7 +558,7 @@ template<> void srw_lock_::wr_wait() return; } - IF_WIN(AcquireSRWLockExclusive(&lock), rw_wrlock(&lock)); + IF_WIN(AcquireSRWLockExclusive(&lk), rw_wrlock(&lk)); } #endif diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 3dd1b093cf6..9d9f9057628 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -77,7 +77,7 @@ TrxUndoRsegsIterator::TrxUndoRsegsIterator() /** Sets the next rseg to purge in purge_sys. Executed in the purge coordinator thread. @return whether anything is to be purged */ -inline bool TrxUndoRsegsIterator::set_next() +TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next() { mysql_mutex_lock(&purge_sys.pq_mutex); @@ -110,23 +110,38 @@ inline bool TrxUndoRsegsIterator::set_next() purge_sys.rseg = *m_iter++; mysql_mutex_unlock(&purge_sys.pq_mutex); - purge_sys.rseg->latch.rd_lock(); - ut_a(purge_sys.rseg->last_page_no != FIL_NULL); - ut_ad(purge_sys.rseg->last_trx_no() == m_rsegs.trx_no); - - /* We assume in purge of externally stored fields that space id is - in the range of UNDO tablespace space ids */ + /* We assume in purge of externally stored fields that space + id is in the range of UNDO tablespace space ids */ ut_ad(purge_sys.rseg->space->id == TRX_SYS_SPACE || srv_is_undo_tablespace(purge_sys.rseg->space->id)); - ut_a(purge_sys.tail.trx_no <= purge_sys.rseg->last_trx_no()); + trx_id_t last_trx_no, tail_trx_no; + { +#ifdef SUX_LOCK_GENERIC + purge_sys.rseg->latch.rd_lock(); +#else + transactional_shared_lock_guard rg + {purge_sys.rseg->latch}; +#endif + last_trx_no = purge_sys.rseg->last_trx_no(); + tail_trx_no = purge_sys.tail.trx_no; - purge_sys.tail.trx_no = purge_sys.rseg->last_trx_no(); - purge_sys.hdr_offset = purge_sys.rseg->last_offset(); - purge_sys.hdr_page_no = purge_sys.rseg->last_page_no; + purge_sys.tail.trx_no = last_trx_no; + purge_sys.hdr_offset = purge_sys.rseg->last_offset(); + purge_sys.hdr_page_no = purge_sys.rseg->last_page_no; + +#ifdef SUX_LOCK_GENERIC + purge_sys.rseg->latch.rd_unlock(); +#endif + } + + /* Only the purge coordinator task will access + purge_sys.rseg_iter or purge_sys.hdr_page_no. */ + ut_ad(last_trx_no == m_rsegs.trx_no); + ut_a(purge_sys.hdr_page_no != FIL_NULL); + ut_a(tail_trx_no <= last_trx_no); - purge_sys.rseg->latch.rd_unlock(); return(true); } @@ -550,7 +565,7 @@ __attribute__((optimize(0))) Removes unnecessary history data from rollback segments. NOTE that when this function is called, the caller must not have any latches on undo log pages! */ -static void trx_purge_truncate_history() +TRANSACTIONAL_TARGET static void trx_purge_truncate_history() { ut_ad(purge_sys.head <= purge_sys.tail); purge_sys_t::iterator &head= purge_sys.head.trx_no @@ -617,12 +632,18 @@ static void trx_purge_truncate_history() { if (rseg.space != &space) continue; +#ifdef SUX_LOCK_GENERIC rseg.latch.rd_lock(); +#else + transactional_shared_lock_guard g{rseg.latch}; +#endif ut_ad(rseg.skip_allocation()); if (rseg.is_referenced()) { not_free: +#ifdef SUX_LOCK_GENERIC rseg.latch.rd_unlock(); +#endif return; } @@ -645,7 +666,9 @@ not_free: goto not_free; } +#ifdef SUX_LOCK_GENERIC rseg.latch.rd_unlock(); +#endif } ib::info() << "Truncating " << file->name; @@ -938,10 +961,7 @@ Chooses the next undo log to purge and updates the info in purge_sys. This function is used to initialize purge_sys when the next record to purge is not known, and also to update the purge system info on the next record when purge has handled the whole undo log for a transaction. */ -static -void -trx_purge_choose_next_log(void) -/*===========================*/ +TRANSACTIONAL_TARGET static void trx_purge_choose_next_log() { ut_ad(!purge_sys.next_stored); diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index 08e05edb896..45bd36d9669 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -1948,16 +1948,30 @@ dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table) return err; } -ATTRIBUTE_COLD ATTRIBUTE_NOINLINE +TRANSACTIONAL_TARGET ATTRIBUTE_COLD ATTRIBUTE_NOINLINE /** @return whether the transaction holds an exclusive lock on a table */ static bool trx_has_lock_x(const trx_t &trx, dict_table_t& table) { if (table.is_temporary()) return true; - table.lock_mutex_lock(); - const auto n= table.n_lock_x_or_s; - table.lock_mutex_unlock(); + uint32_t n; + +#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC + if (xbegin()) + { + if (table.lock_mutex_is_locked()) + xabort(); + n= table.n_lock_x_or_s; + xend(); + } + else +#endif + { + table.lock_mutex_lock(); + n= table.n_lock_x_or_s; + table.lock_mutex_unlock(); + } /* This thread is executing trx. No other thread can modify our table locks (only record locks might be created, in an implicit-to-explicit conversion). diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 61e1fd50cd5..18c93d5a8cc 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -456,7 +456,7 @@ void trx_t::free() } /** Transition to committed state, to release implicit locks. */ -inline void trx_t::commit_state() +TRANSACTIONAL_INLINE inline void trx_t::commit_state() { ut_ad(state == TRX_STATE_PREPARED || state == TRX_STATE_PREPARED_RECOVERED @@ -473,9 +473,8 @@ inline void trx_t::commit_state() makes modifications to the database, will get an lsn larger than the committing transaction T. In the case where the log flush fails, and T never gets committed, also T2 will never get committed. */ - mutex.wr_lock(); + TMTrxGuard tg{*this}; state= TRX_STATE_COMMITTED_IN_MEMORY; - mutex.wr_unlock(); ut_ad(id || !is_referenced()); } @@ -498,8 +497,7 @@ inline void trx_t::release_locks() } /** At shutdown, frees a transaction object. */ -void -trx_free_at_shutdown(trx_t *trx) +TRANSACTIONAL_TARGET void trx_free_at_shutdown(trx_t *trx) { ut_ad(trx->is_recovered); ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) @@ -1228,7 +1226,7 @@ void trx_t::evict_table(table_id_t table_id, bool reset_only) } /** Mark a transaction committed in the main memory data structures. */ -inline void trx_t::commit_in_memory(const mtr_t *mtr) +TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) { must_flush_log_later= false; read_view.close(); @@ -1395,7 +1393,7 @@ void trx_t::commit_cleanup() /** Commit the transaction in a mini-transaction. @param mtr mini-transaction (if there are any persistent modifications) */ -void trx_t::commit_low(mtr_t *mtr) +TRANSACTIONAL_TARGET void trx_t::commit_low(mtr_t *mtr) { ut_ad(!mtr || mtr->is_active()); ut_d(bool aborted = in_rollback && error_state == DB_DEADLOCK); @@ -1771,6 +1769,7 @@ trx_print_latched( /**********************************************************************//** Prints info about a transaction. Acquires and releases lock_sys.latch. */ +TRANSACTIONAL_TARGET void trx_print( /*======*/ @@ -1781,7 +1780,7 @@ trx_print( { ulint n_rec_locks, n_trx_locks, heap_size; { - LockMutexGuard g{SRW_LOCK_CALL}; + TMLockMutexGuard g{SRW_LOCK_CALL}; n_rec_locks= trx->lock.n_rec_locks; n_trx_locks= UT_LIST_GET_LEN(trx->lock.trx_locks); heap_size= mem_heap_get_size(trx->lock.lock_heap); @@ -1833,6 +1832,7 @@ static lsn_t trx_prepare_low(trx_t *trx) /****************************************************************//** Prepares a transaction. */ +TRANSACTIONAL_TARGET static void trx_prepare( @@ -1848,9 +1848,10 @@ trx_prepare( DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE();); ut_a(trx->state == TRX_STATE_ACTIVE); - trx->mutex_lock(); - trx->state = TRX_STATE_PREPARED; - trx->mutex_unlock(); + { + TMTrxGuard tg{*trx}; + trx->state = TRX_STATE_PREPARED; + } if (lsn) { /* Depending on the my.cnf options, we may now write the log -- cgit v1.2.1