summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bzrignore5
-rwxr-xr-xBUILD/SETUP.sh2
-rwxr-xr-xBUILD/compile-amd64-debug-wsrep11
-rwxr-xr-xBUILD/compile-amd64-wsrep9
-rwxr-xr-xBUILD/compile-pentium-debug-wsrep12
-rwxr-xr-xBUILD/compile-pentium-wsrep11
-rwxr-xr-xBUILD/compile-pentium64-wsrep28
-rw-r--r--CMakeLists.txt12
-rw-r--r--Docs/README-wsrep487
-rw-r--r--cmake/configure.pl10
-rw-r--r--cmake/cpack_rpm.cmake33
-rw-r--r--cmake/package_name.cmake2
-rw-r--r--cmake/wsrep.cmake59
-rw-r--r--include/thr_lock.h13
-rw-r--r--mysys/default.c12
-rw-r--r--mysys/thr_lock.c121
-rw-r--r--scripts/CMakeLists.txt4
-rw-r--r--scripts/mysqld_multi.sh130
-rw-r--r--scripts/mysqld_safe.sh98
-rw-r--r--scripts/wsrep_sst_common.sh106
-rw-r--r--scripts/wsrep_sst_mysqldump.sh121
-rw-r--r--scripts/wsrep_sst_rsync.sh210
-rw-r--r--scripts/wsrep_sst_xtrabackup.sh222
-rw-r--r--sql/CMakeLists.txt20
-rw-r--r--sql/events.cc12
-rw-r--r--sql/handler.cc130
-rw-r--r--sql/handler.h16
-rw-r--r--sql/item_func.cc12
-rw-r--r--sql/lock.cc50
-rw-r--r--sql/log.cc247
-rw-r--r--sql/log.h26
-rw-r--r--sql/log_event.cc158
-rw-r--r--sql/mdl.cc136
-rw-r--r--sql/mdl.h3
-rw-r--r--sql/mysqld.cc782
-rw-r--r--sql/mysqld.h17
-rw-r--r--sql/protocol.cc8
-rw-r--r--sql/set_var.h6
-rw-r--r--sql/slave.cc25
-rw-r--r--sql/sp.cc34
-rw-r--r--sql/sql_acl.cc38
-rw-r--r--sql/sql_alter.cc20
-rw-r--r--sql/sql_base.cc50
-rw-r--r--sql/sql_builtin.cc.in9
-rw-r--r--sql/sql_class.cc292
-rw-r--r--sql/sql_class.h65
-rw-r--r--sql/sql_connect.cc39
-rw-r--r--sql/sql_delete.cc12
-rw-r--r--sql/sql_insert.cc31
-rw-r--r--sql/sql_lex.cc11
-rw-r--r--sql/sql_parse.cc1135
-rw-r--r--sql/sql_parse.h16
-rw-r--r--sql/sql_plugin.cc6
-rw-r--r--sql/sql_reload.cc13
-rw-r--r--sql/sql_repl.cc14
-rw-r--r--sql/sql_show.cc6
-rw-r--r--sql/sql_table.cc11
-rw-r--r--sql/sql_trigger.cc52
-rw-r--r--sql/sql_truncate.cc10
-rw-r--r--sql/sql_update.cc12
-rw-r--r--sql/sys_vars.cc222
-rw-r--r--sql/transaction.cc48
-rw-r--r--sql/wsrep_check_opts.cc392
-rw-r--r--sql/wsrep_hton.cc449
-rw-r--r--sql/wsrep_mysqld.cc1311
-rw-r--r--sql/wsrep_mysqld.h383
-rw-r--r--sql/wsrep_notify.cc107
-rw-r--r--sql/wsrep_priv.h233
-rw-r--r--sql/wsrep_sst.cc1001
-rw-r--r--sql/wsrep_utils.cc468
-rw-r--r--sql/wsrep_var.cc548
-rw-r--r--storage/innobase/dict/dict0dict.c21
-rw-r--r--storage/innobase/handler/ha_innodb.cc1175
-rw-r--r--storage/innobase/handler/ha_innodb.h41
-rw-r--r--storage/innobase/handler/handler0alter.cc4
-rw-r--r--storage/innobase/include/dict0mem.h3
-rw-r--r--storage/innobase/include/ha_prototypes.h12
-rw-r--r--storage/innobase/include/lock0lock.h1
-rw-r--r--storage/innobase/include/rem0rec.h8
-rw-r--r--storage/innobase/include/srv0srv.h4
-rw-r--r--storage/innobase/include/trx0sys.h30
-rw-r--r--storage/innobase/lock/lock0lock.c301
-rw-r--r--storage/innobase/os/os0file.c23
-rw-r--r--storage/innobase/rem/rem0rec.c123
-rw-r--r--storage/innobase/row/row0ins.c49
-rw-r--r--storage/innobase/row/row0upd.c70
-rw-r--r--storage/innobase/srv/srv0srv.c110
-rw-r--r--storage/innobase/trx/trx0roll.c27
-rw-r--r--storage/innobase/trx/trx0sys.c87
-rw-r--r--storage/innobase/trx/trx0trx.c19
-rw-r--r--storage/xtradb/dict/dict0dict.c21
-rw-r--r--storage/xtradb/handler/ha_innodb.cc1150
-rw-r--r--storage/xtradb/handler/ha_innodb.h38
-rw-r--r--storage/xtradb/handler/handler0alter.cc4
-rw-r--r--storage/xtradb/include/dict0mem.h3
-rw-r--r--storage/xtradb/include/ha_prototypes.h13
-rw-r--r--storage/xtradb/include/lock0lock.h1
-rw-r--r--storage/xtradb/include/rem0rec.h8
-rw-r--r--storage/xtradb/include/srv0srv.h4
-rw-r--r--storage/xtradb/include/trx0sys.h30
-rw-r--r--storage/xtradb/lock/lock0lock.c292
-rw-r--r--storage/xtradb/os/os0file.c23
-rw-r--r--storage/xtradb/rem/rem0rec.c123
-rw-r--r--storage/xtradb/row/row0ins.c49
-rw-r--r--storage/xtradb/row/row0upd.c70
-rw-r--r--storage/xtradb/srv/srv0srv.c110
-rw-r--r--storage/xtradb/trx/trx0roll.c27
-rw-r--r--storage/xtradb/trx/trx0sys.c87
-rw-r--r--storage/xtradb/trx/trx0trx.c19
-rw-r--r--support-files/CMakeLists.txt8
-rwxr-xr-xsupport-files/build-tags9
-rw-r--r--support-files/mysql.spec.sh65
-rw-r--r--support-files/wsrep.cnf.sh129
-rw-r--r--support-files/wsrep_notify.sh102
-rw-r--r--wsrep/CMakeLists.txt24
-rw-r--r--wsrep/Makefile.am2
-rw-r--r--wsrep/wsrep_api.h875
-rw-r--r--wsrep/wsrep_dummy.c368
-rw-r--r--wsrep/wsrep_loader.c199
-rw-r--r--wsrep/wsrep_uuid.c78
120 files changed, 16321 insertions, 122 deletions
diff --git a/.bzrignore b/.bzrignore
index 74c81c48e05..5bafcee9b38 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -969,6 +969,8 @@ support-files/mysql.server
support-files/mysql.spec
support-files/mysqld_multi.server
support-files/ndb-config-2-node.ini
+support-files/wsrep.cnf
+support-files/wsrep_notify
TAGS
test/ndbapi/bank/bankCreator
test/ndbapi/bank/bankMakeGL
@@ -1092,6 +1094,9 @@ sql/share/slovak
sql/share/spanish
sql/share/swedish
sql/share/ukrainian
+scripts/wsrep_sst_mysqldump
+scripts/wsrep_sst_rsync
+scripts/wsrep_sst_xtrabackup
CPackConfig.cmake
CPackSourceConfig.cmake
Docs/INFO_BIN
diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh
index 9f552f1ca5e..b59c8a1a6e1 100755
--- a/BUILD/SETUP.sh
+++ b/BUILD/SETUP.sh
@@ -205,7 +205,7 @@ all_configs="$SSL_LIBRARY --with-plugins=max --with-plugin-ndbcluster --with-emb
alpha_cflags="$check_cpu_cflags -Wa,-m$cpu_flag"
amd64_cflags="$check_cpu_cflags"
amd64_cxxflags="" # If dropping '--with-big-tables', add here "-DBIG_TABLES"
-pentium_cflags="$check_cpu_cflags"
+pentium_cflags="$check_cpu_cflags -m32"
pentium64_cflags="$check_cpu_cflags -m64"
ppc_cflags="$check_cpu_cflags"
sparc_cflags=""
diff --git a/BUILD/compile-amd64-debug-wsrep b/BUILD/compile-amd64-debug-wsrep
new file mode 100755
index 00000000000..995a8afcca9
--- /dev/null
+++ b/BUILD/compile-amd64-debug-wsrep
@@ -0,0 +1,11 @@
+#! /bin/sh
+
+path=`dirname $0`
+. "$path/SETUP.sh"
+
+extra_flags="$amd64_cflags $debug_cflags -g -O0 $wsrep_cflags"
+c_warnings="$c_warnings $debug_extra_warnings"
+cxx_warnings="$cxx_warnings $debug_extra_warnings"
+extra_configs="$amd64_configs $debug_configs $wsrep_configs --with-wsrep"
+
+. "$path/FINISH.sh"
diff --git a/BUILD/compile-amd64-wsrep b/BUILD/compile-amd64-wsrep
new file mode 100755
index 00000000000..57dfbdd6da2
--- /dev/null
+++ b/BUILD/compile-amd64-wsrep
@@ -0,0 +1,9 @@
+#! /bin/sh
+
+path=`dirname $0`
+. "$path/SETUP.sh"
+
+extra_flags="$amd64_cflags $fast_cflags -g $wsrep_cflags"
+extra_configs="$amd64_configs $wsrep_configs --with-wsrep"
+
+. "$path/FINISH.sh"
diff --git a/BUILD/compile-pentium-debug-wsrep b/BUILD/compile-pentium-debug-wsrep
new file mode 100755
index 00000000000..ee68e3fd0c1
--- /dev/null
+++ b/BUILD/compile-pentium-debug-wsrep
@@ -0,0 +1,12 @@
+#! /bin/sh -x
+
+path=`dirname $0`
+set -- "$@" --with-debug=full
+. "$path/SETUP.sh"
+
+extra_flags="$pentium_cflags $debug_cflags -g -O0 $wsrep_cflags"
+c_warnings="$c_warnings $debug_extra_warnings"
+cxx_warnings="$cxx_warnings $debug_extra_warnings"
+extra_configs="$pentium_configs $debug_configs $wsrep_configs --with-wsrep"
+
+. "$path/FINISH.sh"
diff --git a/BUILD/compile-pentium-wsrep b/BUILD/compile-pentium-wsrep
new file mode 100755
index 00000000000..eeb14310e4e
--- /dev/null
+++ b/BUILD/compile-pentium-wsrep
@@ -0,0 +1,11 @@
+#! /bin/sh
+
+path=`dirname $0`
+. "$path/SETUP.sh"
+
+extra_flags="$pentium_cflags $fast_cflags $wsrep_cflags"
+extra_configs="$pentium_configs $wsrep_configs --with-wsrep"
+
+#strip=yes
+
+. "$path/FINISH.sh"
diff --git a/BUILD/compile-pentium64-wsrep b/BUILD/compile-pentium64-wsrep
new file mode 100755
index 00000000000..0bccb34e753
--- /dev/null
+++ b/BUILD/compile-pentium64-wsrep
@@ -0,0 +1,28 @@
+#! /bin/sh
+
+# Copyright (C) 2006, 2007 MySQL AB
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; version 2
+# of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this library; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA
+
+path=`dirname $0`
+. "$path/SETUP.sh"
+
+extra_flags="$pentium64_cflags $fast_cflags -g $wsrep_cflags"
+extra_configs="$pentium_configs $static_link $wsrep_configs --with-wsrep"
+CC="$CC --pipe"
+strip=yes
+
+. "$path/FINISH.sh"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ed19a9929a7..2f8ac5a2fe0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -151,6 +151,7 @@ INCLUDE(ctest)
INCLUDE(plugin)
INCLUDE(install_macros)
INCLUDE(mysql_add_executable)
+INCLUDE(wsrep)
# Handle options
OPTION(DISABLE_SHARED
@@ -190,6 +191,12 @@ MARK_AS_ADVANCED(ENABLED_LOCAL_INFILE)
OPTION(WITH_FAST_MUTEXES "Compile with fast mutexes" OFF)
MARK_AS_ADVANCED(WITH_FAST_MUTEXES)
+OPTION(WITH_INNODB_DISALLOW_WRITES "InnoDB freeze writes patch from Google" ${WITH_WSREP})
+IF (WITH_INNODB_DISALLOW_WRITES)
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWITH_INNODB_DISALLOW_WRITES")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWITH_INNODB_DISALLOW_WRITES")
+ENDIF()
+
# Set DBUG_OFF and other optional release-only flags for non-debug project types
FOREACH(BUILD_TYPE RELEASE RELWITHDEBINFO MINSIZEREL)
FOREACH(LANG C CXX)
@@ -298,6 +305,9 @@ ADD_SUBDIRECTORY(vio)
ADD_SUBDIRECTORY(regex)
ADD_SUBDIRECTORY(mysys)
ADD_SUBDIRECTORY(libmysql)
+IF(WITH_WSREP)
+ADD_SUBDIRECTORY(wsrep)
+ENDIF()
ADD_SUBDIRECTORY(client)
ADD_SUBDIRECTORY(extra)
ADD_SUBDIRECTORY(libservices)
@@ -368,7 +378,7 @@ OPTIONAL
INSTALL(FILES README DESTINATION ${INSTALL_DOCREADMEDIR} COMPONENT Readme)
INSTALL(FILES ${CMAKE_BINARY_DIR}/Docs/INFO_SRC ${CMAKE_BINARY_DIR}/Docs/INFO_BIN DESTINATION ${INSTALL_DOCDIR})
IF(UNIX)
- INSTALL(FILES Docs/INSTALL-BINARY DESTINATION ${INSTALL_DOCREADMEDIR} COMPONENT Readme)
+ INSTALL(FILES Docs/INSTALL-BINARY Docs/README-wsrep DESTINATION ${INSTALL_DOCREADMEDIR} COMPONENT Readme)
ENDIF()
INCLUDE(CPack)
diff --git a/Docs/README-wsrep b/Docs/README-wsrep
new file mode 100644
index 00000000000..025379764b2
--- /dev/null
+++ b/Docs/README-wsrep
@@ -0,0 +1,487 @@
+Codership Oy
+http://www.codership.com
+<info@codership.com>
+
+DISCLAIMER
+
+THIS SOFTWARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+IN NO EVENT SHALL CODERSHIP OY BE HELD LIABLE TO ANY PARTY FOR ANY DAMAGES
+RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE.
+
+Trademark Information.
+
+MySQL is a trademark or registered trademark of Oracle and/or its affiliates.
+Other trademarks are the property of their respective owners.
+
+Licensing Information.
+
+Please see file COPYING that came with this distribution
+
+Source code can be found at
+wsrep API: https://launchpad.net/wsrep
+MySQL patch: https://launchpad.net/codership-mysql
+
+
+ABOUT THIS DOCUMENT
+
+This document covers installation and configuration issues specific to this
+wsrep-patched MySQL distribution by Codership. It does not cover the use or
+administration of MySQL server per se. The reader is assumed to know how to
+install, configure, administer and use standard MySQL server version 5.1.xx.
+
+
+ MYSQL-5.5.x/wsrep-23.x
+
+CONTENTS:
+=========
+1. WHAT IS WSREP PATCH FOR MYSQL
+2. INSTALLATION
+3. FIRST TIME SETUP
+ 3.1 CONFIGURATION FILES
+ 3.2 DATABASE PRIVILEGES
+ 3.3 CHECK AND CORRECT FIREWALL SETTINGS
+ 3.4 SELINUX
+ 3.5 APPARMOR
+ 3.6 CONNECT TO CLUSTER
+4. UPGRADING FROM MySQL 5.1.x
+5. CONFIGURATION OPTIONS
+ 5.1 MANDATORY MYSQL OPTIONS
+ 5.2 WSREP OPTIONS
+6. ONLINE SCHEMA UPGRADE
+ 6.1 TOTAL ORDER ISOLATION (TOI)
+ 6.2 ROLLING SCHEMA UPGRADE (RSU)
+7. LIMITATIONS
+
+
+1. WHAT IS WSREP PATCH FOR MYSQL/INNODB
+
+Wsrep API developed by Codership Oy is a modern generic (database-agnostic)
+replication API for transactional databases with a goal to make database
+replication/logging subsystem completely modular and pluggable. It is developed
+with flexibility and completeness in mind to satisfy broad range of modern
+replication scenarios. It is equally suitable for synchronous and asynchronous,
+master-slave and multi-master replication.
+
+wsrep stands for Write Set REPlication.
+
+Wsrep patch for MySQL/InnoDB allows MySQL server to load and use various wsrep
+API implementations ("wsrep providers") with different qualities of service.
+Without wsrep provider MySQL-wsrep server will function like a regular
+standalone server.
+
+
+2. INSTALLATION
+
+In the examples below mysql authentication options are omitted for brevity.
+
+2.1 Download and install mysql-wsrep package.
+
+Download binary package for your Linux distribution from
+https://launchpad.net/codership-mysql/
+
+2.1.1 On Debian and Debian-derived distributions.
+
+Upgrade from mysql-server-5.0 to mysql-wsrep is not supported yet, please
+upgrade to mysql-server-5.1 first.
+
+If you're installing over an existing mysql installation, mysql-server-wsrep
+will conflict with mysql-server-5.1 package, so remove it first:
+
+$ sudo apt-get remove mysql-server-5.1 mysql-server-core-5.1
+
+mysql-server-wsrep requires psmisc and mysql-client-5.1.47 (or later).
+MySQL 5.1 packages can be found from backports repositories.
+For further information about configuring and using Debian or Ubuntu
+backports, see:
+
+* http://backports.debian.org
+
+* https://help.ubuntu.com/community/UbuntuBackports
+
+For example, installation of required packages on Debian Lenny:
+
+$ sudo apt-get install psmisc
+$ sudo apt-get -t lenny-backports install mysql-client-5.1
+
+Now you should be able to install mysql-wsrep package:
+
+$ sudo dpkg -i <mysql-server-wsrep DEB>
+
+2.1.2 On CentOS and similar RPM-based distributions.
+
+If you're migrating from existing MySQL installation, there are two variants:
+
+ a) If you're already using official MySQL-server-community 5.1.x RPM from
+ Oracle:
+
+ # rpm -e mysql-server
+
+ b) If you're upgrading from the stock mysql-5.0.77 on CentOS:
+
+ 1) Make sure that the following packages are not installed:
+ # rpm --nodeps --allmatches -e mysql-server mysql-test mysql-bench
+
+ 2) Install *official* MySQL-shared-compat-5.1.x from
+ http://dev.mysql.com/downloads/mysql/5.1.html
+
+Actual installation:
+
+ # rpm -Uvh <MySQL-server-wsrep RPM>
+
+ If this fails due to unsatisfied dependencies, install missing packages
+ (e.g. yum install perl-DBI) and retry.
+
+Additional packages to consider (if not yet installed):
+ * galera (multi-master replication provider, https://launchpad.net/galera)
+ * MySQL-client-community (for connecting to server and mysqldump-based SST)
+ * rsync (for rsync-based SST)
+ * xtrabackup and nc (for xtrabackup-based SST)
+
+2.2 Upgrade system tables.
+
+If you're upgrading a previous MySQL installation, it might be advisable to
+upgrade system tables. To do that start mysqld and run mysql_upgrade command.
+Consult MySQL documentation in case of errors. Normally they are not critical
+and can be ignored unless specific functionality is needed.
+
+
+3. FIRST TIME SETUP
+
+Unless you're upgrading an already installed mysql-wsrep package, you will need
+to set up a few things to prepare server for operation.
+
+3.1 CONFIGURATION FILES
+
+* Make sure system-wide my.cnf does not bind mysqld to 127.0.0.1. That is, if
+ you have the following line in [mysqld] section, comment it out:
+
+ #bind-address = 127.0.0.1
+
+* Make sure system-wide my.cnf contains "!includedir /etc/mysql/conf.d/" line.
+
+* Edit /etc/mysql/conf.d/wsrep.cnf and set wsrep_provider option by specifying
+ a path to provider library. If you don't have a provider, leave it as it is.
+
+* When a new node joins the cluster it'll have to receive a state snapshot from
+ one of the peers. This requires a privileged MySQL account with access from
+ the rest of the cluster. Edit /etc/mysql/conf.d/wsrep.cnf and set mysql
+ login/password pair for SST, for example:
+
+ wsrep_sst_auth=wsrep_sst:wspass
+
+* See CONFIGURATION section below about other configuration parameters that you
+ might want to change at this point.
+
+3.2 DATABASE PRIVILEGES
+
+Restart MySQL server and connect to it as root to grant privileges to SST
+account (empty users confuse MySQL authentication matching rules, we need to
+delete them too):
+
+$ mysql -e "SET wsrep_on=OFF; DELETE FROM mysql.user WHERE user='';"
+$ mysql -e "SET wsrep_on=OFF; GRANT ALL ON *.* TO wsrep_sst@'%' IDENTIFIED BY 'wspass'";
+
+3.3 CHECK AND CORRECT FIREWALL SETTINGS.
+
+MySQL-wsrep server needs to be accessible from other cluster members through
+its client listening socket and through wsrep provider socket. See your
+distribution and wsrep provider documentation for details. For example on
+CentOS you might need to do something along these lines:
+
+# iptables --insert RH-Firewall-1-INPUT 1 --proto tcp --source <my IP>/24 --destination <my IP>/32 --dport 3306 -j ACCEPT
+# iptables --insert RH-Firewall-1-INPUT 1 --proto tcp --source <my IP>/24 --destination <my IP>/32 --dport 4567 -j ACCEPT
+
+If there is a NAT firewall between the nodes, it must be configured to allow
+direct connections between the nodes (e.g. via port forwarding).
+
+3.4 SELINUX
+
+If you have SELinux enabled, it may block mysqld from doing required operations.
+You'll need to either disable it or configure to allow mysqld to run external
+programs and open listen sockets at unprivileged ports (i.e. things that
+an unprivileged user can do). See SELinux documentation about it.
+
+To quickly disable SELinux:
+1) run 'setenforce 0' as root.
+2) set 'SELINUX=permissive' in /etc/selinux/config
+
+3.5 APPARMOR
+
+AppArmor automatically comes with Ubuntu and may also prevent mysqld to from
+opening additional ports or run scripts. See AppArmor documentation about its
+configuration. To disable AppArmor for mysqld:
+
+$ cd /etc/apparmor.d/disable/
+$ sudo ln -s /etc/apparmor.d/usr.sbin.mysqld
+$ sudo service apparmor restart
+
+
+3.6 CONNECT TO CLUSTER
+
+Now you're ready to connect to cluster by setting wsrep_cluster_address variable
+and monitor status of wsrep provider:
+
+mysql> SET GLOBAL wsrep_cluster_address='<cluster address string>';
+mysql> SHOW STATUS LIKE 'wsrep%';
+
+
+4 UPGRADING FROM MySQL 5.1.x
+
+!!! THESE INSTRUCTIONS ARE PRELIMINARY AND INCOMPLETE !!!
+
+1) BEFORE UPGRADE (while running 5.1.x):
+ - comment out 'wsrep_provider' setting from configuration files
+ (my.cnf and/or wsrep.cnf)
+ - If performing a rolling upgrade on a running cluster, set
+ wsrep_sst_method=mysqldump.
+ You might also need to configure wsrep_sst_receive_address and
+ wsrep_sst_auth appropriately. mysqldump is the only way to transfer data
+ from 5.1.x to 5.5.x reliably.
+ - remove innodb_plugin settings from configuration files.
+
+2) Perform upgrade as usual:
+ http://dev.mysql.com/doc/refman/5.5/en/upgrading-from-previous-series.html
+ Don't forget to run 'mysql_upgrade' command.
+
+3) AFTER UPGRADING individual node:
+ - uncomment 'wsrep_provider' line in configuration file.
+ - restart the server and join the cluster.
+
+4) AFTER UPGRADING the whole cluster:
+ - revert to usual wsrep SST settings if not 'mysqldump'.
+
+
+5. CONFIGURATION OPTIONS
+
+5.1 MANDATORY MYSQL OPTIONS
+
+binlog_format=ROW
+ This option is required to use row-level replication as opposed to
+ statement-level. For performance and consistency considerations don't change
+ that. As a side effect, binlog, if turned on, can be ROW only. In future this
+ option won't have special meaning.
+
+innodb_autoinc_lock_mode=2
+ This is a required parameter. Without it INSERTs into tables with
+ AUTO_INCREMENT column may fail.
+ autoinc lock modes 0 and 1 can cause unresolved deadlock, and make
+ system unresponsive.
+
+innodb_locks_unsafe_for_binlog=1
+ This option is required for parallel applying.
+
+5.2 WSREP OPTIONS
+
+All options are optional except for wsrep_provider, wsrep_cluster_address, and
+wsrep_sst_auth.
+
+wsrep_provider=none
+ A full path to the library that implements WSREP interface. If none is
+ specified, the server behaves like a regular mysqld.
+
+wsrep_provider_options=
+ Provider-specific option string. Check wsrep provider documentation or
+ http://www.codership.com/wiki
+
+wsrep_cluster_address=
+ Provider-specific cluster address string. This is used to connect a node to
+ the desired cluster. This option can be given either on mysqld startup or set
+ during runtime. See wsrep provider documentation for possible values.
+
+wsrep_cluster_name="my_wsrep_cluster"
+ Logical cluster name, must be the same for all nodes of the cluster.
+
+wsrep_node_address=
+ An option to explicitly specify the network address of the node in the form
+ <address>[:port] if autoguessing for some reason does not produce desirable
+ results (multiple network interfaces, NAT, etc.)
+ If not explicitly overridden by wsrep_sst_receive_address, the <address> part
+ will be used to listen for SST (see below). And the whole <address>[:port]
+ will be passed to wsrep provider to be used as a base address in its
+ communications.
+
+wsrep_node_name=
+ Human readable node name (for easier log reading only). Defaults to hostname.
+
+wsrep_slave_threads=1
+ Number of threads dedicated to processing of writesets from other nodes.
+ For best performance should be few per CPU core.
+
+wsrep_dbug_option
+ Options for the built-in DBUG library (independent from what MySQL uses).
+ Empty by default. Not currently in use.
+
+wsrep_debug=0
+ Enable debug-level logging.
+
+wsrep_convert_LOCK_to_trx=0
+ Implicitly convert locking sessions into transactions inside mysqld. By
+ itself it does not mean support for locking sessions, but it prevents the
+ database from going into logically inconsistent state. Note however, that
+ loading large database dump with LOCK statements might result in abnormally
+ large transactions and cause an out-of-memory condition
+
+wsrep_retry_autocommit=1
+ Retry autocommit queries and single statement transactions should they fail
+ certification test. This is analogous to rescheduling an autocommit query
+ should it go into deadlock with other transactions in the database lock
+ manager.
+
+wsrep_auto_increment_control=1
+ Automatically adjust auto_increment_increment and auto_increment_offset
+ variables based on the number of nodes in the cluster. Significantly reduces
+ certification conflict rate for INSERTS.
+
+wsrep_drupal_282555_workaround=1
+ MySQL seems to have an obscure bug when INSERT into table with
+ AUTO_INCREMENT column with NULL value for that column can fail with a
+ duplicate key error. When this option is on, it retries such INSERTs.
+ Required for stable Drupal operation. Documented at:
+ http://bugs.mysql.com/bug.php?id=41984
+ http://drupal.org/node/282555
+
+wsrep_causal_reads=0
+ Enforce strict READ COMMITTED semantics on reads and transactions. May
+ result in additional latencies. It is a session variable.
+
+wsrep_OSU_method=TOI
+ Online Schema Upgrade (OSU) can be performed with two alternative methods:
+ Total Order Isolation (TOI) runs DDL statement in all cluster nodes in
+ same total order sequence locking the affected table for the duration of the
+ operation. This may result in the whole cluster being blocked for the
+ duration of the operation.
+ Rolling Schema Upgrade (RSU) executes the DDL statement only locally, thus
+ blocking only one cluster node. During the DDL processing, the node
+ is not replicating and may be unable to process replication events (due to
+ table lock). Once DDL operation is complete, the node will catch up and sync
+ with the cluster to become fully operational again. The DDL statement or
+ its effects are not replicated, so it is user's responsibility to manually
+ perform this operation on each of the nodes.
+
+wsrep_forced_binlog_format=none
+ Force every transaction to use given binlog format. When this variable is
+ set to something else than NONE, all transactions will use the given forced
+ format, regardless of what the client session has specified in binlog_format.
+ Valid choices for wsrep_forced_binlog_format are: ROW, STATEMENT, MIXED and
+ special value NONE, meaning that there is no forced binlog format in effect.
+ This variable was intruduced to support STATEMENT format replication during
+ rolling schema upgrade processing. However, in most cases ROW replication
+ is valid for asymmetrict schema replication.
+
+State snapshot transfer options.
+
+When a new node joins the cluster it has to synchronize its initial state with
+the other cluster members by transferring state snapshot from one of them.
+The options below govern how this happens and should be set up before attempting
+to join or start a cluster.
+
+wsrep_sst_method=mysqldump
+ What method to use to copy database state to a newly joined node. Supported
+ methods:
+ - mysqldump: slow (except for small datasets) but most tested.
+ - rsync: much faster on large datasets.
+ - rsync_wan: same as rsync but with deltaxfer to minimize network traffic.
+ - xtrabackup: very fast and practically non-blocking SST method based on
+ Percona's xtrabackup tool.
+
+ (for xtrabackup to work the following settings must be present in my.cnf
+ on all nodes:
+ [mysqld]
+ wsrep_sst_auth=root:<root password>
+ datadir=<path to data dir>
+ [client]
+ socket=<path to socket>
+ )
+
+wsrep_sst_receive_address=
+ Address (hostname:port) at which this node wants to receive state snapshot.
+ Defaults to mysqld bind address, and if that is not specified (0.0.0.0) -
+ to the first IP of eth0 + mysqld bind port.
+ NOTE: check that your firewall allows connections to this address from other
+ cluster nodes.
+
+wsrep_sst_auth=
+ Authentication information needed for state transfer. Depends on the state
+ transfer method. For mysqldump-based SST it is
+ <mysql_root_user>:<mysql_root_password>
+ and should be the same on all nodes - it is used to authenticate with both
+ state snapshot receiver and state snapshot donor.
+
+wsrep_sst_donor=
+ A name of the node which should serve as state snapshot donor. This allows
+ to control which node will serve state snapshot request. By default the
+ most suitable node is chosen by wsrep provider. This is the same as given in
+ wsrep_node_name.
+
+
+6. ONLINE SCHEMA UPGRADE
+
+ Schema upgrades mean any data definition statements (DDL statemnents) run
+ for the database. They change the database structure and are non-
+ transactional.
+
+ Release 22.3 brings a new method for performing schema upgrades. User can
+ now choose whether to use the traditional total order isolation or new
+ rolling schema upgrade method. The OSU method choice is done by global
+ parameter: 'wsrep_OSU_method'.
+
+6.1 Total Order Isolation (TOI)
+
+ With earlier releases, DDL processing happened always by Total Order
+ Isolation (TOI) method. With TOI, the DDL was scheduled to be processed in
+ same transaction seqeuncing 'slot' in each cluster node.
+ The processing is secured by locking the affected table from any other use.
+ With TOI method, the whole cluster has part of the database locked for the
+ duration of the DDL processing.
+
+6.2 Rolling Schema Upgrade (RSU)
+
+ Rolling schema upgrade is new DDL processing method, where DDL will be
+ processed locally for the node. The node is disconnected of the replication
+ for the duration of the DDL processing, so that there is only DDL statement
+ processing in the node and it does not block the rest of the cluster. When
+ the DDL processing is complete, the node applies delayed replication events
+ and synchronizes back with the cluster.
+ The DDL can then be executed cluster-wide by running the same DDL statement
+ for each node in turn. When this rolling schema upgrade proceeds, part of
+ the cluster will have old schema structure and part of the cluster will have
+ new schema structure.
+
+
+7. LIMITATIONS
+
+1) Currently replication works only with InnoDB storage engine. Any writes to
+ tables of other types, including system (mysql.*) tables are not replicated.
+ However, DDL statements are replicated in statement level, and changes
+ to mysql.* tables will get replicated that way.
+ So, you can safely issue: CREATE USER...,
+ but issuing: INSERT INTO mysql.user..., will not be replicated.
+
+2) DELETE operation is unsupported on tables without primary key. Also rows in
+ tables without primary key may appear in different order on different nodes.
+ As a result SELECT...LIMIT... may return slightly different sets.
+
+3) Unsupported queries:
+ * LOCK/UNLOCK TABLES cannot be supported in multi-master setups.
+ * lock functions (GET_LOCK(), RELEASE_LOCK()... )
+
+4) Query log cannot be directed to table. If you enable query logging,
+ you must forward the log to a file:
+ log_output = FILE
+ Use general_log and general_log_file to choose query logging and the
+ log file name
+
+5) Maximum allowed transaction size is defined by wsrep_max_ws_rows and
+ wsrep_max_ws_size. Anything bigger (e.g. huge LOAD DATA) will be rejected.
+
+6) Due to cluster level optimistic concurrency control, transaction issuing
+ COMMIT may still be aborted at that stage. There can be two transactions.
+ writing to same rows and committing in separate cluster nodes, and only one
+ of the them can successfully commit. The failing one will be aborted.
+ For cluster level aborts, MySQL/galera cluster gives back deadlock error.
+ code (Error: 1213 SQLSTATE: 40001 (ER_LOCK_DEADLOCK)).
+
+7) XA transactions can not be supported due to possible rollback on commit.
+
diff --git a/cmake/configure.pl b/cmake/configure.pl
index 3a7d187c0be..a542092c417 100644
--- a/cmake/configure.pl
+++ b/cmake/configure.pl
@@ -206,6 +206,16 @@ foreach my $option (@ARGV)
$cmakeargs = $cmakeargs." -DMYSQL_DATADIR=".substr($option,14);
next;
}
+ if ($option =~ /layout=/)
+ {
+ $cmakeargs = $cmakeargs." -DINSTALL_LAYOUT=".substr($option,7);
+ next;
+ }
+ if ($option =~ /with-unix-socket-path=/)
+ {
+ $cmakeargs = $cmakeargs." -DMYSQL_UNIX_ADDR=".substr($option,22);
+ next;
+ }
if ($option =~ /mysql-maintainer-mode/)
{
$cmakeargs = $cmakeargs." -DMYSQL_MAINTAINER_MODE=" .
diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake
index f5017d6c984..da48afad00c 100644
--- a/cmake/cpack_rpm.cmake
+++ b/cmake/cpack_rpm.cmake
@@ -23,11 +23,9 @@ SET(CPACK_COMPONENT_SHAREDLIBRARIES_GROUP "shared")
SET(CPACK_COMPONENT_COMMON_GROUP "common")
SET(CPACK_COMPONENT_COMPAT_GROUP "compat")
SET(CPACK_COMPONENTS_ALL Server ManPagesServer IniFiles Server_Scripts
- SupportFiles Development ManPagesDevelopment
- ManPagesTest Readme ManPagesClient Test
- Common Client SharedLibraries)
+ SupportFiles Readme)
-SET(CPACK_RPM_PACKAGE_NAME "MariaDB")
+SET(CPACK_RPM_PACKAGE_NAME "MariaDB-Galera")
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_RPM_PACKAGE_NAME}-${VERSION}-${RPM}-${CMAKE_SYSTEM_PROCESSOR}")
SET(CPACK_RPM_PACKAGE_RELEASE 1) # FIX: add distribution name here
@@ -83,6 +81,7 @@ SET(CPACK_RPM_devel_PACKAGE_PROVIDES "MariaDB-devel MySQL-devel mysql-devel")
SET(CPACK_RPM_server_PACKAGE_OBSOLETES "MariaDB mysql mysql-server MySQL-server MySQL-OurDelta-server")
SET(CPACK_RPM_server_PACKAGE_PROVIDES "MariaDB MariaDB-server MySQL-server config(MariaDB-server) msqlormysql mysql mysql-server")
+SET(CPACK_RPM_server_PACKAGE_REQUIRES "${CPACK_RPM_PACKAGE_REQUIRES} galera")
SET(CPACK_RPM_server_PRE_INSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/server-prein.sh)
SET(CPACK_RPM_server_PRE_UNINSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/server-preun.sh)
SET(CPACK_RPM_server_POST_INSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/server-postin.sh)
@@ -99,32 +98,6 @@ SET(CPACK_RPM_test_PACKAGE_PROVIDES "MariaDB-test MySQL-test mysql-test")
# workaround for lots of perl dependencies added by rpmbuild
SET(CPACK_RPM_test_PACKAGE_PROVIDES "${CPACK_RPM_test_PACKAGE_PROVIDES} perl(lib::mtr_gcov.pl) perl(lib::mtr_gprof.pl) perl(lib::mtr_io.pl) perl(lib::mtr_misc.pl) perl(lib::mtr_process.pl) perl(lib::v1/mtr_cases.pl) perl(lib::v1/mtr_gcov.pl) perl(lib::v1/mtr_gprof.pl) perl(lib::v1/mtr_im.pl) perl(lib::v1/mtr_io.pl) perl(lib::v1/mtr_match.pl) perl(lib::v1/mtr_misc.pl) perl(lib::v1/mtr_process.pl) perl(lib::v1/mtr_report.pl) perl(lib::v1/mtr_stress.pl) perl(lib::v1/mtr_timer.pl) perl(lib::v1/mtr_unique.pl) perl(mtr_misc.pl)")
-# If we want to build build MariaDB-shared-compat,
-# extract compat libraries from MariaDB-shared-5.3 rpm
-FILE(GLOB compat_rpm RELATIVE ${CMAKE_SOURCE_DIR}
- "${CMAKE_SOURCE_DIR}/../MariaDB-shared-5.3.*.rpm")
-IF (compat_rpm)
- MESSAGE("Using ${compat_rpm} to build MariaDB-compat")
- INSTALL(CODE "EXECUTE_PROCESS(
- COMMAND rpm2cpio ${CMAKE_SOURCE_DIR}/${compat_rpm}
- COMMAND cpio --extract --make-directories */libmysqlclient*.so.* -
- WORKING_DIRECTORY \$ENV{DESTDIR})
- EXECUTE_PROCESS(
- COMMAND chmod -R a+rX .
- WORKING_DIRECTORY \$ENV{DESTDIR})"
- COMPONENT Compat)
- SET(CPACK_COMPONENTS_ALL ${CPACK_COMPONENTS_ALL} Compat)
-
- # RHEL6/CentOS6 install Postfix by default, and it requires
- # libmysqlclient.so.16 that pulls in mysql-libs-5.1.x
- # And the latter conflicts with our rpms.
- # Make sure that for these distribuions all our rpms require
- # MariaDB-compat, that will replace mysql-libs-5.1
- IF(RPM MATCHES "(rhel|centos)6")
- SET(CPACK_RPM_common_PACKAGE_REQUIRES "MariaDB-compat")
- ENDIF()
-ENDIF(compat_rpm)
-
SET(CPACK_RPM_compat_PACKAGE_REQUIRES "/bin/sh") # to mask CPACK_RPM_PACKAGE_REQUIRES
SET(CPACK_RPM_compat_PACKAGE_PROVIDES "mysql-libs = 5.3.5") # exact version doesn't matter as long as it greater than 5.1
SET(CPACK_RPM_compat_PACKAGE_OBSOLETES "mysql-libs < 5.3.5")
diff --git a/cmake/package_name.cmake b/cmake/package_name.cmake
index 5c99a110e7c..37f619e7177 100644
--- a/cmake/package_name.cmake
+++ b/cmake/package_name.cmake
@@ -120,7 +120,7 @@ IF(NOT VERSION)
ELSEIF(MYSQL_SERVER_SUFFIX)
SET(PRODUCT_TAG "${MYSQL_SERVER_SUFFIX}") # Already has a leading dash
ELSE()
- SET(PRODUCT_TAG)
+ SET(PRODUCT_TAG "-galera")
ENDIF()
IF("${VERSION}" MATCHES "-ndb-")
diff --git a/cmake/wsrep.cmake b/cmake/wsrep.cmake
new file mode 100644
index 00000000000..b0c1dce34dc
--- /dev/null
+++ b/cmake/wsrep.cmake
@@ -0,0 +1,59 @@
+# Copyright (c) 2011, Codership Oy <info@codership.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+# We need to generate a proper spec file even without --with-wsrep flag,
+# so WSREP_VERSION is produced regardless
+
+# Set the patch version
+SET(WSREP_PATCH_VERSION "7rc1")
+
+# Obtain patch revision number
+SET(WSREP_PATCH_REVNO $ENV{WSREP_REV})
+IF(NOT WSREP_PATCH_REVNO)
+ EXECUTE_PROCESS(
+ COMMAND bzr revno
+ OUTPUT_VARIABLE WSREP_PATCH_REVNO
+ RESULT_VARIABLE RESULT
+ )
+STRING(REGEX REPLACE "(\r?\n)+$" "" WSREP_PATCH_REVNO "${WSREP_PATCH_REVNO}")
+#FILE(WRITE "wsrep_config" "Debug: WSREP_PATCH_REVNO result: ${RESULT}\n")
+ENDIF()
+IF(NOT WSREP_PATCH_REVNO)
+ SET(WSREP_PATCH_REVNO "XXXX")
+ENDIF()
+
+# Obtain wsrep API version
+EXECUTE_PROCESS(
+ COMMAND sh -c "grep WSREP_INTERFACE_VERSION ${MySQL_SOURCE_DIR}/wsrep/wsrep_api.h | cut -d '\"' -f 2"
+ OUTPUT_VARIABLE WSREP_API_VERSION
+ RESULT_VARIABLE RESULT
+)
+#FILE(WRITE "wsrep_config" "Debug: WSREP_API_VERSION result: ${RESULT}\n")
+STRING(REGEX REPLACE "(\r?\n)+$" "" WSREP_API_VERSION "${WSREP_API_VERSION}")
+
+SET(WSREP_VERSION
+ "${WSREP_API_VERSION}.${WSREP_PATCH_VERSION}.r${WSREP_PATCH_REVNO}"
+)
+
+OPTION(WITH_WSREP "WSREP replication API (to use, e.g. Galera Replication library)" ON)
+IF (WITH_WSREP)
+ SET(WSREP_C_FLAGS "-DWITH_WSREP -DWSREP_PROC_INFO -DMYSQL_MAX_VARIABLE_VALUE_LEN=2048")
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WSREP_C_FLAGS}")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WSREP_C_FLAGS}")
+ SET(COMPILATION_COMMENT "${COMPILATION_COMMENT}, wsrep_${WSREP_VERSION}")
+ SET(WITH_EMBEDDED_SERVER OFF CACHE INTERNAL "" FORCE)
+ENDIF()
+
+#
diff --git a/include/thr_lock.h b/include/thr_lock.h
index 3f7a5ca988f..4551a3160ff 100644
--- a/include/thr_lock.h
+++ b/include/thr_lock.h
@@ -20,6 +20,15 @@
#ifdef __cplusplus
extern "C" {
#endif
+#ifdef WITH_WSREP
+#include <my_sys.h>
+ typedef int (* wsrep_thd_is_brute_force_fun)(void *);
+ typedef int (* wsrep_abort_thd_fun)(void *, void *, my_bool);
+ typedef int (* wsrep_on_fun)(void *);
+ void wsrep_thr_lock_init(
+ wsrep_thd_is_brute_force_fun bf_fun, wsrep_abort_thd_fun abort_fun,
+ my_bool debug, my_bool convert_LOCK_to_trx, wsrep_on_fun on_fun);
+#endif
#include <my_pthread.h>
#include <my_list.h>
@@ -95,6 +104,10 @@ typedef struct st_thr_lock_info
{
pthread_t thread;
my_thread_id thread_id;
+#ifdef WITH_WSREP
+ void *mysql_thd; // THD pointer
+ my_bool in_lock_tables; // true, if inside locking session
+#endif
} THR_LOCK_INFO;
diff --git a/mysys/default.c b/mysys/default.c
index c7ac0d89462..0ba7ed494af 100644
--- a/mysys/default.c
+++ b/mysys/default.c
@@ -87,6 +87,12 @@ static char my_defaults_extra_file_buffer[FN_REFLEN];
static my_bool defaults_already_read= FALSE;
+#ifdef WITH_WSREP
+/* The only purpose of this global array is to hold full name of my.cnf
+ * which seems to be otherwise unavailable */
+char wsrep_defaults_file[FN_REFLEN + 10]={0,};
+#endif /* WITH_WREP */
+
/* Which directories are searched for options (and in which order) */
#define MAX_DEFAULT_DIRS 6
@@ -803,6 +809,12 @@ static int search_default_file_with_ext(Process_option_func opt_handler,
if (!(fp= mysql_file_fopen(key_file_cnf, name, O_RDONLY, MYF(0))))
return 1; /* Ignore wrong files */
+#ifdef WITH_WSREP
+ /* make sure we do this only once - for top-level file */
+ if ('\0' == wsrep_defaults_file[0])
+ strncpy(wsrep_defaults_file, name, sizeof(wsrep_defaults_file) - 1);
+#endif /* WITH_WSREP */
+
while (mysql_file_fgets(buff, sizeof(buff) - 1, fp))
{
line++;
diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c
index a7cbfa07db2..7c10b0abe8c 100644
--- a/mysys/thr_lock.c
+++ b/mysys/thr_lock.c
@@ -94,7 +94,24 @@ be any number of TL_WRITE_CONCURRENT_INSERT locks aktive at the same time.
my_bool thr_lock_inited=0;
ulong locks_immediate = 0L, locks_waited = 0L;
enum thr_lock_type thr_upgraded_concurrent_insert_lock = TL_WRITE;
-
+#ifdef WITH_WSREP
+static wsrep_thd_is_brute_force_fun wsrep_thd_is_brute_force= NULL;
+static wsrep_abort_thd_fun wsrep_abort_thd= NULL;
+static my_bool wsrep_debug;
+static my_bool wsrep_convert_LOCK_to_trx;
+static wsrep_on_fun wsrep_on = NULL;
+
+void wsrep_thr_lock_init(
+ wsrep_thd_is_brute_force_fun bf_fun, wsrep_abort_thd_fun abort_fun,
+ my_bool debug, my_bool convert_LOCK_to_trx, wsrep_on_fun on_fun
+) {
+ wsrep_thd_is_brute_force = bf_fun;
+ wsrep_abort_thd = abort_fun;
+ wsrep_debug = debug;
+ wsrep_convert_LOCK_to_trx= convert_LOCK_to_trx;
+ wsrep_on = on_fun;
+}
+#endif
/* The following constants are only for debug output */
#define MAX_THREADS 1000
#define MAX_LOCKS 1000
@@ -1147,6 +1164,108 @@ static void sort_locks(THR_LOCK_DATA **data,uint count)
}
}
+#ifdef WITH_WSREP
+/*
+ * If brute force applier would need to wait for a thr lock,
+ * it needs to make sure that it will get the lock without (too much)
+ * delay.
+ * We identify here the owners of blocking locks and ask them to
+ * abort. We then put our lock request in the first place in the
+ * wait queue. When lock holders abort (one by one) the lock release
+ * algorithm should grant the lock to us. We rely on this and proceed
+ * to wait_for_locks().
+ * wsrep_break_locks() should be called in all the cases, where lock
+ * wait would happen.
+ *
+ * TODO: current implementation might not cover all possible lock wait
+ * situations. This needs an review still.
+ * TODO: lock release, might favor some other lock (instead our bf).
+ * This needs an condition to check for bf locks first.
+ * TODO: we still have a debug fprintf, this should be removed
+ */
+static inline my_bool
+wsrep_break_lock(
+ THR_LOCK_DATA *data, struct st_lock_list *lock_queue1,
+ struct st_lock_list *lock_queue2, struct st_lock_list *wait_queue)
+{
+ if (wsrep_on(data->owner->mysql_thd) &&
+ wsrep_thd_is_brute_force &&
+ wsrep_thd_is_brute_force(data->owner->mysql_thd))
+ {
+ THR_LOCK_DATA *holder;
+
+ /* if locking session conversion to transaction has been enabled,
+ we know that this conflicting lock must be read lock and furthermore,
+ lock holder is read-only. It is safe to wait for him.
+ */
+#ifdef TODO
+ if (wsrep_convert_LOCK_to_trx &&
+ (THD*)(data->owner->mysql_thd)->in_lock_tables)
+ {
+ if (wsrep_debug)
+ fprintf(stderr,"WSREP wsrep_break_lock read lock untouched\n");
+ return FALSE;
+ }
+#endif
+ if (wsrep_debug)
+ fprintf(stderr,"WSREP wsrep_break_lock aborting locks\n");
+
+ /* aborting lock holder(s) here */
+ for (holder=(lock_queue1) ? lock_queue1->data : NULL;
+ holder;
+ holder=holder->next)
+ {
+ if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd))
+ {
+ wsrep_abort_thd(data->owner->mysql_thd,
+ holder->owner->mysql_thd, FALSE);
+ }
+ else
+ {
+ if (wsrep_debug)
+ fprintf(stderr,"WSREP wsrep_break_lock skipping BF lock conflict\n");
+ return FALSE;
+ }
+ }
+ for (holder=(lock_queue2) ? lock_queue2->data : NULL;
+ holder;
+ holder=holder->next)
+ {
+ if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd))
+ {
+ wsrep_abort_thd(data->owner->mysql_thd,
+ holder->owner->mysql_thd, FALSE);
+ }
+ else
+ {
+ if (wsrep_debug)
+ fprintf(stderr,"WSREP wsrep_break_lock skipping BF lock conflict\n");
+ return FALSE;
+ }
+ }
+
+ /* Add our lock to the head of the wait queue */
+ if (*(wait_queue->last)==wait_queue->data)
+ {
+ wait_queue->last=&data->next;
+ assert(wait_queue->data==0);
+ }
+ else
+ {
+ assert(wait_queue->data!=0);
+ wait_queue->data->prev=&data->next;
+ }
+ data->next=wait_queue->data;
+ data->prev=&wait_queue->data;
+ wait_queue->data=data;
+ data->cond=get_cond();
+
+ statistic_increment(locks_immediate,&THR_LOCK_lock);
+ return TRUE;
+ }
+ return FALSE;
+}
+#endif
enum enum_thr_lock_result
thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_INFO *owner,
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index 91a47bf3d48..cb1fc540d2c 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -311,6 +311,9 @@ IF(WIN32)
INSTALL_SCRIPT(${CMAKE_CURRENT_BINARY_DIR}/${file}.pl COMPONENT Server_Scripts)
ENDFOREACH()
ELSE()
+ IF(WITH_WSREP)
+ SET(WSREP_BINARIES wsrep_sst_common wsrep_sst_mysqldump wsrep_sst_rsync wsrep_sst_xtrabackup)
+ ENDIF()
# On Unix, most of the files end up in the bin directory
SET(BIN_SCRIPTS
msql2mysql
@@ -327,6 +330,7 @@ ELSE()
mysqldumpslow
mysqld_multi
mysqld_safe
+ ${WSREP_BINARIES}
)
FOREACH(file ${BIN_SCRIPTS})
IF(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${file}.sh)
diff --git a/scripts/mysqld_multi.sh b/scripts/mysqld_multi.sh
index 99ce187fcd4..7bf66da99e7 100644
--- a/scripts/mysqld_multi.sh
+++ b/scripts/mysqld_multi.sh
@@ -38,7 +38,7 @@ use Getopt::Long;
use POSIX qw(strftime getcwd);
$|=1;
-$VER="2.16";
+$VER="2.20";
my @defaults_options; # Leading --no-defaults, --defaults-file, etc.
@@ -116,6 +116,7 @@ sub main
print "will be disabled\nand some will be enabled.\n\n";
}
+ init_log() if (!defined($opt_log));
$groupids = $ARGV[1];
if ($opt_version)
{
@@ -141,7 +142,6 @@ sub main
!($ARGV[0] =~ m/^stop$/i) &&
!($ARGV[0] =~ m/^report$/i)));
- init_log() if (!defined($opt_log));
if (!$opt_no_log)
{
w2log("$my_progname log file version $VER; run: ",
@@ -184,9 +184,9 @@ sub main
}
}
-#
-# Quote word for shell
-#
+####
+#### Quote word for shell
+####
sub quote_shell_word
{
@@ -196,6 +196,10 @@ sub quote_shell_word
return $option;
}
+####
+#### get options for a group
+####
+
sub defaults_for_group
{
my ($group) = @_;
@@ -297,8 +301,12 @@ sub report_mysqlds
sub start_mysqlds()
{
- my (@groups, $com, $tmp, $i, @options, $j, $mysqld_found, $info_sent);
+ my (@groups, $com, $com2, $tmp, $i, @options, $j, $mysqld_found,
+ $info_sent, $curdir, $mysql_install_db, $srcdir, $basedir,
+ $datadir);
+ $mysql_install_db= undef();
+ $srcdir= undef();
if (!$opt_no_log)
{
w2log("\nStarting MySQL servers\n","$opt_log",0,0);
@@ -312,8 +320,9 @@ sub start_mysqlds()
{
@options = defaults_for_group($groups[$i]);
- $basedir_found= 0; # The default
- $mysqld_found= 1; # The default
+ my $basedir_found= 0; # The default
+ my $datadir_found= 0; # The default
+ my $mysqld_found= 1; # The default
$mysqld_found= 0 if (!length($mysqld));
$com= "$mysqld";
for ($j = 0, $tmp= ""; defined($options[$j]); $j++)
@@ -328,6 +337,47 @@ sub start_mysqlds()
$com= $options[$j];
$mysqld_found= 1;
}
+ elsif ("--mysql-install-db=" eq substr($options[$j], 0, 19))
+ {
+ # mysql_install_db related option
+
+ $options[$j]=~ s/\-\-mysql\-install\-db\=//;
+ $mysql_install_db= $options[$j];
+ }
+ elsif ("--srcdir=" eq substr($options[$j], 0, 9))
+ {
+ # mysql_install_db related option
+
+ $options[$j]=~ s/\-\-srcdir\=//;
+ $srcdir= $options[$j];
+ }
+ elsif ("--include-config=" eq substr($options[$j], 0, 17))
+ {
+ $options[$j]=~ s/\-\-include\-config\=//;
+ $com2= "my_print_defaults --config-file=$options[$j] $groups[$i]";
+
+ # we need to reorder the array so that options in extra config file
+ # come in the middle. Needed if someone wants to overwrite an option
+
+ my ($k, @tmp_array);
+ for ($k= $j + 1; defined($options[$k]); $k++)
+ {
+ push (@tmp_array, $options[$k]);
+ undef($options[$k]);
+ }
+ pop(@options); # pop out last null array element
+ push (@options, `$com2`);
+ chomp(@options); # new lines away
+ push (@options, @tmp_array);
+ }
+ elsif ("--datadir=" eq substr($options[$j], 0, 10))
+ {
+ $datadir= $options[$j];
+ $datadir =~ s/^--datadir=//;
+ $datadir_found= 1;
+ $options[$j]= quote_shell_word($options[$j]);
+ $tmp.= " $options[$j]";
+ }
elsif ("--basedir=" eq substr($options[$j], 0, 10))
{
$basedir= $options[$j];
@@ -351,6 +401,25 @@ sub start_mysqlds()
print "wanted mysqld binary.\n\n";
$info_sent= 1;
}
+ if (defined($mysql_install_db))
+ {
+ $com2= "$mysql_install_db";
+ $com2.= " --srcdir=$srcdir" if (defined($srcdir));
+ $com2.= " --datadir=$datadir" if ($datadir_found);
+
+ if (-d "$datadir/mysql")
+ {
+ my $wstr= "WARNING: $datadir/mysql already exists. Not going to\n";
+ $wstr.= "run $mysql_install_db on $datadir\n";
+ print $wstr if ($opt_verbose);
+ w2log($wstr, 0, 0);
+ }
+ else
+ {
+ w2log("Installing databases on $datadir..\n", 0, 0);
+ `$com2`;
+ }
+ }
$com.= $tmp;
$com.= " >> $opt_log 2>&1" if (!$opt_no_log);
$com.= " &";
@@ -420,7 +489,7 @@ sub stop_mysqlds()
sub get_mysqladmin_options
{
my ($i, @groups)= @_;
- my ($mysqladmin_found, $com, $tmp, $j);
+ my ($mysqladmin_found, $com, $com2, $tmp, $j);
@options = defaults_for_group($groups[$i]);
@@ -443,6 +512,25 @@ sub get_mysqladmin_options
$com= $options[$j];
$mysqladmin_found= 1;
}
+ elsif ("--include-config=" eq substr($options[$j], 0, 17))
+ {
+ $options[$j]=~ s/\-\-include\-config\=//;
+ $com2= "my_print_defaults --config-file=$options[$j] $groups[$i]";
+
+ # we need to reorder the array so that options in extra config file
+ # come in the middle. Needed if someone wants to overwrite an option
+
+ my ($k, @tmp_array);
+ for ($k= $j + 1; defined($options[$k]); $k++)
+ {
+ push (@tmp_array, $options[$k]);
+ undef($options[$k]);
+ }
+ pop(@options); # pop out last null array element
+ push (@options, `$com2`);
+ chomp(@options); # new lines away
+ push (@options, @tmp_array);
+ }
elsif ((($options[$j] =~ m/^(\-\-socket\=)(.*)$/) && !$opt_tcp_ip) ||
($options[$j] =~ m/^(\-\-port\=)(.*)$/))
{
@@ -462,8 +550,11 @@ sub get_mysqladmin_options
return $com;
}
-# Return a list of option files which can be opened. Similar, but not
-# identical, to behavior of my_search_option_files()
+####
+#### Return a list of option files which can be opened. Similar, but not
+#### identical, to behavior of my_search_option_files()
+####
+
sub list_defaults_files
{
my %opt;
@@ -485,9 +576,11 @@ sub list_defaults_files
($ENV{HOME} ? "$ENV{HOME}/.my.cnf" : undef));
}
+####
+#### Takes a specification of GNRs (see --help), and returns a list of matching
+#### groups which actually are mentioned in a relevant config file
+####
-# Takes a specification of GNRs (see --help), and returns a list of matching
-# groups which actually are mentioned in a relevant config file
sub find_groups
{
my ($raw_gids) = @_;
@@ -802,6 +895,17 @@ Using: @{[join ' ', @defaults_options]}
question. This will be recognised as a special option and
will not be passed to the mysqld. This will allow one to
start different mysqld versions with mysqld_multi.
+--include-config= An optional config file inside a group [mysqld#] which
+ the program is currently reading. It will read any extra
+ options of that group from additional file and insert them
+ where this option was found. Note that the group name
+ [mysqld#] must be the same in order for options to be found.
+--mysql-install-db=...
+ mysql_install_db script to be used for creating internal
+ databases. Used when installing datadir for the first time.
+ This option will be skipped with info in the log file if
+ 'mysql' database already exists in the given datadir.
+--srcdir=... srcdir argument for mysql_install_db script. Optional.
--no-log Print to stdout instead of the log file. By default the log
file is turned on.
--password=... Password for mysqladmin user.
diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh
index aaf1936afe1..8371b8eec46 100644
--- a/scripts/mysqld_safe.sh
+++ b/scripts/mysqld_safe.sh
@@ -125,7 +125,7 @@ log_notice () {
}
eval_log_error () {
- cmd="$1"
+ local cmd="$1"
case $logging in
file) cmd="$cmd >> "`shell_quote_string "$err_log"`" 2>&1" ;;
syslog)
@@ -161,6 +161,63 @@ shell_quote_string() {
echo "$1" | sed -e 's,\([^a-zA-Z0-9/_.=-]\),\\\1,g'
}
+wsrep_pick_url() {
+ [ $# -eq 0 ] && return 0
+
+ log_error "WSREP: 'wsrep_urls' is DEPRECATED! Use wsrep_cluster_address to specify multiple addresses instead."
+
+ if ! which nc >/dev/null; then
+ log_error "ERROR: nc tool not found in PATH! Make sure you have it installed."
+ return 1
+ fi
+
+ local url
+ # Assuming URL in the form scheme://host:port
+ # If host and port are not NULL, the liveness of URL is assumed to be tested
+ # If port part is absent, the url is returned literally and unconditionally
+ # If every URL has port but none is reachable, nothing is returned
+ for url in `echo $@ | sed s/,/\ /g` 0; do
+ local host=`echo $url | cut -d \: -f 2 | sed s/^\\\/\\\///`
+ local port=`echo $url | cut -d \: -f 3`
+ [ -z "$port" ] && break
+ nc -z "$host" $port >/dev/null && break
+ done
+
+ if [ "$url" == "0" ]; then
+ log_error "ERROR: none of the URLs in '$@' is reachable."
+ return 1
+ fi
+
+ echo $url
+}
+
+# Run mysqld with --wsrep-recover and parse recovered position from log.
+# Position will be stored in wsrep_start_position_opt global.
+wsrep_recovery() {
+ local mysqld_cmd="$@"
+ wr_logfile=$(mktemp)
+ [ "$EUID" = "0" ] && chown $user $wr_logfile
+ chmod 600 $wr_logfile
+ log_notice "WSREP: Running position recovery with --log_error=$wr_logfile"
+ $mysqld_cmd --log_error=$wr_logfile --wsrep-recover
+ rp=$(grep "WSREP: Recovered position:" $wr_logfile)
+ if [ -z "$rp" ]; then
+ skipped=$(grep WSREP $wr_logfile | grep "skipping position recovery")
+ if [ -z "$skipped" ]; then
+ log_error "WSREP: Failed to recover position: " \
+ `cat $wr_logfile`;
+ else
+ log_notice "WSREP: Position recovery skipped"
+ fi
+ else
+ start_pos=$(echo $rp | sed 's/.*WSREP\:\ Recovered\ position://' \
+ | sed 's/^[ \t]*//')
+ wsrep_start_position_opt="--wsrep_start_position=$start_pos"
+ log_notice "WSREP: Recovered position $start_pos"
+ fi
+ rm $wr_logfile
+}
+
parse_arguments() {
# We only need to pass arguments through to the server if we don't
# handle them here. So, we collect unrecognized options (passed on
@@ -221,7 +278,13 @@ parse_arguments() {
--skip-syslog) want_syslog=0 ;;
--syslog-tag=*) syslog_tag="$val" ;;
--timezone=*) TZ="$val"; export TZ; ;;
-
+ --wsrep[-_]urls=*) wsrep_urls="$val"; ;;
+ --wsrep[-_]provider=*)
+ if test -n "$val" && test "$val" != "none"
+ then
+ wsrep_restart=1
+ fi
+ ;;
--help) usage ;;
*)
@@ -757,7 +820,8 @@ do
done
cmd="$cmd $args"
# Avoid 'nohup: ignoring input' warning
-test -n "$NOHUP_NICENESS" && cmd="$cmd < /dev/null"
+nohup_redir=""
+test -n "$NOHUP_NICENESS" && nohup_redir=" < /dev/null"
log_notice "Starting $MYSQLD daemon with databases from $DATADIR"
@@ -768,13 +832,25 @@ max_fast_restarts=5
# flag whether a usable sleep command exists
have_sleep=1
+# maximum number of wsrep restarts
+max_wsrep_restarts=0
+
while true
do
rm -f $safe_mysql_unix_port "$pid_file" # Some extra safety
+ [ -n "$wsrep_urls" ] && url=`wsrep_pick_url $wsrep_urls` # check connect address
+
start_time=`date +%M%S`
- eval_log_error "$cmd"
+ if [ -z "$url" ]
+ then
+ wsrep_recovery "$cmd"
+ eval_log_error "$cmd $wsrep_start_position_opt $nohup_redir"
+ else
+ wsrep_recovery "$cmd"
+ eval_log_error "$cmd $wsrep_start_position_opt --wsrep_cluster_address=$url $nohup_redir"
+ fi
end_time=`date +%M%S`
@@ -838,6 +914,20 @@ do
I=`expr $I + 1`
done
fi
+
+ if [ -n "$wsrep_restart" ]
+ then
+ if [ $wsrep_restart -le $max_wsrep_restarts ]
+ then
+ wsrep_restart=`expr $wsrep_restart + 1`
+ log_notice "WSREP: sleeping 15 seconds before restart"
+ sleep 15
+ else
+ log_notice "WSREP: not restarting wsrep node automatically"
+ break
+ fi
+ fi
+
log_notice "mysqld restarted"
done
diff --git a/scripts/wsrep_sst_common.sh b/scripts/wsrep_sst_common.sh
new file mode 100644
index 00000000000..5c292465ecc
--- /dev/null
+++ b/scripts/wsrep_sst_common.sh
@@ -0,0 +1,106 @@
+# Copyright (C) 2010 Codership Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston
+# MA 02110-1301 USA.
+
+# This is a common command line parser to be sourced by other SST scripts
+
+set -u
+
+WSREP_SST_OPT_BYPASS=0
+
+while [ $# -gt 0 ]; do
+case "$1" in
+ '--address')
+ readonly WSREP_SST_OPT_ADDR="$2"
+ shift
+ ;;
+ '--auth')
+ readonly WSREP_SST_OPT_AUTH="$2"
+ shift
+ ;;
+ '--bypass')
+ WSREP_SST_OPT_BYPASS=1
+ ;;
+ '--datadir')
+ readonly WSREP_SST_OPT_DATA="$2"
+ shift
+ ;;
+ '--defaults-file')
+ readonly WSREP_SST_OPT_CONF="$2"
+ shift
+ ;;
+ '--host')
+ readonly WSREP_SST_OPT_HOST="$2"
+ shift
+ ;;
+ '--local-port')
+ readonly WSREP_SST_OPT_LPORT="$2"
+ shift
+ ;;
+ '--parent')
+ readonly WSREP_SST_OPT_PARENT="$2"
+ shift
+ ;;
+ '--password')
+ readonly WSREP_SST_OPT_PSWD="$2"
+ shift
+ ;;
+ '--port')
+ readonly WSREP_SST_OPT_PORT="$2"
+ shift
+ ;;
+ '--role')
+ readonly WSREP_SST_OPT_ROLE="$2"
+ shift
+ ;;
+ '--socket')
+ readonly WSREP_SST_OPT_SOCKET="$2"
+ shift
+ ;;
+ '--user')
+ readonly WSREP_SST_OPT_USER="$2"
+ shift
+ ;;
+ '--gtid')
+ readonly WSREP_SST_OPT_GTID="$2"
+ shift
+ ;;
+ *) # must be command
+ # usage
+ # exit 1
+ ;;
+esac
+shift
+done
+readonly WSREP_SST_OPT_BYPASS
+
+wsrep_log()
+{
+ # echo everything to stderr so that it gets into common error log
+ # deliberately made to look different from the rest of the log
+ local readonly tst="$(date +%Y%m%d\ %H:%M:%S.%N | cut -b -21)"
+ echo "WSREP_SST: $* ($tst)" >>/dev/stderr
+}
+
+wsrep_log_error()
+{
+ wsrep_log "[ERROR] $*"
+}
+
+wsrep_log_info()
+{
+ wsrep_log "[INFO] $*"
+}
+
diff --git a/scripts/wsrep_sst_mysqldump.sh b/scripts/wsrep_sst_mysqldump.sh
new file mode 100644
index 00000000000..120533edc4e
--- /dev/null
+++ b/scripts/wsrep_sst_mysqldump.sh
@@ -0,0 +1,121 @@
+#!/bin/sh -e
+# Copyright (C) 2009 Codership Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston
+# MA 02110-1301 USA.
+
+# This is a reference script for mysqldump-based state snapshot tansfer
+
+. $(dirname $0)/wsrep_sst_common
+
+EINVAL=22
+
+local_ip()
+{
+ PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
+
+ [ "$1" = "127.0.0.1" ] && return 0
+ [ "$1" = "localhost" ] && return 0
+ [ "$1" = "$(hostname -s)" ] && return 0
+ [ "$1" = "$(hostname -f)" ] && return 0
+ [ "$1" = "$(hostname -d)" ] && return 0
+
+ # Now if ip program is not found in the path, we can't return 0 since
+ # it would block any address. Thankfully grep should fail in this case
+ ip route get "$1" | grep local >/dev/null && return 0
+
+ return 1
+}
+
+if test -z "$WSREP_SST_OPT_USER"; then err "USER cannot be nil"; exit $EINVAL; fi
+if test -z "$WSREP_SST_OPT_HOST"; then err "HOST cannot be nil"; exit $EINVAL; fi
+if test -z "$WSREP_SST_OPT_PORT"; then err "PORT cannot be nil"; exit $EINVAL; fi
+if test -z "$WSREP_SST_OPT_LPORT"; then err "LPORT cannot be nil"; exit $EINVAL; fi
+if test -z "$WSREP_SST_OPT_SOCKET";then err "SOCKET cannot be nil";exit $EINVAL; fi
+if test -z "$WSREP_SST_OPT_GTID"; then err "GTID cannot be nil"; exit $EINVAL; fi
+
+if local_ip $WSREP_SST_OPT_HOST && \
+ [ "$WSREP_SST_OPT_PORT" = "$WSREP_SST_OPT_LPORT" ]
+then
+ wsrep_log_error \
+ "destination address '$WSREP_SST_OPT_HOST:$WSREP_SST_OPT_PORT' matches source address."
+ exit $EINVAL
+fi
+
+# Check client version
+if ! mysql --version | grep 'Distrib 5.5' >/dev/null
+then
+ mysql --version >&2
+ err "this operation requires MySQL client version 5.5.x"
+ exit $EINVAL
+fi
+
+AUTH="-u$WSREP_SST_OPT_USER"
+if test -n "$WSREP_SST_OPT_PSWD"; then AUTH="$AUTH -p$WSREP_SST_OPT_PSWD"; fi
+
+STOP_WSREP="SET wsrep_on=OFF;"
+
+# NOTE: we don't use --routines here because we're dumping mysql.proc table
+MYSQLDUMP="mysqldump $AUTH -S$WSREP_SST_OPT_SOCKET \
+--add-drop-database --add-drop-table --skip-add-locks --create-options \
+--disable-keys --extended-insert --skip-lock-tables --quick --set-charset \
+--skip-comments --flush-privileges --all-databases"
+
+# mysqldump cannot restore CSV tables, fix this issue
+CSV_TABLES_FIX="
+set sql_mode='';
+
+USE mysql;
+
+SET @str = IF (@@have_csv = 'YES', 'CREATE TABLE IF NOT EXISTS general_log (event_time TIMESTAMP NOT NULL, user_host MEDIUMTEXT NOT NULL, thread_id INTEGER NOT NULL, server_id INTEGER UNSIGNED NOT NULL, command_type VARCHAR(64) NOT NULL,argument MEDIUMTEXT NOT NULL) engine=CSV CHARACTER SET utf8 comment=\"General log\"', 'SET @dummy = 0');
+
+PREPARE stmt FROM @str;
+EXECUTE stmt;
+DROP PREPARE stmt;
+
+SET @str = IF (@@have_csv = 'YES', 'CREATE TABLE IF NOT EXISTS slow_log (start_time TIMESTAMP NOT NULL, user_host MEDIUMTEXT NOT NULL, query_time TIME NOT NULL, lock_time TIME NOT NULL, rows_sent INTEGER NOT NULL, rows_examined INTEGER NOT NULL, db VARCHAR(512) NOT NULL, last_insert_id INTEGER NOT NULL, insert_id INTEGER NOT NULL, server_id INTEGER UNSIGNED NOT NULL, sql_text MEDIUMTEXT NOT NULL) engine=CSV CHARACTER SET utf8 comment=\"Slow log\"', 'SET @dummy = 0');
+
+PREPARE stmt FROM @str;
+EXECUTE stmt;
+DROP PREPARE stmt;"
+
+SET_START_POSITION="SET GLOBAL wsrep_start_position='$WSREP_SST_OPT_GTID';"
+
+MYSQL="mysql $AUTH -h$WSREP_SST_OPT_HOST -P$WSREP_SST_OPT_PORT "\
+"--disable-reconnect --connect_timeout=10"
+
+# need to disable logging when loading the dump
+# reason is that dump contains ALTER TABLE for log tables, and
+# this causes an error if logging is enabled
+GENERAL_LOG_OPT=`$MYSQL --skip-column-names -e"$STOP_WSREP SELECT @@GENERAL_LOG"`
+SLOW_LOG_OPT=`$MYSQL --skip-column-names -e"$STOP_WSREP SELECT @@SLOW_QUERY_LOG"`
+$MYSQL -e"$STOP_WSREP SET GLOBAL GENERAL_LOG=OFF"
+$MYSQL -e"$STOP_WSREP SET GLOBAL SLOW_QUERY_LOG=OFF"
+
+# commands to restore log settings
+RESTORE_GENERAL_LOG="SET GLOBAL GENERAL_LOG=$GENERAL_LOG_OPT;"
+RESTORE_SLOW_QUERY_LOG="SET GLOBAL SLOW_QUERY_LOG=$SLOW_LOG_OPT;"
+
+if [ $WSREP_SST_OPT_BYPASS -eq 0 ]
+then
+ (echo $STOP_WSREP && $MYSQLDUMP && echo $CSV_TABLES_FIX \
+ && echo $RESTORE_GENERAL_LOG && echo $RESTORE_SLOW_QUERY_LOG \
+ && echo $SET_START_POSITION \
+ || echo "SST failed to complete;") | $MYSQL
+else
+ wsrep_log_info "Bypassing state dump."
+ echo $SET_START_POSITION | $MYSQL
+fi
+
+#
diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh
new file mode 100644
index 00000000000..d510477535c
--- /dev/null
+++ b/scripts/wsrep_sst_rsync.sh
@@ -0,0 +1,210 @@
+#!/bin/bash -ue
+
+# Copyright (C) 2010 Codership Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston
+# MA 02110-1301 USA.
+
+# This is a reference script for rsync-based state snapshot tansfer
+
+RSYNC_PID=
+RSYNC_CONF=
+
+. $(dirname $0)/wsrep_sst_common
+
+cleanup_joiner()
+{
+ wsrep_log_info "Joiner cleanup."
+ local PID=$(cat "$RSYNC_PID" 2>/dev/null || echo 0)
+ [ "0" != "$PID" ] && kill $PID && sleep 0.5 && kill -9 $PID >/dev/null 2>&1 \
+ || :
+ rm -rf "$RSYNC_CONF"
+ rm -rf "$MAGIC_FILE"
+ rm -rf "$RSYNC_PID"
+ echo " done." >&2
+}
+
+check_pid()
+{
+ local pid_file=$1
+ [ -r "$pid_file" ] && ps -p $(cat $pid_file) >/dev/null 2>&1
+}
+
+check_pid_and_port()
+{
+ local pid_file=$1
+ local rsync_pid=$(cat $pid_file)
+ local rsync_port=$2
+
+ check_pid $pid_file && \
+ netstat -anpt 2>/dev/null | \
+ grep LISTEN | grep \:$rsync_port | grep $rsync_pid/rsync >/dev/null
+}
+
+MAGIC_FILE="$WSREP_SST_OPT_DATA/rsync_sst_complete"
+rm -rf "$MAGIC_FILE"
+
+if [ "$WSREP_SST_OPT_ROLE" = "donor" ]
+then
+
+ if [ $WSREP_SST_OPT_BYPASS -eq 0 ]
+ then
+
+ FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed"
+ rm -rf "$FLUSHED"
+
+ # Use deltaxfer only for WAN
+ inv=$(basename $0)
+ [ "$inv" = "wsrep_sst_rsync_wan" ] && WHOLE_FILE_OPT="" \
+ || WHOLE_FILE_OPT="--whole-file"
+
+ echo "flush tables"
+
+ # wait for tables flushed and state ID written to the file
+ while [ ! -r "$FLUSHED" ] && ! grep -q ':' "$FLUSHED" >/dev/null 2>&1
+ do
+ sleep 0.2
+ done
+
+ STATE="$(cat $FLUSHED)"
+ rm -rf "$FLUSHED"
+
+ sync
+
+ # Old filter - include everything except selected
+ # FILTER=(--exclude '*.err' --exclude '*.pid' --exclude '*.sock' \
+ # --exclude '*.conf' --exclude core --exclude 'galera.*' \
+ # --exclude grastate.txt --exclude '*.pem' \
+ # --exclude '*.[0-9][0-9][0-9][0-9][0-9][0-9]' --exclude '*.index')
+
+ # New filter - exclude everything except dirs (schemas) and innodb files
+ FILTER=(-f '+ /ibdata*' -f '+ /ib_logfile*' -f '+ */' -f '-! */*')
+
+ RC=0
+ rsync --archive --no-times --ignore-times --inplace --delete --quiet \
+ $WHOLE_FILE_OPT "${FILTER[@]}" "$WSREP_SST_OPT_DATA" \
+ rsync://$WSREP_SST_OPT_ADDR || RC=$?
+
+ [ $RC -ne 0 ] && echo "rsync returned code $RC:" >> /dev/stderr
+
+ case $RC in
+ 0) RC=0 # Success
+ ;;
+ 12) RC=71 # EPROTO
+ wsrep_log_error \
+ "rsync server on the other end has incompatible protocol. " \
+ "Make sure you have the same version of rsync on all nodes."
+ ;;
+ 22) RC=12 # ENOMEM
+ ;;
+ *) RC=255 # unknown error
+ ;;
+ esac
+
+ [ $RC -ne 0 ] && exit $RC
+
+ else # BYPASS
+ wsrep_log_info "Bypassing state dump."
+ STATE="$WSREP_SST_OPT_GTID"
+ fi
+
+ echo "continue" # now server can resume updating data
+
+ echo "$STATE" > "$MAGIC_FILE"
+ rsync -aqc "$MAGIC_FILE" rsync://$WSREP_SST_OPT_ADDR
+
+ echo "done $STATE"
+
+elif [ "$WSREP_SST_OPT_ROLE" = "joiner" ]
+then
+ MYSQLD_PID=$WSREP_SST_OPT_PARENT
+
+ MODULE="rsync_sst"
+
+ RSYNC_PID="$WSREP_SST_OPT_DATA/$MODULE.pid"
+
+ if check_pid $RSYNC_PID
+ then
+ echo "rsync daemon already running."
+ exit 114 # EALREADY
+ fi
+ rm -rf "$RSYNC_PID"
+
+ ADDR=$WSREP_SST_OPT_ADDR
+ RSYNC_PORT=$(echo $ADDR | awk -F ':' '{ print $2 }')
+ if [ -z "$RSYNC_PORT" ]
+ then
+ RSYNC_PORT=4444
+ ADDR="$(echo $ADDR | awk -F ':' '{ print $1 }'):$RSYNC_PORT"
+ fi
+
+ trap "exit 32" HUP PIPE
+ trap "exit 3" INT TERM
+ trap cleanup_joiner EXIT
+
+ MYUID=$(id -u)
+ MYGID=$(id -g)
+ RSYNC_CONF="$WSREP_SST_OPT_DATA/$MODULE.conf"
+
+cat << EOF > "$RSYNC_CONF"
+pid file = $RSYNC_PID
+use chroot = no
+[$MODULE]
+ path = $WSREP_SST_OPT_DATA
+ read only = no
+ timeout = 300
+ uid = $MYUID
+ gid = $MYGID
+EOF
+
+# rm -rf "$DATA"/ib_logfile* # we don't want old logs around
+
+ # listen at all interfaces (for firewalled setups)
+ rsync --daemon --port $RSYNC_PORT --config "$RSYNC_CONF"
+
+ until check_pid_and_port $RSYNC_PID $RSYNC_PORT
+ do
+ sleep 0.2
+ done
+
+ echo "ready $ADDR/$MODULE"
+
+ # wait for SST to complete by monitoring magic file
+ while [ ! -r "$MAGIC_FILE" ] && check_pid "$RSYNC_PID" && \
+ ps -p $MYSQLD_PID >/dev/null
+ do
+ sleep 1
+ done
+
+ if ! ps -p $MYSQLD_PID >/dev/null
+ then
+ echo "Parent mysqld process (PID:$MYSQLD_PID) terminated unexpectedly." >&2
+ exit 32
+ fi
+
+ if [ -r "$MAGIC_FILE" ]
+ then
+ cat "$MAGIC_FILE" # output UUID:seqno
+ else
+ # this message should cause joiner to abort
+ echo "rsync process ended without creating '$MAGIC_FILE'"
+ fi
+
+# cleanup_joiner
+else
+ echo "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
+ exit 22 # EINVAL
+fi
+
+exit 0
diff --git a/scripts/wsrep_sst_xtrabackup.sh b/scripts/wsrep_sst_xtrabackup.sh
new file mode 100644
index 00000000000..08552f50a44
--- /dev/null
+++ b/scripts/wsrep_sst_xtrabackup.sh
@@ -0,0 +1,222 @@
+#!/bin/bash -ue
+
+# Copyright (C) 2011 Percona Inc
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston
+# MA 02110-1301 USA.
+
+# This is a reference script for Percona XtraBackup-based state snapshot tansfer
+
+TMPDIR="/tmp"
+
+. $(dirname $0)/wsrep_sst_common
+
+cleanup_joiner()
+{
+#set -x
+ local PID=$(ps -aef |grep nc| grep $NC_PORT | awk '{ print $2 }')
+ wsrep_log_info "Killing nc pid $PID"
+ [ -n "$PID" -a "0" != "$PID" ] && kill $PID && (kill $PID && kill -9 $PID) || :
+ rm -f "$MAGIC_FILE"
+#set +x
+}
+
+check_pid()
+{
+ local pid_file="$1"
+ [ -r "$pid_file" ] && ps -p $(cat "$pid_file") >/dev/null 2>&1
+}
+
+kill_xtrabackup()
+{
+#set -x
+ local PID=$(cat $XTRABACKUP_PID)
+ [ -n "$PID" -a "0" != "$PID" ] && kill $PID && (kill $PID && kill -9 $PID) || :
+ rm -f "$XTRABACKUP_PID"
+#set +x
+}
+
+# waits ~10 seconds for nc to open the port and then reports ready
+# (regardless of timeout)
+wait_for_nc()
+{
+ local PORT=$1
+ local ADDR=$2
+ local MODULE=$3
+ for i in $(seq 1 50)
+ do
+ netstat -nptl 2>/dev/null | grep '/nc\s*$' | awk '{ print $4 }' | \
+ sed 's/.*://' | grep \^${PORT}\$ >/dev/null && break
+ sleep 0.2
+ done
+ echo "ready ${ADDR}/${MODULE}"
+}
+
+INNOBACKUPEX_BIN=innobackupex
+INNOBACKUPEX_ARGS=""
+NC_BIN=nc
+
+for TOOL_BIN in INNOBACKUPEX_BIN NC_BIN ; do
+ if ! which ${!TOOL_BIN} > /dev/null 2>&1
+ then
+ echo "Can't find ${!TOOL_BIN} in the path"
+ exit 22 # EINVAL
+ fi
+done
+
+#ROLE=$1
+#ADDR=$2
+readonly AUTH=(${WSREP_SST_OPT_AUTH//:/ })
+readonly DATA="${WSREP_SST_OPT_DATA}"
+#CONF=$5
+
+INFO_FILE="xtrabackup_galera_info"
+IST_FILE="xtrabackup_ist"
+
+MAGIC_FILE="${DATA}/${INFO_FILE}"
+rm -f "${MAGIC_FILE}"
+
+if [ "$WSREP_SST_OPT_ROLE" = "donor" ]
+then
+
+# UUID=$6
+# SEQNO=$7
+# BYPASS=$8
+
+ NC_PORT=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $2 }')
+ REMOTEIP=$(echo $WSREP_SST_OPT_ADDR | awk -F ':' '{ print $1 }')
+
+ if [ $WSREP_SST_OPT_BYPASS -eq 0 ]
+ then
+
+ INNOBACKUPEX_ARGS="--galera-info --tmpdir=${TMPDIR} --stream=tar
+ --defaults-file=${WSREP_SST_OPT_CONF}
+ --socket=${WSREP_SST_OPT_SOCKET}"
+
+ if [ "${AUTH[0]}" != "(null)" ]; then
+ INNOBACKUPEX_ARGS="${INNOBACKUPEX_ARGS} --user=${AUTH[0]}"
+ fi
+
+ if [ ${#AUTH[*]} -eq 2 ]; then
+ INNOBACKUPEX_ARGS="${INNOBACKUPEX_ARGS} --password=${AUTH[1]}"
+ fi
+
+ set +e
+
+ # This file and variable seems to have no effect and probably should be deleted
+ XTRABACKUP_PID=$(mktemp --tmpdir wsrep_sst_xtrabackupXXXX.pid)
+
+ ${INNOBACKUPEX_BIN} ${INNOBACKUPEX_ARGS} ${TMPDIR} \
+ 2> ${DATA}/innobackup.backup.log | \
+ ${NC_BIN} ${REMOTEIP} ${NC_PORT}
+
+ RC=( "${PIPESTATUS[@]}" )
+ set -e
+
+ if [ ${RC[0]} -ne 0 ]; then
+ wsrep_log_error "${INNOBACKUPEX_BIN} finished with error: ${RC[0]}. " \
+ "Check ${DATA}/innobackup.backup.log"
+ exit 22
+ elif [ ${RC[1]} -ne 0 ]; then
+ wsrep_log_error "${NC_BIN} finished with error: ${RC[1]}"
+ exit 22
+ fi
+
+ if check_pid "${XTRABACKUP_PID}"
+ then
+ wsrep_log_error "xtrabackup process is still running. Killing... "
+ kill_xtrabackup
+ exit 22
+ fi
+
+ rm -f "${XTRABACKUP_PID}"
+
+ else # BYPASS
+ STATE="${WSREP_SST_OPT_GTID}"
+ echo "continue" # now server can resume updating data
+ echo "${STATE}" > "${MAGIC_FILE}"
+ echo "1" > "${DATA}/${IST_FILE}"
+ (cd ${DATA}; tar cf - ${INFO_FILE} ${IST_FILE}) | ${NC_BIN} ${REMOTEIP} ${NC_PORT}
+ rm -f ${DATA}/${IST_FILE}
+ fi
+
+ echo "done ${WSREP_SST_OPT_GTID}"
+
+elif [ "${WSREP_SST_OPT_ROLE}" = "joiner" ]
+then
+ MODULE="xtrabackup_sst"
+
+ rm -f ${DATA}/xtrabackup_*
+
+ ADDR=${WSREP_SST_OPT_ADDR}
+ NC_PORT=$(echo ${ADDR} | awk -F ':' '{ print $2 }')
+ if [ -z "${NC_PORT}" ]
+ then
+ NC_PORT=4444
+ ADDR="$(echo ${ADDR} | awk -F ':' '{ print $1 }'):${NC_PORT}"
+ fi
+
+ wait_for_nc ${NC_PORT} ${ADDR} ${MODULE} &
+
+# trap "exit 32" HUP PIPE
+# trap "exit 3" INT TERM
+ trap cleanup_joiner HUP PIPE INT TERM
+
+ set +e
+ ${NC_BIN} -dl ${NC_PORT} | tar xfi - -C ${DATA} 1>&2
+ RC=( "${PIPESTATUS[@]}" )
+ set -e
+
+ wait %% # join wait_for_nc thread
+
+ if [ ${RC[0]} -ne 0 -o ${RC[1]} -ne 0 ];
+ then
+ wsrep_log_error "Error while getting st data from donor node: " \
+ "${RC[0]}, ${RC[1]}"
+ exit 32
+ fi
+
+ if [ ! -r "${MAGIC_FILE}" ]
+ then
+ # this message should cause joiner to abort
+ wsrep_log_error "xtrabackup process ended without creating '${MAGIC_FILE}'"
+ exit 32
+ fi
+
+ if ! ps -p ${WSREP_SST_OPT_PARENT} >/dev/null
+ then
+ wsrep_log_error "Parent mysqld process (PID:${WSREP_SST_OPT_PARENT}) terminated unexpectedly." >&2
+ exit 32
+ fi
+
+ if [ ! -r "${IST_FILE}" ]
+ then
+ rm -f ${DATA}/ib_logfile*
+ ${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} --apply-log \
+ --ibbackup=xtrabackup ${DATA} 1>&2 2> ${DATA}/innobackup.prepare.log
+ if [ $? -ne 0 ];
+ then
+ wsrep_log_error "${INNOBACKUPEX_BIN} finished with errors. Check ${DATA}/innobackup.prepare.log" >&2
+ exit 22
+ fi
+ fi
+
+ cat "${MAGIC_FILE}" # output UUID:seqno
+
+else
+ wsrep_log_error "Unrecognized role: ${WSREP_SST_OPT_ROLE}"
+ exit 22 # EINVAL
+fi
+
+exit 0
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index ecf91fcf043..85008f1eaea 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -13,6 +13,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+IF(WITH_WSREP)
+ SET(WSREP_INCLUDES ${CMAKE_SOURCE_DIR}/wsrep)
+ENDIF()
+
INCLUDE_DIRECTORIES(
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/sql
@@ -20,6 +24,7 @@ ${CMAKE_SOURCE_DIR}/regex
${ZLIB_INCLUDE_DIR}
${SSL_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/sql
+${WSREP_INCLUDES}
)
SET(GEN_SOURCES
@@ -35,6 +40,18 @@ ADD_DEFINITIONS(-DMYSQL_SERVER -DHAVE_EVENT_SCHEDULER -DHAVE_POOL_OF_THREADS)
IF(SSL_DEFINES)
ADD_DEFINITIONS(${SSL_DEFINES})
ENDIF()
+IF(WITH_WSREP)
+ SET(WSREP_SOURCES
+ wsrep_check_opts.cc
+ wsrep_hton.cc
+ wsrep_mysqld.cc
+ wsrep_notify.cc
+ wsrep_sst.cc
+ wsrep_utils.cc
+ wsrep_var.cc
+ )
+ SET(WSREP_LIB wsrep)
+ENDIF()
SET (SQL_SOURCE
../sql-common/client.c derror.cc des_key_file.cc
@@ -85,6 +102,7 @@ SET (SQL_SOURCE
gcalc_slicescan.cc gcalc_tools.cc
threadpool_common.cc
../sql-common/mysql_async.c
+ ${WSREP_SOURCES}
${GEN_SOURCES}
${MYSYS_LIBWRAP_SOURCE}
)
@@ -104,6 +122,7 @@ DTRACE_INSTRUMENT(sql)
TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS}
mysys dbug strings vio regex
${LIBWRAP} ${LIBCRYPT} ${LIBDL}
+ ${WSREP_LIB}
${SSL_LIBRARIES})
IF(WIN32)
@@ -199,7 +218,6 @@ IF (NOT ${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
ENDIF()
ENDIF()
-
INCLUDE(${CMAKE_SOURCE_DIR}/cmake/bison.cmake)
RUN_BISON(
${CMAKE_CURRENT_SOURCE_DIR}/sql_yacc.yy
diff --git a/sql/events.cc b/sql/events.cc
index 8b4bab9e3a6..208914a4e6d 100644
--- a/sql/events.cc
+++ b/sql/events.cc
@@ -1145,7 +1145,19 @@ end:
close_mysql_tables(thd);
DBUG_RETURN(ret);
}
+#ifdef WITH_WSREP
+int wsrep_create_event_query(THD *thd, uchar** buf, uint* buf_len)
+{
+ String log_query;
+ if (create_query_string(thd, &log_query))
+ {
+ WSREP_WARN("events create string failed: %s", thd->query());
+ return 1;
+ }
+ return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len);
+}
+#endif /* WITH_WSREP */
/**
@} (End of group Event_Scheduler)
*/
diff --git a/sql/handler.cc b/sql/handler.cc
index 356ae330201..97c79291a59 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -52,6 +52,9 @@
#include "../storage/maria/ha_maria.h"
#endif
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
/*
While we have legacy_db_type, we have this array to
check for dups and to find handlerton from legacy_db_type.
@@ -1189,7 +1192,11 @@ int ha_commit_trans(THD *thd, bool all)
{
/* Free resources and perform other cleanup even for 'empty' transactions. */
if (is_real_trans)
- thd->transaction.cleanup();
+#ifdef WITH_WSREP
+ thd->transaction.cleanup(thd);
+#else
+ thd->transaction.cleanup();
+#endif /* WITH_WSREP */
DBUG_RETURN(0);
}
@@ -1217,7 +1224,12 @@ int ha_commit_trans(THD *thd, bool all)
mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
MDL_EXPLICIT);
+#ifdef WITH_WSREP
+ if (!WSREP(thd) &&
+ thd->mdl_context.acquire_lock(&mdl_request,
+#else
if (thd->mdl_context.acquire_lock(&mdl_request,
+#endif /* WITH_WSREP */
thd->variables.lock_wait_timeout))
{
ha_rollback_trans(thd, all);
@@ -1264,6 +1276,19 @@ int ha_commit_trans(THD *thd, bool all)
err= ht->prepare(ht, thd, all);
status_var_increment(thd->status_var.ha_prepare_count);
if (err)
+#ifdef WITH_WSREP
+ if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP)
+ {
+ error= 1;
+ /* avoid sending error, if we need to replay */
+ if (thd->wsrep_conflict_state!= MUST_REPLAY)
+ {
+ my_error(ER_LOCK_DEADLOCK, MYF(0), err);
+ }
+ }
+ else
+ /* not wsrep hton, bail to native mysql behavior */
+#endif
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
if (err)
@@ -1275,6 +1300,13 @@ int ha_commit_trans(THD *thd, bool all)
DEBUG_SYNC(thd, "ha_commit_trans_after_prepare");
DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
+#ifdef WITH_WSREP
+ if (!error && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid))
+ {
+ // xid was rewritten by wsrep
+ xid= wsrep_xid_seqno(&thd->transaction.xid_state.xid);
+ }
+#endif // WITH_WSREP
if (!is_real_trans)
{
error= commit_one_phase_2(thd, all, trans, is_real_trans);
@@ -1361,6 +1393,18 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
int error= 0;
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
DBUG_ENTER("commit_one_phase_2");
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ char info[64]= { 0, };
+ snprintf (info, sizeof(info) - 1, "ha_commit_one_phase(%lld)",
+ (long long)thd->wsrep_trx_seqno);
+#else
+ const char info[]="ha_commit_one_phase()";
+#endif /* WSREP_PROC_INFO */
+ char* tmp_info= NULL;
+ if (WSREP(thd)) tmp_info= (char *)thd_proc_info(thd, info);
+#endif /* WITH_WSREP */
+
if (ha_info)
{
for (; ha_info; ha_info= ha_info_next)
@@ -1389,7 +1433,14 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
}
/* Free resources and perform other cleanup even for 'empty' transactions. */
if (is_real_trans)
+#ifdef WITH_WSREP
+ thd->transaction.cleanup(thd);
+#else
thd->transaction.cleanup();
+#endif /* WITH_WSREP */
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp_info);
+#endif /* WITH_WSREP */
DBUG_RETURN(error);
}
@@ -1464,7 +1515,11 @@ int ha_rollback_trans(THD *thd, bool all)
}
/* Always cleanup. Even if nht==0. There may be savepoints. */
if (is_real_trans)
+#ifdef WITH_WSREP
+ thd->transaction.cleanup(thd);
+#else
thd->transaction.cleanup();
+#endif /* WITH_WSREP */
if (all)
thd->transaction_rollback_request= FALSE;
@@ -1629,7 +1684,13 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
got, hton_name(hton)->str);
for (int i=0; i < got; i ++)
{
+#ifdef WITH_WSREP
+ my_xid x=(wsrep_is_wsrep_xid(&info->list[i]) ?
+ wsrep_xid_seqno(&info->list[i]) :
+ info->list[i].get_my_xid());
+#else
my_xid x=info->list[i].get_my_xid();
+#endif /* WITH_WSREP */
if (!x) // not "mine" - that is generated by external TM
{
#ifndef DBUG_OFF
@@ -2645,7 +2706,12 @@ int handler::update_auto_increment()
variables->auto_increment_increment);
auto_inc_intervals_count++;
/* Row-based replication does not need to store intervals in binlog */
+#ifdef WITH_WSREP
+ if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) &&
+ !thd->is_current_stmt_binlog_format_row())
+#else
if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
+#endif /* WITH_WSREP */
thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
auto_inc_interval_for_cur_row.values(),
variables->auto_increment_increment);
@@ -4871,7 +4937,11 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table)
return (thd->is_current_stmt_binlog_format_row() &&
table->s->cached_row_logging_check &&
(thd->variables.option_bits & OPTION_BIN_LOG) &&
+#ifdef WITH_WSREP
+ ((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()));
+#else
mysql_bin_log.is_open());
+#endif
}
@@ -5194,6 +5264,64 @@ void signal_log_not_needed(struct handlerton, char *log_file)
}
+#ifdef WITH_WSREP
+/**
+ @details
+ This function makes the storage engine to force the victim transaction
+ to abort. Currently, only innodb has this functionality, but any SE
+ implementing the wsrep API should provide this service to support
+ multi-master operation.
+
+ @param bf_thd brute force THD asking for the abort
+ @param victim_thd victim THD to be aborted
+
+ @return
+ always 0
+*/
+
+int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
+{
+ DBUG_ENTER("ha_wsrep_abort_transaction");
+ if (!WSREP(bf_thd) &&
+ !(wsrep_OSU_method_options == WSREP_OSU_RSU &&
+ bf_thd->wsrep_exec_mode == TOTAL_ORDER)) {
+ DBUG_RETURN(0);
+ }
+
+ handlerton *hton= installed_htons[DB_TYPE_INNODB];
+ if (hton && hton->wsrep_abort_transaction)
+ {
+ hton->wsrep_abort_transaction(hton, bf_thd, victim_thd, signal);
+ }
+ else
+ {
+ WSREP_WARN("cannot abort InnoDB transaction");
+ }
+
+ DBUG_RETURN(0);
+}
+
+void ha_wsrep_fake_trx_id(THD *thd)
+{
+ DBUG_ENTER("ha_wsrep_fake_trx_id");
+ if (!WSREP(thd))
+ {
+ DBUG_VOID_RETURN;
+ }
+
+ handlerton *hton= installed_htons[DB_TYPE_INNODB];
+ if (hton && hton->wsrep_fake_trx_id)
+ {
+ hton->wsrep_fake_trx_id(hton, thd);
+ }
+ else
+ {
+ WSREP_WARN("cannot get get fake InnoDB transaction ID");
+ }
+
+ DBUG_VOID_RETURN;
+}
+#endif /* WITH_WSREP */
#ifdef TRANS_LOG_MGM_EXAMPLE_CODE
/*
Example of transaction log management functions based on assumption that logs
diff --git a/sql/handler.h b/sql/handler.h
index 4a6fc2f1bd5..da847e2677c 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -355,6 +355,7 @@ enum legacy_db_type
DB_TYPE_MARIA,
/** Performance schema engine. */
DB_TYPE_PERFORMANCE_SCHEMA,
+ DB_TYPE_WSREP,
DB_TYPE_FIRST_DYNAMIC=42,
DB_TYPE_DEFAULT=127 // Must be last
};
@@ -1042,6 +1043,11 @@ struct handlerton
const char *wild, bool dir, List<LEX_STRING> *files);
int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db,
const char *name);
+ int (*wsrep_abort_transaction)(handlerton *hton, THD *bf_thd,
+ THD *victim_thd, my_bool signal);
+ int (*wsrep_set_checkpoint)(handlerton *hton, const XID* xid);
+ int (*wsrep_get_checkpoint)(handlerton *hton, XID* xid);
+ void (*wsrep_fake_trx_id)(handlerton *hton, THD *thd);
uint32 license; /* Flag for Engine License */
/*
@@ -2929,6 +2935,9 @@ bool key_uses_partial_cols(TABLE *table, uint keyno);
extern const char *ha_row_type[];
extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[];
extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[];
+#ifdef WITH_WSREP
+extern MYSQL_PLUGIN_IMPORT const char *wsrep_binlog_format_names[];
+#endif /* WITH_WSREP */
extern TYPELIB tx_isolation_typelib;
extern const char *myisam_stats_method_names[];
extern ulong total_ha, total_ha_2pc;
@@ -3024,6 +3033,10 @@ int ha_enable_transaction(THD *thd, bool on);
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv);
int ha_savepoint(THD *thd, SAVEPOINT *sv);
int ha_release_savepoint(THD *thd, SAVEPOINT *sv);
+#ifdef WITH_WSREP
+int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal);
+void ha_wsrep_fake_trx_id(THD *thd);
+#endif /* WITH_WSREP */
/* these are called by storage engines */
void trans_register_ha(THD *thd, bool all, handlerton *ht);
@@ -3054,6 +3067,9 @@ int ha_binlog_end(THD *thd);
#define ha_binlog_wait(a) do {} while (0)
#define ha_binlog_end(a) do {} while (0)
#endif
+#ifdef WITH_WSREP
+void wsrep_brute_force_aborts();
+#endif
const char *get_canonical_filename(handler *file, const char *path,
char *tmp_path);
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 441eb37d701..2ee39b7e329 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -2583,7 +2583,19 @@ void Item_func_rand::seed_random(Item *arg)
TODO: do not do reinit 'rand' for every execute of PS/SP if
args[0] is a constant.
*/
+#ifdef WITH_WSREP
+ uint32 tmp;
+ if (WSREP(current_thd))
+ {
+ if (current_thd->wsrep_exec_mode==REPL_RECV)
+ tmp= current_thd->wsrep_rand;
+ else
+ tmp= current_thd->wsrep_rand= (uint32) arg->val_int();
+ } else
+ tmp= (uint32) arg->val_int();
+#else
uint32 tmp= (uint32) arg->val_int();
+#endif /* WITH_WSREP */
my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L),
(uint32) (tmp*0x10000001L));
}
diff --git a/sql/lock.cc b/sql/lock.cc
index 3a1a6d41ce3..c8ca26561e6 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -84,6 +84,10 @@
#include <hash.h>
#include <assert.h>
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
+
/**
@defgroup Locking Locking
@{
@@ -314,6 +318,9 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags)
/* Copy the lock data array. thr_multi_lock() reorders its contents. */
memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
sql_lock->lock_count * sizeof(*sql_lock->locks));
+#ifdef WITH_WSREP
+ thd->lock_info.in_lock_tables= thd->in_lock_tables;
+#endif /* Lock on the copied half of the lock data array. */
/* Lock on the copied half of the lock data array. */
rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks +
sql_lock->lock_count,
@@ -323,7 +330,11 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags)
(void) unlock_external(thd, sql_lock->table, sql_lock->table_count);
end:
+#ifdef WITH_WSREP
+ thd_proc_info(thd, "mysql_lock_tables(): unlocking tables II");
+#else /* WITH_WSREP */
thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
if (thd->killed)
{
@@ -336,6 +347,9 @@ end:
my_error(rc, MYF(0));
thd->set_time_after_lock();
+#ifdef WITH_WSREP
+ thd_proc_info(thd, "exit mysqld_lock_tables()");
+#endif /* WITH_WSREP */
DBUG_RETURN(rc);
}
@@ -1054,11 +1068,15 @@ void Global_read_lock::unlock_global_read_lock(THD *thd)
{
thd->mdl_context.release_lock(m_mdl_blocks_commits_lock);
m_mdl_blocks_commits_lock= NULL;
+#ifdef WITH_WSREP
+ wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
+ wsrep->resume(wsrep);
+#endif /* WITH_WSREP */
}
thd->mdl_context.release_lock(m_mdl_global_shared_lock);
m_mdl_global_shared_lock= NULL;
m_state= GRL_NONE;
-
+
DBUG_VOID_RETURN;
}
@@ -1086,6 +1104,20 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd)
If we didn't succeed lock_global_read_lock(), or if we already suceeded
make_global_read_lock_block_commit(), do nothing.
*/
+
+#ifdef WITH_WSREP
+ if (m_mdl_blocks_commits_lock)
+ {
+ WSREP_DEBUG("GRL was in block commit mode when entering "
+ "make_global_read_lock_block_commit");
+ thd->mdl_context.release_lock(m_mdl_blocks_commits_lock);
+ m_mdl_blocks_commits_lock= NULL;
+ wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
+ wsrep->resume(wsrep);
+ m_state= GRL_ACQUIRED;
+ }
+#endif /* WITH_WSREP */
+
if (m_state != GRL_ACQUIRED)
DBUG_RETURN(0);
@@ -1098,6 +1130,22 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd)
m_mdl_blocks_commits_lock= mdl_request.ticket;
m_state= GRL_ACQUIRED_AND_BLOCKS_COMMIT;
+#ifdef WITH_WSREP
+ long long ret = wsrep->pause(wsrep);
+ if (ret >= 0)
+ {
+ wsrep_locked_seqno= ret;
+ }
+ else if (ret != -ENOSYS) /* -ENOSYS - no provider */
+ {
+ WSREP_ERROR("Failed to pause provider: %lld (%s)", -ret, strerror(-ret));
+
+ /* m_mdl_blocks_commits_lock is always NULL here */
+ wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
+ my_error(ER_LOCK_DEADLOCK, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+#endif /* WITH_WSREP */
DBUG_RETURN(FALSE);
}
diff --git a/sql/log.cc b/sql/log.cc
index 70e74de5a31..964120e6b51 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -51,6 +51,9 @@
#include "sql_plugin.h"
#include "rpl_handler.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
#include "debug_sync.h"
#include "sql_show.h"
@@ -487,6 +490,9 @@ private:
};
handlerton *binlog_hton;
+#ifdef WITH_WSREP
+extern handlerton *wsrep_hton;
+#endif
bool LOGGER::is_log_table_enabled(uint log_table_type)
{
@@ -501,6 +507,134 @@ bool LOGGER::is_log_table_enabled(uint log_table_type)
}
}
+#ifdef WITH_WSREP
+IO_CACHE * get_trans_log(THD * thd)
+{
+ binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
+ thd_get_ha_data(thd, binlog_hton);
+ if (cache_mngr)
+ {
+ return cache_mngr->get_binlog_cache_log(true);
+ }
+ else
+ {
+ WSREP_DEBUG("binlog cache not initialized, conn :%ld", thd->thread_id);
+ return NULL;
+ }
+}
+
+
+bool wsrep_trans_cache_is_empty(THD *thd)
+{
+ bool res= TRUE;
+
+ if (thd_sql_command((const THD*) thd) != SQLCOM_SELECT)
+ res= FALSE;
+ else
+ {
+ binlog_cache_mngr *const cache_mngr=
+ (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+ if (cache_mngr)
+ {
+ res= cache_mngr->trx_cache.empty();
+ }
+ }
+ return res;
+}
+
+void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end)
+{
+ thd->binlog_flush_pending_rows_event(stmt_end);
+}
+void thd_binlog_trx_reset(THD * thd)
+{
+ /*
+ todo: fix autocommit select to not call the caller
+ */
+ if (thd_get_ha_data(thd, binlog_hton) != NULL)
+ {
+ binlog_cache_mngr *const cache_mngr=
+ (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+ if (cache_mngr) cache_mngr->reset(TRUE, TRUE);
+ }
+ thd->clear_binlog_table_maps();
+}
+
+void thd_binlog_rollback_stmt(THD * thd)
+{
+ WSREP_DEBUG("thd_binlog_rollback_stmt :%ld", thd->thread_id);
+ binlog_cache_mngr *const cache_mngr=
+ (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+ if (cache_mngr) cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
+}
+/*
+ Write the contents of a cache to memory buffer.
+
+ This function quite the same as MYSQL_BIN_LOG::write_cache(),
+ with the exception that here we write in buffer instead of log file.
+ */
+
+int wsrep_write_cache(IO_CACHE *cache, uchar **buf, uint *buf_len)
+{
+
+ if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
+ return ER_ERROR_ON_WRITE;
+ uint length= my_b_bytes_in_cache(cache);
+ long long total_length = 0;
+ uchar *buf_ptr = NULL;
+
+ do
+ {
+ /* bail out if buffer grows too large
+ This is a temporary fix to avoid flooding replication
+ TODO: remove this check for 0.7.4 release
+ */
+ if (total_length > wsrep_max_ws_size)
+ {
+ WSREP_WARN("transaction size limit (%lld) exceeded: %lld",
+ wsrep_max_ws_size, total_length);
+ if (reinit_io_cache(cache, WRITE_CACHE, 0, 0, 0))
+ {
+ WSREP_WARN("failed to initialize io-cache");
+ }
+ if (buf_ptr) my_free(*buf);
+ *buf_len = 0;
+ return ER_ERROR_ON_WRITE;
+ }
+ if (total_length > 0)
+ {
+ *buf_len += length;
+ *buf = (uchar *)my_realloc(*buf, total_length+length, MYF(0));
+ if (!*buf)
+ {
+ WSREP_ERROR("io cache write problem: %d %d", *buf_len, length);
+ return ER_ERROR_ON_WRITE;
+ }
+ buf_ptr = *buf+total_length;
+ }
+ else
+ {
+ if (buf_ptr != NULL)
+ {
+ WSREP_ERROR("io cache alloc error: %d %d", *buf_len, length);
+ my_free(*buf);
+ }
+ if (length > 0)
+ {
+ *buf = (uchar *) my_malloc(length, MYF(0));
+ buf_ptr = *buf;
+ *buf_len = length;
+ }
+ }
+ total_length += length;
+
+ memcpy(buf_ptr, cache->read_pos, length);
+ cache->read_pos=cache->read_end;
+ } while ((cache->file >= 0) && (length= my_b_fill(cache)));
+
+ return 0;
+}
+#endif
/* Check if a given table is opened log table */
int check_if_log_table(size_t db_len, const char *db, size_t table_name_len,
@@ -1535,7 +1669,11 @@ binlog_trans_log_savepos(THD *thd, my_off_t *pos)
thd->binlog_setup_trx_data();
binlog_cache_mngr *const cache_mngr=
(binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+#ifdef WITH_WSREP
+ DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
+#else
DBUG_ASSERT(mysql_bin_log.is_open());
+#endif
*pos= cache_mngr->trx_cache.get_byte_position();
DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
DBUG_VOID_RETURN;
@@ -1583,7 +1721,16 @@ binlog_trans_log_truncate(THD *thd, my_off_t pos)
int binlog_init(void *p)
{
binlog_hton= (handlerton *)p;
+#ifdef WITH_WSREP
+ if (WSREP_ON)
+ binlog_hton->state= SHOW_OPTION_YES;
+ else
+ {
+#endif /* WITH_WSREP */
binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
binlog_hton->db_type=DB_TYPE_BINLOG;
binlog_hton->savepoint_offset= sizeof(my_off_t);
binlog_hton->close_connection= binlog_close_connection;
@@ -1684,6 +1831,13 @@ static inline int
binlog_commit_flush_stmt_cache(THD *thd, bool all,
binlog_cache_mngr *cache_mngr)
{
+#ifdef WITH_WSREP
+ if (thd->wsrep_mysql_replicated > 0)
+ {
+ WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d", thd->wsrep_mysql_replicated);
+ return 0;
+ }
+#endif
Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
FALSE, TRUE, TRUE, 0);
return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
@@ -1839,6 +1993,9 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
DBUG_ENTER("binlog_commit");
binlog_cache_mngr *const cache_mngr=
(binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+#ifdef WITH_WSREP
+ if (!cache_mngr) DBUG_RETURN(0);
+#endif /* WITH_WSREP */
DBUG_PRINT("debug",
("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
@@ -1895,6 +2052,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
int error= 0;
binlog_cache_mngr *const cache_mngr=
(binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+#ifdef WITH_WSREP
+ if (!cache_mngr) DBUG_RETURN(0);
+#endif /* WITH_WSREP */
DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
YESNO(all),
@@ -1923,8 +2083,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
cache_mngr->reset(false, true);
DBUG_RETURN(error);
}
-
+#ifdef WITH_WSREP
+ if (!wsrep_emulate_bin_log &&
+ mysql_bin_log.check_write_error(thd))
+#else
if (mysql_bin_log.check_write_error(thd))
+#endif
{
/*
"all == true" means that a "rollback statement" triggered the error and
@@ -1954,12 +2118,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
if (ending_trans(thd, all) &&
((thd->variables.option_bits & OPTION_KEEP_LOG) ||
(trans_has_updated_non_trans_table(thd) &&
- thd->variables.binlog_format == BINLOG_FORMAT_STMT) ||
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT) ||
(cache_mngr->trx_cache.changes_to_non_trans_temp_table() &&
- thd->variables.binlog_format == BINLOG_FORMAT_MIXED) ||
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED) ||
(trans_has_updated_non_trans_table(thd) &&
ending_single_stmt_trans(thd,all) &&
- thd->variables.binlog_format == BINLOG_FORMAT_MIXED)))
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED)))
error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
/*
Truncate the cache if:
@@ -1973,9 +2137,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
else if (ending_trans(thd, all) ||
(!(thd->variables.option_bits & OPTION_KEEP_LOG) &&
(!stmt_has_updated_non_trans_table(thd) ||
- thd->variables.binlog_format != BINLOG_FORMAT_STMT) &&
+ WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_STMT) &&
(!cache_mngr->trx_cache.changes_to_non_trans_temp_table() ||
- thd->variables.binlog_format != BINLOG_FORMAT_MIXED)))
+ WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_MIXED)))
error= binlog_truncate_trx_cache(thd, cache_mngr, all);
}
@@ -2069,7 +2233,9 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
binlog_trans_log_savepos(thd, (my_off_t*) sv);
/* Write it to the binary log */
-
+#ifdef WITH_WSREP
+ if (wsrep_emulate_bin_log) DBUG_RETURN(0);
+#endif /* WITH_WSREP */
String log_query;
if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")) ||
append_identifier(thd, &log_query,
@@ -2090,7 +2256,12 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
non-transactional table. Otherwise, truncate the binlog cache starting
from the SAVEPOINT command.
*/
+#ifdef WITH_WSREP
+ if (!wsrep_emulate_bin_log &&
+ unlikely(trans_has_updated_non_trans_table(thd) ||
+#else
if (unlikely(trans_has_updated_non_trans_table(thd) ||
+#endif
(thd->variables.option_bits & OPTION_KEEP_LOG)))
{
String log_query;
@@ -4747,6 +4918,7 @@ int THD::binlog_setup_trx_data()
DBUG_RETURN(0);
}
+
/*
Function to start a statement and optionally a transaction for the
binary log.
@@ -4866,7 +5038,12 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
table->s->table_map_id));
/* Pre-conditions */
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ (WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
Table_map_log_event
@@ -5003,7 +5180,11 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
bool is_transactional)
{
DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
+#ifdef WITH_WSREP
+ DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
+#else
DBUG_ASSERT(mysql_bin_log.is_open());
+#endif
DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
int error= 0;
@@ -5083,7 +5264,11 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
mostly called if is_open() *was* true a few instructions before, but it
could have changed since.
*/
+#ifdef WITH_WSREP
+ if ((WSREP(thd) && wsrep_emulate_bin_log) || is_open())
+#else
if (likely(is_open()))
+#endif
{
my_off_t UNINIT_VAR(my_org_b_tell);
#ifdef HAVE_REPLICATION
@@ -5264,6 +5449,35 @@ err:
}
}
+#ifdef WITH_WSREP
+ if (WSREP(thd) && wsrep_incremental_data_collection &&
+ (wsrep_emulate_bin_log || mysql_bin_log.is_open()))
+ {
+ DBUG_ASSERT(thd->wsrep_trx_handle.trx_id != (unsigned long)-1);
+ if (!error)
+ {
+ IO_CACHE* cache= get_trans_log(thd);
+ uchar* buf= NULL;
+ uint buf_len= 0;
+
+ if (wsrep_emulate_bin_log)
+ thd->binlog_flush_pending_rows_event(false);
+ error= wsrep_write_cache(cache, &buf, &buf_len);
+ if (!error && buf_len > 0)
+ {
+ wsrep_status_t rc= wsrep->append_data(wsrep,
+ &thd->wsrep_trx_handle,
+ buf, buf_len);
+ if (rc != WSREP_OK)
+ {
+ sql_print_warning("WSREP: append_data() returned %d", rc);
+ error= 1;
+ }
+ }
+ if (buf_len) my_free(buf);
+ }
+ }
+#endif /* WITH_WSREP */
DBUG_RETURN(error);
}
@@ -5356,6 +5570,14 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
{
int error= 0;
DBUG_ENTER("MYSQL_BIN_LOG::rotate");
+#ifdef WITH_WSREP
+ if (WSREP_ON && wsrep_to_isolation)
+ {
+ WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
+ wsrep_to_isolation);
+ DBUG_RETURN(0);
+ }
+#endif
//todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
*check_purge= false;
@@ -5824,6 +6046,9 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
group_commit_entry entry;
DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
+#ifdef WITH_WSREP
+ if (wsrep_emulate_bin_log) DBUG_RETURN(0);
+#endif /* WITH_WSREP */
entry.thd= thd;
entry.cache_mngr= cache_mngr;
entry.error= 0;
@@ -7464,7 +7689,13 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
binlog_cache_mngr *cache_mngr=
(binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
-
+#ifdef WITH_WSREP
+ if (!cache_mngr)
+ {
+ WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
+ DBUG_RETURN(1);
+ }
+#endif /* WITH_WSREP */
cache_mngr->using_xa= TRUE;
cache_mngr->xa_xid= xid;
err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
diff --git a/sql/log.h b/sql/log.h
index e388df61b38..0e189a789db 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -94,7 +94,7 @@ public:
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered)
{
- DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
+ //DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
return 1;
}
int unlog(ulong cookie, my_xid xid) { return 0; }
@@ -273,6 +273,12 @@ enum enum_log_state { LOG_OPENED, LOG_CLOSED, LOG_TO_BE_OPENED };
(mmap+fsync is two times faster than write+fsync)
*/
+#ifdef WITH_WSREP
+extern my_bool wsrep_emulate_bin_log;
+Log_event* wsrep_read_log_event(
+ char **arg_buf, size_t *arg_buf_len,
+ const Format_description_log_event *description_event);
+#endif
class MYSQL_LOG
{
public:
@@ -853,12 +859,30 @@ public:
};
enum enum_binlog_format {
+ /*
+ statement-based except for cases where only row-based can work (UUID()
+ etc):
+ */
BINLOG_FORMAT_MIXED= 0, ///< statement if safe, otherwise row - autodetected
BINLOG_FORMAT_STMT= 1, ///< statement-based
BINLOG_FORMAT_ROW= 2, ///< row-based
BINLOG_FORMAT_UNSPEC=3 ///< thd_binlog_format() returns it when binlog is closed
};
+#ifdef WITH_WSREP
+IO_CACHE * get_trans_log(THD * thd);
+bool wsrep_trans_cache_is_empty(THD *thd);
+void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end);
+void thd_binlog_trx_reset(THD * thd);
+void thd_binlog_rollback_stmt(THD * thd);
+int wsrep_write_cache(IO_CACHE *cache, uchar **buf, uint *buf_len);
+
+#define WSREP_FORMAT(my_format) \
+ ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) ? \
+ wsrep_forced_binlog_format : my_format)
+#else
+#define WSREP_FORMAT(my_format) my_format
+#endif
int query_error_code(THD *thd, bool not_killed);
uint purge_log_get_error_code(int res);
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 6fc116d2b92..edb89fb85fe 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -48,6 +48,9 @@
#include <my_dir.h>
#include "sql_show.h"
+#if WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
#endif /* MYSQL_CLIENT */
#include <base64.h>
@@ -2786,7 +2789,9 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
master_data_written(0)
{
time_t end_time;
-
+#ifdef WITH_WSREP
+ thd->wsrep_PA_safe= false;
+#endif /* WITH_WSREP */
memset(&user, 0, sizeof(user));
memset(&host, 0, sizeof(host));
@@ -7141,7 +7146,14 @@ err:
end_io_cache(&file);
if (fd >= 0)
mysql_file_close(fd, MYF(0));
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit Create_file_log_event::do_apply_event()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
return error != 0;
}
#endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */
@@ -7312,7 +7324,14 @@ int Append_block_log_event::do_apply_event(Relay_log_info const *rli)
err:
if (fd >= 0)
mysql_file_close(fd, MYF(0));
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit Append_block_log_event::do_apply_event()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
DBUG_RETURN(error);
}
#endif
@@ -8256,10 +8275,21 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS;
/* A small test to verify that objects have consistent types */
DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS));
-
if (open_and_lock_tables(thd, rli->tables_to_lock, FALSE, 0))
{
uint actual_error= thd->stmt_da->sql_errno();
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d "
+ "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)",
+ thd->stmt_da->sql_errno(),
+ thd->is_fatal_error,
+ thd->wsrep_exec_mode,
+ thd->wsrep_conflict_state,
+ (long long)thd->wsrep_trx_seqno);
+ }
+#endif
if (thd->is_slave_error || thd->is_fatal_error)
{
/*
@@ -9367,8 +9397,12 @@ check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list)
{
DBUG_ENTER("check_table_map");
enum_tbl_map_status res= OK_TO_PROCESS;
-
+#ifdef WITH_WSREP
+ if ((rli->sql_thd->slave_thread /* filtering is for slave only */ ||
+ (WSREP(rli->sql_thd) && rli->sql_thd->wsrep_applier)) &&
+#else
if (rli->sql_thd->slave_thread /* filtering is for slave only */ &&
+#endif /* WITH_WSREP */
(!rpl_filter->db_ok(table_list->db) ||
(rpl_filter->is_on() && !rpl_filter->tables_ok("", table_list))))
res= FILTERED_OUT;
@@ -10025,8 +10059,23 @@ int
Write_rows_log_event::do_exec_row(const Relay_log_info *const rli)
{
DBUG_ASSERT(m_table != NULL);
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1, "Write_rows_log_event::write_row(%lld)",
+ (long long) thd->wsrep_trx_seqno);
+ const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL;
+#else
+ const char* tmp = (WSREP(thd)) ?
+ thd_proc_info(thd,"Write_rows_log_event::write_row()") : NULL;
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
int error= write_row(rli, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT);
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp);
+#endif /* WITH_WSREP */
if (error && !thd->is_error())
{
DBUG_ASSERT(0);
@@ -10685,14 +10734,39 @@ int Delete_rows_log_event::do_exec_row(const Relay_log_info *const rli)
int error;
DBUG_ASSERT(m_table != NULL);
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1, "Delete_rows_log_event::find_row(%lld)",
+ (long long) thd->wsrep_trx_seqno);
+ const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL;
+#else
+ const char* tmp = (WSREP(thd)) ?
+ thd_proc_info(thd,"Delete_rows_log_event::find_row()") : NULL;
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
if (!(error= find_row(rli)))
{
/*
Delete the record found, located in record[0]
*/
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ snprintf(info, sizeof(info) - 1,
+ "Delete_rows_log_event::ha_delete_row(%lld)",
+ (long long) thd->wsrep_trx_seqno);
+ if (WSREP(thd)) thd_proc_info(thd, info);
+#else
+ if (WSREP(thd)) thd_proc_info(thd,"Delete_rows_log_event::ha_delete_row()");
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
error= m_table->file->ha_delete_row(m_table->record[0]);
m_table->file->ha_index_or_rnd_end();
}
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp);
+#endif /* WITH_WSREP */
return error;
}
@@ -10812,6 +10886,18 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli)
{
DBUG_ASSERT(m_table != NULL);
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1, "Update_rows_log_event::find_row(%lld)",
+ (long long) thd->wsrep_trx_seqno);
+ const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL;
+#else
+ const char* tmp = (WSREP(thd)) ?
+ thd_proc_info(thd,"Update_rows_log_event::find_row()") : NULL;
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
int error= find_row(rli);
if (error)
{
@@ -10838,6 +10924,17 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli)
store_record(m_table,record[1]);
m_curr_row= m_curr_row_end;
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ snprintf(info, sizeof(info) - 1,
+ "Update_rows_log_event::unpack_current_row(%lld)",
+ (long long) thd->wsrep_trx_seqno);
+ if (WSREP(thd)) thd_proc_info(thd, info);
+#else
+ if (WSREP(thd))
+ thd_proc_info(thd,"Update_rows_log_event::unpack_current_row()");
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
/* this also updates m_curr_row_end */
if ((error= unpack_current_row(rli)))
goto err;
@@ -10856,10 +10953,23 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli)
DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength);
#endif
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ snprintf(info, sizeof(info) - 1,
+ "Update_rows_log_event::ha_update_row(%lld)",
+ (long long) thd->wsrep_trx_seqno);
+ if (WSREP(thd)) thd_proc_info(thd, info);
+#else
+ if (WSREP(thd)) thd_proc_info(thd,"Update_rows_log_event::ha_update_row()");
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
error= m_table->file->ha_update_row(m_table->record[1], m_table->record[0]);
if (error == HA_ERR_RECORD_IS_THE_SAME)
error= 0;
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp);
+#endif /* WITH_WSREP */
err:
m_table->file->ha_index_or_rnd_end();
return error;
@@ -10944,6 +11054,48 @@ void Incident_log_event::pack_info(THD *thd, Protocol *protocol)
protocol->store(buf, bytes, &my_charset_bin);
}
#endif
+#if WITH_WSREP && !defined(MYSQL_CLIENT)
+Format_description_log_event *wsrep_format_desc; // TODO: free them at the end
+/*
+ read the first event from (*buf). The size of the (*buf) is (*buf_len).
+ At the end (*buf) is shitfed to point to the following event or NULL and
+ (*buf_len) will be changed to account just being read bytes of the 1st event.
+*/
+#define WSREP_MAX_ALLOWED_PACKET 1024*1024*1024 // current protocol max
+
+Log_event* wsrep_read_log_event(
+ char **arg_buf, size_t *arg_buf_len,
+ const Format_description_log_event *description_event)
+{
+ DBUG_ENTER("wsrep_read_log_event");
+ char *head= (*arg_buf);
+
+ uint data_len = uint4korr(head + EVENT_LEN_OFFSET);
+ char *buf= (*arg_buf);
+ const char *error= 0;
+ Log_event *res= 0;
+
+ if (data_len > WSREP_MAX_ALLOWED_PACKET)
+ {
+ error = "Event too big";
+ goto err;
+ }
+
+ res= Log_event::read_log_event(buf, data_len, &error, description_event, FALSE);
+
+err:
+ if (!res)
+ {
+ DBUG_ASSERT(error != 0);
+ sql_print_error("Error in Log_event::read_log_event(): "
+ "'%s', data_len: %d, event_type: %d",
+ error,data_len,head[EVENT_TYPE_OFFSET]);
+ }
+ (*arg_buf)+= data_len;
+ (*arg_buf_len)-= data_len;
+ DBUG_RETURN(res);
+}
+#endif
#ifdef MYSQL_CLIENT
diff --git a/sql/mdl.cc b/sql/mdl.cc
index 5e297051377..e2c633b486e 100644
--- a/sql/mdl.cc
+++ b/sql/mdl.cc
@@ -20,7 +20,17 @@
#include <mysqld_error.h>
#include <mysql/plugin.h>
#include <mysql/service_thd_wait.h>
-
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd);
+extern "C" char *wsrep_thd_query(THD *thd);
+void sql_print_information(const char *format, ...)
+ ATTRIBUTE_FORMAT(printf, 1, 2);
+
+extern bool
+wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
+ MDL_ticket *ticket);
+#endif /* WITH_WSREP */
#ifdef HAVE_PSI_INTERFACE
static PSI_mutex_key key_MDL_map_mutex;
static PSI_mutex_key key_MDL_wait_LOCK_wait_status;
@@ -1254,11 +1264,54 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket)
called by other threads.
*/
DBUG_ASSERT(ticket->get_lock());
+#ifdef WITH_WSREP
+ if ((this == &(ticket->get_lock()->m_waiting)) &&
+ wsrep_thd_is_brute_force((void *)(ticket->get_ctx()->get_thd())))
+ {
+ Ticket_iterator itw(ticket->get_lock()->m_waiting);
+ Ticket_iterator itg(ticket->get_lock()->m_granted);
+
+ MDL_ticket *waiting, *granted;
+ MDL_ticket *prev=NULL;
+ bool added= false;
+
+ while ((waiting= itw++) && !added)
+ {
+ if (!wsrep_thd_is_brute_force((void *)(waiting->get_ctx()->get_thd())))
+ {
+ WSREP_DEBUG("MDL add_ticket inserted before: %lu %s",
+ wsrep_thd_thread_id(waiting->get_ctx()->get_thd()),
+ wsrep_thd_query(waiting->get_ctx()->get_thd()));
+ m_list.insert_after(prev, ticket);
+ added= true;
+ }
+ prev= waiting;
+ }
+ if (!added) m_list.push_back(ticket);
+
+ while ((granted= itg++))
+ {
+ if (granted->get_ctx() != ticket->get_ctx() &&
+ granted->is_incompatible_when_granted(ticket->get_type()))
+ {
+ if (!wsrep_grant_mdl_exception(ticket->get_ctx(), granted))
+ {
+ WSREP_DEBUG("MDL victim killed at add_ticket");
+ }
+ }
+ }
+ }
+ else
+ {
+#endif /* WITH_WSREP */
/*
Add ticket to the *back* of the queue to ensure fairness
among requests with the same priority.
*/
m_list.push_back(ticket);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
m_bitmap|= MDL_BIT(ticket->get_type());
}
@@ -1564,7 +1617,6 @@ MDL_object_lock::m_waiting_incompatible[MDL_TYPE_END] =
0
};
-
/**
Check if request for the metadata lock can be satisfied given its
current state.
@@ -1589,6 +1641,9 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg,
bool can_grant= FALSE;
bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg];
bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg];
+#ifdef WITH_WSREP
+ bool wsrep_can_grant= TRUE;
+#endif /* WITH_WSREP */
/*
New lock request can be satisfied iff:
@@ -1611,12 +1666,59 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg,
{
if (ticket->get_ctx() != requestor_ctx &&
ticket->is_incompatible_when_granted(type_arg))
+#ifdef WITH_WSREP
+ {
+ if (wsrep_thd_is_brute_force((void *)(requestor_ctx->get_thd())) &&
+ key.mdl_namespace() == MDL_key::GLOBAL)
+ {
+ WSREP_DEBUG("global lock granted for BF: %lu %s",
+ wsrep_thd_thread_id(requestor_ctx->get_thd()),
+ wsrep_thd_query(requestor_ctx->get_thd()));
+ can_grant = true;
+ }
+ else if (!wsrep_grant_mdl_exception(requestor_ctx, ticket))
+ {
+ wsrep_can_grant= FALSE;
+ if (wsrep_log_conflicts)
+ {
+ MDL_lock * lock = ticket->get_lock();
+ WSREP_INFO(
+ "MDL conflict db=%s table=%s ticket=%d solved by %s",
+ lock->key.db_name(), lock->key.name(), ticket->get_type(), "abort"
+ );
+ }
+ }
+ else
+ {
+ can_grant= TRUE;
+ }
+ }
+#else
break;
+#endif /* WITH_WSREP */
}
+#ifdef WITH_WSREP
+ if ((ticket == NULL) && wsrep_can_grant)
+#else
if (ticket == NULL) /* Incompatible locks are our own. */
+#endif /* WITH_WSREP */
+
can_grant= TRUE;
}
}
+#ifdef WITH_WSREP
+ else
+ {
+ if (wsrep_thd_is_brute_force((void *)(requestor_ctx->get_thd())) &&
+ key.mdl_namespace() == MDL_key::GLOBAL)
+ {
+ WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s",
+ wsrep_thd_thread_id(requestor_ctx->get_thd()),
+ wsrep_thd_query(requestor_ctx->get_thd()));
+ can_grant = true;
+ }
+ }
+#endif /* WITH_WSREP */
return can_grant;
}
@@ -2913,3 +3015,33 @@ void MDL_context::set_transaction_duration_for_all_locks()
ticket->m_duration= MDL_TRANSACTION;
#endif
}
+#ifdef WITH_WSREP
+void MDL_ticket::wsrep_report(bool debug)
+{
+ if (debug)
+ {
+ WSREP_DEBUG("MDL ticket: type: %s space: %s db: %s name: %s (%s)",
+ (get_type() == MDL_INTENTION_EXCLUSIVE) ? "intention exclusive" :
+ ((get_type() == MDL_SHARED) ? "shared" :
+ ((get_type() == MDL_SHARED_HIGH_PRIO ? "shared high prio" :
+ ((get_type() == MDL_SHARED_READ) ? "shared read" :
+ ((get_type() == MDL_SHARED_WRITE) ? "shared write" :
+ ((get_type() == MDL_SHARED_NO_WRITE) ? "shared no write" :
+ ((get_type() == MDL_SHARED_NO_READ_WRITE) ? "shared no read write" :
+ ((get_type() == MDL_EXCLUSIVE) ? "exclusive" :
+ "UNKNOWN")))))))),
+ (m_lock->key.mdl_namespace() == MDL_key::GLOBAL) ? "GLOBAL" :
+ ((m_lock->key.mdl_namespace() == MDL_key::SCHEMA) ? "SCHEMA" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TABLE" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "FUNCTION" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "PROCEDURE" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TRIGGER" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "EVENT" :
+ ((m_lock->key.mdl_namespace() == MDL_key::COMMIT) ? "COMMIT" :
+ (char *)"UNKNOWN"))))))),
+ m_lock->key.db_name(),
+ m_lock->key.name(),
+ m_lock->key.get_wait_state_name());
+ }
+}
+#endif /* WITH_WSREP */
diff --git a/sql/mdl.h b/sql/mdl.h
index af7d75c1297..0f99b759c0e 100644
--- a/sql/mdl.h
+++ b/sql/mdl.h
@@ -475,6 +475,9 @@ public:
MDL_ticket *next_in_lock;
MDL_ticket **prev_in_lock;
public:
+#ifdef WITH_WSREP
+ void wsrep_report(bool debug);
+#endif /* WITH_WSREP */
bool has_pending_conflicting_lock() const;
MDL_context *get_ctx() const { return m_ctx; }
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 3e4212f1aa9..b441c31b28d 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -72,6 +72,10 @@
#include "scheduler.h"
#include <waiting_threads.h>
#include "debug_sync.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+ulong wsrep_running_threads = 0; // # of currently running wsrep threads
+#endif
#include "sql_callback.h"
#include "threadpool.h"
@@ -362,6 +366,9 @@ static DYNAMIC_ARRAY all_options;
/* Global variables */
+#ifdef WITH_WSREP
+ulong my_bind_addr;
+#endif /* WITH_WSREP */
bool opt_bin_log, opt_bin_log_used=0, opt_ignore_builtin_innodb= 0;
my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0, opt_abort;
ulonglong log_output_options;
@@ -454,6 +461,10 @@ ulong opt_binlog_rows_event_max_size;
my_bool opt_master_verify_checksum= 0;
my_bool opt_slave_sql_verify_checksum= 1;
const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS};
+#ifdef WITH_WSREP
+const char *wsrep_binlog_format_names[]=
+ {"MIXED", "STATEMENT", "ROW", "NONE", NullS};
+#endif /*WITH_WSREP */
#ifdef HAVE_INITGROUPS
volatile sig_atomic_t calling_initgroups= 0; /**< Used in SIGSEGV handler. */
#endif
@@ -690,6 +701,22 @@ pthread_attr_t connection_attrib;
mysql_mutex_t LOCK_server_started;
mysql_cond_t COND_server_started;
+#ifdef WITH_WSREP
+mysql_mutex_t LOCK_wsrep_ready;
+mysql_cond_t COND_wsrep_ready;
+mysql_mutex_t LOCK_wsrep_sst;
+mysql_cond_t COND_wsrep_sst;
+mysql_mutex_t LOCK_wsrep_sst_init;
+mysql_cond_t COND_wsrep_sst_init;
+mysql_mutex_t LOCK_wsrep_rollback;
+mysql_cond_t COND_wsrep_rollback;
+wsrep_aborting_thd_t wsrep_aborting_thd= NULL;
+mysql_mutex_t LOCK_wsrep_replaying;
+mysql_cond_t COND_wsrep_replaying;
+mysql_mutex_t LOCK_wsrep_slave_threads;
+int wsrep_replaying= 0;
+static void wsrep_close_threads(THD* thd);
+#endif
int mysqld_server_started= 0;
File_parser_dummy_hook file_parser_dummy_hook;
@@ -758,6 +785,12 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
key_PARTITION_LOCK_auto_inc;
+#ifdef WITH_WSREP
+PSI_mutex_key key_LOCK_wsrep_rollback, key_LOCK_wsrep_thd,
+ key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst,
+ key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init,
+ key_LOCK_wsrep_slave_threads;
+#endif
PSI_mutex_key key_RELAYLOG_LOCK_index;
PSI_mutex_key key_LOCK_stats,
@@ -825,6 +858,17 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL},
{ &key_LOG_INFO_lock, "LOG_INFO::lock", 0},
{ &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL},
+#ifdef WITH_WSREP
+ { &key_LOCK_wsrep_ready, "LOCK_wsrep_ready", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0},
+ { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_rollback, "LOCK_wsrep_rollback", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_thd, "THD::LOCK_wsrep_thd", 0},
+ { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_slave_threads, "LOCK_wsrep_slave_threads", PSI_FLAG_GLOBAL},
+#endif
{ &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0}
};
@@ -862,6 +906,11 @@ PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
key_TABLE_SHARE_cond, key_user_level_lock_cond,
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache,
key_BINLOG_COND_queue_busy;
+#ifdef WITH_WSREP
+PSI_cond_key key_COND_wsrep_rollback, key_COND_wsrep_thd,
+ key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst,
+ key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread;
+#endif /* WITH_WSREP */
PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready;
PSI_cond_key key_RELAYLOG_COND_queue_busy;
PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy;
@@ -903,6 +952,15 @@ static PSI_cond_info all_server_conds[]=
{ &key_user_level_lock_cond, "User_level_lock::cond", 0},
{ &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},
{ &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL},
+#ifdef WITH_WSREP
+ { &key_COND_wsrep_ready, "COND_wsrep_ready", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0},
+ { &key_COND_wsrep_rollback, "COND_wsrep_rollback", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_thd, "THD::COND_wsrep_thd", 0},
+ { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL},
+#endif
{ &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}
};
@@ -1428,6 +1486,11 @@ static void close_connections(void)
if (tmp->slave_thread)
continue;
+#ifdef WITH_WSREP
+ /* skip wsrep system threads as well */
+ if (WSREP(tmp) && (tmp->wsrep_exec_mode==REPL_RECV || tmp->wsrep_applier))
+ continue;
+#endif
tmp->killed= KILL_SERVER_HARD;
MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp));
mysql_mutex_lock(&tmp->LOCK_thd_data);
@@ -1491,6 +1554,33 @@ static void close_connections(void)
close_connection(tmp,ER_SERVER_SHUTDOWN);
}
#endif
+#ifdef WITH_WSREP
+ /*
+ * TODO: this code block may turn out redundant. wsrep->disconnect()
+ * should terminate slave threads gracefully, and we don't need
+ * to signal them here.
+ * The code here makes sure mysqld will not hang during shutdown
+ * even if wsrep provider has problems in shutting down.
+ */
+ if (WSREP(tmp) && tmp->wsrep_exec_mode==REPL_RECV)
+ {
+ sql_print_information("closing wsrep system thread");
+ tmp->killed= KILL_CONNECTION;
+ MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp));
+ if (tmp->mysys_var)
+ {
+ tmp->mysys_var->abort=1;
+ mysql_mutex_lock(&tmp->mysys_var->mutex);
+ if (tmp->mysys_var->current_cond)
+ {
+ mysql_mutex_lock(tmp->mysys_var->current_mutex);
+ mysql_cond_broadcast(tmp->mysys_var->current_cond);
+ mysql_mutex_unlock(tmp->mysys_var->current_mutex);
+ }
+ mysql_mutex_unlock(&tmp->mysys_var->mutex);
+ }
+ }
+#endif
DBUG_PRINT("quit",("Unlocking LOCK_thread_count"));
mysql_mutex_unlock(&LOCK_thread_count);
}
@@ -1651,8 +1741,14 @@ static void __cdecl kill_server(int sig_ptr)
}
}
#endif
+#ifdef WITH_WSREP
+ if (WSREP_ON) wsrep_stop_replication(NULL);
+#endif
close_connections();
+#ifdef WITH_WSREP
+ if (WSREP_ON) wsrep_deinit();
+#endif
if (sig != MYSQL_KILL_SIGNAL &&
sig != 0)
unireg_abort(1); /* purecov: inspected */
@@ -1747,6 +1843,21 @@ extern "C" void unireg_abort(int exit_code)
usage();
if (exit_code)
sql_print_error("Aborting\n");
+#ifdef WITH_WSREP
+ if (wsrep)
+ {
+ /* This is an abort situation, we cannot expect to gracefully close all
+ * wsrep threads here, we can only diconnect from service */
+ wsrep_close_client_connections(FALSE);
+ shutdown_in_progress= 1;
+ THD* thd(0);
+ wsrep->disconnect(wsrep);
+ WSREP_INFO("Service disconnected.");
+ wsrep_close_threads(thd); /* this won't close all threads */
+ sleep(1); /* so give some time to exit for those which can */
+ WSREP_INFO("Some threads may fail to exit.");
+ }
+#endif // WITH_WSREP
clean_up(!opt_abort && (exit_code || !opt_bootstrap)); /* purecov: inspected */
DBUG_PRINT("quit",("done with cleanup in unireg_abort"));
mysqld_exit(exit_code);
@@ -1948,6 +2059,19 @@ static void clean_up_mutexes()
mysql_cond_destroy(&COND_thread_count);
mysql_cond_destroy(&COND_thread_cache);
mysql_cond_destroy(&COND_flush_thread_cache);
+#ifdef WITH_WSREP
+ (void) mysql_mutex_destroy(&LOCK_wsrep_ready);
+ (void) mysql_cond_destroy(&COND_wsrep_ready);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_sst);
+ (void) mysql_cond_destroy(&COND_wsrep_sst);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_sst_init);
+ (void) mysql_cond_destroy(&COND_wsrep_sst_init);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_rollback);
+ (void) mysql_cond_destroy(&COND_wsrep_rollback);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_replaying);
+ (void) mysql_cond_destroy(&COND_wsrep_replaying);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_slave_threads);
+#endif
mysql_mutex_destroy(&LOCK_server_started);
mysql_cond_destroy(&COND_server_started);
mysql_mutex_destroy(&LOCK_prepare_ordered);
@@ -2376,7 +2500,11 @@ static void network_init(void)
@note
For the connection that is doing shutdown, this is called twice
*/
+#ifdef WITH_WSREP
+void close_connection(THD *thd, uint sql_errno, bool lock)
+#else
void close_connection(THD *thd, uint sql_errno)
+#endif
{
DBUG_ENTER("close_connection");
@@ -3511,7 +3639,13 @@ static int init_common_variables()
}
else
opt_log_basename= glob_hostname;
-
+#ifdef WITH_WSREP
+ if (0 == wsrep_node_name || 0 == wsrep_node_name[0])
+ {
+ my_free((void *)wsrep_node_name);
+ wsrep_node_name= my_strdup(glob_hostname, MYF(MY_WME));
+ }
+#endif /* WITH_WSREP */
if (!*pidfile_name)
{
strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5);
@@ -3570,7 +3704,11 @@ static int init_common_variables()
compile_time_assert(sizeof(com_status_vars)/sizeof(com_status_vars[0]) - 1 ==
SQLCOM_END + 8);
#endif
-
+#ifdef WITH_WSREP
+ /* This is a protection against mutually incompatible option values. */
+ if (WSREP_ON && wsrep_check_opts (remaining_argc, remaining_argv))
+ return 1;
+#endif /* WITH_WSREP */
if (get_options(&remaining_argc, &remaining_argv))
return 1;
set_server_version();
@@ -3963,6 +4101,25 @@ static int init_thread_environment()
sql_print_error("Can't create thread-keys");
return 1;
}
+#ifdef WITH_WSREP
+ mysql_mutex_init(key_LOCK_wsrep_ready,
+ &LOCK_wsrep_ready, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_ready, &COND_wsrep_ready, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_sst,
+ &LOCK_wsrep_sst, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_sst_init,
+ &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_rollback,
+ &LOCK_wsrep_rollback, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_rollback, &COND_wsrep_rollback, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_replaying,
+ &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_slave_threads,
+ &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST);
+#endif
return 0;
}
@@ -4202,10 +4359,18 @@ static int init_server_components()
/* need to configure logging before initializing storage engines */
if (!opt_bin_log_used)
{
+#ifdef WITH_WSREP
+ if (!WSREP_ON && opt_log_slave_updates)
+#else
if (opt_log_slave_updates)
+#endif
sql_print_warning("You need to use --log-bin to make "
"--log-slave-updates work.");
+#ifdef WITH_WSREP
+ if (!WSREP_ON && binlog_format_used)
+#else
if (binlog_format_used)
+#endif
sql_print_warning("You need to use --log-bin to make "
"--binlog-format work.");
}
@@ -4231,6 +4396,39 @@ will be ignored as the --log-bin option is not defined.");
}
#endif
+#ifdef WITH_WSREP /* WSREP BEFORE SE */
+ if (!wsrep_recovery)
+ {
+ if (opt_bootstrap) // bootsrap option given - disable wsrep functionality
+ {
+ wsrep_provider_init(WSREP_NONE);
+ if (wsrep_init()) unireg_abort(1);
+ }
+ else // full wsrep initialization
+ {
+ // add basedir/bin to PATH to resolve wsrep script names
+ char* const tmp_path((char*)alloca(strlen(mysql_home) +
+ strlen("/bin") + 1));
+ if (tmp_path)
+ {
+ strcpy(tmp_path, mysql_home);
+ strcat(tmp_path, "/bin");
+ wsrep_prepend_PATH(tmp_path);
+ }
+ else
+ {
+ WSREP_ERROR("Could not append %s/bin to PATH", mysql_home);
+ }
+
+ if (wsrep_before_SE())
+ {
+ set_ports(); // this is also called in network_init() later but we need
+ // to know mysqld_port now - lp:1071882
+ wsrep_init_startup(true);
+ }
+ }
+ }
+#endif /* WITH_WSREP */
if (opt_bin_log)
{
/* Reports an error and aborts, if the --log-bin's path
@@ -4436,11 +4634,30 @@ a file name for --log-bin-index option", opt_binlog_index_name);
internal_tmp_table_max_key_segments= myisam_max_key_segments();
#endif
+#ifdef WITH_WSREP
+ if (!opt_bin_log)
+ {
+ wsrep_emulate_bin_log= 1;
+ }
+#endif
+
tc_log= (total_ha_2pc > 1 ? (opt_bin_log ?
(TC_LOG *) &mysql_bin_log :
+#ifdef WITH_WSREP
+ (WSREP_ON ?
+ (TC_LOG *) &tc_log_dummy :
+ (TC_LOG *) &tc_log_mmap)) :
+#else
(TC_LOG *) &tc_log_mmap) :
- (TC_LOG *) &tc_log_dummy);
-
+#endif
+ (TC_LOG *) &tc_log_dummy);
+#ifdef WITH_WSREP
+ WSREP_DEBUG("Initial TC log open: %s",
+ (tc_log == &mysql_bin_log) ? "binlog" :
+ (tc_log == &tc_log_mmap) ? "mmap" :
+ (tc_log == &tc_log_dummy) ? "dummy" : "unknown"
+ );
+#endif
if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file))
{
sql_print_error("Can't init tc log");
@@ -4497,8 +4714,6 @@ a file name for --log-bin-index option", opt_binlog_index_name);
init_global_client_stats();
DBUG_RETURN(0);
}
-
-
#ifndef EMBEDDED_LIBRARY
static void create_shutdown_thread()
@@ -4517,6 +4732,482 @@ static void create_shutdown_thread()
#endif /* EMBEDDED_LIBRARY */
+#ifdef WITH_WSREP
+typedef void (*wsrep_thd_processor_fun)(THD *);
+
+pthread_handler_t start_wsrep_THD(void *arg)
+{
+ THD *thd;
+ wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg;
+
+ if (my_thread_init())
+ {
+ WSREP_ERROR("Could not initialize thread");
+ return(NULL);
+ }
+
+ if (!(thd= new THD(true)))
+ {
+ return(NULL);
+ }
+ mysql_mutex_lock(&LOCK_thread_count);
+ thd->thread_id=thread_id++;
+
+ thd->real_id=pthread_self(); // Keep purify happy
+ thread_count++;
+ thread_created++;
+ threads.append(thd);
+
+ my_net_init(&thd->net,(st_vio*) 0);
+
+ DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id));
+ thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer();
+ (void) mysql_mutex_unlock(&LOCK_thread_count);
+
+ /* from bootstrap()... */
+ thd->bootstrap=1;
+ thd->max_client_packet_length= thd->net.max_packet;
+ thd->security_ctx->master_access= ~(ulong)0;
+
+ /* from handle_one_connection... */
+ pthread_detach_this_thread();
+
+ mysql_thread_set_psi_id(thd->thread_id);
+ thd->thr_create_utime= microsecond_interval_timer();
+ if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0))
+ {
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+ statistic_increment(aborted_connects,&LOCK_status);
+ MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+
+ return(NULL);
+ }
+
+// </5.1.17>
+ /*
+ handle_one_connection() is normally the only way a thread would
+ start and would always be on the very high end of the stack ,
+ therefore, the thread stack always starts at the address of the
+ first local variable of handle_one_connection, which is thd. We
+ need to know the start of the stack so that we could check for
+ stack overruns.
+ */
+ DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n",
+ (long long)thd->thread_id));
+ /* now that we've called my_thread_init(), it is safe to call DBUG_* */
+
+ thd->thread_stack= (char*) &thd;
+ if (thd->store_globals())
+ {
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+ statistic_increment(aborted_connects,&LOCK_status);
+ MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+ delete thd;
+
+ return(NULL);
+ }
+
+ thd->system_thread= SYSTEM_THREAD_SLAVE_SQL;
+ thd->security_ctx->skip_grants();
+
+ /* handle_one_connection() again... */
+ //thd->version= refresh_version;
+ thd->proc_info= 0;
+ thd->command= COM_SLEEP;
+ thd->set_time();
+ thd->init_for_queries();
+
+ mysql_mutex_lock(&LOCK_connection_count);
+ ++connection_count;
+ mysql_mutex_unlock(&LOCK_connection_count);
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ wsrep_running_threads++;
+ mysql_cond_signal(&COND_thread_count);
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ processor(thd);
+
+ close_connection(thd, 0, 1);
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ wsrep_running_threads--;
+ mysql_cond_signal(&COND_thread_count);
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ // Note: We can't call THD destructor without crashing
+ // if plugins have not been initialized. However, in most of the
+ // cases this means that pre SE initialization SST failed and
+ // we are going to exit anyway.
+ if (plugins_are_initialized)
+ {
+ net_end(&thd->net);
+ MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1));
+ }
+ else
+ {
+ // TODO: lightweight cleanup to get rid of:
+ // 'Error in my_thread_global_end(): 2 threads didn't exit'
+ // at server shutdown
+ }
+ return(NULL);
+}
+
+void wsrep_create_rollbacker()
+{
+ if (wsrep_provider && strcasecmp(wsrep_provider, "none"))
+ {
+ pthread_t hThread;
+ /* create rollbacker */
+ if (pthread_create( &hThread, &connection_attrib,
+ start_wsrep_THD, (void*)wsrep_rollback_process))
+ WSREP_WARN("Can't create thread to manage wsrep rollback");
+ }
+}
+
+void wsrep_create_appliers(long threads)
+{
+ if (!wsrep_connected)
+ {
+ /* see wsrep_replication_start() for the logic */
+ if (wsrep_cluster_address && strlen(wsrep_cluster_address) &&
+ wsrep_provider && strcasecmp(wsrep_provider, "none"))
+ {
+ WSREP_ERROR("Trying to launch slave threads before creating "
+ "connection at '%s'", wsrep_cluster_address);
+ assert(0);
+ }
+ return;
+ }
+
+ long wsrep_threads=0;
+ pthread_t hThread;
+ while (wsrep_threads++ < threads) {
+ if (pthread_create(
+ &hThread, &connection_attrib,
+ start_wsrep_THD, (void*)wsrep_replication_process))
+ WSREP_WARN("Can't create thread to manage wsrep replication");
+ }
+}
+/**/
+static bool abort_replicated(THD *thd)
+{
+ bool ret_code= false;
+ if (thd->wsrep_query_state== QUERY_COMMITTING)
+ {
+ if (wsrep_debug) WSREP_INFO("aborting replicated trx: %lu", thd->real_id);
+
+ (void)wsrep_abort_thd(thd, thd, TRUE);
+ ret_code= true;
+ }
+ return ret_code;
+}
+/**/
+static inline bool is_client_connection(THD *thd)
+{
+#if REMOVE
+// REMOVE THIS LATER (lp:777201). Below we had to add an explicit check for
+// wsrep_applier since wsrep_exec_mode didn't seem to always work
+if (thd->wsrep_applier && thd->wsrep_exec_mode != REPL_RECV)
+WSREP_WARN("applier has wsrep_exec_mode = %d", thd->wsrep_exec_mode);
+
+ if ( thd->slave_thread || /* declared as mysql slave */
+ thd->system_thread || /* declared as system thread */
+ !thd->vio_ok() || /* server internal thread */
+ thd->wsrep_exec_mode==REPL_RECV || /* applier or replaying thread */
+ thd->wsrep_applier || /* wsrep slave applier */
+ !thd->variables.wsrep_on) /* client, but fenced outside wsrep */
+ return false;
+
+ return true;
+#else
+ return (thd->wsrep_client_thread && thd->variables.wsrep_on);
+#endif /* REMOVE */
+}
+
+static inline bool is_replaying_connection(THD *thd)
+{
+ bool ret;
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ ret= (thd->wsrep_conflict_state == REPLAYING) ? true : false;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ return ret;
+}
+
+static inline bool is_committing_connection(THD *thd)
+{
+ bool ret;
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ ret= (thd->wsrep_query_state == QUERY_COMMITTING) ? true : false;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ return ret;
+}
+
+static bool have_client_connections()
+{
+ THD *tmp;
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
+ tmp->thread_id));
+ if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION)
+ {
+ (void)abort_replicated(tmp);
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ returns the number of wsrep appliers running.
+ However, the caller (thd parameter) is not taken in account
+ */
+static int have_wsrep_appliers(THD *thd)
+{
+ int ret= 0;
+ THD *tmp;
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ ret+= (tmp != thd && tmp->wsrep_applier);
+ }
+ return ret;
+}
+
+static void wsrep_close_thread(THD *thd)
+{
+ thd->killed= KILL_CONNECTION;
+ MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd));
+ if (thd->mysys_var)
+ {
+ thd->mysys_var->abort=1;
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ if (thd->mysys_var->current_cond)
+ {
+ mysql_mutex_lock(thd->mysys_var->current_mutex);
+ mysql_cond_broadcast(thd->mysys_var->current_cond);
+ mysql_mutex_unlock(thd->mysys_var->current_mutex);
+ }
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+ }
+}
+
+static my_bool have_committing_connections()
+{
+ THD *tmp;
+ mysql_mutex_lock(&LOCK_thread_count); // For unlink from list
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ if (!is_client_connection(tmp))
+ continue;
+
+ if (is_committing_connection(tmp))
+ {
+ mysql_mutex_unlock(&LOCK_thread_count);
+ return TRUE;
+ }
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+ return FALSE;
+}
+
+int wsrep_wait_committing_connections_close(int wait_time)
+{
+ int sleep_time= 100;
+
+ while (have_committing_connections() && wait_time > 0)
+ {
+ WSREP_DEBUG("wait for committing transaction to close: %d", wait_time);
+ my_sleep(sleep_time);
+ wait_time -= sleep_time;
+ }
+ if (have_committing_connections())
+ {
+ return 1;
+ }
+ return 0;
+}
+
+void wsrep_close_client_connections(my_bool wait_to_end)
+{
+ /*
+ First signal all threads that it's time to die
+ */
+
+ THD *tmp;
+ mysql_mutex_lock(&LOCK_thread_count); // For unlink from list
+
+ bool kill_cached_threads_saved= kill_cached_threads;
+ kill_cached_threads= true; // prevent future threads caching
+ mysql_cond_broadcast(&COND_thread_cache); // tell cached threads to die
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
+ tmp->thread_id));
+ /* We skip slave threads & scheduler on this first loop through. */
+ if (!is_client_connection(tmp))
+ continue;
+
+ if (is_replaying_connection(tmp))
+ {
+ tmp->killed= KILL_CONNECTION;
+ continue;
+ }
+
+ /* replicated transactions must be skipped */
+ if (abort_replicated(tmp))
+ continue;
+
+ WSREP_DEBUG("closing connection %ld", tmp->thread_id);
+ wsrep_close_thread(tmp);
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ if (thread_count)
+ sleep(2); // Give threads time to die
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ /*
+ Force remaining threads to die by closing the connection to the client
+ */
+
+ I_List_iterator<THD> it2(threads);
+ while ((tmp=it2++))
+ {
+#ifndef __bsdi__ // Bug in BSDI kernel
+ if (is_client_connection(tmp) &&
+ !abort_replicated(tmp) &&
+ !is_replaying_connection(tmp))
+ {
+ WSREP_INFO("killing local connection: %ld",tmp->thread_id);
+ close_connection(tmp,0,0);
+ }
+#endif
+ }
+
+ DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count));
+ if (wsrep_debug)
+ WSREP_INFO("waiting for client connections to close: %u", thread_count);
+
+ while (wait_to_end && have_client_connections())
+ {
+ mysql_cond_wait(&COND_thread_count, &LOCK_thread_count);
+ DBUG_PRINT("quit",("One thread died (count=%u)", thread_count));
+ }
+
+ kill_cached_threads= kill_cached_threads_saved;
+
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ /* All client connection threads have now been aborted */
+}
+
+void wsrep_close_applier(THD *thd)
+{
+ WSREP_DEBUG("closing applier %ld", thd->thread_id);
+ wsrep_close_thread(thd);
+}
+
+static void wsrep_close_threads(THD *thd)
+{
+ THD *tmp;
+ mysql_mutex_lock(&LOCK_thread_count); // For unlink from list
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
+ tmp->thread_id));
+ /* We skip slave threads & scheduler on this first loop through. */
+ if (tmp->wsrep_applier && tmp != thd)
+ {
+ WSREP_DEBUG("closing wsrep thread %ld", tmp->thread_id);
+ wsrep_close_thread (tmp);
+ }
+ }
+
+ mysql_mutex_unlock(&LOCK_thread_count);
+}
+
+void wsrep_close_applier_threads(int count)
+{
+ THD *tmp;
+ mysql_mutex_lock(&LOCK_thread_count); // For unlink from list
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++) && count)
+ {
+ DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
+ tmp->thread_id));
+ /* We skip slave threads & scheduler on this first loop through. */
+ if (tmp->wsrep_applier)
+ {
+ WSREP_DEBUG("closing wsrep applier thread %ld", tmp->thread_id);
+ tmp->wsrep_applier_closing= TRUE;
+ count--;
+ }
+ }
+
+ mysql_mutex_unlock(&LOCK_thread_count);
+}
+
+void wsrep_wait_appliers_close(THD *thd)
+{
+ /* Wait for wsrep appliers to gracefully exit */
+ mysql_mutex_lock(&LOCK_thread_count);
+ while (have_wsrep_appliers(thd) > 1)
+ // 1 is for rollbacker thread which needs to be killed explicitly.
+ // This gotta be fixed in a more elegant manner if we gonna have arbitrary
+ // number of non-applier wsrep threads.
+ {
+ mysql_cond_wait(&COND_thread_count,&LOCK_thread_count);
+ DBUG_PRINT("quit",("One applier died (count=%u)",thread_count));
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+ /* Now kill remaining wsrep threads: rollbacker */
+ wsrep_close_threads (thd);
+ /* and wait for them to die */
+ mysql_mutex_lock(&LOCK_thread_count);
+ while (have_wsrep_appliers(thd) > 0)
+ {
+ mysql_cond_wait(&COND_thread_count,&LOCK_thread_count);
+ DBUG_PRINT("quit",("One thread died (count=%u)",thread_count));
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ /* All wsrep applier threads have now been aborted. However, if this thread
+ is also applier, we are still running...
+ */
+}
+
+void wsrep_kill_mysql(THD *thd)
+{
+ if (mysqld_server_started)
+ {
+ if (!shutdown_in_progress)
+ {
+ WSREP_INFO("starting shutdown");
+ kill_mysql();
+ }
+ }
+ else
+ {
+ unireg_abort(1);
+ }
+}
+#endif /* WITH_WSREP */
#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
static void handle_connections_methods()
@@ -4945,6 +5636,38 @@ int mysqld_main(int argc, char **argv)
if (Events::init(opt_noacl || opt_bootstrap))
unireg_abort(1);
+#ifdef WITH_WSREP /* WSREP AFTER SE */
+ if (wsrep_recovery)
+ {
+ select_thread_in_use= 0;
+ wsrep_recover();
+ unireg_abort(0);
+ }
+
+ if (opt_bootstrap)
+ {
+ /*! bootstrap wsrep init was taken care of above */
+ }
+ else
+ {
+ wsrep_SE_initialized();
+
+ if (wsrep_before_SE())
+ {
+ /*! in case of no SST wsrep waits in view handler callback */
+ wsrep_SE_init_grab();
+ wsrep_SE_init_done();
+ /*! in case of SST wsrep waits for wsrep->sst_received */
+ wsrep_sst_continue();
+ }
+ else
+ {
+ wsrep_init_startup (false);
+ }
+
+ wsrep_create_appliers(wsrep_slave_threads - 1);
+ }
+#endif /* WITH_WSREP */
if (opt_bootstrap)
{
select_thread_in_use= 0; // Allow 'kill' to work
@@ -5002,6 +5725,9 @@ int mysqld_main(int argc, char **argv)
#ifdef EXTRA_DEBUG2
sql_print_error("Before Lock_thread_count");
#endif
+#ifdef WITH_WSREP
+ WSREP_DEBUG("Before Lock_thread_count");
+#endif
mysql_mutex_lock(&LOCK_thread_count);
DBUG_PRINT("quit", ("Got thread_count mutex"));
select_thread_in_use=0; // For close_connections
@@ -5268,6 +5994,9 @@ static void bootstrap(MYSQL_FILE *file)
DBUG_ENTER("bootstrap");
THD *thd= new THD;
+#ifdef WITH_WSREP
+ thd->variables.wsrep_on= 0;
+#endif
thd->bootstrap=1;
my_net_init(&thd->net,(st_vio*) 0);
thd->max_client_packet_length= thd->net.max_packet;
@@ -5392,7 +6121,11 @@ void create_thread_to_handle_connection(THD *thd)
my_snprintf(error_message_buff, sizeof(error_message_buff),
ER_THD(thd, ER_CANT_CREATE_THREAD), error);
net_send_error(thd, ER_CANT_CREATE_THREAD, error_message_buff, NULL);
+#ifdef WITH_WSREP
+ close_connection(thd, ER_OUT_OF_RESOURCES ,0);
+#else
close_connection(thd, ER_OUT_OF_RESOURCES);
+#endif /* WITH_WSREP */
mysql_mutex_lock(&LOCK_thread_count);
delete thd;
mysql_mutex_unlock(&LOCK_thread_count);
@@ -5435,7 +6168,11 @@ static void create_new_thread(THD *thd)
mysql_mutex_unlock(&LOCK_connection_count);
DBUG_PRINT("error",("Too many connections"));
+ #ifdef WITH_WSREP
+ close_connection(thd, ER_CON_COUNT_ERROR, 1);
+#else
close_connection(thd, ER_CON_COUNT_ERROR);
+#endif /* WITH_WSREP */
statistic_increment(denied_connections, &LOCK_status);
delete thd;
DBUG_VOID_RETURN;
@@ -5818,7 +6555,11 @@ pthread_handler_t handle_connections_namedpipes(void *arg)
if (!(thd->net.vio= vio_new_win32pipe(hConnectedPipe)) ||
my_net_init(&thd->net, thd->net.vio))
{
+#ifdef WITH_WSREP
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+#else
close_connection(thd, ER_OUT_OF_RESOURCES);
+#endif
delete thd;
continue;
}
@@ -6013,7 +6754,11 @@ pthread_handler_t handle_connections_shared_memory(void *arg)
event_conn_closed)) ||
my_net_init(&thd->net, thd->net.vio))
{
+#ifdef WITH_WSREP
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+#else
close_connection(thd, ER_OUT_OF_RESOURCES);
+#endif
errmsg= 0;
goto errorconn;
}
@@ -7113,6 +7858,19 @@ SHOW_VAR status_vars[]= {
#ifdef ENABLED_PROFILING
{"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_FUNC},
#endif
+#ifdef WITH_WSREP
+ {"wsrep_connected", (char*) &wsrep_connected, SHOW_BOOL},
+ {"wsrep_ready", (char*) &wsrep_ready, SHOW_BOOL},
+ {"wsrep_cluster_state_uuid", (char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR},
+ {"wsrep_cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG},
+ {"wsrep_cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR},
+ {"wsrep_cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG},
+ {"wsrep_local_index", (char*) &wsrep_local_index, SHOW_LONG},
+ {"wsrep_provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR},
+ {"wsrep_provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR},
+ {"wsrep_provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR},
+ {"wsrep", (char*) &wsrep_show_status, SHOW_FUNC},
+#endif
{NullS, NullS, SHOW_LONG}
};
@@ -7430,6 +8188,10 @@ static int mysql_init_variables(void)
tmpenv = DEFAULT_MYSQL_HOME;
(void) strmake(mysql_home, tmpenv, sizeof(mysql_home)-1);
#endif
+#ifdef WITH_WSREP
+ if (WSREP_ON && wsrep_init_vars())
+ return 1;
+#endif
return 0;
}
@@ -7707,6 +8469,14 @@ mysqld_get_one_option(int optid,
case OPT_LOWER_CASE_TABLE_NAMES:
lower_case_table_names_used= 1;
break;
+#ifdef WITH_WSREP
+ case OPT_WSREP_START_POSITION:
+ wsrep_start_position_init (argument);
+ break;
+ case OPT_WSREP_SST_AUTH:
+ wsrep_sst_auth_init (argument);
+ break;
+#endif
#if defined(ENABLED_DEBUG_SYNC)
case OPT_DEBUG_SYNC_TIMEOUT:
/*
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 554c662e90f..02db7f9d959 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -53,7 +53,11 @@ typedef Bitmap<((MAX_INDEXES+7)/8*8)> key_map; /* Used for finding keys */
some places */
/* Function prototypes */
void kill_mysql(void);
+#ifdef WITH_WSREP
+void close_connection(THD *thd, uint sql_errno= 0, bool lock=1);
+#else
void close_connection(THD *thd, uint sql_errno= 0);
+#endif
void handle_connection_in_main_thread(THD *thd);
void create_thread_to_handle_connection(THD *thd);
void unlink_thd(THD *thd);
@@ -221,6 +225,10 @@ extern pthread_key(MEM_ROOT**,THR_MALLOC);
extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active,
key_LOCK_pool;
#endif /* HAVE_MMAP */
+#ifdef WITH_WSREP
+extern PSI_mutex_key key_LOCK_wsrep_thd;
+extern PSI_cond_key key_COND_wsrep_thd;
+#endif /* HAVE_MMAP */
#ifdef HAVE_OPENSSL
extern PSI_mutex_key key_LOCK_des_key_file;
@@ -410,6 +418,14 @@ enum options_mysqld
OPT_SSL_KEY,
OPT_UPDATE_LOG,
OPT_WANT_CORE,
+#ifdef WITH_WSREP
+ OPT_WSREP_PROVIDER,
+ OPT_WSREP_PROVIDER_OPTIONS,
+ OPT_WSREP_CLUSTER_ADDRESS,
+ OPT_WSREP_START_POSITION,
+ OPT_WSREP_SST_AUTH,
+ OPT_WSREP_RECOVER,
+#endif
OPT_which_is_always_the_last
};
#endif
@@ -558,4 +574,5 @@ extern uint internal_tmp_table_max_key_segments;
extern uint volatile global_disable_checkpoint;
extern my_bool opt_help;
+
#endif /* MYSQLD_INCLUDED */
diff --git a/sql/protocol.cc b/sql/protocol.cc
index 3af7dc88b88..afa20661928 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -485,6 +485,14 @@ static uchar *net_store_length_fast(uchar *packet, uint length)
void Protocol::end_statement()
{
+#ifdef WITH_WSREP
+ /*sanity check, can be removed before 1.0 release */
+ if (WSREP(thd) && thd->wsrep_conflict_state== REPLAYING)
+ {
+ WSREP_ERROR("attempting net_end_statement while replaying");
+ return;
+ }
+#endif
DBUG_ENTER("Protocol::end_statement");
DBUG_ASSERT(! thd->stmt_da->is_sent);
bool error= FALSE;
diff --git a/sql/set_var.h b/sql/set_var.h
index f36c0f8b88f..ec6d2c28e6a 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -235,6 +235,9 @@ public:
int check(THD *thd);
int update(THD *thd);
int light_check(THD *thd);
+#ifdef WITH_WSREP
+ int wsrep_store_variable(THD *thd);
+#endif
};
@@ -315,6 +318,9 @@ extern sys_var *Sys_autocommit_ptr;
CHARSET_INFO *get_old_charset_by_name(const char *old_name);
+#ifdef WITH_WSREP
+int sql_set_wsrep_variables(THD *thd, List<set_var_base> *var_list);
+#endif
int sys_var_init();
int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags);
void sys_var_end(void);
diff --git a/sql/slave.cc b/sql/slave.cc
index 7b7fd12e267..7e082971d6c 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -53,6 +53,9 @@
// Create_file_log_event,
// Format_description_log_event
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
#ifdef HAVE_REPLICATION
#include "rpl_tblmap.h"
@@ -2597,6 +2600,23 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli)
ev->thd = thd; // because up to this point, ev->thd == 0
int reason= ev->shall_skip(rli);
+#ifdef WITH_WSREP
+ if (WSREP_ON && (ev->get_type_code() == XID_EVENT ||
+ (ev->get_type_code() == QUERY_EVENT && thd->wsrep_mysql_replicated > 0 &&
+ (!strncasecmp(((Query_log_event*)ev)->query , "BEGIN", 5) ||
+ !strncasecmp(((Query_log_event*)ev)->query , "COMMIT", 6) ))))
+ {
+ if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle)
+ {
+ WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated);
+ reason = Log_event::EVENT_SKIP_IGNORE;
+ }
+ else
+ {
+ thd->wsrep_mysql_replicated = 0;
+ }
+ }
+#endif
if (reason == Log_event::EVENT_SKIP_COUNT)
sql_slave_skip_counter= --rli->slave_skip_counter;
mysql_mutex_unlock(&rli->data_lock);
@@ -3545,6 +3565,11 @@ pthread_handler_t handle_slave_sql(void *arg)
#endif
DBUG_ASSERT(rli->sql_thd == thd);
+#ifdef WITH_WSREP
+ thd->wsrep_exec_mode= LOCAL_STATE;
+ /* synchronize with wsrep replication */
+ if (WSREP_ON) wsrep_ready_wait();
+#endif
DBUG_PRINT("master_info",("log_file_name: %s position: %s",
rli->group_master_log_name,
llstr(rli->group_master_log_pos,llbuff)));
diff --git a/sql/sp.cc b/sql/sp.cc
index 93cd64b4104..b3cbfa7865e 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -2278,3 +2278,37 @@ sp_load_for_information_schema(THD *thd, TABLE *proc_table, String *db,
thd->lex= old_lex;
return sp;
}
+#ifdef WITH_WSREP
+int wsrep_create_sp(THD *thd, uchar** buf, uint* buf_len)
+{
+ String log_query;
+ sp_head *sp = thd->lex->sphead;
+ ulong saved_mode= thd->variables.sql_mode;
+ String retstr(64);
+ retstr.set_charset(system_charset_info);
+
+ log_query.set_charset(system_charset_info);
+
+ if (sp->m_type == TYPE_ENUM_FUNCTION)
+ {
+ sp_returns_type(thd, retstr, sp);
+ }
+
+ if (!create_string(thd, &log_query,
+ sp->m_type,
+ (sp->m_explicit_name ? sp->m_db.str : NULL),
+ (sp->m_explicit_name ? sp->m_db.length : 0),
+ sp->m_name.str, sp->m_name.length,
+ sp->m_params.str, sp->m_params.length,
+ retstr.c_ptr(), retstr.length(),
+ sp->m_body.str, sp->m_body.length,
+ sp->m_chistics, &(thd->lex->definer->user),
+ &(thd->lex->definer->host),
+ saved_mode))
+ {
+ WSREP_WARN("SP create string failed: %s", thd->query());
+ return 1;
+ }
+ return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len);
+}
+#endif /* WITH_WSREP */
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index b8e7d891e05..b35ab76f3d2 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -1836,6 +1836,9 @@ int check_change_password(THD *thd, const char *host, const char *user,
return(1);
}
if (!thd->slave_thread &&
+#ifdef WITH_WSREP
+ (!WSREP(thd) || !thd->wsrep_applier) &&
+#endif /* WITH_WSREP */
(strcmp(thd->security_ctx->user, user) ||
my_strcasecmp(system_charset_info, host,
thd->security_ctx->priv_host)))
@@ -1843,7 +1846,12 @@ int check_change_password(THD *thd, const char *host, const char *user,
if (check_access(thd, UPDATE_ACL, "mysql", NULL, NULL, 1, 0))
return(1);
}
+#ifdef WITH_WSREP
+ if ((!WSREP(thd) || !thd->wsrep_applier) &&
+ !thd->slave_thread && !thd->security_ctx->user[0])
+#else
if (!thd->slave_thread && !thd->security_ctx->user[0])
+#endif /* WITH_WSREP */
{
my_message(ER_PASSWORD_ANONYMOUS_USER, ER(ER_PASSWORD_ANONYMOUS_USER),
MYF(0));
@@ -1882,10 +1890,13 @@ bool change_password(THD *thd, const char *host, const char *user,
TABLE *table;
/* Buffer should be extended when password length is extended. */
char buff[512];
- ulong query_length;
+ ulong query_length=0;
bool save_binlog_row_based;
uint new_password_len= (uint) strlen(new_password);
bool result= 1;
+#ifdef WITH_WSREP
+ const CSET_STRING query_save = thd->query_string;
+#endif /* WITH_WSREP */
DBUG_ENTER("change_password");
DBUG_PRINT("enter",("host: '%s' user: '%s' new_password: '%s'",
host,user,new_password));
@@ -1893,6 +1904,18 @@ bool change_password(THD *thd, const char *host, const char *user,
if (check_change_password(thd, host, user, new_password, new_password_len))
DBUG_RETURN(1);
+#ifdef WITH_WSREP
+ if (WSREP(thd) && !thd->wsrep_applier)
+ {
+ query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'",
+ user ? user : "",
+ host ? host : "",
+ new_password);
+ thd->set_query_inner(buff, query_length, system_charset_info);
+
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, (char*)"user", NULL);
+ }
+#endif /* WITH_WSREP */
tables.init_one_table("mysql", 5, "user", 4, "user", TL_WRITE);
@@ -1971,13 +1994,26 @@ bool change_password(THD *thd, const char *host, const char *user,
}
end:
close_mysql_tables(thd);
+#ifdef WITH_WSREP
+ if (WSREP(thd) && !thd->wsrep_applier)
+ {
+ WSREP_TO_ISOLATION_END;
+ thd->query_string = query_save;
+ thd->wsrep_exec_mode = LOCAL_STATE;
+ }
+#endif /* WITH_WSREP */
/* Restore the state of binlog format */
DBUG_ASSERT(!thd->is_current_stmt_binlog_format_row());
if (save_binlog_row_based)
thd->set_current_stmt_binlog_format_row();
DBUG_RETURN(result);
+#ifdef WITH_WSREP
+ error:
+ WSREP_ERROR("Replication of SET PASSWORD failed: %s", buff);
+ DBUG_RETURN(result);
+#endif /* WITH_WSREP */
}
diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc
index c6c02773286..00691633aa8 100644
--- a/sql/sql_alter.cc
+++ b/sql/sql_alter.cc
@@ -17,7 +17,9 @@
#include "sql_table.h" // mysql_alter_table,
// mysql_exchange_partition
#include "sql_alter.h"
-
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
bool Alter_table_statement::execute(THD *thd)
{
LEX *lex= thd->lex;
@@ -97,6 +99,20 @@ bool Alter_table_statement::execute(THD *thd)
thd->enable_slow_log= opt_log_slow_admin_statements;
+#ifdef WITH_WSREP
+ TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl);
+
+ if ((!thd->is_current_stmt_binlog_format_row() ||
+ !find_temporary_table(thd, first_table)) &&
+ wsrep_to_isolation_begin(thd,
+ lex->name.str ? select_lex->db : NULL,
+ lex->name.str ? lex->name.str : NULL,
+ first_table))
+ {
+ WSREP_WARN("ALTER TABLE isolation failure");
+ DBUG_RETURN(TRUE);
+ }
+#endif /* WITH_WSREP */
result= mysql_alter_table(thd, select_lex->db, lex->name.str,
&create_info,
first_table,
@@ -105,5 +121,7 @@ bool Alter_table_statement::execute(THD *thd)
select_lex->order_list.first,
lex->ignore, lex->online);
+#ifdef WITH_WSREP
+#endif /* WITH_WSREP */
DBUG_RETURN(result);
}
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index d46560e57ba..ac247e11e88 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -60,6 +60,10 @@
#include <io.h>
#endif
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+
+#endif // WITH_WSREP
bool
No_such_table_error_handler::handle_condition(THD *,
@@ -4218,7 +4222,7 @@ thr_lock_type read_lock_type_for_table(THD *thd,
*/
bool log_on= mysql_bin_log.is_open() && thd->variables.sql_log_bin;
ulong binlog_format= thd->variables.binlog_format;
- if ((log_on == FALSE) || (binlog_format == BINLOG_FORMAT_ROW) ||
+ if ((log_on == FALSE) || (WSREP_FORMAT(binlog_format) == BINLOG_FORMAT_ROW) ||
(table_list->table->s->table_category == TABLE_CATEGORY_LOG) ||
(table_list->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) ||
!(is_update_query(prelocking_ctx->sql_command) ||
@@ -5074,9 +5078,32 @@ restart:
}
}
}
+#ifdef WITH_WSREP
+ if ((thd->lex->sql_command== SQLCOM_INSERT ||
+ thd->lex->sql_command== SQLCOM_INSERT_SELECT ||
+ thd->lex->sql_command== SQLCOM_REPLACE ||
+ thd->lex->sql_command== SQLCOM_REPLACE_SELECT ||
+ thd->lex->sql_command== SQLCOM_UPDATE ||
+ thd->lex->sql_command== SQLCOM_UPDATE_MULTI ||
+ thd->lex->sql_command== SQLCOM_LOAD ||
+ thd->lex->sql_command== SQLCOM_DELETE) &&
+ wsrep_replicate_myisam &&
+ (*start)->table && (*start)->table->file->ht->db_type == DB_TYPE_MYISAM)
+ {
+ WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (*start));
+ }
+ error:
+#endif
err:
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit open_tables()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
free_root(&new_frm_mem, MYF(0)); // Free pre-alloced block
if (error && *table_to_open)
@@ -5525,7 +5552,14 @@ end:
trans_rollback_stmt(thd);
close_thread_tables(thd);
}
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "End opening table");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
DBUG_RETURN(table);
}
@@ -5788,7 +5822,7 @@ bool lock_tables(THD *thd, TABLE_LIST *tables, uint count,
We can solve these problems in mixed mode by switching to binlogging
if at least one updated table is used by sub-statement
*/
- if (thd->variables.binlog_format != BINLOG_FORMAT_ROW && tables &&
+ if (WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_ROW && tables &&
has_write_table_with_auto_increment(thd->lex->first_not_own_table()))
thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
}
@@ -9243,7 +9277,19 @@ bool mysql_notify_thread_having_shared_lock(THD *thd, THD *in_use,
(e.g. see partitioning code).
*/
if (!thd_table->needs_reopen())
+#ifdef WITH_WSREP
+ {
+ signalled|= mysql_lock_abort_for_thread(thd, thd_table);
+ if (thd && WSREP(thd) && wsrep_thd_is_brute_force((void *)thd))
+ {
+ WSREP_DEBUG("remove_table_from_cache: %llu",
+ (unsigned long long) thd->real_id);
+ wsrep_abort_thd((void *)thd, (void *)in_use, FALSE);
+ }
+ }
+#else
signalled|= mysql_lock_abort_for_thread(thd, thd_table);
+#endif
}
mysql_mutex_unlock(&in_use->LOCK_thd_data);
}
diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in
index 63850650ac9..2de475b0a76 100644
--- a/sql/sql_builtin.cc.in
+++ b/sql/sql_builtin.cc.in
@@ -25,7 +25,11 @@ extern
#endif
builtin_maria_plugin
@mysql_mandatory_plugins@ @mysql_optional_plugins@
- builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin;
+ builtin_maria_binlog_plugin,
+#ifdef WITH_WSREP
+ builtin_wsrep_plugin@mysql_plugin_defs@,
+#endif /* WITH_WSREP */
+ builtin_maria_mysql_password_plugin;
struct st_maria_plugin *mysql_optional_plugins[]=
{
@@ -35,5 +39,8 @@ struct st_maria_plugin *mysql_optional_plugins[]=
struct st_maria_plugin *mysql_mandatory_plugins[]=
{
builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin,
+#ifdef WITH_WSREP
+ builtin_wsrep_plugin@mysql_plugin_defs@,
+#endif /* WITH_WSREP */
@mysql_mandatory_plugins@ 0
};
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 81f021d9306..68f85ea579f 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -62,6 +62,9 @@
#include "debug_sync.h"
#include "sql_parse.h" // is_update_query
#include "sql_callback.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
#include "sql_connect.h"
/*
@@ -765,6 +768,172 @@ char *thd_security_context(THD *thd, char *buffer, unsigned int length,
return buffer;
}
+#ifdef WITH_WSREP
+extern "C" int wsrep_on(void *thd)
+{
+ return (int)(WSREP(((THD*)thd)));
+}
+extern "C" bool wsrep_thd_is_wsrep_on(THD *thd)
+{
+ return thd->variables.wsrep_on;
+}
+
+extern "C" bool wsrep_consistency_check(void *thd)
+{
+ return ((THD*)thd)->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING;
+}
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode)
+{
+ thd->wsrep_exec_mode= mode;
+}
+extern "C" void wsrep_thd_set_query_state(
+ THD *thd, enum wsrep_query_state state)
+{
+ thd->wsrep_query_state= state;
+}
+extern "C" void wsrep_thd_set_conflict_state(
+ THD *thd, enum wsrep_conflict_state state)
+{
+ thd->wsrep_conflict_state= state;
+}
+
+
+extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd)
+{
+ return thd->wsrep_exec_mode;
+}
+
+extern "C" const char *wsrep_thd_exec_mode_str(THD *thd)
+{
+ return
+ (!thd) ? "void" :
+ (thd->wsrep_exec_mode == LOCAL_STATE) ? "local" :
+ (thd->wsrep_exec_mode == REPL_RECV) ? "applier" :
+ (thd->wsrep_exec_mode == TOTAL_ORDER) ? "total order" :
+ (thd->wsrep_exec_mode == LOCAL_COMMIT) ? "local commit" : "void";
+}
+
+extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd)
+{
+ return thd->wsrep_query_state;
+}
+
+extern "C" const char *wsrep_thd_query_state_str(THD *thd)
+{
+ return
+ (!thd) ? "void" :
+ (thd->wsrep_query_state == QUERY_IDLE) ? "idle" :
+ (thd->wsrep_query_state == QUERY_EXEC) ? "executing" :
+ (thd->wsrep_query_state == QUERY_COMMITTING) ? "committing" :
+ (thd->wsrep_query_state == QUERY_EXITING) ? "exiting" :
+ (thd->wsrep_query_state == QUERY_ROLLINGBACK) ? "rolling back" : "void";
+}
+
+extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd)
+{
+ return thd->wsrep_conflict_state;
+}
+extern "C" const char *wsrep_thd_conflict_state_str(THD *thd)
+{
+ return
+ (!thd) ? "void" :
+ (thd->wsrep_conflict_state == NO_CONFLICT) ? "no conflict" :
+ (thd->wsrep_conflict_state == MUST_ABORT) ? "must abort" :
+ (thd->wsrep_conflict_state == ABORTING) ? "aborting" :
+ (thd->wsrep_conflict_state == MUST_REPLAY) ? "must replay" :
+ (thd->wsrep_conflict_state == REPLAYING) ? "replaying" :
+ (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT) ? "retrying" :
+ (thd->wsrep_conflict_state == CERT_FAILURE) ? "cert failure" : "void";
+}
+
+extern "C" wsrep_trx_handle_t* wsrep_thd_trx_handle(THD *thd)
+{
+ return &thd->wsrep_trx_handle;
+}
+
+extern "C"void wsrep_thd_LOCK(THD *thd)
+{
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+}
+extern "C"void wsrep_thd_UNLOCK(THD *thd)
+{
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+}
+extern "C" time_t wsrep_thd_query_start(THD *thd)
+{
+ return thd->query_start();
+}
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd)
+{
+ return thd->wsrep_rand;
+}
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd)
+{
+ return thd->thread_id;
+}
+extern "C" wsrep_seqno_t wsrep_thd_trx_seqno(THD *thd)
+{
+ return (thd) ? thd->wsrep_trx_seqno : -1;
+}
+extern "C" query_id_t wsrep_thd_query_id(THD *thd)
+{
+ return thd->query_id;
+}
+extern "C" char *wsrep_thd_query(THD *thd)
+{
+ return (thd) ? thd->query() : NULL;
+}
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd)
+{
+ return thd->wsrep_last_query_id;
+}
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id)
+{
+ thd->wsrep_last_query_id= id;
+}
+extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
+{
+ if (signal)
+ {
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ thd->awake(KILL_QUERY);
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ }
+ else
+ {
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ mysql_cond_broadcast(&COND_wsrep_replaying);
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ }
+}
+
+extern "C" int
+wsrep_trx_order_before(void *thd1, void *thd2)
+{
+ if (((THD*)thd1)->wsrep_trx_seqno < ((THD*)thd2)->wsrep_trx_seqno) {
+ WSREP_DEBUG("BF conflict, order: %lld %lld\n",
+ (long long)((THD*)thd1)->wsrep_trx_seqno,
+ (long long)((THD*)thd2)->wsrep_trx_seqno);
+ return 1;
+ }
+ WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n",
+ (long long)((THD*)thd1)->wsrep_trx_seqno,
+ (long long)((THD*)thd2)->wsrep_trx_seqno);
+ return 0;
+}
+extern "C" int
+wsrep_trx_is_aborting(void *thd_ptr)
+{
+ if (thd_ptr) {
+ if ((((THD *)thd_ptr)->wsrep_conflict_state == MUST_ABORT) ||
+ (((THD *)thd_ptr)->wsrep_conflict_state == ABORTING)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
/**
Implementation of Drop_table_error_handler::handle_condition().
@@ -793,7 +962,11 @@ bool Drop_table_error_handler::handle_condition(THD *thd,
}
+#ifdef WITH_WSREP
+THD::THD(bool is_applier)
+#else
THD::THD()
+#endif
:Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION,
/* statement id */ 0),
rli_fake(0), rli_slave(NULL),
@@ -820,6 +993,11 @@ THD::THD()
bootstrap(0),
derived_tables_processing(FALSE),
spcont(NULL),
+#ifdef WITH_WSREP
+ wsrep_applier(is_applier),
+ wsrep_applier_closing(FALSE),
+ wsrep_client_thread(0),
+#endif
m_parser_state(NULL),
#if defined(ENABLED_DEBUG_SYNC)
debug_sync_control(0),
@@ -919,6 +1097,23 @@ THD::THD()
command=COM_CONNECT;
*scramble= '\0';
+#ifdef WITH_WSREP
+ mysql_mutex_init(key_LOCK_wsrep_thd, &LOCK_wsrep_thd, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_thd, &COND_wsrep_thd, NULL);
+ wsrep_trx_handle.trx_id = -1;
+ wsrep_trx_handle.opaque = NULL;
+ //wsrep_retry_autocommit= ::wsrep_retry_autocommit;
+ wsrep_retry_counter = 0;
+ wsrep_PA_safe = true;
+ wsrep_seqno_changed = false;
+ wsrep_retry_query = NULL;
+ wsrep_retry_query_len = 0;
+ wsrep_retry_command = COM_CONNECT;
+ wsrep_consistency_check = NO_CONSISTENCY_CHECK;
+ wsrep_status_vars = 0;
+ wsrep_mysql_replicated = 0;
+
+#endif
/* Call to init() below requires fully initialized Open_tables_state. */
reset_open_tables_state(this);
@@ -951,6 +1146,13 @@ THD::THD()
my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
substitute_null_with_insert_id = FALSE;
thr_lock_info_init(&lock_info); /* safety: will be reset after start */
+#ifdef WITH_WSREP
+ lock_info.mysql_thd= (void *)this;
+ lock_info.in_lock_tables= false;
+#ifdef WSREP_PROC_INFO
+ wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
m_internal_handler= NULL;
m_binlog_invoker= FALSE;
@@ -1252,7 +1454,20 @@ void THD::init(void)
reset_current_stmt_binlog_format_row();
bzero((char *) &status_var, sizeof(status_var));
bzero((char *) &org_status_var, sizeof(org_status_var));
-
+#ifdef WITH_WSREP
+ wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE;
+ wsrep_conflict_state= NO_CONFLICT;
+ wsrep_query_state= QUERY_IDLE;
+ wsrep_last_query_id= 0;
+ wsrep_trx_seqno= 0;
+ wsrep_converted_lock_session= false;
+ wsrep_retry_counter= 0;
+ wsrep_rli= NULL;
+ wsrep_PA_safe= true;
+ wsrep_seqno_changed= false;
+ wsrep_consistency_check = NO_CONSISTENCY_CHECK;
+ wsrep_mysql_replicated = 0;
+#endif
if (variables.sql_log_bin)
variables.option_bits|= OPTION_BIN_LOG;
else
@@ -1446,6 +1661,13 @@ THD::~THD()
mysql_mutex_unlock(&LOCK_thd_data);
add_to_status(&global_status_var, &status_var);
+#ifdef WITH_WSREP
+ mysql_mutex_lock(&LOCK_wsrep_thd);
+ mysql_mutex_unlock(&LOCK_wsrep_thd);
+ mysql_mutex_destroy(&LOCK_wsrep_thd);
+ if (wsrep_rli) delete wsrep_rli;
+ if (wsrep_status_vars) wsrep->stats_free(wsrep, wsrep_status_vars);
+#endif
/* Close connection */
#ifndef EMBEDDED_LIBRARY
if (net.vio)
@@ -1862,6 +2084,13 @@ void THD::cleanup_after_query()
/* reset table map for multi-table update */
table_map_for_update= 0;
m_binlog_invoker= FALSE;
+#ifdef WITH_WSREP
+ if (TOTAL_ORDER == wsrep_exec_mode)
+ {
+ wsrep_exec_mode = LOCAL_STATE;
+ }
+ //wsrep_trx_seqno = 0;
+#endif /* WITH_WSREP */
#ifndef EMBEDDED_LIBRARY
if (rli_slave)
@@ -2282,6 +2511,13 @@ bool sql_exchange::escaped_given(void)
bool select_send::send_result_set_metadata(List<Item> &list, uint flags)
{
bool res;
+#ifdef WITH_WSREP
+ if (WSREP(thd) && thd->wsrep_retry_query)
+ {
+ WSREP_DEBUG("skipping select metadata");
+ return FALSE;
+ }
+#endif /* WITH_WSREP */
if (!(res= thd->protocol->send_result_set_metadata(&list, flags)))
is_result_set_started= 1;
return res;
@@ -4020,8 +4256,13 @@ extern "C" int thd_non_transactional_update(const MYSQL_THD thd)
extern "C" int thd_binlog_format(const MYSQL_THD thd)
{
+#ifdef WITH_WSREP
+ if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) &&
+ (thd->variables.option_bits & OPTION_BIN_LOG))
+#else
if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG))
- return (int) thd->variables.binlog_format;
+#endif
+ return (int) WSREP_FORMAT(thd->variables.binlog_format);
else
return BINLOG_FORMAT_UNSPEC;
}
@@ -4576,7 +4817,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
binlog by filtering rules.
*/
if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
- !(variables.binlog_format == BINLOG_FORMAT_STMT &&
+ !(WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT &&
!binlog_filter->db_ok(db)))
{
/*
@@ -4740,7 +4981,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
*/
my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
}
- else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
+ else if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW &&
sqlcom_can_generate_row_events(this))
{
/*
@@ -4769,7 +5010,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
else
{
/* binlog_format = STATEMENT */
- if (variables.binlog_format == BINLOG_FORMAT_STMT)
+ if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT)
{
if (lex->is_stmt_row_injection())
{
@@ -4786,7 +5027,14 @@ int THD::decide_logging_format(TABLE_LIST *tables)
5. Error: Cannot modify table that uses a storage engine
limited to row-logging when binlog_format = STATEMENT
*/
+#ifdef WITH_WSREP
+ if (!WSREP(this) || wsrep_exec_mode == LOCAL_STATE)
+ {
+#endif /* WITH_WSREP */
my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
}
else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
{
@@ -4834,7 +5082,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
"and binlog_filter->db_ok(db) = %d",
mysql_bin_log.is_open(),
(variables.option_bits & OPTION_BIN_LOG),
- variables.binlog_format,
+ WSREP_FORMAT(variables.binlog_format),
binlog_filter->db_ok(db)));
#endif
@@ -5088,7 +5336,13 @@ int THD::binlog_write_row(TABLE* table, bool is_trans,
MY_BITMAP const* cols, size_t colcnt,
uchar const *record)
{
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ ((WSREP(this) && wsrep_emulate_bin_log) ||
+ mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
/*
Pack records into format for transfer. We are allocating more
@@ -5118,7 +5372,13 @@ int THD::binlog_update_row(TABLE* table, bool is_trans,
const uchar *before_record,
const uchar *after_record)
{
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ ((WSREP(this) && wsrep_emulate_bin_log)
+ || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
size_t const before_maxlen = max_row_length(table, before_record);
size_t const after_maxlen = max_row_length(table, after_record);
@@ -5163,7 +5423,13 @@ int THD::binlog_delete_row(TABLE* table, bool is_trans,
MY_BITMAP const* cols, size_t colcnt,
uchar const *record)
{
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ ((WSREP(this) && wsrep_emulate_bin_log)
+ || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
/*
Pack records into format for transfer. We are allocating more
@@ -5194,7 +5460,11 @@ int THD::binlog_remove_pending_rows_event(bool clear_maps,
{
DBUG_ENTER("THD::binlog_remove_pending_rows_event");
+#ifdef WITH_WSREP
+ if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()))
+#else
if (!mysql_bin_log.is_open())
+#endif
DBUG_RETURN(0);
mysql_bin_log.remove_pending_rows_event(this, is_transactional);
@@ -5213,7 +5483,11 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
mode: it might be the case that we left row-based mode before
flushing anything (e.g., if we have explicitly locked tables).
*/
+#ifdef WITH_WSREP
+ if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()))
+#else
if (!mysql_bin_log.is_open())
+#endif
DBUG_RETURN(0);
/*
@@ -5461,8 +5735,12 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
DBUG_ENTER("THD::binlog_query");
DBUG_PRINT("enter", ("qtype: %s query: '%-.*s'",
show_query_type(qtype), (int) query_len, query_arg));
+#ifdef WITH_WSREP
+ DBUG_ASSERT(query_arg && (WSREP_EMULATE_BINLOG(this)
+ || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(query_arg && mysql_bin_log.is_open());
-
+#endif
/*
If we are not in prelocked mode, mysql_unlock_tables() will be
called after this binlog_query(), so we have to flush the pending
diff --git a/sql/sql_class.h b/sql/sql_class.h
index f50d1442a3f..451624e902c 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -45,6 +45,15 @@
THR_LOCK_INFO */
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+struct wsrep_thd_shadow {
+ ulonglong options;
+ enum wsrep_exec_mode wsrep_exec_mode;
+ Vio *vio;
+ ulong tx_isolation;
+};
+#endif
class Reprepare_observer;
class Relay_log_info;
@@ -573,6 +582,11 @@ typedef struct system_variables
ulong wt_timeout_short, wt_deadlock_search_depth_short;
ulong wt_timeout_long, wt_deadlock_search_depth_long;
+#ifdef WITH_WSREP
+ my_bool wsrep_on;
+ my_bool wsrep_causal_reads;
+ ulong wsrep_retry_autocommit;
+#endif
double long_query_time_double;
} SV;
@@ -968,6 +982,9 @@ struct st_savepoint {
/** State of metadata locks before this savepoint was set. */
MDL_savepoint mdl_savepoint;
};
+#ifdef WITH_WSREP
+void wsrep_cleanup_transaction(THD *thd); // THD.transactions.cleanup calls it
+#endif
enum xa_states {XA_NOTR=0, XA_ACTIVE, XA_IDLE, XA_PREPARED, XA_ROLLBACK_ONLY};
extern const char *xa_state_names[];
@@ -1786,7 +1803,7 @@ public:
int is_current_stmt_binlog_format_row() const {
DBUG_ASSERT(current_stmt_binlog_format == BINLOG_FORMAT_STMT ||
current_stmt_binlog_format == BINLOG_FORMAT_ROW);
- return current_stmt_binlog_format == BINLOG_FORMAT_ROW;
+ return (WSREP_FORMAT((ulong)current_stmt_binlog_format) == BINLOG_FORMAT_ROW);
}
private:
@@ -1844,7 +1861,11 @@ public:
*/
CHANGED_TABLE_LIST* changed_tables;
MEM_ROOT mem_root; // Transaction-life memory allocation pool
+#ifdef WITH_WSREP
+ void cleanup(THD *thd)
+#else
void cleanup()
+#endif
{
DBUG_ENTER("thd::cleanup");
changed_tables= 0;
@@ -1858,6 +1879,11 @@ public:
if (!xid_state.rm_error)
xid_state.xid.null();
free_root(&mem_root,MYF(MY_KEEP_PREALLOC));
+#ifdef WITH_WSREP
+ // Todo: convert into a plugin method
+ // wsrep's post-commit. LOCAL_COMMIT designates wsrep's commit was ok
+ if (WSREP(thd)) wsrep_cleanup_transaction(thd);
+#endif /* WITH_WSREP */
DBUG_VOID_RETURN;
}
my_bool is_active()
@@ -2321,6 +2347,35 @@ public:
query_id_t first_query_id;
} binlog_evt_union;
+#ifdef WITH_WSREP
+ const bool wsrep_applier; /* dedicated slave applier thread */
+ bool wsrep_applier_closing; /* applier marked to close */
+ bool wsrep_client_thread; /* to identify client threads*/
+ enum wsrep_exec_mode wsrep_exec_mode;
+ query_id_t wsrep_last_query_id;
+ enum wsrep_query_state wsrep_query_state;
+ enum wsrep_conflict_state wsrep_conflict_state;
+ mysql_mutex_t LOCK_wsrep_thd;
+ mysql_cond_t COND_wsrep_thd;
+ wsrep_seqno_t wsrep_trx_seqno;
+ uint32 wsrep_rand;
+ Relay_log_info* wsrep_rli;
+ bool wsrep_converted_lock_session;
+ wsrep_trx_handle_t wsrep_trx_handle;
+ bool wsrep_seqno_changed;
+#ifdef WSREP_PROC_INFO
+ char wsrep_info[128]; /* string for dynamic proc info */
+#endif /* WSREP_PROC_INFO */
+ ulong wsrep_retry_counter; // of autocommit
+ bool wsrep_PA_safe;
+ char* wsrep_retry_query;
+ size_t wsrep_retry_query_len;
+ enum enum_server_command wsrep_retry_command;
+ enum wsrep_consistency_check_mode
+ wsrep_consistency_check;
+ wsrep_stats_var* wsrep_status_vars;
+ int wsrep_mysql_replicated;
+#endif /* WITH_WSREP */
/**
Internal parser state.
Note that since the parser is not re-entrant, we keep only one parser
@@ -2352,7 +2407,11 @@ public:
/* Debug Sync facility. See debug_sync.cc. */
struct st_debug_sync_control *debug_sync_control;
#endif /* defined(ENABLED_DEBUG_SYNC) */
+#ifdef WITH_WSREP
+ THD(bool is_applier = false);
+#else
THD();
+#endif
~THD();
void init(void);
@@ -2748,7 +2807,7 @@ public:
tests fail and so force them to propagate the
lex->binlog_row_based_if_mixed upwards to the caller.
*/
- if ((variables.binlog_format == BINLOG_FORMAT_MIXED) &&
+ if ((WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_MIXED) &&
(in_sub_stmt == 0))
set_current_stmt_binlog_format_row();
@@ -2790,7 +2849,7 @@ public:
show_system_thread(system_thread)));
if (in_sub_stmt == 0)
{
- if (variables.binlog_format == BINLOG_FORMAT_ROW)
+ if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW)
set_current_stmt_binlog_format_row();
else if (temporary_tables == NULL)
clear_current_stmt_binlog_format_row();
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index 81a85f10a16..c0cdddeb561 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -44,6 +44,9 @@ HASH global_index_stats;
extern mysql_mutex_t LOCK_global_user_client_stats;
extern mysql_mutex_t LOCK_global_table_stats;
extern mysql_mutex_t LOCK_global_index_stats;
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
/*
Get structure for logging connection data for the current user
@@ -977,7 +980,11 @@ bool setup_connection_thread_globals(THD *thd)
{
if (thd->store_globals())
{
+#ifdef WITH_WSREP
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+#else
close_connection(thd, ER_OUT_OF_RESOURCES);
+#endif
statistic_increment(aborted_connects,&LOCK_status);
MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0));
return 1; // Error
@@ -1050,6 +1057,17 @@ bool login_connection(THD *thd)
void end_connection(THD *thd)
{
NET *net= &thd->net;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ wsrep_status_t rcode= wsrep->free_connection(wsrep, thd->thread_id);
+ if (rcode) {
+ WSREP_WARN("wsrep failed to free connection context: %lu, code: %d",
+ thd->thread_id, rcode);
+ }
+ }
+ thd->wsrep_client_thread= 0;
+#endif
plugin_thdvar_cleanup(thd);
if (thd->user_connect)
@@ -1182,6 +1200,9 @@ bool thd_prepare_connection(THD *thd)
(char *) thd->security_ctx->host_or_ip);
prepare_new_connection_state(thd);
+#ifdef WITH_WSREP
+ thd->wsrep_client_thread= 1;
+#endif /* WITH_WSREP */
return FALSE;
}
@@ -1205,7 +1226,11 @@ void do_handle_one_connection(THD *thd_arg)
if (MYSQL_CALLBACK_ELSE(thd->scheduler, init_new_connection_thread, (), 0))
{
+#ifdef WITH_WSREP
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+#else
close_connection(thd, ER_OUT_OF_RESOURCES);
+#endif
statistic_increment(aborted_connects,&LOCK_status);
MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0));
return;
@@ -1254,9 +1279,21 @@ void do_handle_one_connection(THD *thd_arg)
break;
}
end_connection(thd);
-
+
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_EXITING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif
end_thread:
+#ifdef WITH_WSREP
+ close_connection(thd, 0, 1);
+#else
close_connection(thd);
+#endif
if (thd->userstat_running)
update_global_user_stats(thd, create_user, time(NULL));
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 3c245807b47..a3d68f54c9c 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -419,7 +419,11 @@ cleanup:
/* See similar binlogging code in sql_update.cc, for comments */
if ((error < 0) || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()))
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (error < 0)
@@ -870,7 +874,11 @@ void multi_delete::abort_result_set()
/*
there is only side effects; to binlog with the error
*/
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
/* possible error of writing binary log is ignored deliberately */
@@ -1046,7 +1054,11 @@ bool multi_delete::send_eof()
}
if ((local_error == 0) || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (local_error == 0)
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index 3c3b9f85727..4bf116eaebe 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -1049,7 +1049,11 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
thd->transaction.stmt.modified_non_trans_table ||
was_insert_delayed)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (error <= 0)
@@ -3159,6 +3163,11 @@ bool Delayed_insert::handle_inserts(void)
mysql_cond_broadcast(&cond_client); // If waiting clients
}
}
+#ifdef WITH_WSREP
+ if (WSREP((&thd)))
+ thd_proc_info(&thd, "insert done");
+ else
+#endif /* WITH_WSREP */
thd_proc_info(&thd, 0);
mysql_mutex_unlock(&mutex);
@@ -3614,8 +3623,14 @@ bool select_insert::send_eof()
DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'",
trans_table, table->file->table_type()));
+#ifdef WITH_WSREP
+ error= (thd->wsrep_conflict_state == MUST_ABORT) ? -1 :
+ (thd->locked_tables_mode <= LTM_LOCK_TABLES ?
+ table->file->ha_end_bulk_insert() : 0);
+#else
error= (thd->locked_tables_mode <= LTM_LOCK_TABLES ?
table->file->ha_end_bulk_insert() : 0);
+#endif /* WITH_WSREP */
if (!error && thd->is_error())
error= thd->stmt_da->sql_errno();
@@ -3643,8 +3658,13 @@ bool select_insert::send_eof()
events are in the transaction cache and will be written when
ha_autocommit_or_rollback() is issued below.
*/
+#ifdef WITH_WSREP
+ if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) &&
+ (!error || thd->transaction.stmt.modified_non_trans_table))
+#else
if (mysql_bin_log.is_open() &&
(!error || thd->transaction.stmt.modified_non_trans_table))
+#endif
{
int errcode= 0;
if (!error)
@@ -3727,7 +3747,11 @@ void select_insert::abort_result_set() {
if (!can_rollback_data())
thd->transaction.all.modified_non_trans_table= TRUE;
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
/* error of writing binary log is ignored */
@@ -4118,7 +4142,11 @@ select_create::binlog_show_create_table(TABLE **tables, uint count)
/* show_database */ TRUE);
DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif /* WITH_WSREP */
{
int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
result= thd->binlog_query(THD::STMT_QUERY_TYPE,
@@ -4128,6 +4156,9 @@ select_create::binlog_show_create_table(TABLE **tables, uint count)
/* suppress_use */ FALSE,
errcode);
}
+#ifdef WITH_WSREP
+ ha_wsrep_fake_trx_id(thd);
+#endif
return result;
}
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 13622cb420a..c40bc0467ed 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -1553,6 +1553,17 @@ int lex_one_token(void *arg, void *yythd)
}
else
{
+#ifdef WITH_WSREP
+ if (version == 99997 && thd->wsrep_exec_mode == LOCAL_STATE)
+ {
+ WSREP_DEBUG("consistency check: %s", thd->query());
+ thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED;
+ lip->yySkipn(5);
+ lip->set_echo(TRUE);
+ state=MY_LEX_START;
+ break; /* Do not treat contents as a comment. */
+ }
+#endif /* WITH_WSREP */
/*
Patch and skip the conditional comment to avoid it
being propagated infinitely (eg. to a slave).
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 5dac052b749..4bf1183e9da 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -102,6 +102,16 @@
#include "../storage/maria/ha_maria.h"
#endif
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#include "rpl_rli.h"
+static void wsrep_client_rollback(THD *thd);
+
+extern Format_description_log_event *wsrep_format_desc;
+
+static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
+ Parser_state *parser_state);
+#endif /* WITH_WSREP */
/**
@defgroup Runtime_Environment Runtime Environment
@{
@@ -435,6 +445,13 @@ bool is_log_table_write_query(enum enum_sql_command command)
return (sql_command_flags[command] & CF_WRITE_LOGS_COMMAND) != 0;
}
+#ifdef WITH_WSREP
+bool is_show_query(enum enum_sql_command command)
+{
+ DBUG_ASSERT(command >= 0 && command <= SQLCOM_END);
+ return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0;
+}
+#endif
void execute_init_command(THD *thd, LEX_STRING *init_command,
mysql_rwlock_t *var_lock)
{
@@ -616,8 +633,12 @@ void do_handle_bootstrap(THD *thd)
if (my_thread_init() || thd->store_globals())
{
#ifndef EMBEDDED_LIBRARY
+#ifdef WITH_WSREP
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+#else
close_connection(thd, ER_OUT_OF_RESOURCES);
#endif
+#endif
thd->fatal_error();
goto end;
}
@@ -691,7 +712,18 @@ bool do_command(THD *thd)
NET *net= &thd->net;
enum enum_server_command command;
DBUG_ENTER("do_command");
-
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_IDLE;
+ if (thd->wsrep_conflict_state==MUST_ABORT)
+ {
+ wsrep_client_rollback(thd);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif
/*
indicator of uninitialized lex => normal flow of errors handling
(see my_message_sql)
@@ -737,11 +769,48 @@ bool do_command(THD *thd)
*/
DEBUG_SYNC(thd, "before_do_command_net_read");
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ packet_length= my_net_read(net);
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ /* these THD's are aborted or are aborting during being idle */
+ if (thd->wsrep_conflict_state == ABORTING)
+ {
+ while (thd->wsrep_conflict_state == ABORTING) {
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ my_sleep(1000);
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ }
+ thd->store_globals();
+ }
+ else if (thd->wsrep_conflict_state == ABORTED)
+ {
+ thd->store_globals();
+ }
+
+ thd->wsrep_query_state= QUERY_EXEC;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ if ((WSREP(thd) && packet_length == packet_error) ||
+ (!WSREP(thd) && (packet_length= my_net_read(net)) == packet_error))
+#else
if ((packet_length= my_net_read(net)) == packet_error)
+#endif
{
DBUG_PRINT("info",("Got error %d reading command from socket %s",
net->error,
vio_description(net->vio)));
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT)
+ {
+ DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id));
+ wsrep_client_rollback(thd);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif
/* Check if we can continue without closing the connection */
@@ -787,12 +856,54 @@ bool do_command(THD *thd)
vio_description(net->vio), command,
command_name[command].str));
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ /*
+ * bail out if DB snapshot has not been installed. We however,
+ * allow queries "SET" and "SHOW", they are trapped later in execute_command
+ */
+ if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready &&
+ command != COM_QUERY &&
+ command != COM_PING &&
+ command != COM_QUIT &&
+ command != COM_PROCESS_INFO &&
+ command != COM_PROCESS_KILL &&
+ command != COM_SET_OPTION &&
+ command != COM_SHUTDOWN &&
+ command != COM_SLEEP &&
+ command != COM_STATISTICS &&
+ command != COM_TIME &&
+ command != COM_END
+ ) {
+ my_error(ER_UNKNOWN_COM_ERROR, MYF(0),
+ "WSREP has not yet prepared node for application use");
+ thd->protocol->end_statement();
+ return_value= FALSE;
+ goto out;
+ }
+ }
+#endif
/* Restore read timeout value */
my_net_set_read_timeout(net, thd->variables.net_read_timeout);
DBUG_ASSERT(packet_length);
return_value= dispatch_command(command, thd, packet+1, (uint) (packet_length-1));
-
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
+ {
+ return_value= dispatch_command(command, thd, thd->wsrep_retry_query,
+ thd->wsrep_retry_query_len);
+ }
+ }
+ if (thd->wsrep_retry_query && thd->wsrep_conflict_state != REPLAYING)
+ {
+ my_free(thd->wsrep_retry_query);
+ thd->wsrep_retry_query = NULL;
+ thd->wsrep_retry_query_len = 0;
+ thd->wsrep_retry_command = COM_CONNECT;
+ }
+#endif
out:
DBUG_RETURN(return_value);
}
@@ -866,6 +977,33 @@ static my_bool deny_updates_if_read_only_option(THD *thd,
DBUG_RETURN(FALSE);
}
+#ifdef WITH_WSREP
+static my_bool wsrep_read_only_option(THD *thd, TABLE_LIST *all_tables)
+{
+ int opt_readonly_saved = opt_readonly;
+ ulong flag_saved = (ulong)(thd->security_ctx->master_access & SUPER_ACL);
+
+ opt_readonly = 0;
+ thd->security_ctx->master_access &= ~SUPER_ACL;
+
+ my_bool ret = !deny_updates_if_read_only_option(thd, all_tables);
+
+ opt_readonly = opt_readonly_saved;
+ thd->security_ctx->master_access |= flag_saved;
+
+ return ret;
+}
+
+static void wsrep_copy_query(THD *thd)
+{
+ thd->wsrep_retry_command = thd->command;
+ thd->wsrep_retry_query_len = thd->query_length();
+ thd->wsrep_retry_query = (char *)my_malloc(
+ thd->wsrep_retry_query_len + 1, MYF(0));
+ strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len);
+ thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0';
+}
+#endif /* WITH_WSREP */
/**
Perform one connection-level (COM_XXXX) command.
@@ -895,6 +1033,42 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ENTER("dispatch_command");
DBUG_PRINT("info", ("command: %d", command));
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ if (!thd->in_multi_stmt_transaction_mode())
+ {
+ thd->wsrep_PA_safe= true;
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_EXEC;
+ if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
+ {
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ }
+ if (thd->wsrep_conflict_state== MUST_ABORT)
+ {
+ wsrep_client_rollback(thd);
+ }
+ if (thd->wsrep_conflict_state== ABORTED)
+ {
+ my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
+ WSREP_DEBUG("Deadlock error for: %s", thd->query());
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ thd->killed = NOT_KILLED;
+ thd->mysys_var->abort = 0;
+ thd->wsrep_conflict_state = NO_CONFLICT;
+ thd->wsrep_retry_counter = 0;
+ /*
+ Increment threads running to compensate dec_thread_running() called
+ after dispatch_end label.
+ */
+ inc_thread_running();
+ goto dispatch_end;
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif /* WITH_WSREP */
#if defined(ENABLED_PROFILING)
thd->profiling.start_new_query();
#endif
@@ -1052,7 +1226,11 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
if (parser_state.init(thd, thd->query(), thd->query_length()))
break;
+#ifdef WITH_WSREP
+ wsrep_mysql_parse(thd, thd->query(), thd->query_length(), &parser_state);
+#else
mysql_parse(thd, thd->query(), thd->query_length(), &parser_state);
+#endif
while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) &&
! thd->is_error())
@@ -1103,10 +1281,19 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
Count each statement from the client.
*/
statistic_increment(thd->status_var.questions, &LOCK_status);
+#ifdef WITH_WSREP
+ if (!WSREP(thd))
+ thd->set_time(); /* Reset the query start time. */
+#else
thd->set_time(); /* Reset the query start time. */
+#endif
parser_state.reset(beginning_of_next_stmt, length);
/* TODO: set thd->lex->sql_command to SQLCOM_END here */
+#ifdef WITH_WSREP
+ wsrep_mysql_parse(thd, beginning_of_next_stmt, length, &parser_state);
+#else
mysql_parse(thd, beginning_of_next_stmt, length, &parser_state);
+#endif
}
DBUG_PRINT("info",("query ready"));
@@ -1421,6 +1608,23 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
break;
}
+#ifdef WITH_WSREP
+ dispatch_end:
+
+ if (WSREP(thd)) {
+ /* wsrep BF abort in query exec phase */
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if ((thd->wsrep_conflict_state != REPLAYING) &&
+ (thd->wsrep_conflict_state != RETRY_AUTOCOMMIT))
+ {
+ thd->update_server_status();
+ thd->protocol->end_statement();
+ query_cache_end_of_result(thd);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ } else { /* if (WSREP(thd))... */
+#endif /* WITH_WSREP */
DBUG_ASSERT(thd->derived_tables == NULL &&
(thd->open_tables == NULL ||
(thd->locked_tables_mode == LTM_LOCK_TABLES)));
@@ -1430,6 +1634,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
thd->update_server_status();
thd->protocol->end_statement();
query_cache_end_of_result(thd);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
if (!thd->is_error() && !thd->killed_errno())
mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_RESULT, 0, 0);
@@ -1446,7 +1653,16 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
thd->reset_query();
thd->command=COM_SLEEP;
dec_thread_running();
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ thd_proc_info(thd, "sleeping");
+ } else {
+#endif /* WITH_WSREP */
thd_proc_info(thd, 0);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
+
thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory
free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC));
@@ -2088,7 +2304,66 @@ mysql_execute_command(THD *thd)
#ifdef HAVE_REPLICATION
} /* endif unlikely slave */
#endif
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ /*
+ change LOCK TABLE WRITE to transaction
+ */
+ if (lex->sql_command== SQLCOM_LOCK_TABLES && wsrep_convert_LOCK_to_trx)
+ {
+ for (TABLE_LIST *table= all_tables; table; table= table->next_global)
+ {
+ if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
+ {
+ lex->sql_command= SQLCOM_BEGIN;
+ thd->wsrep_converted_lock_session= true;
+ break;
+ }
+ }
+ }
+ if (lex->sql_command== SQLCOM_UNLOCK_TABLES &&
+ thd->wsrep_converted_lock_session)
+ {
+ thd->wsrep_converted_lock_session= false;
+ lex->sql_command= SQLCOM_COMMIT;
+ lex->tx_release= TVL_NO;
+ }
+ /*
+ * bail out if DB snapshot has not been installed. We however,
+ * allow SET and SHOW queries
+ */
+ if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready &&
+ lex->sql_command != SQLCOM_SET_OPTION &&
+ !is_show_query(lex->sql_command))
+ {
+#if DIRTY_HACK
+ /* Dirty hack for lp:1002714 - trying to recognize mysqldump connection
+ * and allow it to continue. Actuall mysqldump_magic_str may be longer
+ * and is obviously version dependent and may be issued by any client
+ * connection after which connection becomes non-replicating. */
+ static char const mysqldump_magic_str[]=
+"SELECT LOGFILE_GROUP_NAME, FILE_NAME, TOTAL_EXTENTS, INITIAL_SIZE, ENGINE, EXTRA FROM INFORMATION_SCHEMA.FILES WHERE FILE_TYPE = 'UNDO LOG' AND FILE_NAME IS NOT NULL";
+ static const size_t mysqldump_magic_str_len= sizeof(mysqldump_magic_str) -1;
+ if (SQLCOM_SELECT != lex->sql_command ||
+ thd->query_length() < mysqldump_magic_str_len ||
+ strncmp(thd->query(), mysqldump_magic_str, mysqldump_magic_str_len))
+ {
+#endif /* DIRTY_HACK */
+ my_error(ER_UNKNOWN_COM_ERROR, MYF(0),
+ "WSREP has not yet prepared node for application use");
+ goto error;
+#if DIRTY_HACK
+ }
+ else
+ {
+ /* mysqldump connection, allow all further queries to pass */
+ thd->variables.wsrep_on= FALSE;
+ }
+#endif /* DIRTY_HACK */
+ }
+ }
+#endif /* WITH_WSREP */
status_var_increment(thd->status_var.com_stat[lex->sql_command]);
thd->progress.report_to_client= test(sql_command_flags[lex->sql_command] &
CF_REPORT_PROGRESS);
@@ -2131,6 +2406,9 @@ mysql_execute_command(THD *thd)
#endif
case SQLCOM_SHOW_STATUS_PROC:
case SQLCOM_SHOW_STATUS_FUNC:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_causal_wait(thd)) goto error;
+#endif /* WITH_WSREP */
if ((res= check_table_access(thd, SELECT_ACL, all_tables, FALSE,
UINT_MAX, FALSE)))
goto error;
@@ -2138,6 +2416,9 @@ mysql_execute_command(THD *thd)
break;
case SQLCOM_SHOW_STATUS:
{
+#ifdef WITH_WSREP
+ if (lex->sql_command == SQLCOM_SHOW_STATUS) wsrep_free_status(thd);
+#endif /* WITH_WSREP */
execute_show_status(thd, all_tables);
break;
}
@@ -2149,17 +2430,27 @@ mysql_execute_command(THD *thd)
case SQLCOM_SHOW_PLUGINS:
case SQLCOM_SHOW_FIELDS:
case SQLCOM_SHOW_KEYS:
+#ifndef WITH_WSREP
case SQLCOM_SHOW_VARIABLES:
case SQLCOM_SHOW_CHARSETS:
case SQLCOM_SHOW_COLLATIONS:
case SQLCOM_SHOW_STORAGE_ENGINES:
case SQLCOM_SHOW_PROFILE:
+#endif /* WITH_WSREP */
case SQLCOM_SHOW_CLIENT_STATS:
case SQLCOM_SHOW_USER_STATS:
case SQLCOM_SHOW_TABLE_STATS:
case SQLCOM_SHOW_INDEX_STATS:
case SQLCOM_SELECT:
- {
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_causal_wait(thd)) goto error;
+ case SQLCOM_SHOW_VARIABLES:
+ case SQLCOM_SHOW_CHARSETS:
+ case SQLCOM_SHOW_COLLATIONS:
+ case SQLCOM_SHOW_STORAGE_ENGINES:
+ case SQLCOM_SHOW_PROFILE:
+#endif /* WITH_WSREP */
+ {
thd->status_var.last_query_cost= 0.0;
/*
@@ -2469,7 +2760,7 @@ case SQLCOM_PREPARE:
*/
if (thd->query_name_consts &&
mysql_bin_log.is_open() &&
- thd->variables.binlog_format == BINLOG_FORMAT_STMT &&
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT &&
!mysql_bin_log.is_query_in_union(thd, thd->query_id))
{
List_iterator_fast<Item> it(select_lex->item_list);
@@ -2573,6 +2864,12 @@ case SQLCOM_PREPARE:
if (create_info.options & HA_LEX_CREATE_TMP_TABLE)
thd->variables.option_bits|= OPTION_KEEP_LOG;
/* regular create */
+#ifdef WITH_WSREP
+ if (!thd->is_current_stmt_binlog_format_row() ||
+ !(create_info.options & HA_LEX_CREATE_TMP_TABLE))
+ WSREP_TO_ISOLATION_BEGIN(create_table->db, create_table->table_name,
+ NULL)
+#endif /* WITH_WSREP */
if (create_info.options & HA_LEX_CREATE_TABLE_LIKE)
{
/* CREATE TABLE ... LIKE ... */
@@ -2614,6 +2911,7 @@ end_with_restore_list:
DBUG_ASSERT(first_table == all_tables && first_table != 0);
if (check_one_table_access(thd, INDEX_ACL, all_tables))
goto error; /* purecov: inspected */
+ WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL)
/*
Currently CREATE INDEX or DROP INDEX cause a full table rebuild
and thus classify as slow administrative statements just like
@@ -2671,6 +2969,7 @@ end_with_restore_list:
case SQLCOM_RENAME_TABLE:
{
+ WSREP_TO_ISOLATION_BEGIN(0, 0, first_table)
if (execute_rename_table(thd, first_table, all_tables))
goto error;
break;
@@ -2698,6 +2997,10 @@ end_with_restore_list:
goto error;
#else
{
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_causal_wait(thd)) goto error;
+#endif /* WITH_WSREP */
+
/*
Access check:
SHOW CREATE TABLE require any privileges on the table level (ie
@@ -2753,6 +3056,10 @@ end_with_restore_list:
case SQLCOM_CHECKSUM:
{
DBUG_ASSERT(first_table == all_tables && first_table != 0);
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_causal_wait(thd)) goto error;
+#endif /* WITH_WSREP */
+
if (check_table_access(thd, SELECT_ACL, all_tables,
FALSE, UINT_MAX, FALSE))
goto error; /* purecov: inspected */
@@ -2948,6 +3255,15 @@ end_with_restore_list:
DBUG_ASSERT(first_table == all_tables && first_table != 0);
if ((res= insert_precheck(thd, all_tables)))
break;
+#ifdef WITH_WSREP
+ if (lex->sql_command == SQLCOM_INSERT_SELECT &&
+ thd->wsrep_consistency_check == CONSISTENCY_CHECK_DECLARED)
+ {
+ thd->wsrep_consistency_check = CONSISTENCY_CHECK_RUNNING;
+ WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL);
+ }
+
+#endif
/*
INSERT...SELECT...ON DUPLICATE KEY UPDATE/REPLACE SELECT/
INSERT...IGNORE...SELECT can be unsafe, unless ORDER BY PRIMARY KEY
@@ -3111,6 +3427,17 @@ end_with_restore_list:
/* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */
thd->variables.option_bits|= OPTION_KEEP_LOG;
}
+#ifdef WITH_WSREP
+ for (TABLE_LIST *table= all_tables; table; table= table->next_global)
+ {
+ if (!thd->is_current_stmt_binlog_format_row() ||
+ !find_temporary_table(thd, table))
+ {
+ WSREP_TO_ISOLATION_BEGIN(NULL, NULL, all_tables);
+ break;
+ }
+ }
+#endif /* WITH_WSREP */
/* DDL and binlog write order are protected by metadata locks. */
res= mysql_rm_table(thd, first_table, lex->drop_if_exists,
lex->drop_temporary);
@@ -3154,7 +3481,6 @@ end_with_restore_list:
if (!mysql_change_db(thd, &db_str, FALSE))
my_ok(thd);
-
break;
}
@@ -3297,6 +3623,7 @@ end_with_restore_list:
#endif
if (check_access(thd, CREATE_ACL, lex->name.str, NULL, NULL, 1, 0))
break;
+ WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL)
res= mysql_create_db(thd,(lower_case_table_names == 2 ? alias :
lex->name.str), &create_info, 0);
break;
@@ -3326,6 +3653,7 @@ end_with_restore_list:
#endif
if (check_access(thd, DROP_ACL, lex->name.str, NULL, NULL, 1, 0))
break;
+ WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL)
res= mysql_rm_db(thd, lex->name.str, lex->drop_if_exists, 0);
break;
}
@@ -3354,6 +3682,7 @@ end_with_restore_list:
res= 1;
break;
}
+ WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL)
res= mysql_upgrade_db(thd, db);
if (!res)
my_ok(thd);
@@ -3386,6 +3715,7 @@ end_with_restore_list:
#endif
if (check_access(thd, ALTER_ACL, db->str, NULL, NULL, 1, 0))
break;
+ WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL)
res= mysql_alter_db(thd, db->str, &create_info);
break;
}
@@ -3418,6 +3748,7 @@ end_with_restore_list:
if (res)
break;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
switch (lex->sql_command) {
case SQLCOM_CREATE_EVENT:
{
@@ -3452,6 +3783,7 @@ end_with_restore_list:
lex->spname->m_name);
break;
case SQLCOM_DROP_EVENT:
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= Events::drop_event(thd,
lex->spname->m_db, lex->spname->m_name,
lex->drop_if_exists)))
@@ -3466,6 +3798,7 @@ end_with_restore_list:
if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 0))
break;
#ifdef HAVE_DLOPEN
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res = mysql_create_function(thd, &lex->udf)))
my_ok(thd);
#else
@@ -3480,6 +3813,7 @@ end_with_restore_list:
if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 1) &&
check_global_access(thd,CREATE_USER_ACL))
break;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
/* Conditionally writes to binlog */
if (!(res= mysql_create_user(thd, lex->users_list)))
my_ok(thd);
@@ -3491,6 +3825,7 @@ end_with_restore_list:
check_global_access(thd,CREATE_USER_ACL))
break;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= mysql_drop_user(thd, lex->users_list)))
my_ok(thd);
break;
@@ -3501,6 +3836,7 @@ end_with_restore_list:
check_global_access(thd,CREATE_USER_ACL))
break;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= mysql_rename_user(thd, lex->users_list)))
my_ok(thd);
break;
@@ -3515,6 +3851,7 @@ end_with_restore_list:
thd->binlog_invoker();
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res = mysql_revoke_all(thd, lex->users_list)))
my_ok(thd);
break;
@@ -3581,6 +3918,7 @@ end_with_restore_list:
lex->type == TYPE_ENUM_PROCEDURE, 0))
goto error;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_routine_grant(thd, all_tables,
lex->type == TYPE_ENUM_PROCEDURE,
lex->users_list, grants,
@@ -3594,6 +3932,7 @@ end_with_restore_list:
all_tables, FALSE, UINT_MAX, FALSE))
goto error;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_table_grant(thd, all_tables, lex->users_list,
lex->columns, lex->grant,
lex->sql_command == SQLCOM_REVOKE);
@@ -3609,6 +3948,7 @@ end_with_restore_list:
}
else
{
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
/* Conditionally writes to binlog */
res = mysql_grant(thd, select_lex->db, lex->users_list, lex->grant,
lex->sql_command == SQLCOM_REVOKE,
@@ -3747,9 +4087,17 @@ end_with_restore_list:
able to open it (with SQLCOM_HA_OPEN) in the first place.
*/
unit->set_limit(select_lex);
+#ifdef WITH_WSREP
+ { char* tmp_info= NULL;
+ if (WSREP(thd)) tmp_info = (char *)thd_proc_info(thd, "mysql_ha_read()");
+#endif /* WITH_WSREP */
res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str,
lex->insert_list, lex->ha_rkey_mode, select_lex->where,
unit->select_limit_cnt, unit->offset_limit_cnt);
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp_info);
+ }
+#endif /* WITH_WSREP */
break;
case SQLCOM_BEGIN:
@@ -3817,8 +4165,20 @@ end_with_restore_list:
/* Disconnect the current client connection. */
if (tx_release)
thd->killed= KILL_CONNECTION;
- my_ok(thd);
- break;
+ #ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ if (thd->wsrep_conflict_state == NO_CONFLICT ||
+ thd->wsrep_conflict_state == REPLAYING)
+ {
+ my_ok(thd);
+ }
+ } else {
+#endif /* WITH_WSREP */
+ my_ok(thd);
+ #ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
+ break;
}
case SQLCOM_RELEASE_SAVEPOINT:
if (trans_release_savepoint(thd, lex->ident))
@@ -3886,6 +4246,7 @@ end_with_restore_list:
if (sp_process_definer(thd))
goto create_sp_error;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= (sp_result= sp_create_routine(thd, lex->sphead->m_type, lex->sphead));
switch (sp_result) {
case SP_OK: {
@@ -4097,6 +4458,7 @@ create_sp_error:
already puts on CREATE FUNCTION.
*/
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
sp_result= sp_update_routine(thd, type, lex->spname, &lex->sp_chistics);
switch (sp_result)
{
@@ -4168,6 +4530,7 @@ create_sp_error:
if (check_routine_access(thd, ALTER_PROC_ACL, db, name,
lex->sql_command == SQLCOM_DROP_PROCEDURE, 0))
goto error;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
/* Conditionally writes to binlog */
sp_result= sp_drop_routine(thd, type, lex->spname);
@@ -4285,6 +4648,7 @@ create_sp_error:
Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands
as specified through the thd->lex->create_view_mode flag.
*/
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_view(thd, first_table, thd->lex->create_view_mode);
break;
}
@@ -4293,12 +4657,14 @@ create_sp_error:
if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE))
goto error;
/* Conditionally writes to binlog. */
+ WSREP_TO_ISOLATION_BEGIN(NULL, NULL, NULL)
res= mysql_drop_view(thd, first_table, thd->lex->drop_mode);
break;
}
case SQLCOM_CREATE_TRIGGER:
{
/* Conditionally writes to binlog. */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_or_drop_trigger(thd, all_tables, 1);
break;
@@ -4306,6 +4672,7 @@ create_sp_error:
case SQLCOM_DROP_TRIGGER:
{
/* Conditionally writes to binlog. */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_or_drop_trigger(thd, all_tables, 0);
break;
}
@@ -4356,11 +4723,13 @@ create_sp_error:
my_ok(thd);
break;
case SQLCOM_INSTALL_PLUGIN:
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (! (res= mysql_install_plugin(thd, &thd->lex->comment,
&thd->lex->ident)))
my_ok(thd);
break;
case SQLCOM_UNINSTALL_PLUGIN:
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment,
&thd->lex->ident)))
my_ok(thd);
@@ -4517,6 +4886,9 @@ finish:
/* Free tables */
thd_proc_info(thd, "closing tables");
close_thread_tables(thd);
+#ifdef WITH_WSREP
+ thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK;
+#endif /* WITH_WSREP */
thd_proc_info(thd, 0);
#ifndef DBUG_OFF
@@ -4553,6 +4925,7 @@ finish:
{
thd->mdl_context.release_statement_locks();
}
+ WSREP_TO_ISOLATION_END
DBUG_RETURN(res || thd->is_error());
}
@@ -5427,6 +5800,21 @@ void THD::reset_for_next_command(bool calculate_userstat)
thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty();
thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0;
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ if (wsrep_auto_increment_control)
+ {
+ if (thd->variables.auto_increment_offset !=
+ global_system_variables.auto_increment_offset)
+ thd->variables.auto_increment_offset=
+ global_system_variables.auto_increment_offset;
+ if (thd->variables.auto_increment_increment !=
+ global_system_variables.auto_increment_increment)
+ thd->variables.auto_increment_increment=
+ global_system_variables.auto_increment_increment;
+ }
+ }
+#endif /* WITH_WSREP */
thd->query_start_used= 0;
thd->query_start_sec_part_used= 0;
thd->is_fatal_error= thd->time_zone_used= 0;
@@ -5636,6 +6024,165 @@ void mysql_init_multi_delete(LEX *lex)
lex->query_tables_last= &lex->query_tables;
}
+#ifdef WITH_WSREP
+static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
+ Parser_state *parser_state)
+{
+ bool is_autocommit=
+ !thd->in_multi_stmt_transaction_mode() &&
+ thd->wsrep_conflict_state == NO_CONFLICT &&
+ !thd->wsrep_applier &&
+ wsrep_read_only_option(thd, thd->lex->query_tables);
+
+ do
+ {
+ if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
+ {
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ }
+ mysql_parse(thd, rawbuf, length, parser_state);
+
+ if (WSREP(thd)) {
+ /* wsrep BF abort in query exec phase */
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ wsrep_client_rollback(thd);
+
+ WSREP_DEBUG("abort in exec query state, avoiding autocommit");
+ }
+
+ /* checking if BF trx must be replayed */
+ if (thd->wsrep_conflict_state== MUST_REPLAY) {
+ if (thd->wsrep_exec_mode!= REPL_RECV) {
+ if (thd->stmt_da->is_sent) {
+ WSREP_ERROR("replay issue, thd has reported status already");
+ }
+ thd->stmt_da->reset_diagnostics_area();
+
+ thd->wsrep_conflict_state= REPLAYING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ mysql_reset_thd_for_next_command(thd, opt_userstat_running);
+ thd->killed= NOT_KILLED;
+ close_thread_tables(thd);
+ if (thd->locked_tables_mode && thd->lock)
+ {
+ WSREP_DEBUG("releasing table lock for replaying (%ld)",
+ thd->thread_id);
+ thd->locked_tables_list.unlock_locked_tables(thd);
+ thd->variables.option_bits&= ~(OPTION_TABLE_LOCK);
+ }
+ thd->mdl_context.release_transactional_locks();
+
+ thd_proc_info(thd, "wsrep replaying trx");
+ WSREP_DEBUG("replay trx: %s %lld",
+ thd->query() ? thd->query() : "void",
+ (long long)thd->wsrep_trx_seqno);
+ struct wsrep_thd_shadow shadow;
+ wsrep_prepare_bf_thd(thd, &shadow);
+ int rcode = wsrep->replay_trx(wsrep,
+ &thd->wsrep_trx_handle,
+ (void *)thd);
+
+ wsrep_return_from_bf_mode(thd, &shadow);
+ if (thd->wsrep_conflict_state!= REPLAYING)
+ WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state );
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ switch (rcode) {
+ case WSREP_OK:
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ wsrep->post_commit(wsrep, &thd->wsrep_trx_handle);
+ WSREP_DEBUG("trx_replay successful for: %ld %llu",
+ thd->thread_id, (long long)thd->real_id);
+ break;
+ case WSREP_TRX_FAIL:
+ if (thd->stmt_da->is_sent) {
+ WSREP_ERROR("replay failed, thd has reported status");
+ }
+ else
+ {
+ WSREP_DEBUG("replay failed, rolling back");
+ my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
+ }
+ thd->wsrep_conflict_state= ABORTED;
+ wsrep->post_rollback(wsrep, &thd->wsrep_trx_handle);
+ break;
+ default:
+ WSREP_ERROR("trx_replay failed for: %d, query: %s",
+ rcode, thd->query() ? thd->query() : "void");
+ /* we're now in inconsistent state, must abort */
+ unireg_abort(1);
+ break;
+ }
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying--;
+ WSREP_DEBUG("replaying decreased: %d, thd: %lu",
+ wsrep_replaying, thd->thread_id);
+ mysql_cond_broadcast(&COND_wsrep_replaying);
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ }
+ }
+
+ /* setting error code for BF aborted trxs */
+ if (thd->wsrep_conflict_state == ABORTED ||
+ thd->wsrep_conflict_state == CERT_FAILURE)
+ {
+ mysql_reset_thd_for_next_command(thd, opt_userstat_running);
+ thd->killed= NOT_KILLED;
+ if (is_autocommit &&
+ thd->lex->sql_command != SQLCOM_SELECT &&
+ (thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit))
+ {
+ WSREP_DEBUG("wsrep retrying AC query: %s",
+ (thd->query()) ? thd->query() : "void");
+
+ close_thread_tables(thd);
+
+ thd->wsrep_conflict_state= RETRY_AUTOCOMMIT;
+ thd->wsrep_retry_counter++; // grow
+ wsrep_copy_query(thd);
+ thd->set_time();
+ parser_state->reset(rawbuf, length);
+ }
+ else
+ {
+ WSREP_DEBUG("%s, thd: %lu is_AC: %d, retry: %lu - %lu SQL: %s",
+ (thd->wsrep_conflict_state == ABORTED) ?
+ "BF Aborted" : "cert failure",
+ thd->thread_id, is_autocommit, thd->wsrep_retry_counter,
+ thd->variables.wsrep_retry_autocommit, thd->query());
+ my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
+ thd->killed= NOT_KILLED;
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ if (thd->wsrep_conflict_state != REPLAYING)
+ thd->wsrep_retry_counter= 0; // reset
+ }
+ }
+ else
+ {
+ set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ } while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT);
+
+ if (thd->wsrep_retry_query)
+ {
+ WSREP_DEBUG("releasing retry_query: conf %d sent %d kill %d errno %d SQL %s",
+ thd->wsrep_conflict_state,
+ thd->stmt_da->is_sent,
+ thd->killed,
+ thd->stmt_da->is_error() ? thd->stmt_da->sql_errno() : 0,
+ thd->wsrep_retry_query);
+ my_free(thd->wsrep_retry_query);
+ thd->wsrep_retry_query = NULL;
+ thd->wsrep_retry_query_len = 0;
+ thd->wsrep_retry_command = COM_CONNECT;
+ }
+}
+#endif /* WITH_WSREP */
/*
When you modify mysql_parse(), you may need to mofify
@@ -7382,6 +7929,580 @@ LEX_USER *create_definer(THD *thd, LEX_STRING *user_name, LEX_STRING *host_name)
return definer;
}
+#ifdef WITH_WSREP
+/* must have (&thd->LOCK_wsrep_thd) */
+static void wsrep_client_rollback(THD *thd)
+{
+ WSREP_DEBUG("client rollback due to BF abort for (%ld), query: %s",
+ thd->thread_id, thd->query());
+
+ thd->wsrep_conflict_state= ABORTING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ trans_rollback(thd);
+
+ if (thd->locked_tables_mode && thd->lock)
+ {
+ WSREP_DEBUG("unlocking tables for BF abort (%ld)", thd->thread_id);
+ thd->locked_tables_list.unlock_locked_tables(thd);
+ thd->variables.option_bits&= ~(OPTION_TABLE_LOCK);
+ }
+
+ if (thd->global_read_lock.is_acquired())
+ {
+ WSREP_DEBUG("unlocking GRL for BF abort (%ld)", thd->thread_id);
+ thd->global_read_lock.unlock_global_read_lock(thd);
+ }
+
+ /* Release transactional metadata locks. */
+ thd->mdl_context.release_transactional_locks();
+
+ if (thd->get_binlog_table_maps())
+ {
+ WSREP_DEBUG("clearing binlog table map for BF abort (%ld)", thd->thread_id);
+ thd->clear_binlog_table_maps();
+ }
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_conflict_state= ABORTED;
+}
+
+static enum wsrep_status wsrep_apply_sql(
+ THD *thd, const char *sql, size_t sql_len, time_t timeval, uint32 randseed)
+{
+ int error;
+ enum wsrep_status ret_code= WSREP_OK;
+
+ DBUG_ENTER("wsrep_bf_execute_cb");
+ thd->wsrep_exec_mode= REPL_RECV;
+ thd->net.vio= 0;
+ thd->start_time= timeval;
+ thd->wsrep_rand= randseed;
+
+ thd->variables.option_bits |= OPTION_NOT_AUTOCOMMIT;
+
+ DBUG_PRINT("wsrep", ("SQL: %s", sql));
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_EXEC;
+ /* preserve replaying mode */
+ if (thd->wsrep_conflict_state!= REPLAYING)
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ if ((error= dispatch_command(COM_QUERY, thd, (char*)sql, sql_len))) {
+ WSREP_WARN("BF SQL apply failed: %d, %lld",
+ thd->wsrep_conflict_state, (long long)thd->wsrep_trx_seqno);
+ DBUG_RETURN(WSREP_FATAL);
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state!= NO_CONFLICT &&
+ thd->wsrep_conflict_state!= REPLAYING) {
+ ret_code= WSREP_FATAL;
+ WSREP_DEBUG("BF thd ending, with: %d, %lld",
+ thd->wsrep_conflict_state, (long long)thd->wsrep_trx_seqno);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ assert(thd->wsrep_exec_mode== REPL_RECV);
+ DBUG_RETURN(ret_code);
+}
+
+void wsrep_write_rbr_buf(
+ THD *thd, const void* rbr_buf, size_t buf_len)
+{
+ char filename[PATH_MAX]= {0};
+ int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log",
+ wsrep_data_home_dir, thd->thread_id,
+ (long long)thd->wsrep_trx_seqno);
+ if (len >= PATH_MAX)
+ {
+ WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len);
+ return;
+ }
+
+ FILE *of= fopen(filename, "wb");
+ if (of)
+ {
+ fwrite (rbr_buf, buf_len, 1, of);
+ fclose(of);
+ }
+ else
+ {
+ WSREP_ERROR("Failed to open file '%s': %d (%s)",
+ filename, errno, strerror(errno));
+ }
+}
+
+static inline wsrep_status_t wsrep_apply_rbr(
+ THD *thd, const uchar *rbr_buf, size_t buf_len)
+{
+ char *buf= (char *)rbr_buf;
+ int rcode= 0;
+ int event= 1;
+
+ DBUG_ENTER("wsrep_apply_rbr");
+
+ if (thd->killed == KILL_CONNECTION)
+ {
+ WSREP_INFO("applier has been aborted, skipping apply_rbr: %lld",
+ (long long) thd->wsrep_trx_seqno);
+ DBUG_RETURN(WSREP_FATAL);
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_EXEC;
+ if (thd->wsrep_conflict_state!= REPLAYING)
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld",
+ (long long) thd->wsrep_trx_seqno);
+
+ if ((rcode= trans_begin(thd)))
+ WSREP_WARN("begin for rbr apply failed: %lld, code: %d",
+ (long long) thd->wsrep_trx_seqno, rcode);
+
+ while(buf_len)
+ {
+ int exec_res;
+ int error = 0;
+ Log_event* ev= wsrep_read_log_event(&buf, &buf_len, wsrep_format_desc);
+
+ if (!ev)
+ {
+ WSREP_ERROR("applier could not read binlog event, seqno: %lld, len: %ld",
+ (long long)thd->wsrep_trx_seqno, buf_len);
+ rcode= 1;
+ goto error;
+ }
+ switch (ev->get_type_code()) {
+ case WRITE_ROWS_EVENT:
+ case UPDATE_ROWS_EVENT:
+ case DELETE_ROWS_EVENT:
+ DBUG_ASSERT(buf_len != 0 ||
+ ((Rows_log_event *) ev)->get_flags(Rows_log_event::STMT_END_F));
+ break;
+ default:
+ break;
+ }
+
+ thd->server_id = ev->server_id; // use the original server id for logging
+ thd->set_time(); // time the query
+ wsrep_xid_init(&thd->transaction.xid_state.xid,
+ wsrep_cluster_uuid(),
+ thd->wsrep_trx_seqno);
+ thd->lex->current_select= 0;
+ if (!ev->when)
+ ev->when = time(NULL);
+ ev->thd = thd;
+ exec_res = ev->apply_event(thd->wsrep_rli);
+ DBUG_PRINT("info", ("exec_event result: %d", exec_res));
+
+ if (exec_res)
+ {
+ WSREP_WARN("RBR event %d %s apply warning: %d, %lld",
+ event, ev->get_type_str(), exec_res, (long long) thd->wsrep_trx_seqno);
+ rcode= exec_res;
+ /* stop processing for the first error */
+ delete ev;
+ goto error;
+ }
+ event++;
+
+ if (thd->wsrep_conflict_state!= NO_CONFLICT &&
+ thd->wsrep_conflict_state!= REPLAYING)
+ WSREP_WARN("conflict state after RBR event applying: %d, %lld",
+ thd->wsrep_query_state, (long long)thd->wsrep_trx_seqno);
+
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ WSREP_WARN("RBR event apply failed, rolling back: %lld",
+ (long long) thd->wsrep_trx_seqno);
+ trans_rollback(thd);
+ thd->locked_tables_list.unlock_locked_tables(thd);
+ /* Release transactional metadata locks. */
+ thd->mdl_context.release_transactional_locks();
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ DBUG_RETURN(WSREP_FATAL);
+ }
+
+ if (ev->get_type_code() != TABLE_MAP_EVENT &&
+ ((Rows_log_event *) ev)->get_flags(Rows_log_event::STMT_END_F))
+ {
+ // TODO: combine with commit on higher level common for the query ws
+
+ thd->wsrep_rli->cleanup_context(thd, 0);
+
+ if (error == 0)
+ {
+ thd->clear_error();
+ }
+ else
+ WSREP_ERROR("Error in %s event: commit of row events failed: %lld",
+ ev->get_type_str(), (long long)thd->wsrep_trx_seqno);
+ }
+ delete ev;
+ }
+
+ error:
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_IDLE;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ assert(thd->wsrep_exec_mode== REPL_RECV);
+
+ if (thd->killed == KILL_CONNECTION)
+ WSREP_INFO("applier aborted: %lld", (long long)thd->wsrep_trx_seqno);
+
+ if (rcode) DBUG_RETURN(WSREP_FATAL);
+ DBUG_RETURN(WSREP_OK);
+}
+
+wsrep_status_t wsrep_apply_cb(void* const ctx,
+ const void* const buf, size_t const buf_len,
+ wsrep_seqno_t const global_seqno)
+{
+ THD* const thd((THD*)ctx);
+
+ thd->wsrep_trx_seqno= global_seqno;
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "applying write set %lld: %p, %zu",
+ (long long)thd->wsrep_trx_seqno, buf, buf_len);
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "applying write set");
+#endif /* WSREP_PROC_INFO */
+
+ wsrep_status_t const rcode(wsrep_apply_rbr(thd, (const uchar*)buf, buf_len));
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "applied write set %lld", (long long)thd->wsrep_trx_seqno);
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "applied write set");
+#endif /* WSREP_PROC_INFO */
+
+ if (WSREP_OK != rcode) wsrep_write_rbr_buf(thd, buf, buf_len);
+
+ return rcode;
+}
+
+#if DELETE // this does not work in 5.5
+/* a common wrapper for end_trans() function - to put all necessary stuff */
+static inline wsrep_status_t
+wsrep_end_trans (THD* const thd, enum enum_mysql_completiontype const end)
+{
+ if (0 == end_trans(thd, end))
+ {
+ return WSREP_OK;
+ }
+ else
+ {
+ return WSREP_FATAL;
+ }
+}
+#endif
+
+wsrep_status_t wsrep_commit(THD* const thd, wsrep_seqno_t const global_seqno)
+{
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "committing %lld", (long long)thd->wsrep_trx_seqno);
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "committing");
+#endif /* WSREP_PROC_INFO */
+
+ wsrep_status_t const rcode(wsrep_apply_sql(thd, "COMMIT", 6, 0, 0));
+// wsrep_status_t const rcode(wsrep_end_trans (thd, COMMIT));
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "committed %lld", (long long)thd->wsrep_trx_seqno);
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "committed");
+#endif /* WSREP_PROC_INFO */
+
+ if (WSREP_OK == rcode)
+ {
+ // TODO: mark snapshot with global_seqno.
+ }
+
+ return rcode;
+}
+
+wsrep_status_t wsrep_rollback(THD* const thd, wsrep_seqno_t const global_seqno)
+{
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "rolling back %lld", (long long)thd->wsrep_trx_seqno);
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "rolling back");
+#endif /* WSREP_PROC_INFO */
+
+ wsrep_status_t const rcode(wsrep_apply_sql(thd, "ROLLBACK", 8, 0, 0));
+// wsrep_status_t const rcode(wsrep_end_trans (thd, ROLLBACK));
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "rolled back %lld", (long long)thd->wsrep_trx_seqno);
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "rolled back");
+#endif /* WSREP_PROC_INFO */
+
+ return rcode;
+}
+
+wsrep_status_t wsrep_commit_cb(void* const ctx,
+ wsrep_seqno_t const global_seqno,
+ bool const commit)
+{
+ THD* const thd((THD*)ctx);
+
+ assert(global_seqno == thd->wsrep_trx_seqno);
+
+ if (commit)
+ return wsrep_commit(thd, global_seqno);
+ else
+ return wsrep_rollback(thd, global_seqno);
+}
+
+Relay_log_info* wsrep_relay_log_init(const char* log_fname)
+{
+ Relay_log_info* rli= new Relay_log_info(false);
+
+ rli->no_storage= true;
+ if (!rli->relay_log.description_event_for_exec)
+ {
+ rli->relay_log.description_event_for_exec=
+ new Format_description_log_event(4);
+ }
+
+ rli->sql_thd= current_thd;
+ return rli;
+}
+
+void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow)
+{
+ shadow->options = thd->variables.option_bits;
+ shadow->wsrep_exec_mode = thd->wsrep_exec_mode;
+ shadow->vio = thd->net.vio;
+
+ if (opt_log_slave_updates)
+ thd->variables.option_bits|= OPTION_BIN_LOG;
+ else
+ thd->variables.option_bits&= ~(OPTION_BIN_LOG);
+
+ if (!thd->wsrep_rli) thd->wsrep_rli= wsrep_relay_log_init("wsrep_relay");
+
+ thd->wsrep_exec_mode= REPL_RECV;
+ thd->net.vio= 0;
+ thd->clear_error();
+
+ thd->variables.option_bits|= OPTION_NOT_AUTOCOMMIT;
+
+ shadow->tx_isolation = thd->variables.tx_isolation;
+ thd->variables.tx_isolation = ISO_READ_COMMITTED;
+ thd->tx_isolation = ISO_READ_COMMITTED;
+}
+
+void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow)
+{
+ thd->variables.option_bits = shadow->options;
+ thd->wsrep_exec_mode = shadow->wsrep_exec_mode;
+ thd->net.vio = shadow->vio;
+ thd->variables.tx_isolation = shadow->tx_isolation;
+}
+
+void wsrep_replication_process(THD *thd)
+{
+ int rcode;
+ DBUG_ENTER("wsrep_replication_process");
+
+ struct wsrep_thd_shadow shadow;
+ wsrep_prepare_bf_thd(thd, &shadow);
+
+ rcode = wsrep->recv(wsrep, (void *)thd);
+ DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode));
+
+ WSREP_INFO("applier thread exiting (code:%d)", rcode);
+
+ switch (rcode) {
+ case WSREP_OK:
+ case WSREP_NOT_IMPLEMENTED:
+ case WSREP_CONN_FAIL:
+ /* provider does not support slave operations / disconnected from group,
+ * just close applier thread */
+ break;
+ case WSREP_NODE_FAIL:
+ /* data inconsistency => SST is needed */
+ /* Note: we cannot just blindly restart replication here,
+ * SST might require server restart if storage engines must be
+ * initialized after SST */
+ WSREP_ERROR("node consistency compromised, aborting");
+ wsrep_kill_mysql(thd);
+ break;
+ case WSREP_WARNING:
+ case WSREP_TRX_FAIL:
+ case WSREP_TRX_MISSING:
+ /* these suggests a bug in provider code */
+ WSREP_WARN("bad return from recv() call: %d", rcode);
+ /* fall through to node shutdown */
+ case WSREP_FATAL:
+ /* Cluster connectivity is lost.
+ *
+ * If applier was killed on purpose (KILL_CONNECTION), we
+ * avoid mysql shutdown. This is because the killer will then handle
+ * shutdown processing (or replication restarting)
+ */
+ if (thd->killed != KILL_CONNECTION)
+ {
+ wsrep_kill_mysql(thd);
+ }
+ break;
+ }
+
+ if (thd->killed != KILL_CONNECTION)
+ {
+ mysql_mutex_lock(&LOCK_thread_count);
+ wsrep_close_applier(thd);
+ mysql_cond_broadcast(&COND_thread_count);
+ mysql_mutex_unlock(&LOCK_thread_count);
+ }
+ wsrep_return_from_bf_mode(thd, &shadow);
+ DBUG_VOID_RETURN;
+}
+
+void wsrep_rollback_process(THD *thd)
+{
+ DBUG_ENTER("wsrep_rollback_process");
+
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+ wsrep_aborting_thd= NULL;
+
+ while (thd->killed == NOT_KILLED) {
+ thd_proc_info(thd, "wsrep aborter idle");
+ thd->mysys_var->current_mutex= &LOCK_wsrep_rollback;
+ thd->mysys_var->current_cond= &COND_wsrep_rollback;
+
+ mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback);
+
+ WSREP_DEBUG("WSREP rollback thread wakes for signal");
+
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ thd_proc_info(thd, "wsrep aborter active");
+ thd->mysys_var->current_mutex= 0;
+ thd->mysys_var->current_cond= 0;
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+
+ /* check for false alarms */
+ if (!wsrep_aborting_thd)
+ {
+ WSREP_DEBUG("WSREP rollback thread has empty abort queue");
+ }
+ /* process all entries in the queue */
+ while (wsrep_aborting_thd) {
+ THD *aborting;
+ wsrep_aborting_thd_t next = wsrep_aborting_thd->next;
+ aborting = wsrep_aborting_thd->aborting_thd;
+ my_free(wsrep_aborting_thd);
+ wsrep_aborting_thd= next;
+ /*
+ * must release mutex, appliers my want to add more
+ * aborting thds in our work queue, while we rollback
+ */
+ mysql_mutex_unlock(&LOCK_wsrep_rollback);
+
+ mysql_mutex_lock(&aborting->LOCK_wsrep_thd);
+ if (aborting->wsrep_conflict_state== ABORTED)
+ {
+ WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d",
+ (long long)aborting->real_id,
+ aborting->wsrep_conflict_state);
+
+ mysql_mutex_unlock(&aborting->LOCK_wsrep_thd);
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+ continue;
+ }
+ aborting->wsrep_conflict_state= ABORTING;
+
+ mysql_mutex_unlock(&aborting->LOCK_wsrep_thd);
+
+ aborting->store_globals();
+
+ mysql_mutex_lock(&aborting->LOCK_wsrep_thd);
+ wsrep_client_rollback(aborting);
+ WSREP_DEBUG("WSREP rollbacker aborted thd: (%lu %llu)",
+ aborting->thread_id, (long long)aborting->real_id);
+ mysql_mutex_unlock(&aborting->LOCK_wsrep_thd);
+
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+ }
+ }
+
+ mysql_mutex_unlock(&LOCK_wsrep_rollback);
+ sql_print_information("WSREP: rollbacker thread exiting");
+
+ DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting"));
+ DBUG_VOID_RETURN;
+}
+extern "C"
+int wsrep_thd_is_brute_force(void *thd_ptr)
+{
+ if (thd_ptr) {
+ switch (((THD *)thd_ptr)->wsrep_exec_mode) {
+ case LOCAL_STATE:
+ {
+ if (((THD *)thd_ptr)->wsrep_conflict_state== REPLAYING)
+ {
+ return 1;
+ }
+ return 0;
+ }
+ case REPL_RECV: return 1;
+ case TOTAL_ORDER: return 2;
+ case LOCAL_COMMIT: return 3;
+ }
+ }
+ return 0;
+}
+extern "C"
+int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
+{
+ THD *victim_thd = (THD *) victim_thd_ptr;
+ THD *bf_thd = (THD *) bf_thd_ptr;
+ DBUG_ENTER("wsrep_abort_thd");
+
+ if ( (WSREP(bf_thd) ||
+ ( (WSREP_ON || wsrep_OSU_method_options == WSREP_OSU_RSU) &&
+ bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) &&
+ victim_thd)
+ {
+ WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ?
+ (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id);
+ ha_wsrep_abort_transaction(bf_thd, victim_thd, signal);
+ }
+ else
+ {
+ WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
+ }
+
+ DBUG_RETURN(1);
+}
+extern "C"
+int wsrep_thd_in_locking_session(void *thd_ptr)
+{
+ if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) {
+ return 1;
+ }
+ return 0;
+}
+#endif
/**
Retuns information about user or current user.
diff --git a/sql/sql_parse.h b/sql/sql_parse.h
index 4510ebe94e2..1303eba84b7 100644
--- a/sql/sql_parse.h
+++ b/sql/sql_parse.h
@@ -202,6 +202,22 @@ inline bool is_supported_parser_charset(CHARSET_INFO *cs)
{
return test(cs->mbminlen == 1);
}
+#ifdef WITH_WSREP
+
+#define WSREP_MYSQL_DB (char *)"mysql"
+#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) \
+ if (WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) goto error;
+
+#define WSREP_TO_ISOLATION_END \
+ if (WSREP(thd) || (thd && thd->wsrep_exec_mode==TOTAL_ORDER)) \
+ wsrep_to_isolation_end(thd);
+
+#else
+
+#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_)
+#define WSREP_TO_ISOLATION_END
+
+#endif /* WITH_WSREP */
#endif /* SQL_PARSE_INCLUDED */
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index dee48a28037..844ca9ea0e7 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -2970,11 +2970,17 @@ void plugin_thdvar_init(THD *thd)
thd->variables.dynamic_variables_size= 0;
thd->variables.dynamic_variables_ptr= 0;
+#ifdef WITH_WSREP
+ if (!WSREP(thd) || !thd->wsrep_applier) {
+#endif
mysql_mutex_lock(&LOCK_plugin);
thd->variables.table_plugin=
my_intern_plugin_lock(NULL, global_system_variables.table_plugin);
intern_plugin_unlock(NULL, old_table_plugin);
mysql_mutex_unlock(&LOCK_plugin);
+#ifdef WITH_WSREP
+ }
+#endif
DBUG_VOID_RETURN;
}
diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc
index 914b9026014..ba9ef757de6 100644
--- a/sql/sql_reload.cc
+++ b/sql/sql_reload.cc
@@ -225,7 +225,18 @@ bool reload_acl_and_cache(THD *thd, unsigned long options,
}
if (options & REFRESH_CHECKPOINT)
disable_checkpoints(thd);
- }
+#ifdef WITH_WSREP
+ /*
+ We need to do it second time after wsrep appliers were blocked in
+ make_global_read_lock_block_commit(thd) above since they could have
+ modified the tables too.
+ */
+ if (WSREP(thd) &&
+ close_cached_tables(thd, tables, (options & REFRESH_FAST) ?
+ FALSE : TRUE, TRUE))
+ result= 1;
+#endif /* WITH_WSREP */
+ }
else
{
if (thd && thd->locked_tables_mode)
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 53ac103dda1..c24f758b11a 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -1400,7 +1400,14 @@ int stop_slave(THD* thd, Master_info* mi, bool net_report )
ER(ER_SLAVE_WAS_NOT_RUNNING));
}
unlock_slave_threads(mi);
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit stop_slave()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
if (slave_errno)
{
@@ -1833,7 +1840,14 @@ bool change_master(THD* thd, Master_info* mi)
err:
unlock_slave_threads(mi);
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit change_master()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
if (ret == FALSE)
my_ok(thd);
DBUG_RETURN(ret);
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 08a08aa1eb6..d585521fc99 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -59,6 +59,9 @@
#include "datadict.h" // dd_frm_type()
#include "keycaches.h"
+#if !defined(MYSQL_MAX_VARIABLE_VALUE_LEN)
+#define MYSQL_MAX_VARIABLE_VALUE_LEN 1024
+#endif // !defined(MYSQL_MAX_VARIABLE_VALUE_LEN)
#define STR_OR_NIL(S) ((S) ? (S) : "<nil>")
#ifdef WITH_PARTITION_STORAGE_ENGINE
@@ -8384,7 +8387,8 @@ ST_FIELD_INFO variables_fields_info[]=
{
{"VARIABLE_NAME", 64, MYSQL_TYPE_STRING, 0, 0, "Variable_name",
SKIP_OPEN_TABLE},
- {"VARIABLE_VALUE", 1024, MYSQL_TYPE_STRING, 0, 1, "Value", SKIP_OPEN_TABLE},
+ {"VARIABLE_VALUE", MYSQL_MAX_VARIABLE_VALUE_LEN, MYSQL_TYPE_STRING, 0, 1,
+ "Value", SKIP_OPEN_TABLE},
{0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
};
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 868e1215595..5eec93f3254 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -6247,12 +6247,18 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
error= 0;
break;
}
- if (error == HA_ERR_WRONG_COMMAND)
+#ifdef WITH_WSREP
+ bool do_log_write(true);
+#endif /* WITH_WSREP */
+ if (error == HA_ERR_WRONG_COMMAND)
{
error= 0;
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
table->alias.c_ptr());
+#ifdef WITH_WSREP
+ WSREP_DEBUG("ignoring DDL failure: %d %s", error, thd->query());
+#endif /* WITH_WSREP */
}
if (!error && (new_name != table_name || new_db != db))
@@ -6304,6 +6310,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
table->alias.c_ptr());
+#ifdef WITH_WSREP
+ WSREP_DEBUG("ignoring DDL failure: %d %s", error, thd->query());
+#endif /* WITH_WSREP */
}
if (!error)
diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc
index aff00f9fcf4..b0dd2183b62 100644
--- a/sql/sql_trigger.cc
+++ b/sql/sql_trigger.cc
@@ -2455,3 +2455,55 @@ bool load_table_name_for_trigger(THD *thd,
DBUG_RETURN(FALSE);
}
+#ifdef WITH_WSREP
+int wsrep_create_trigger_query(THD *thd, uchar** buf, uint* buf_len)
+{
+ LEX *lex= thd->lex;
+ String stmt_query;
+
+ LEX_STRING definer_user;
+ LEX_STRING definer_host;
+
+ if (!lex->definer)
+ {
+ if (!thd->slave_thread)
+ {
+ if (!(lex->definer= create_default_definer(thd)))
+ return 1;
+ }
+ }
+
+ if (lex->definer)
+ {
+ /* SUID trigger. */
+
+ definer_user= lex->definer->user;
+ definer_host= lex->definer->host;
+ }
+ else
+ {
+ /* non-SUID trigger. */
+
+ definer_user.str= 0;
+ definer_user.length= 0;
+
+ definer_host.str= 0;
+ definer_host.length= 0;
+ }
+
+ stmt_query.append(STRING_WITH_LEN("CREATE "));
+
+ append_definer(thd, &stmt_query, &definer_user, &definer_host);
+
+ LEX_STRING stmt_definition;
+ stmt_definition.str= (char*) thd->lex->stmt_definition_begin;
+ stmt_definition.length= thd->lex->stmt_definition_end
+ - thd->lex->stmt_definition_begin;
+ trim_whitespace(thd->charset(), & stmt_definition);
+
+ stmt_query.append(stmt_definition.str, stmt_definition.length);
+
+ return wsrep_to_buf_helper(thd, stmt_query.c_ptr(), stmt_query.length(),
+ buf, buf_len);
+}
+#endif /* WITH_WSREP */
diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc
index 9d4ca5e1373..12ebe7e636a 100644
--- a/sql/sql_truncate.cc
+++ b/sql/sql_truncate.cc
@@ -24,6 +24,9 @@
#include "sql_acl.h" // DROP_ACL
#include "sql_parse.h" // check_one_table_access()
#include "sql_truncate.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
#include "sql_show.h"
@@ -520,9 +523,14 @@ bool Truncate_statement::execute(THD *thd)
if (check_one_table_access(thd, DROP_ACL, first_table))
DBUG_RETURN(res);
+#ifdef WITH_WSREP
+ if (WSREP(thd) && wsrep_to_isolation_begin(thd,
+ first_table->db,
+ first_table->table_name, NULL))
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
if (! (res= truncate_table(thd, first_table)))
my_ok(thd);
-
DBUG_RETURN(res);
}
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 78b693c5237..241e332736a 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -908,7 +908,11 @@ int mysql_update(THD *thd,
*/
if ((error < 0) || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (error < 0)
@@ -2047,7 +2051,11 @@ void multi_update::abort_result_set()
The query has to binlog because there's a modified non-transactional table
either from the query's list or via a stored routine: bug#13270,23333
*/
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
/*
THD::killed status might not have been set ON at time of an error
@@ -2306,7 +2314,11 @@ bool multi_update::send_eof()
if (local_error == 0 || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (local_error == 0)
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index c1c6e142706..1efe724e6f9 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -3615,6 +3615,228 @@ static Sys_var_tz Sys_time_zone(
"time_zone", "time_zone",
SESSION_VAR(time_zone), NO_CMD_LINE,
DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG);
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+
+static Sys_var_charptr Sys_wsrep_provider(
+ "wsrep_provider", "Path to replication provider library",
+ PREALLOCATED GLOBAL_VAR(wsrep_provider), CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER),
+ IN_FS_CHARSET, DEFAULT(wsrep_provider),
+ // IN_FS_CHARSET, DEFAULT(wsrep_provider_default),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_provider_check), ON_UPDATE(wsrep_provider_update));
+
+static Sys_var_charptr Sys_wsrep_provider_options(
+ "wsrep_provider_options", "provider specific options",
+ PREALLOCATED GLOBAL_VAR(wsrep_provider_options),
+ CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER_OPTIONS),
+ IN_FS_CHARSET, DEFAULT(wsrep_provider_options),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_provider_options_check),
+ ON_UPDATE(wsrep_provider_options_update));
+
+static Sys_var_charptr Sys_wsrep_data_home_dir(
+ "wsrep_data_home_dir", "home directory for wsrep provider",
+ READ_ONLY GLOBAL_VAR(wsrep_data_home_dir), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_charptr Sys_wsrep_cluster_name(
+ "wsrep_cluster_name", "Name for the cluster",
+ GLOBAL_VAR(wsrep_cluster_name), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(wsrep_cluster_name),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_cluster_name_check),
+ ON_UPDATE(wsrep_cluster_name_update));
+
+static PolyLock_mutex PLock_wsrep_slave_threads(&LOCK_wsrep_slave_threads);
+static Sys_var_charptr Sys_wsrep_cluster_address (
+ "wsrep_cluster_address", "Address to initially connect to cluster",
+ PREALLOCATED GLOBAL_VAR(wsrep_cluster_address),
+ CMD_LINE(REQUIRED_ARG, OPT_WSREP_CLUSTER_ADDRESS),
+ IN_FS_CHARSET, DEFAULT(wsrep_cluster_address),
+ &PLock_wsrep_slave_threads, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_cluster_address_check),
+ ON_UPDATE(wsrep_cluster_address_update));
+
+static Sys_var_charptr Sys_wsrep_node_name (
+ "wsrep_node_name", "Node name",
+ GLOBAL_VAR(wsrep_node_name), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(glob_hostname),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_charptr Sys_wsrep_node_address (
+ "wsrep_node_address", "Node address",
+ GLOBAL_VAR(wsrep_node_address), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(wsrep_node_address),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_node_address_check),
+ ON_UPDATE(wsrep_node_address_update));
+
+static Sys_var_charptr Sys_wsrep_node_incoming_address(
+ "wsrep_node_incoming_address", "Client connection address",
+ GLOBAL_VAR(wsrep_node_incoming_address),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(wsrep_node_incoming_address),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_node_name_check),
+ ON_UPDATE(wsrep_node_name_update));
+
+static Sys_var_ulong Sys_wsrep_slave_threads(
+ "wsrep_slave_threads", "Number of slave appliers to launch",
+ GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1),
+ &PLock_wsrep_slave_threads, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_slave_threads_check),
+ ON_UPDATE(wsrep_slave_threads_update));
+
+static Sys_var_charptr Sys_wsrep_dbug_option(
+ "wsrep_dbug_option", "DBUG options to provider library",
+ GLOBAL_VAR(wsrep_dbug_option),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_mybool Sys_wsrep_debug(
+ "wsrep_debug", "To enable debug level logging",
+ GLOBAL_VAR(wsrep_debug), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_convert_LOCK_to_trx(
+ "wsrep_convert_LOCK_to_trx", "To convert locking sessions "
+ "into transactions",
+ GLOBAL_VAR(wsrep_convert_LOCK_to_trx),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_ulong Sys_wsrep_retry_autocommit(
+ "wsrep_retry_autocommit", "Max number of times to retry "
+ "a failed autocommit statement",
+ SESSION_VAR(wsrep_retry_autocommit), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 10000), DEFAULT(1), BLOCK_SIZE(1));
+
+static Sys_var_mybool Sys_wsrep_auto_increment_control(
+ "wsrep_auto_increment_control", "To automatically control the "
+ "assignment of autoincrement variables",
+ GLOBAL_VAR(wsrep_auto_increment_control),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+
+static Sys_var_mybool Sys_wsrep_drupal_282555_workaround(
+ "wsrep_drupal_282555_workaround", "To use a workaround for"
+ "bad autoincrement value",
+ GLOBAL_VAR(wsrep_drupal_282555_workaround),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_charptr sys_wsrep_sst_method(
+ "wsrep_sst_method", "State snapshot transfer method",
+ GLOBAL_VAR(wsrep_sst_method),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(wsrep_sst_method), NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_method_check),
+ ON_UPDATE(wsrep_sst_method_update));
+
+static Sys_var_charptr Sys_wsrep_sst_receive_address(
+ "wsrep_sst_receive_address", "Address where node is waiting for "
+ "SST contact",
+ GLOBAL_VAR(wsrep_sst_receive_address),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(wsrep_sst_receive_address), NO_MUTEX_GUARD,
+ NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_receive_address_check),
+ ON_UPDATE(wsrep_sst_receive_address_update));
+
+static Sys_var_charptr Sys_wsrep_sst_auth(
+ "wsrep_sst_auth", "Authentication for SST connection",
+ PREALLOCATED GLOBAL_VAR(wsrep_sst_auth), CMD_LINE(REQUIRED_ARG, OPT_WSREP_SST_AUTH),
+ IN_FS_CHARSET, DEFAULT(wsrep_sst_auth), NO_MUTEX_GUARD,
+ NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_auth_check),
+ ON_UPDATE(wsrep_sst_auth_update));
+
+static Sys_var_charptr Sys_wsrep_sst_donor(
+ "wsrep_sst_donor", "preferred donor node for the SST",
+ GLOBAL_VAR(wsrep_sst_donor),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_donor_check),
+ ON_UPDATE(wsrep_sst_donor_update));
+
+static Sys_var_mybool Sys_wsrep_sst_donor_rejects_queries(
+ "wsrep_sst_donor_rejects_queries", "Reject client queries "
+ "when donating state snapshot transfer",
+ GLOBAL_VAR(wsrep_sst_donor_rejects_queries),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_on (
+ "wsrep_on", "To enable wsrep replication ",
+ SESSION_VAR(wsrep_on),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(wsrep_on_update));
+
+static Sys_var_charptr Sys_wsrep_start_position (
+ "wsrep_start_position", "global transaction position to start from ",
+ GLOBAL_VAR(wsrep_start_position),
+ CMD_LINE(REQUIRED_ARG, OPT_WSREP_START_POSITION),
+ IN_FS_CHARSET, DEFAULT(wsrep_start_position),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_start_position_check),
+ ON_UPDATE(wsrep_start_position_update));
+
+static Sys_var_ulonglong Sys_wsrep_max_ws_size (
+ "wsrep_max_ws_size", "Max write set size (bytes)",
+ GLOBAL_VAR(wsrep_max_ws_size), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(1024, 4294967296ULL), DEFAULT(1073741824ULL), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_wsrep_max_ws_rows (
+ "wsrep_max_ws_rows", "Max number of rows in write set",
+ GLOBAL_VAR(wsrep_max_ws_rows), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(1, 1048576), DEFAULT(131072), BLOCK_SIZE(1));
+
+static Sys_var_charptr Sys_wsrep_notify_cmd(
+ "wsrep_notify_cmd", "",
+ GLOBAL_VAR(wsrep_notify_cmd),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_mybool Sys_wsrep_certify_nonPK(
+ "wsrep_certify_nonPK", "Certify tables with no primary key",
+ GLOBAL_VAR(wsrep_certify_nonPK),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+
+static Sys_var_mybool Sys_wsrep_causal_reads(
+ "wsrep_causal_reads", "Enable \"strictly synchronous\" semantics for read operations",
+ SESSION_VAR(wsrep_causal_reads),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+ // ON_UPDATE(wsrep_causal_reads_update));
+
+static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS };
+static Sys_var_enum Sys_wsrep_OSU_method(
+ "wsrep_OSU_method", "Method for Online Schema Upgrade",
+ GLOBAL_VAR(wsrep_OSU_method_options), CMD_LINE(OPT_ARG),
+ wsrep_OSU_method_names, DEFAULT(WSREP_OSU_TOI),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(0));
+
+static Sys_var_enum Sys_wsrep_forced_binlog_format(
+ "wsrep_forced_binlog_format", "binlog format to take effect over user's choice",
+ GLOBAL_VAR(wsrep_forced_binlog_format),
+ CMD_LINE(REQUIRED_ARG, OPT_BINLOG_FORMAT),
+ wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(0));
+
+static Sys_var_mybool Sys_wsrep_recover_datadir(
+ "wsrep_recover", "Recover database state after crash and exit",
+ READ_ONLY GLOBAL_VAR(wsrep_recovery),
+ CMD_LINE(OPT_ARG, OPT_WSREP_RECOVER), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_replicate_myisam(
+ "wsrep_replicate_myisam", "To enable myisam replication",
+ GLOBAL_VAR(wsrep_replicate_myisam), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_log_conflicts(
+ "wsrep_log_conflicts", "To log multi-master conflicts",
+ GLOBAL_VAR(wsrep_log_conflicts), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_ulong Sys_wsrep_mysql_replication_bundle(
+ "wsrep_mysql_replication_bundle", "mysql replication group commit ",
+ GLOBAL_VAR(wsrep_mysql_replication_bundle), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 1000), DEFAULT(0), BLOCK_SIZE(1));
+
+#endif /* WITH_WSREP */
static Sys_var_charptr Sys_ignore_db_dirs(
"ignore_db_dirs",
diff --git a/sql/transaction.cc b/sql/transaction.cc
index 3359decbcd5..ef4383df0ea 100644
--- a/sql/transaction.cc
+++ b/sql/transaction.cc
@@ -96,6 +96,9 @@ static bool xa_trans_force_rollback(THD *thd)
by ha_rollback()/THD::transaction::cleanup().
*/
thd->transaction.xid_state.rm_error= 0;
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
if (ha_rollback_trans(thd, true))
{
my_error(ER_XAER_RMERR, MYF(0));
@@ -134,6 +137,9 @@ bool trans_begin(THD *thd, uint flags)
(thd->variables.option_bits & OPTION_TABLE_LOCK))
{
thd->variables.option_bits&= ~OPTION_TABLE_LOCK;
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
res= test(ha_commit_trans(thd, TRUE));
}
@@ -150,6 +156,12 @@ bool trans_begin(THD *thd, uint flags)
*/
thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ thd->wsrep_PA_safe= true;
+ if (WSREP_CLIENT(thd) && wsrep_causal_wait(thd))
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
+
thd->variables.option_bits|= OPTION_BEGIN;
thd->server_status|= SERVER_STATUS_IN_TRANS;
@@ -177,6 +189,9 @@ bool trans_commit(THD *thd)
if (trans_check(thd))
DBUG_RETURN(TRUE);
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
res= ha_commit_trans(thd, TRUE);
if (res)
@@ -220,6 +235,9 @@ bool trans_commit_implicit(THD *thd)
/* Safety if one did "drop table" on locked tables */
if (!thd->locked_tables_mode)
thd->variables.option_bits&= ~OPTION_TABLE_LOCK;
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
res= test(ha_commit_trans(thd, TRUE));
}
@@ -251,11 +269,16 @@ bool trans_commit_implicit(THD *thd)
bool trans_rollback(THD *thd)
{
int res;
- DBUG_ENTER("trans_rollback");
-
- if (trans_check(thd))
+ DBUG_ENTER("trans_rollback");
+#ifdef WITH_WSREP
+ thd->wsrep_PA_safe= true;
+#endif /* WITH_WSREP */
+ if (trans_check(thd))
DBUG_RETURN(TRUE);
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
res= ha_rollback_trans(thd, TRUE);
RUN_HOOK(transaction, after_rollback, (thd, FALSE));
@@ -296,6 +319,9 @@ bool trans_commit_stmt(THD *thd)
if (thd->transaction.stmt.ha_list)
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, FALSE);
+#endif /* WITH_WSREP */
res= ha_commit_trans(thd, FALSE);
if (! thd->in_active_multi_stmt_transaction())
thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation;
@@ -338,9 +364,19 @@ bool trans_rollback_stmt(THD *thd)
if (thd->transaction.stmt.ha_list)
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, FALSE);
+#endif /* WITH_WSREP */
ha_rollback_trans(thd, FALSE);
if (thd->transaction_rollback_request && !thd->in_sub_stmt)
+#ifdef WITH_WSREP
+ {
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
ha_rollback_trans(thd, TRUE);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
if (! thd->in_active_multi_stmt_transaction())
thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation;
}
@@ -681,6 +717,9 @@ bool trans_xa_commit(THD *thd)
}
else if (xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE)
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
int r= ha_commit_trans(thd, TRUE);
if ((res= test(r)))
my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0));
@@ -702,6 +741,9 @@ bool trans_xa_commit(THD *thd)
if (thd->mdl_context.acquire_lock(&mdl_request,
thd->variables.lock_wait_timeout))
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
ha_rollback_trans(thd, TRUE);
my_error(ER_XAER_RMERR, MYF(0));
}
diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc
new file mode 100644
index 00000000000..5764be39093
--- /dev/null
+++ b/sql/wsrep_check_opts.cc
@@ -0,0 +1,392 @@
+/* Copyright 2011 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+//#include <mysqld.h>
+#include <sql_class.h>
+//#include <sql_plugin.h>
+//#include <set_var.h>
+
+#include "wsrep_mysqld.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+
+/* This file is about checking for correctness of mysql configuration options */
+
+struct opt
+{
+ const char* const name;
+ const char* value;
+};
+
+/* A list of options to check.
+ * At first we assume default values and then see if they are changed on CLI or
+ * in my.cnf */
+static struct opt opts[] =
+{
+ { "wsrep_slave_threads", "1" }, // mysqld.cc
+ { "bind_address", "0.0.0.0" }, // mysqld.cc
+ { "wsrep_sst_method","mysqldump" }, // mysqld.cc
+ { "wsrep_sst_receive_address","AUTO"}, // mysqld.cc
+ { "binlog_format", "ROW" }, // mysqld.cc
+ { "wsrep_provider", "none" }, // mysqld.cc
+ { "query_cache_type", "0" }, // mysqld.cc
+ { "query_cache_size", "0" }, // mysqld.cc
+ { "locked_in_memory", "0" }, // mysqld.cc
+ { "wsrep_cluster_address", "0" }, // mysqld.cc
+ { "locks_unsafe_for_binlog", "0" }, // ha_innodb.cc
+ { "autoinc_lock_mode", "1" }, // ha_innodb.cc
+ { 0, 0 }
+};
+
+enum
+{
+ WSREP_SLAVE_THREADS,
+ BIND_ADDRESS,
+ WSREP_SST_METHOD,
+ WSREP_SST_RECEIVE_ADDRESS,
+ BINLOG_FORMAT,
+ WSREP_PROVIDER,
+ QUERY_CACHE_TYPE,
+ QUERY_CACHE_SIZE,
+ LOCKED_IN_MEMORY,
+ WSREP_CLUSTER_ADDRESS,
+ LOCKS_UNSAFE_FOR_BINLOG,
+ AUTOINC_LOCK_MODE
+};
+
+
+/* A class to make a copy of argv[] vector */
+struct argv_copy
+{
+ int const argc_;
+ char** argv_;
+
+ argv_copy (int const argc, const char* const argv[]) :
+ argc_ (argc),
+ argv_ (reinterpret_cast<char**>(calloc(argc_, sizeof(char*))))
+ {
+ if (argv_)
+ {
+ for (int i = 0; i < argc_; ++i)
+ {
+ argv_[i] = strdup(argv[i]);
+
+ if (!argv_[i])
+ {
+ argv_free (); // free whatever bee allocated
+ return;
+ }
+ }
+ }
+ }
+
+ ~argv_copy () { argv_free (); }
+
+private:
+ argv_copy (const argv_copy&);
+ argv_copy& operator= (const argv_copy&);
+
+ void argv_free()
+ {
+ if (argv_)
+ {
+ for (int i = 0; (i < argc_) && argv_[i] ; ++i) free (argv_[i]);
+ free (argv_);
+ argv_ = 0;
+ }
+ }
+};
+
+/* a short corresponding to '--' byte sequence */
+static short const long_opt_prefix ('-' + ('-' << 8));
+
+/* Normalizes long options to have '_' instead of '-' */
+static int
+normalize_opts (argv_copy& a)
+{
+ if (a.argv_)
+ {
+ for (int i = 0; i < a.argc_; ++i)
+ {
+ char* ptr = a.argv_[i];
+ if (long_opt_prefix == *(short*)ptr) // long option
+ {
+ ptr += 2;
+ const char* end = strchr(ptr, '=');
+
+ if (!end) end = ptr + strlen(ptr);
+
+ for (; ptr != end; ++ptr) if ('-' == *ptr) *ptr = '_';
+ }
+ }
+
+ return 0;
+ }
+
+ return EINVAL;
+}
+
+/* Find required options in the argument list and change their values */
+static int
+find_opts (argv_copy& a, struct opt* const opts)
+{
+ for (int i = 0; i < a.argc_; ++i)
+ {
+ char* ptr = a.argv_[i] + 2; // we're interested only in long options
+
+ struct opt* opt = opts;
+ for (; 0 != opt->name; ++opt)
+ {
+ if (!strstr(ptr, opt->name)) continue; // try next option
+
+ /* 1. try to find value after the '=' */
+ opt->value = strchr(ptr, '=') + 1;
+
+ /* 2. if no '=', try next element in the argument vector */
+ if (reinterpret_cast<void*>(1) == opt->value)
+ {
+ /* also check that the next element is not an option itself */
+ if (i + 1 < a.argc_ && *(a.argv_[i + 1]) != '-')
+ {
+ ++i;
+ opt->value = a.argv_[i];
+ }
+ else opt->value = ""; // no value supplied (like boolean opt)
+ }
+
+ break; // option found, break inner loop
+ }
+ }
+
+ return 0;
+}
+
+/* Parses string for an integer. Returns 0 on success. */
+int get_long_long (const struct opt& opt, long long* const val, int const base)
+{
+ const char* const str = opt.value;
+
+ if ('\0' != *str)
+ {
+ char* endptr;
+
+ *val = strtoll (str, &endptr, base);
+
+ if ('k' == *endptr || 'K' == *endptr)
+ {
+ *val *= 1024L;
+ endptr++;
+ }
+ else if ('m' == *endptr || 'M' == *endptr)
+ {
+ *val *= 1024L * 1024L;
+ endptr++;
+ }
+ else if ('g' == *endptr || 'G' == *endptr)
+ {
+ *val *= 1024L * 1024L * 1024L;
+ endptr++;
+ }
+
+ if ('\0' == *endptr) return 0; // the whole string was a valid integer
+ }
+
+ WSREP_ERROR ("Bad value for *%s: '%s'. Should be integer.",
+ opt.name, opt.value);
+
+ return EINVAL;
+}
+
+/* This is flimzy coz hell knows how mysql interprets boolean strings...
+ * and, no, I'm not going to become versed in how mysql handles options -
+ * I'd rather sing.
+
+ Aha, http://dev.mysql.com/doc/refman/5.1/en/dynamic-system-variables.html:
+ Variables that have a type of “boolean” can be set to 0, 1, ON or OFF. (If you
+ set them on the command line or in an option file, use the numeric values.)
+
+ So it is '0' for FALSE, '1' or empty string for TRUE
+
+ */
+int get_bool (const struct opt& opt, bool* const val)
+{
+ const char* str = opt.value;
+
+ while (isspace(*str)) ++str; // skip initial whitespaces
+
+ ssize_t str_len = strlen(str);
+ switch (str_len)
+ {
+ case 0:
+ *val = true;
+ return 0;
+ case 1:
+ if ('0' == *str || '1' == *str)
+ {
+ *val = ('1' == *str);
+ return 0;
+ }
+ }
+
+ WSREP_ERROR ("Bad value for *%s: '%s'. Should be '0', '1' or empty string.",
+ opt.name, opt.value);
+
+ return EINVAL;
+}
+
+static int
+check_opts (int const argc, const char* const argv[], struct opt opts[])
+{
+ /* First, make a copy of argv to be able to manipulate it */
+ argv_copy a(argc, argv);
+
+ if (!a.argv_)
+ {
+ WSREP_ERROR ("Could not copy argv vector: not enough memory.");
+ return ENOMEM;
+ }
+
+ int err = normalize_opts (a);
+ if (err)
+ {
+ WSREP_ERROR ("Failed to normalize options.");
+ return err;
+ }
+
+ err = find_opts (a, opts);
+ if (err)
+ {
+ WSREP_ERROR ("Failed to parse options.");
+ return err;
+ }
+
+ /* At this point we have updated default values in our option list to
+ what has been specified on the command line / my.cnf */
+
+ long long slave_threads;
+ err = get_long_long (opts[WSREP_SLAVE_THREADS], &slave_threads, 10);
+ if (err) return err;
+
+ int rcode = 0;
+
+ if (slave_threads > 1)
+ /* Need to check AUTOINC_LOCK_MODE and LOCKS_UNSAFE_FOR_BINLOG */
+ {
+ long long autoinc_lock_mode;
+ err = get_long_long (opts[AUTOINC_LOCK_MODE], &autoinc_lock_mode, 10);
+ if (err) return err;
+
+ bool locks_unsafe_for_binlog;
+ err = get_bool (opts[LOCKS_UNSAFE_FOR_BINLOG],&locks_unsafe_for_binlog);
+ if (err) return err;
+
+ if (autoinc_lock_mode != 2)
+ {
+ WSREP_ERROR ("Parallel applying (wsrep_slave_threads > 1) requires"
+ " innodb_autoinc_lock_mode = 2.");
+ rcode = EINVAL;
+ }
+ }
+
+ long long query_cache_size, query_cache_type;
+ if ((err = get_long_long (opts[QUERY_CACHE_SIZE], &query_cache_size, 10)))
+ return err;
+ if ((err = get_long_long (opts[QUERY_CACHE_TYPE], &query_cache_type, 10)))
+ return err;
+
+ if (0 != query_cache_size && 0 != query_cache_type)
+ {
+ WSREP_ERROR ("Query cache is not supported (size=%lld type=%lld)",
+ query_cache_size, query_cache_type);
+ rcode = EINVAL;
+ }
+
+ bool locked_in_memory;
+ err = get_bool (opts[LOCKED_IN_MEMORY], &locked_in_memory);
+ if (err) { WSREP_ERROR("get_bool error: %s", strerror(err)); return err; }
+ if (locked_in_memory)
+ {
+ WSREP_ERROR ("Memory locking is not supported (locked_in_memory=%s)",
+ locked_in_memory ? "ON" : "OFF");
+ rcode = EINVAL;
+ }
+
+ if (!strcasecmp(opts[WSREP_SST_METHOD].value,"mysqldump"))
+ {
+ if (!strcasecmp(opts[BIND_ADDRESS].value, "127.0.0.1") ||
+ !strcasecmp(opts[BIND_ADDRESS].value, "localhost"))
+ {
+ WSREP_ERROR ("wsrep_sst_method is set to 'mysqldump' yet "
+ "mysqld bind_address is set to '%s', which makes it "
+ "impossible to receive state transfer from another "
+ "node, since mysqld won't accept such connections. "
+ "If you wish to use mysqldump state transfer method, "
+ "set bind_address to allow mysql client connections "
+ "from other cluster members (e.g. 0.0.0.0).",
+ opts[BIND_ADDRESS].value);
+ rcode = EINVAL;
+ }
+ }
+ else
+ {
+ // non-mysqldump SST requires wsrep_cluster_address on startup
+ if (strlen(opts[WSREP_CLUSTER_ADDRESS].value) == 0)
+ {
+ WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be "
+ "configured on startup.",opts[WSREP_SST_METHOD].value);
+ rcode = EINVAL;
+ }
+ }
+
+ if (strcasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, "AUTO"))
+ {
+ if (!strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value,
+ "127.0.0.1", strlen("127.0.0.1")) ||
+ !strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value,
+ "localhost", strlen("localhost")))
+ {
+ WSREP_WARN ("wsrep_sst_receive_address is set to '%s' which "
+ "makes it impossible for another host to reach this "
+ "one. Please set it to the address which this node "
+ "can be connected at by other cluster members.",
+ opts[WSREP_SST_RECEIVE_ADDRESS].value);
+// rcode = EINVAL;
+ }
+ }
+
+ if (strcasecmp(opts[WSREP_PROVIDER].value, "none"))
+ {
+ if (strcasecmp(opts[BINLOG_FORMAT].value, "ROW"))
+ {
+ WSREP_ERROR ("Only binlog_format = 'ROW' is currently supported. "
+ "Configured value: '%s'. Please adjust your "
+ "configuration.", opts[BINLOG_FORMAT].value);
+
+ rcode = EINVAL;
+ }
+ }
+
+ return rcode;
+}
+
+int
+wsrep_check_opts (int const argc, char* const* const argv)
+{
+ return check_opts (argc, argv, opts);
+}
+
diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc
new file mode 100644
index 00000000000..1fc2372a57c
--- /dev/null
+++ b/sql/wsrep_hton.cc
@@ -0,0 +1,449 @@
+/* Copyright 2008 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include "sql_base.h"
+#include "rpl_filter.h"
+#include <sql_class.h>
+#include "wsrep_mysqld.h"
+#include "wsrep_priv.h"
+#include <cstdio>
+#include <cstdlib>
+
+extern handlerton *binlog_hton;
+extern int binlog_close_connection(handlerton *hton, THD *thd);
+extern ulonglong thd_to_trx_id(THD *thd);
+
+extern "C" int thd_binlog_format(const MYSQL_THD thd);
+// todo: share interface with ha_innodb.c
+
+enum wsrep_trx_status wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all);
+
+/*
+ a post-commit cleanup on behalf of wsrep. Can't be a part of hton struct.
+ Is called by THD::transactions.cleanup()
+*/
+void wsrep_cleanup_transaction(THD *thd)
+{
+ if (thd->thread_id == 0) return;
+ if (thd->wsrep_exec_mode == LOCAL_COMMIT)
+ {
+ if (thd->variables.wsrep_on &&
+ thd->wsrep_conflict_state != MUST_REPLAY)
+ {
+ if (thd->wsrep_seqno_changed)
+ {
+ if (wsrep->post_commit(wsrep, &thd->wsrep_trx_handle))
+ {
+ DBUG_PRINT("wsrep", ("set committed fail"));
+ WSREP_WARN("set committed fail: %llu %d",
+ (long long)thd->real_id, thd->stmt_da->status());
+ }
+ }
+ //else
+ //WSREP_DEBUG("no trx handle for %s", thd->query());
+ thd_binlog_trx_reset(thd);
+ thd->wsrep_seqno_changed = false;
+ }
+ thd->wsrep_exec_mode= LOCAL_STATE;
+ }
+}
+
+/*
+ wsrep hton
+*/
+handlerton *wsrep_hton;
+
+void wsrep_register_hton(THD* thd, bool all)
+{
+ THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
+ for (Ha_trx_info *i= trans->ha_list; WSREP(thd) && i; i = i->next())
+ {
+ if (i->ht()->db_type == DB_TYPE_INNODB)
+ {
+ trans_register_ha(thd, all, wsrep_hton);
+ thd->ha_data[wsrep_hton->slot].ha_info[all].set_trx_read_write();
+ break;
+ }
+ }
+}
+
+/*
+ wsrep exploits binlog's caches even if binlogging itself is not
+ activated. In such case connection close needs calling
+ actual binlog's method.
+ Todo: split binlog hton from its caches to use ones by wsrep
+ without referring to binlog's stuff.
+*/
+static int
+wsrep_close_connection(handlerton* hton, THD* thd)
+{
+ DBUG_ENTER("wsrep_close_connection");
+ if (thd_get_ha_data(thd, binlog_hton) != NULL)
+ binlog_hton->close_connection (binlog_hton, thd);
+ DBUG_RETURN(0);
+}
+
+/*
+ prepare/wsrep_run_wsrep_commit can fail in two ways
+ - certification test or an equivalent. As a result,
+ the current transaction just rolls back
+ Error codes:
+ WSREP_TRX_ROLLBACK, WSREP_TRX_ERROR
+ - a post-certification failure makes this server unable to
+ commit its own WS and therefore the server must abort
+*/
+static int wsrep_prepare(handlerton *hton, THD *thd, bool all)
+{
+#ifndef DBUG_OFF
+ //wsrep_seqno_t old = thd->wsrep_trx_seqno;
+#endif
+ DBUG_ENTER("wsrep_prepare");
+ if ((all ||
+ !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
+ (thd->variables.wsrep_on && !wsrep_trans_cache_is_empty(thd)))
+ {
+ switch (wsrep_run_wsrep_commit(thd, hton, all))
+ {
+ case WSREP_TRX_OK:
+ // DBUG_ASSERT(thd->wsrep_trx_seqno > old ||
+ // thd->wsrep_exec_mode == REPL_RECV ||
+ // thd->wsrep_exec_mode == TOTAL_ORDER);
+ break;
+ case WSREP_TRX_ROLLBACK:
+ case WSREP_TRX_ERROR:
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+static int wsrep_savepoint_set(handlerton *hton, THD *thd, void *sv)
+{
+ if (!wsrep_emulate_bin_log) return 0;
+ int rcode = binlog_hton->savepoint_set(binlog_hton, thd, sv);
+ return rcode;
+}
+static int wsrep_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
+{
+ if (!wsrep_emulate_bin_log) return 0;
+ int rcode = binlog_hton->savepoint_rollback(binlog_hton, thd, sv);
+ return rcode;
+}
+
+static int wsrep_rollback(handlerton *hton, THD *thd, bool all)
+{
+ DBUG_ENTER("wsrep_rollback");
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
+ (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY))
+ {
+ if (wsrep->post_rollback(wsrep, &thd->wsrep_trx_handle))
+ {
+ DBUG_PRINT("wsrep", ("setting rollback fail"));
+ WSREP_ERROR("settting rollback fail: thd: %llu SQL: %s",
+ (long long)thd->real_id, thd->query());
+ }
+ }
+
+ int rcode = 0;
+ if (!wsrep_emulate_bin_log)
+ {
+ if (all) thd_binlog_trx_reset(thd);
+ }
+
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ DBUG_RETURN(rcode);
+}
+
+int wsrep_commit(handlerton *hton, THD *thd, bool all)
+{
+ DBUG_ENTER("wsrep_commit");
+
+ DBUG_RETURN(0);
+}
+
+extern Rpl_filter* binlog_filter;
+extern my_bool opt_log_slave_updates;
+enum wsrep_trx_status
+wsrep_run_wsrep_commit(
+ THD *thd, handlerton *hton, bool all)
+{
+ int rcode = -1;
+ uint data_len = 0;
+ uchar *rbr_data = NULL;
+ IO_CACHE *cache;
+ int replay_round= 0;
+
+ if (thd->stmt_da->is_error()) {
+ WSREP_ERROR("commit issue, error: %d %s",
+ thd->stmt_da->sql_errno(), thd->stmt_da->message());
+ }
+
+ DBUG_ENTER("wsrep_run_wsrep_commit");
+ if (thd->slave_thread && !opt_log_slave_updates) {
+ DBUG_RETURN(WSREP_TRX_OK);
+ }
+ if (thd->wsrep_exec_mode == REPL_RECV) {
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ if (wsrep_debug)
+ WSREP_INFO("WSREP: must abort for BF");
+ DBUG_PRINT("wsrep", ("BF apply commit fail"));
+ thd->wsrep_conflict_state = NO_CONFLICT;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ //
+ // TODO: test all calls of the rollback.
+ // rollback must happen automagically innobase_rollback(hton, thd, 1);
+ //
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ if (thd->wsrep_exec_mode != LOCAL_STATE) {
+ DBUG_RETURN(WSREP_TRX_OK);
+ }
+ if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING) {
+ WSREP_DEBUG("commit for consistency check: %s", thd->query());
+ DBUG_RETURN(WSREP_TRX_OK);
+ }
+
+ DBUG_PRINT("wsrep", ("replicating commit"));
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ DBUG_PRINT("wsrep", ("replicate commit fail"));
+ thd->wsrep_conflict_state = ABORTED;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ if (wsrep_debug) {
+ WSREP_INFO("innobase_commit, abort %s",
+ (thd->query()) ? thd->query() : "void");
+ }
+ DBUG_RETURN(WSREP_TRX_ROLLBACK);
+ }
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+
+ while (wsrep_replaying > 0 &&
+ thd->wsrep_conflict_state == NO_CONFLICT &&
+ thd->killed == NOT_KILLED &&
+ !shutdown_in_progress)
+ {
+
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ thd_proc_info(thd, "wsrep waiting on replaying");
+ thd->mysys_var->current_mutex= &LOCK_wsrep_replaying;
+ thd->mysys_var->current_cond= &COND_wsrep_replaying;
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ // Using timedwait is a hack to avoid deadlock in case if BF victim
+ // misses the signal.
+ struct timespec wtime = {0, 1000000};
+ mysql_cond_timedwait(&COND_wsrep_replaying, &LOCK_wsrep_replaying,
+ &wtime);
+ if (replay_round++ % 100000 == 0)
+ WSREP_DEBUG("commit waiting for replaying: replayers %d, thd: (%lu) conflict: %d (round: %d)",
+ wsrep_replaying, thd->thread_id, thd->wsrep_conflict_state, replay_round);
+
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ thd->mysys_var->current_mutex= 0;
+ thd->mysys_var->current_cond= 0;
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ }
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ DBUG_PRINT("wsrep", ("replicate commit fail"));
+ thd->wsrep_conflict_state = ABORTED;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ WSREP_DEBUG("innobase_commit abort after replaying wait %s",
+ (thd->query()) ? thd->query() : "void");
+ DBUG_RETURN(WSREP_TRX_ROLLBACK);
+ }
+ thd->wsrep_query_state = QUERY_COMMITTING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ cache = get_trans_log(thd);
+ rcode = 0;
+ if (cache) {
+ thd->binlog_flush_pending_rows_event(true);
+ rcode = wsrep_write_cache(cache, &rbr_data, &data_len);
+ if (rcode) {
+ WSREP_ERROR("rbr write fail, data_len: %d, %d", data_len, rcode);
+ if (data_len) my_free(rbr_data);
+ DBUG_RETURN(WSREP_TRX_ROLLBACK);
+ }
+ }
+ if (data_len == 0)
+ {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_exec_mode = LOCAL_COMMIT;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ if (thd->stmt_da->is_ok() &&
+ thd->stmt_da->affected_rows() > 0 &&
+ !binlog_filter->is_on())
+ {
+ WSREP_DEBUG("empty rbr buffer, query: %s, "
+ "affected rows: %llu, "
+ "changed tables: %d, "
+ "sql_log_bin: %d, "
+ "wsrep status (%d %d %d)",
+ thd->query(), thd->stmt_da->affected_rows(),
+ stmt_has_updated_trans_table(thd), thd->variables.sql_log_bin,
+ thd->wsrep_exec_mode, thd->wsrep_query_state,
+ thd->wsrep_conflict_state);
+ }
+ else
+ {
+ WSREP_DEBUG("empty rbr buffer, query: %s", thd->query());
+ }
+ DBUG_RETURN(WSREP_TRX_OK);
+ }
+ if (!rcode) {
+ rcode = wsrep->pre_commit(
+ wsrep,
+ (wsrep_conn_id_t)thd->thread_id,
+ &thd->wsrep_trx_handle,
+ rbr_data,
+ data_len,
+ (thd->wsrep_PA_safe) ? WSREP_FLAG_PA_SAFE : 0ULL,
+ &thd->wsrep_trx_seqno);
+ if (rcode == WSREP_TRX_MISSING) {
+ rcode = WSREP_OK;
+ } else if (rcode == WSREP_BF_ABORT) {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_conflict_state = MUST_REPLAY;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying++;
+ WSREP_DEBUG("replaying increased: %d, thd: %lu",
+ wsrep_replaying, thd->thread_id);
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ }
+ thd->wsrep_seqno_changed = true;
+ } else {
+ WSREP_ERROR("I/O error reading from thd's binlog iocache: "
+ "errno=%d, io cache code=%d", my_errno, cache->error);
+ if (data_len) my_free(rbr_data);
+ DBUG_ASSERT(0); // failure like this can not normally happen
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ }
+
+ if (data_len) {
+ my_free(rbr_data);
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ switch(rcode) {
+ case 0:
+ thd->wsrep_exec_mode = LOCAL_COMMIT;
+ /* Override XID iff it was generated by mysql */
+ if (thd->transaction.xid_state.xid.get_my_xid())
+ {
+ wsrep_xid_init(&thd->transaction.xid_state.xid,
+ wsrep_cluster_uuid(),
+ thd->wsrep_trx_seqno);
+ }
+ DBUG_PRINT("wsrep", ("replicating commit success"));
+
+ break;
+ case WSREP_TRX_FAIL:
+ case WSREP_BF_ABORT:
+ WSREP_DEBUG("commit failed for reason: %d", rcode);
+ DBUG_PRINT("wsrep", ("replicating commit fail"));
+
+ thd->wsrep_query_state= QUERY_EXEC;
+
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ thd->wsrep_conflict_state= ABORTED;
+ }
+ else
+ {
+ WSREP_DEBUG("conflict state: %d", thd->wsrep_conflict_state);
+ if (thd->wsrep_conflict_state == NO_CONFLICT)
+ {
+ thd->wsrep_conflict_state = CERT_FAILURE;
+ WSREP_LOG_CONFLICT(NULL, thd, FALSE);
+ }
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ DBUG_RETURN(WSREP_TRX_ROLLBACK);
+
+ case WSREP_CONN_FAIL:
+ WSREP_ERROR("connection failure");
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ default:
+ WSREP_ERROR("unknown connection failure");
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ }
+
+ thd->wsrep_query_state= QUERY_EXEC;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ DBUG_RETURN(WSREP_TRX_OK);
+}
+
+
+static int wsrep_hton_init(void *p)
+{
+ wsrep_hton= (handlerton *)p;
+ //wsrep_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
+ wsrep_hton->state= SHOW_OPTION_YES;
+ wsrep_hton->db_type=DB_TYPE_WSREP;
+ wsrep_hton->savepoint_offset= sizeof(my_off_t);
+ wsrep_hton->close_connection= wsrep_close_connection;
+ wsrep_hton->savepoint_set= wsrep_savepoint_set;
+ wsrep_hton->savepoint_rollback= wsrep_savepoint_rollback;
+ wsrep_hton->commit= wsrep_commit;
+ wsrep_hton->rollback= wsrep_rollback;
+ wsrep_hton->prepare= wsrep_prepare;
+ wsrep_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN; // todo: fix flags
+ wsrep_hton->slot= 0;
+ return 0;
+}
+
+
+struct st_mysql_storage_engine wsrep_storage_engine=
+{ MYSQL_HANDLERTON_INTERFACE_VERSION };
+
+
+mysql_declare_plugin(wsrep)
+{
+ MYSQL_STORAGE_ENGINE_PLUGIN,
+ &wsrep_storage_engine,
+ "wsrep",
+ "Codership Oy",
+ "A pseudo storage engine to represent transactions in multi-master synchornous replication",
+ PLUGIN_LICENSE_GPL,
+ wsrep_hton_init, /* Plugin Init */
+ NULL, /* Plugin Deinit */
+ 0x0100 /* 1.0 */,
+ NULL, /* status variables */
+ NULL, /* system variables */
+ NULL, /* config options */
+ 0, /* flags */
+}
+mysql_declare_plugin_end;
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
new file mode 100644
index 00000000000..62397c58a6e
--- /dev/null
+++ b/sql/wsrep_mysqld.cc
@@ -0,0 +1,1311 @@
+/* Copyright 2008 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include <sql_class.h>
+#include <sql_parse.h>
+#include "wsrep_priv.h"
+#include <cstdio>
+#include <cstdlib>
+#include "log_event.h"
+
+extern Format_description_log_event *wsrep_format_desc;
+wsrep_t *wsrep = NULL;
+my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface
+
+/*
+ * Begin configuration options and their default values
+ */
+
+const char* wsrep_data_home_dir = NULL;
+
+#define WSREP_NODE_INCOMING_AUTO "AUTO"
+const char* wsrep_node_incoming_address = WSREP_NODE_INCOMING_AUTO;
+const char* wsrep_dbug_option = "";
+
+long wsrep_slave_threads = 1; // # of slave action appliers wanted
+my_bool wsrep_debug = 0; // enable debug level logging
+my_bool wsrep_convert_LOCK_to_trx = 1; // convert locking sessions to trx
+ulong wsrep_retry_autocommit = 5; // retry aborted autocommit trx
+my_bool wsrep_auto_increment_control = 1; // control auto increment variables
+my_bool wsrep_drupal_282555_workaround = 1; // retry autoinc insert after dupkey
+my_bool wsrep_incremental_data_collection = 0; // incremental data collection
+long long wsrep_max_ws_size = 1073741824LL; //max ws (RBR buffer) size
+long wsrep_max_ws_rows = 65536; // max number of rows in ws
+int wsrep_to_isolation = 0; // # of active TO isolation threads
+my_bool wsrep_certify_nonPK = 1; // certify, even when no primary key
+long wsrep_max_protocol_version = 2; // maximum protocol version to use
+ulong wsrep_forced_binlog_format = BINLOG_FORMAT_UNSPEC;
+my_bool wsrep_recovery = 0; // recovery
+my_bool wsrep_replicate_myisam = 0; // enable myisam replication
+my_bool wsrep_log_conflicts = 0; //
+ulong wsrep_mysql_replication_bundle = 0;
+
+/*
+ * End configuration options
+ */
+
+static const wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED;
+const wsrep_uuid_t* wsrep_cluster_uuid()
+{
+ return &cluster_uuid;
+}
+static char cluster_uuid_str[40]= { 0, };
+static const char* cluster_status_str[WSREP_VIEW_MAX] =
+{
+ "Primary",
+ "non-Primary",
+ "Disconnected"
+};
+
+static char provider_name[256]= { 0, };
+static char provider_version[256]= { 0, };
+static char provider_vendor[256]= { 0, };
+
+/*
+ * wsrep status variables
+ */
+my_bool wsrep_connected = FALSE;
+my_bool wsrep_ready = FALSE; // node can accept queries
+const char* wsrep_cluster_state_uuid = cluster_uuid_str;
+long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED;
+const char* wsrep_cluster_status = cluster_status_str[WSREP_VIEW_DISCONNECTED];
+long wsrep_cluster_size = 0;
+long wsrep_local_index = -1;
+const char* wsrep_provider_name = provider_name;
+const char* wsrep_provider_version = provider_version;
+const char* wsrep_provider_vendor = provider_vendor;
+/* End wsrep status variables */
+
+
+wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED;
+wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED;
+wsp::node_status local_status;
+long wsrep_protocol_version = 2;
+
+// Boolean denoting if server is in initial startup phase. This is needed
+// to make sure that main thread waiting in wsrep_sst_wait() is signaled
+// if there was no state gap on receiving first view event.
+static my_bool wsrep_startup = TRUE;
+
+// action execute callback
+extern wsrep_status_t wsrep_apply_cb(void *ctx,
+ const void* buf, size_t buf_len,
+ wsrep_seqno_t global_seqno);
+
+extern wsrep_status_t wsrep_commit_cb (void *ctx,
+ wsrep_seqno_t global_seqno,
+ bool commit);
+
+static void wsrep_log_cb(wsrep_log_level_t level, const char *msg) {
+ switch (level) {
+ case WSREP_LOG_INFO:
+ sql_print_information("WSREP: %s", msg);
+ break;
+ case WSREP_LOG_WARN:
+ sql_print_warning("WSREP: %s", msg);
+ break;
+ case WSREP_LOG_ERROR:
+ case WSREP_LOG_FATAL:
+ sql_print_error("WSREP: %s", msg);
+ break;
+ case WSREP_LOG_DEBUG:
+ if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg);
+ default:
+ break;
+ }
+}
+
+static void wsrep_log_states (wsrep_log_level_t const level,
+ const wsrep_uuid_t* const group_uuid,
+ wsrep_seqno_t const group_seqno,
+ const wsrep_uuid_t* const node_uuid,
+ wsrep_seqno_t const node_seqno)
+{
+ char uuid_str[37];
+ char msg[256];
+
+ wsrep_uuid_print (group_uuid, uuid_str, sizeof(uuid_str));
+ snprintf (msg, 255, "WSREP: Group state: %s:%lld",
+ uuid_str, (long long)group_seqno);
+ wsrep_log_cb (level, msg);
+
+ wsrep_uuid_print (node_uuid, uuid_str, sizeof(uuid_str));
+ snprintf (msg, 255, "WSREP: Local state: %s:%lld",
+ uuid_str, (long long)node_seqno);
+ wsrep_log_cb (level, msg);
+}
+
+static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg)
+{
+ XID* xid= reinterpret_cast<XID*>(arg);
+ handlerton* hton= plugin_data(plugin, handlerton *);
+ if (hton->db_type == DB_TYPE_INNODB)
+ {
+ const wsrep_uuid_t* uuid(wsrep_xid_uuid(xid));
+ char uuid_str[40] = {0, };
+ wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str));
+ WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld",
+ uuid_str, (long long)wsrep_xid_seqno(xid));
+ hton->wsrep_set_checkpoint(hton, xid);
+ }
+ return FALSE;
+}
+
+void wsrep_set_SE_checkpoint(XID* xid)
+{
+ plugin_foreach(NULL, set_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, xid);
+}
+
+static my_bool get_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg)
+{
+ XID* xid= reinterpret_cast<XID*>(arg);
+ handlerton* hton= plugin_data(plugin, handlerton *);
+ if (hton->db_type == DB_TYPE_INNODB)
+ {
+ hton->wsrep_get_checkpoint(hton, xid);
+ const wsrep_uuid_t* uuid(wsrep_xid_uuid(xid));
+ char uuid_str[40] = {0, };
+ wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str));
+ WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld",
+ uuid_str, (long long)wsrep_xid_seqno(xid));
+
+ }
+ return FALSE;
+}
+
+void wsrep_get_SE_checkpoint(XID* xid)
+{
+ plugin_foreach(NULL, get_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, xid);
+}
+
+static void wsrep_view_handler_cb (void* app_ctx,
+ void* recv_ctx,
+ const wsrep_view_info_t* view,
+ const char* state,
+ size_t state_len,
+ void** sst_req,
+ ssize_t* sst_req_len)
+{
+ wsrep_member_status_t new_status= local_status.get();
+
+ if (memcmp(&cluster_uuid, &view->uuid, sizeof(wsrep_uuid_t)))
+ {
+ memcpy((wsrep_uuid_t*)&cluster_uuid, &view->uuid, sizeof(cluster_uuid));
+ wsrep_uuid_print (&cluster_uuid, cluster_uuid_str,
+ sizeof(cluster_uuid_str));
+ }
+
+ wsrep_cluster_conf_id= view->view;
+ wsrep_cluster_status= cluster_status_str[view->status];
+ wsrep_cluster_size= view->memb_num;
+ wsrep_local_index= view->my_idx;
+
+ WSREP_INFO("New cluster view: global state: %s:%lld, view# %lld: %s, "
+ "number of nodes: %ld, my index: %ld, protocol version %d",
+ wsrep_cluster_state_uuid, (long long)view->seqno,
+ (long long)wsrep_cluster_conf_id, wsrep_cluster_status,
+ wsrep_cluster_size, wsrep_local_index, view->proto_ver);
+
+ /* Proceed further only if view is PRIMARY */
+ if (WSREP_VIEW_PRIMARY != view->status) {
+ wsrep_ready_set(FALSE);
+ new_status= WSREP_MEMBER_UNDEFINED;
+ /* Always record local_uuid and local_seqno in non-prim since this
+ * may lead to re-initializing provider and start position is
+ * determined according to these variables */
+ // WRONG! local_uuid should be the last primary configuration uuid we were
+ // a member of. local_seqno should be updated in commit calls.
+ // local_uuid= cluster_uuid;
+ // local_seqno= view->first - 1;
+ goto out;
+ }
+
+ switch (view->proto_ver)
+ {
+ case 0:
+ case 1:
+ case 2:
+ // version change
+ if (view->proto_ver != wsrep_protocol_version)
+ {
+ my_bool wsrep_ready_saved= wsrep_ready;
+ wsrep_ready_set(FALSE);
+ WSREP_INFO("closing client connections for "
+ "protocol change %ld -> %d",
+ wsrep_protocol_version, view->proto_ver);
+ wsrep_close_client_connections(TRUE);
+ wsrep_protocol_version= view->proto_ver;
+ wsrep_ready_set(wsrep_ready_saved);
+ }
+ break;
+ default:
+ WSREP_ERROR("Unsupported application protocol version: %d",
+ view->proto_ver);
+ unireg_abort(1);
+ }
+
+ if (view->state_gap)
+ {
+ WSREP_WARN("Gap in state sequence. Need state transfer.");
+
+ /* After that wsrep will call wsrep_sst_prepare. */
+ /* keep ready flag 0 until we receive the snapshot */
+ wsrep_ready_set(FALSE);
+
+ /* Close client connections to ensure that they don't interfere
+ * with SST */
+ WSREP_DEBUG("[debug]: closing client connections for PRIM");
+ wsrep_close_client_connections(TRUE);
+
+ *sst_req_len= wsrep_sst_prepare (sst_req);
+
+ if (*sst_req_len < 0)
+ {
+ int err = *sst_req_len;
+ WSREP_ERROR("SST preparation failed: %d (%s)", -err, strerror(-err));
+ new_status= WSREP_MEMBER_UNDEFINED;
+ }
+ else
+ {
+ new_status= WSREP_MEMBER_JOINER;
+ }
+ }
+ else
+ {
+ /*
+ * NOTE: Initialize wsrep_group_uuid here only if it wasn't initialized
+ * before - OR - it was reinitilized on startup (lp:992840)
+ */
+ if (wsrep_startup)
+ {
+ if (wsrep_before_SE())
+ {
+ wsrep_SE_init_grab();
+ // Signal mysqld init thread to continue
+ wsrep_sst_complete (&cluster_uuid, view->seqno, false);
+ // and wait for SE initialization
+ wsrep_SE_init_wait();
+ }
+ else
+ {
+ local_uuid= cluster_uuid;
+ local_seqno= view->seqno;
+ }
+ /* Init storage engine XIDs from first view */
+ XID xid;
+ wsrep_xid_init(&xid, &local_uuid, local_seqno);
+ wsrep_set_SE_checkpoint(&xid);
+ new_status= WSREP_MEMBER_JOINED;
+ }
+
+ // just some sanity check
+ if (memcmp (&local_uuid, &cluster_uuid, sizeof (wsrep_uuid_t)))
+ {
+ WSREP_ERROR("Undetected state gap. Can't continue.");
+ wsrep_log_states(WSREP_LOG_FATAL, &cluster_uuid, view->seqno,
+ &local_uuid, -1);
+ unireg_abort(1);
+ }
+ }
+
+ if (wsrep_auto_increment_control)
+ {
+ global_system_variables.auto_increment_offset= view->my_idx + 1;
+ global_system_variables.auto_increment_increment= view->memb_num;
+ }
+
+out:
+ wsrep_startup= FALSE;
+ local_status.set(new_status, view);
+}
+
+void wsrep_ready_set (my_bool x)
+{
+ WSREP_DEBUG("Setting wsrep_ready to %d", x);
+ if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort();
+ if (wsrep_ready != x)
+ {
+ wsrep_ready= x;
+ mysql_cond_signal (&COND_wsrep_ready);
+ }
+ mysql_mutex_unlock (&LOCK_wsrep_ready);
+}
+
+// Wait until wsrep has reached ready state
+void wsrep_ready_wait ()
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort();
+ while (!wsrep_ready)
+ {
+ WSREP_INFO("Waiting to reach ready state");
+ mysql_cond_wait (&COND_wsrep_ready, &LOCK_wsrep_ready);
+ }
+ WSREP_INFO("ready state reached");
+ mysql_mutex_unlock (&LOCK_wsrep_ready);
+}
+
+static void wsrep_synced_cb(void* app_ctx)
+{
+ WSREP_INFO("Synchronized with group, ready for connections");
+ if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort();
+ if (!wsrep_ready)
+ {
+ wsrep_ready= TRUE;
+ mysql_cond_signal (&COND_wsrep_ready);
+ }
+ local_status.set(WSREP_MEMBER_SYNCED);
+ mysql_mutex_unlock (&LOCK_wsrep_ready);
+}
+
+static void wsrep_init_position()
+{
+ /* read XIDs from storage engines */
+ XID xid;
+ memset(&xid, 0, sizeof(xid));
+ xid.formatID= -1;
+ wsrep_get_SE_checkpoint(&xid);
+
+ if (xid.formatID == -1)
+ {
+ WSREP_INFO("Read nil XID from storage engines, skipping position init");
+ return;
+ }
+ else if (!wsrep_is_wsrep_xid(&xid))
+ {
+ WSREP_WARN("Read non-wsrep XID from storage engines, skipping position init");
+ return;
+ }
+
+ const wsrep_uuid_t* uuid= wsrep_xid_uuid(&xid);
+ const wsrep_seqno_t seqno= wsrep_xid_seqno(&xid);
+
+ char uuid_str[40] = {0, };
+ wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str));
+ WSREP_INFO("Initial position: %s:%lld", uuid_str, (long long)seqno);
+
+
+ if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(local_uuid)) &&
+ local_seqno == WSREP_SEQNO_UNDEFINED)
+ {
+ // Initial state
+ local_uuid= *uuid;
+ local_seqno= seqno;
+ }
+ else if (memcmp(&local_uuid, uuid, sizeof(local_uuid)) ||
+ local_seqno != seqno)
+ {
+ WSREP_WARN("Initial position was provided by configuration or SST, "
+ "avoiding override");
+ }
+}
+
+
+int wsrep_init()
+{
+ int rcode= -1;
+
+ wsrep_ready_set(FALSE);
+ assert(wsrep_provider);
+ wsrep_format_desc= new Format_description_log_event(4);
+ wsrep_init_position();
+
+ if ((rcode= wsrep_load(wsrep_provider, &wsrep, wsrep_log_cb)) != WSREP_OK)
+ {
+ if (strcasecmp(wsrep_provider, WSREP_NONE))
+ {
+ WSREP_ERROR("wsrep_load(%s) failed: %s (%d). Reverting to no provider.",
+ wsrep_provider, strerror(rcode), rcode);
+ strcpy((char*)wsrep_provider, WSREP_NONE); // damn it's a dirty hack
+ (void) wsrep_init();
+ return rcode;
+ }
+ else /* this is for recursive call above */
+ {
+ WSREP_ERROR("Could not revert to no provider: %s (%d). Need to abort.",
+ strerror(rcode), rcode);
+ unireg_abort(1);
+ }
+ }
+
+ if (strlen(wsrep_provider)== 0 ||
+ !strcmp(wsrep_provider, WSREP_NONE))
+ {
+ // enable normal operation in case no provider is specified
+ wsrep_ready_set(TRUE);
+ global_system_variables.wsrep_on = 0;
+ return 0;
+ }
+ else
+ {
+ global_system_variables.wsrep_on = 1;
+ strncpy(provider_name,
+ wsrep->provider_name, sizeof(provider_name) - 1);
+ strncpy(provider_version,
+ wsrep->provider_version, sizeof(provider_version) - 1);
+ strncpy(provider_vendor,
+ wsrep->provider_vendor, sizeof(provider_vendor) - 1);
+ }
+
+ if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0)
+ wsrep_data_home_dir = mysql_real_data_home;
+
+ char node_addr[512]= { 0, };
+ if (!wsrep_node_address || !strcmp(wsrep_node_address, ""))
+ {
+ size_t const node_addr_max= sizeof(node_addr);
+ size_t const ret= guess_ip(node_addr, node_addr_max);
+ if (!(ret > 0 && ret < node_addr_max))
+ {
+ WSREP_WARN("Failed to guess base node address. Set it explicitly via "
+ "wsrep_node_address.");
+ node_addr[0]= '\0';
+ }
+ }
+ else
+ {
+ strncpy(node_addr, wsrep_node_address, sizeof(node_addr) - 1);
+ }
+
+ static char inc_addr[512]= { 0, };
+
+ if ((!wsrep_node_incoming_address ||
+ !strcmp (wsrep_node_incoming_address, WSREP_NODE_INCOMING_AUTO)))
+ {
+ size_t const node_addr_len= strlen(node_addr);
+ if (node_addr_len > 0)
+ {
+ const char* const colon= strrchr(node_addr, ':');
+ if (strchr(node_addr, ':') == colon) // 1 or 0 ':'
+ {
+ size_t const inc_addr_max= sizeof (inc_addr);
+ size_t const ip_len= colon ? colon - node_addr : node_addr_len;
+ if (ip_len + 7 /* :55555\0 */ < inc_addr_max)
+ {
+ memcpy (inc_addr, node_addr, ip_len);
+ snprintf(inc_addr + ip_len, inc_addr_max - ip_len, ":%u",mysqld_port);
+ }
+ else
+ {
+ WSREP_WARN("Guessing address for incoming client connections: "
+ "address too long.");
+ inc_addr[0]= '\0';
+ }
+ }
+ else
+ {
+ WSREP_WARN("Guessing address for incoming client connections: "
+ "too many colons :) .");
+ inc_addr[0]= '\0';
+ }
+ }
+
+ // this is to display detected address on SHOW VARIABLES...
+ wsrep_node_incoming_address = inc_addr;
+
+ if (!strlen(wsrep_node_incoming_address))
+ {
+ WSREP_WARN("Guessing address for incoming client connections failed. "
+ "Try setting wsrep_node_incoming_address explicitly.");
+ }
+ }
+
+ struct wsrep_init_args wsrep_args;
+
+ wsrep_args.data_dir = wsrep_data_home_dir;
+ wsrep_args.node_name = (wsrep_node_name) ? wsrep_node_name : "";
+ wsrep_args.node_address = node_addr;
+ wsrep_args.node_incoming = wsrep_node_incoming_address;
+ wsrep_args.options = (wsrep_provider_options) ?
+ wsrep_provider_options : "";
+ wsrep_args.proto_ver = wsrep_max_protocol_version;
+
+ wsrep_args.state_uuid = &local_uuid;
+ wsrep_args.state_seqno = local_seqno;
+
+ wsrep_args.logger_cb = wsrep_log_cb;
+ wsrep_args.view_handler_cb = wsrep_view_handler_cb;
+ wsrep_args.apply_cb = wsrep_apply_cb;
+ wsrep_args.commit_cb = wsrep_commit_cb;
+ wsrep_args.sst_donate_cb = wsrep_sst_donate_cb;
+ wsrep_args.synced_cb = wsrep_synced_cb;
+
+ rcode = wsrep->init(wsrep, &wsrep_args);
+
+ if (rcode)
+ {
+ DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode));
+ WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode);
+ free(wsrep);
+ wsrep = NULL;
+ }
+
+ return rcode;
+}
+
+extern "C" int wsrep_on(void *);
+
+void wsrep_init_startup (bool first)
+{
+ if (wsrep_init()) unireg_abort(1);
+
+ wsrep_thr_lock_init(wsrep_thd_is_brute_force, wsrep_abort_thd,
+ wsrep_debug, wsrep_convert_LOCK_to_trx, wsrep_on);
+
+ /* Skip replication start if no cluster address */
+ if (!wsrep_cluster_address || strlen(wsrep_cluster_address) == 0) return;
+
+ if (first) wsrep_sst_grab(); // do it so we can wait for SST below
+
+ if (!wsrep_start_replication()) unireg_abort(1);
+
+ wsrep_create_rollbacker();
+ wsrep_create_appliers(1);
+
+ if (first && !wsrep_sst_wait()) unireg_abort(1);// wait until SST is completed
+}
+
+
+void wsrep_deinit()
+{
+ wsrep_unload(wsrep);
+ wsrep= 0;
+ provider_name[0]= '\0';
+ provider_version[0]= '\0';
+ provider_vendor[0]= '\0';
+
+ delete wsrep_format_desc;
+ wsrep_format_desc= NULL;
+}
+
+void wsrep_recover()
+{
+ if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t)) &&
+ local_seqno == -2)
+ {
+ char uuid_str[40];
+ wsrep_uuid_print(&local_uuid, uuid_str, sizeof(uuid_str));
+ WSREP_INFO("Position %s:%lld given at startup, skipping position recovery",
+ uuid_str, (long long)local_seqno);
+ return;
+ }
+ XID xid;
+ memset(&xid, 0, sizeof(xid));
+ xid.formatID= -1;
+ wsrep_get_SE_checkpoint(&xid);
+ char uuid_str[40];
+ wsrep_uuid_print(wsrep_xid_uuid(&xid), uuid_str, sizeof(uuid_str));
+ WSREP_INFO("Recovered position: %s:%lld", uuid_str,
+ (long long)wsrep_xid_seqno(&xid));
+}
+
+
+void wsrep_stop_replication(THD *thd)
+{
+ WSREP_INFO("Stop replication");
+ if (!wsrep)
+ {
+ WSREP_INFO("Provider was not loaded, in stop replication");
+ return;
+ }
+
+ /* disconnect from group first to get wsrep_ready == FALSE */
+ WSREP_DEBUG("Provider disconnect");
+ wsrep->disconnect(wsrep);
+
+ wsrep_connected= FALSE;
+
+ wsrep_close_client_connections(TRUE);
+
+ /* wait until appliers have stopped */
+ wsrep_wait_appliers_close(thd);
+
+ return;
+}
+
+
+bool wsrep_start_replication()
+{
+ wsrep_status_t rcode;
+
+ /*
+ if provider is trivial, don't even try to connect,
+ but resume local node operation
+ */
+ if (strlen(wsrep_provider)== 0 ||
+ !strcmp(wsrep_provider, WSREP_NONE))
+ {
+ // enable normal operation in case no provider is specified
+ wsrep_ready_set(TRUE);
+ return true;
+ }
+
+ if (!wsrep_cluster_address || strlen(wsrep_cluster_address)== 0)
+ {
+ // if provider is non-trivial, but no address is specified, wait for address
+ wsrep_ready_set(FALSE);
+ return true;
+ }
+
+ WSREP_INFO("Start replication");
+
+ if ((rcode = wsrep->connect(wsrep,
+ wsrep_cluster_name,
+ wsrep_cluster_address,
+ wsrep_sst_donor)))
+ {
+ if (-ESOCKTNOSUPPORT == rcode)
+ {
+ DBUG_PRINT("wsrep",("unrecognized cluster address: '%s', rcode: %d",
+ wsrep_cluster_address, rcode));
+ WSREP_ERROR("unrecognized cluster address: '%s', rcode: %d",
+ wsrep_cluster_address, rcode);
+ }
+ else
+ {
+ DBUG_PRINT("wsrep",("wsrep->connect() failed: %d", rcode));
+ WSREP_ERROR("wsrep::connect() failed: %d", rcode);
+ }
+
+ return false;
+ }
+ else
+ {
+ wsrep_connected= TRUE;
+
+ uint64_t caps = wsrep->capabilities (wsrep);
+
+ wsrep_incremental_data_collection =
+ (caps & WSREP_CAP_WRITE_SET_INCREMENTS);
+
+ char* opts= wsrep->options_get(wsrep);
+ if (opts)
+ {
+ wsrep_provider_options_init(opts);
+ free(opts);
+ }
+ else
+ {
+ WSREP_WARN("Failed to get wsrep options");
+ }
+ }
+
+ return true;
+}
+
+bool
+wsrep_causal_wait (THD* thd)
+{
+ if (thd->variables.wsrep_causal_reads && thd->variables.wsrep_on &&
+ !thd->in_active_multi_stmt_transaction() &&
+ thd->wsrep_conflict_state != REPLAYING)
+ {
+ // This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0
+ // TODO: modify to check if thd has locked any rows.
+ wsrep_seqno_t seqno;
+ wsrep_status_t ret= wsrep->causal_read (wsrep, &seqno);
+
+ if (unlikely(WSREP_OK != ret))
+ {
+ const char* msg;
+ int err;
+
+ // Possibly relevant error codes:
+ // ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY,
+ // ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET,
+ // ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED
+
+ switch (ret)
+ {
+ case WSREP_NOT_IMPLEMENTED:
+ msg= "consistent reads by wsrep backend. "
+ "Please unset wsrep_causal_reads variable.";
+ err= ER_NOT_SUPPORTED_YET;
+ break;
+ default:
+ msg= "Causal wait failed.";
+ err= ER_LOCK_WAIT_TIMEOUT; // NOTE: the above msg won't be displayed
+ // with ER_LOCK_WAIT_TIMEOUT
+ }
+
+ my_error(err, MYF(0), msg);
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Helpers to deal with TOI key arrays
+ */
+typedef struct wsrep_key_arr
+{
+ wsrep_key_t* keys;
+ size_t keys_len;
+} wsrep_key_arr_t;
+
+
+static void wsrep_keys_free(wsrep_key_arr_t* key_arr)
+{
+ for (size_t i= 0; i < key_arr->keys_len; ++i)
+ {
+ my_free((wsrep_key_part_t*)key_arr->keys[i].key_parts);
+ }
+ my_free(key_arr->keys);
+ key_arr->keys= 0;
+ key_arr->keys_len= 0;
+}
+
+
+/*!
+ * @param db Database string
+ * @param table Table string
+ * @param key Array of wsrep_key_t
+ * @param key_len In: number of elements in key array, Out: number of
+ * elements populated
+ *
+ * @return true if preparation was successful, otherwise false.
+ */
+
+static bool wsrep_prepare_key_for_isolation(const char* db,
+ const char* table,
+ wsrep_key_part_t* key,
+ size_t* key_len)
+{
+ if (*key_len < 2) return false;
+
+ switch (wsrep_protocol_version)
+ {
+ case 0:
+ *key_len= 0;
+ break;
+ case 1:
+ case 2:
+ {
+ *key_len= 0;
+ if (db)
+ {
+ // sql_print_information("%s.%s", db, table);
+ if (db)
+ {
+ key[*key_len].buf= db;
+ key[*key_len].buf_len= strlen(db);
+ ++(*key_len);
+ if (table)
+ {
+ key[*key_len].buf= table;
+ key[*key_len].buf_len= strlen(table);
+ ++(*key_len);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/* Prepare key list from db/table and table_list */
+static bool wsrep_prepare_keys_for_isolation(THD* thd,
+ const char* db,
+ const char* table,
+ const TABLE_LIST* table_list,
+ wsrep_key_arr_t* ka)
+{
+ ka->keys= 0;
+ ka->keys_len= 0;
+
+ extern TABLE* find_temporary_table(THD*, const TABLE_LIST*);
+
+ if (db || table)
+ {
+ TABLE_LIST tmp_table;
+ bzero((char*) &tmp_table,sizeof(tmp_table));
+ tmp_table.table_name= (char*)db;
+ tmp_table.db= (char*)table;
+ if (!table || !find_temporary_table(thd, &tmp_table))
+ {
+ if (!(ka->keys= (wsrep_key_t*)my_malloc(sizeof(wsrep_key_t), MYF(0))))
+ {
+ sql_print_error("Can't allocate memory for key_array");
+ goto err;
+ }
+ ka->keys_len= 1;
+ if (!(ka->keys[0].key_parts= (wsrep_key_part_t*)
+ my_malloc(sizeof(wsrep_key_part_t)*2, MYF(0))))
+ {
+ sql_print_error("Can't allocate memory for key_parts");
+ goto err;
+ }
+ ka->keys[0].key_parts_len= 2;
+ if (!wsrep_prepare_key_for_isolation(
+ db, table,
+ (wsrep_key_part_t*)ka->keys[0].key_parts,
+ &ka->keys[0].key_parts_len))
+ {
+ sql_print_error("Preparing keys for isolation failed");
+ goto err;
+ }
+ }
+ }
+
+ for (const TABLE_LIST* table= table_list; table; table= table->next_global)
+ {
+ if (!find_temporary_table(thd, table))
+ {
+ wsrep_key_t* tmp;
+ tmp= (wsrep_key_t*)my_realloc(
+ ka->keys, (ka->keys_len + 1) * sizeof(wsrep_key_t), MYF(0));
+ if (!tmp)
+ {
+ sql_print_error("Can't allocate memory for key_array");
+ goto err;
+ }
+ ka->keys= tmp;
+ if (!(ka->keys[ka->keys_len].key_parts= (wsrep_key_part_t*)
+ my_malloc(sizeof(wsrep_key_part_t)*2, MYF(0))))
+ {
+ sql_print_error("Can't allocate memory for key_parts");
+ goto err;
+ }
+ ka->keys[ka->keys_len].key_parts_len= 2;
+ ++ka->keys_len;
+ if (!wsrep_prepare_key_for_isolation(
+ table->db, table->table_name,
+ (wsrep_key_part_t*)ka->keys[ka->keys_len - 1].key_parts,
+ &ka->keys[ka->keys_len - 1].key_parts_len))
+ {
+ sql_print_error("Preparing keys for isolation failed");
+ goto err;
+ }
+ }
+ }
+ return true;
+err:
+ wsrep_keys_free(ka);
+ return false;
+}
+
+
+
+bool wsrep_prepare_key_for_innodb(const uchar* cache_key,
+ size_t cache_key_len,
+ const uchar* row_id,
+ size_t row_id_len,
+ wsrep_key_part_t* key,
+ size_t* key_len)
+{
+ if (*key_len < 3) return false;
+
+ *key_len= 0;
+ switch (wsrep_protocol_version)
+ {
+ case 0:
+ {
+ key[*key_len].buf = cache_key;
+ key[*key_len].buf_len = cache_key_len;
+ ++(*key_len);
+ break;
+ }
+ case 1:
+ case 2:
+ {
+ key[*key_len].buf = cache_key;
+ key[*key_len].buf_len = strlen( (char*)cache_key );
+ ++(*key_len);
+ key[*key_len].buf = cache_key + strlen( (char*)cache_key ) + 1;
+ key[*key_len].buf_len = strlen( (char*)(key[*key_len].buf) );
+ ++(*key_len);
+ break;
+ }
+ default:
+ return false;
+ }
+
+ key[*key_len].buf = row_id;
+ key[*key_len].buf_len = row_id_len;
+ ++(*key_len);
+
+ return true;
+}
+
+/*
+ * Construct Query_log_Event from thd query and serialize it
+ * into buffer.
+ *
+ * Return 0 in case of success, 1 in case of error.
+ */
+int wsrep_to_buf_helper(
+ THD* thd, const char *query, uint query_len, uchar** buf, uint* buf_len)
+{
+ IO_CACHE tmp_io_cache;
+ if (open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX,
+ 65536, MYF(MY_WME)))
+ return 1;
+ Query_log_event ev(thd, query, query_len, FALSE, FALSE, FALSE, 0);
+ int ret(0);
+ if (ev.write(&tmp_io_cache)) ret= 1;
+ if (!ret && wsrep_write_cache(&tmp_io_cache, buf, buf_len)) ret= 1;
+ close_cached_file(&tmp_io_cache);
+ return ret;
+}
+
+#include "sql_show.h"
+static int
+create_view_query(THD *thd, uchar** buf, uint* buf_len)
+{
+ LEX *lex= thd->lex;
+ SELECT_LEX *select_lex= &lex->select_lex;
+ TABLE_LIST *first_table= select_lex->table_list.first;
+ TABLE_LIST *views = first_table;
+
+ String buff;
+ const LEX_STRING command[3]=
+ {{ C_STRING_WITH_LEN("CREATE ") },
+ { C_STRING_WITH_LEN("ALTER ") },
+ { C_STRING_WITH_LEN("CREATE OR REPLACE ") }};
+
+ buff.append(command[thd->lex->create_view_mode].str,
+ command[thd->lex->create_view_mode].length);
+
+ if (!lex->definer)
+ {
+ /*
+ DEFINER-clause is missing; we have to create default definer in
+ persistent arena to be PS/SP friendly.
+ If this is an ALTER VIEW then the current user should be set as
+ the definer.
+ */
+
+ if (!(lex->definer= create_default_definer(thd)))
+ {
+ WSREP_WARN("view default definer issue");
+ }
+ }
+
+ views->algorithm = lex->create_view_algorithm;
+ views->definer.user = lex->definer->user;
+ views->definer.host = lex->definer->host;
+ views->view_suid = lex->create_view_suid;
+ views->with_check = lex->create_view_check;
+
+ view_store_options(thd, views, &buff);
+ buff.append(STRING_WITH_LEN("VIEW "));
+ /* Test if user supplied a db (ie: we did not use thd->db) */
+ if (views->db && views->db[0] &&
+ (thd->db == NULL || strcmp(views->db, thd->db)))
+ {
+ append_identifier(thd, &buff, views->db,
+ views->db_length);
+ buff.append('.');
+ }
+ append_identifier(thd, &buff, views->table_name,
+ views->table_name_length);
+ if (lex->view_list.elements)
+ {
+ List_iterator_fast<LEX_STRING> names(lex->view_list);
+ LEX_STRING *name;
+ int i;
+
+ for (i= 0; (name= names++); i++)
+ {
+ buff.append(i ? ", " : "(");
+ append_identifier(thd, &buff, name->str, name->length);
+ }
+ buff.append(')');
+ }
+ buff.append(STRING_WITH_LEN(" AS "));
+ //buff.append(views->source.str, views->source.length);
+ buff.append(thd->lex->create_view_select.str,
+ thd->lex->create_view_select.length);
+ //int errcode= query_error_code(thd, TRUE);
+ //if (thd->binlog_query(THD::STMT_QUERY_TYPE,
+ // buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcod
+ return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len);
+}
+
+static int wsrep_TOI_begin(THD *thd, char *db_, char *table_,
+ const TABLE_LIST* table_list)
+{
+ wsrep_status_t ret(WSREP_WARNING);
+ uchar* buf(0);
+ uint buf_len(0);
+ int buf_err;
+
+ WSREP_DEBUG("TO BEGIN: %lld, %d : %s", (long long)thd->wsrep_trx_seqno,
+ thd->wsrep_exec_mode, thd->query() );
+ switch (thd->lex->sql_command)
+ {
+ case SQLCOM_CREATE_VIEW:
+ buf_err= create_view_query(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_CREATE_PROCEDURE:
+ case SQLCOM_CREATE_SPFUNCTION:
+ buf_err= wsrep_create_sp(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_CREATE_TRIGGER:
+ buf_err= wsrep_create_trigger_query(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_CREATE_EVENT:
+ buf_err= wsrep_create_event_query(thd, &buf, &buf_len);
+ break;
+ default:
+ buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), &buf,
+ &buf_len);
+ break;
+ }
+
+ wsrep_key_arr_t key_arr= {0, 0};
+ if (!buf_err &&
+ wsrep_prepare_keys_for_isolation(thd, db_, table_, table_list, &key_arr)&&
+ WSREP_OK == (ret = wsrep->to_execute_start(wsrep, thd->thread_id,
+ key_arr.keys, key_arr.keys_len,
+ buf, buf_len,
+ &thd->wsrep_trx_seqno)))
+ {
+ thd->wsrep_exec_mode= TOTAL_ORDER;
+ wsrep_to_isolation++;
+ if (buf) my_free(buf);
+ wsrep_keys_free(&key_arr);
+ WSREP_DEBUG("TO BEGIN: %lld, %d",(long long)thd->wsrep_trx_seqno,
+ thd->wsrep_exec_mode);
+ }
+ else {
+ /* jump to error handler in mysql_execute_command() */
+ WSREP_WARN("TO isolation failed for: %d, sql: %s. Check wsrep "
+ "connection state and retry the query.",
+ ret, (thd->query()) ? thd->query() : "void");
+ my_error(ER_LOCK_DEADLOCK, MYF(0), "WSREP replication failed. Check "
+ "your wsrep connection state and retry the query.");
+ if (buf) my_free(buf);
+ wsrep_keys_free(&key_arr);
+ return -1;
+ }
+ return 0;
+}
+
+static void wsrep_TOI_end(THD *thd) {
+ wsrep_status_t ret;
+ wsrep_to_isolation--;
+ WSREP_DEBUG("TO END: %lld, %d : %s", (long long)thd->wsrep_trx_seqno,
+ thd->wsrep_exec_mode, (thd->query()) ? thd->query() : "void")
+ if (WSREP_OK == (ret = wsrep->to_execute_end(wsrep, thd->thread_id))) {
+ WSREP_DEBUG("TO END: %lld", (long long)thd->wsrep_trx_seqno);
+ }
+ else {
+ WSREP_WARN("TO isolation end failed for: %d, sql: %s",
+ ret, (thd->query()) ? thd->query() : "void");
+ }
+}
+
+static int wsrep_RSU_begin(THD *thd, char *db_, char *table_)
+{
+ wsrep_status_t ret(WSREP_WARNING);
+ WSREP_DEBUG("RSU BEGIN: %lld, %d : %s", (long long)thd->wsrep_trx_seqno,
+ thd->wsrep_exec_mode, thd->query() );
+
+ ret = wsrep->desync(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("RSU desync failed %d for %s", ret, thd->query());
+ my_error(ER_LOCK_DEADLOCK, MYF(0));
+ return(ret);
+ }
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying++;
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ if (wsrep_wait_committing_connections_close(5000))
+ {
+ /* no can do, bail out from DDL */
+ WSREP_WARN("RSU failed due to pending transactions, %s", thd->query());
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying--;
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ ret = wsrep->resync(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("resync failed %d for %s", ret, thd->query());
+ }
+ my_error(ER_LOCK_DEADLOCK, MYF(0));
+ return(1);
+ }
+
+ wsrep_seqno_t seqno = wsrep->pause(wsrep);
+ if (seqno == WSREP_SEQNO_UNDEFINED)
+ {
+ WSREP_WARN("pause failed %lld for %s", (long long)seqno, thd->query());
+ return(1);
+ }
+ WSREP_DEBUG("paused at %lld", (long long)seqno);
+ thd->variables.wsrep_on = 0;
+ return 0;
+}
+
+static void wsrep_RSU_end(THD *thd)
+{
+ wsrep_status_t ret(WSREP_WARNING);
+ WSREP_DEBUG("RSU END: %lld, %d : %s", (long long)thd->wsrep_trx_seqno,
+ thd->wsrep_exec_mode, thd->query() );
+
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying--;
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ ret = wsrep->resume(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("resume failed %d for %s", ret, thd->query());
+ }
+ ret = wsrep->resync(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("resync failed %d for %s", ret, thd->query());
+ return;
+ }
+ thd->variables.wsrep_on = 1;
+ return;
+}
+
+int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_,
+ const TABLE_LIST* table_list)
+{
+ int ret= 0;
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT)
+ {
+ WSREP_INFO("thread: %lu, %s has been aborted due to multi-master conflict",
+ thd->thread_id, thd->query());
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ return WSREP_TRX_FAIL;
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ if (wsrep_debug && thd->mdl_context.has_locks())
+ {
+ WSREP_DEBUG("thread holds MDL locks at TI begin: %s %lu",
+ thd->query(), thd->thread_id);
+ }
+ if (thd->variables.wsrep_on && thd->wsrep_exec_mode==LOCAL_STATE)
+ {
+ switch (wsrep_OSU_method_options) {
+ case WSREP_OSU_TOI: ret = wsrep_TOI_begin(thd, db_, table_,
+ table_list); break;
+ case WSREP_OSU_RSU: ret = wsrep_RSU_begin(thd, db_, table_); break;
+ }
+ if (!ret)
+ {
+ thd->wsrep_exec_mode= TOTAL_ORDER;
+ }
+ }
+ return ret;
+}
+
+void wsrep_to_isolation_end(THD *thd) {
+ if (thd->wsrep_exec_mode==TOTAL_ORDER)
+ {
+ switch(wsrep_OSU_method_options)
+ {
+ case WSREP_OSU_TOI: return wsrep_TOI_end(thd);
+ case WSREP_OSU_RSU: return wsrep_RSU_end(thd);
+ }
+ }
+}
+
+#define WSREP_MDL_LOG(severity, msg, req, gra) \
+ WSREP_##severity( \
+ "%s\n" \
+ "request: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)\n" \
+ "granted: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)", \
+ msg, \
+ req->thread_id, (long long)req->wsrep_trx_seqno, \
+ req->wsrep_exec_mode, req->wsrep_query_state, req->wsrep_conflict_state, \
+ req->command, req->lex->sql_command, req->query(), \
+ gra->thread_id, (long long)gra->wsrep_trx_seqno, \
+ gra->wsrep_exec_mode, gra->wsrep_query_state, gra->wsrep_conflict_state, \
+ gra->command, gra->lex->sql_command, gra->query());
+
+bool
+wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
+ MDL_ticket *ticket
+) {
+ if (!WSREP_ON) return FALSE;
+
+ THD *request_thd = requestor_ctx->get_thd();
+ THD *granted_thd = ticket->get_ctx()->get_thd();
+ bool ret = FALSE;
+
+ mysql_mutex_lock(&request_thd->LOCK_wsrep_thd);
+ if (request_thd->wsrep_exec_mode == TOTAL_ORDER ||
+ request_thd->wsrep_exec_mode == REPL_RECV)
+ {
+ mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd);
+ WSREP_MDL_LOG(DEBUG, "MDL conflict ", request_thd, granted_thd);
+ ticket->wsrep_report(wsrep_debug);
+
+ mysql_mutex_lock(&granted_thd->LOCK_wsrep_thd);
+ if (granted_thd->wsrep_exec_mode == TOTAL_ORDER ||
+ granted_thd->wsrep_exec_mode == REPL_RECV)
+ {
+ WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", request_thd, granted_thd);
+ ticket->wsrep_report(true);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ ret = TRUE;
+ }
+ else if (granted_thd->lex->sql_command == SQLCOM_FLUSH)
+ {
+ WSREP_DEBUG("mdl granted over FLUSH BF");
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ ret = TRUE;
+ }
+ else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE)
+ {
+ WSREP_DEBUG("DROP caused BF abort");
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1);
+ ret = FALSE;
+ }
+ else if (granted_thd->wsrep_query_state == QUERY_COMMITTING)
+ {
+ WSREP_DEBUG("mdl granted, but commiting thd abort scheduled");
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1);
+ ret = FALSE;
+ }
+ else
+ {
+ WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", request_thd, granted_thd);
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1);
+ ret = FALSE;
+ }
+ }
+ else
+ {
+ mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd);
+ }
+ return ret;
+}
diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h
new file mode 100644
index 00000000000..6b9640cea70
--- /dev/null
+++ b/sql/wsrep_mysqld.h
@@ -0,0 +1,383 @@
+/* Copyright 2008-2012 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef WSREP_MYSQLD_H
+#define WSREP_MYSQLD_H
+
+#include "mysqld.h"
+typedef struct st_mysql_show_var SHOW_VAR;
+#include <sql_priv.h>
+#include "../wsrep/wsrep_api.h"
+
+class set_var;
+class THD;
+
+#ifdef WITH_WSREP
+#include "../wsrep/wsrep_api.h"
+//#include "wsrep_mysqld.h"
+ enum wsrep_exec_mode {
+ LOCAL_STATE,
+ REPL_RECV,
+ TOTAL_ORDER,
+ LOCAL_COMMIT,
+ };
+ enum wsrep_query_state {
+ QUERY_IDLE,
+ QUERY_EXEC,
+ QUERY_COMMITTING,
+ QUERY_EXITING,
+ QUERY_ROLLINGBACK,
+ };
+ enum wsrep_conflict_state {
+ NO_CONFLICT,
+ MUST_ABORT,
+ ABORTING,
+ ABORTED,
+ MUST_REPLAY,
+ REPLAYING,
+ RETRY_AUTOCOMMIT,
+ CERT_FAILURE,
+ };
+ enum wsrep_consistency_check_mode {
+ NO_CONSISTENCY_CHECK,
+ CONSISTENCY_CHECK_DECLARED,
+ CONSISTENCY_CHECK_RUNNING,
+ };
+#endif
+
+// Global wsrep parameters
+extern wsrep_t* wsrep;
+
+// MySQL wsrep options
+extern const char* wsrep_provider;
+extern const char* wsrep_provider_options;
+extern const char* wsrep_cluster_name;
+extern const char* wsrep_cluster_address;
+extern const char* wsrep_node_name;
+extern const char* wsrep_node_address;
+extern const char* wsrep_node_incoming_address;
+extern const char* wsrep_data_home_dir;
+extern const char* wsrep_dbug_option;
+extern long wsrep_slave_threads;
+extern my_bool wsrep_debug;
+extern my_bool wsrep_convert_LOCK_to_trx;
+extern ulong wsrep_retry_autocommit;
+extern my_bool wsrep_auto_increment_control;
+extern my_bool wsrep_drupal_282555_workaround;
+extern my_bool wsrep_incremental_data_collection;
+extern const char* wsrep_sst_method;
+extern const char* wsrep_sst_receive_address;
+extern char* wsrep_sst_auth;
+extern const char* wsrep_sst_donor;
+extern my_bool wsrep_sst_donor_rejects_queries;
+extern const char* wsrep_start_position;
+extern long long wsrep_max_ws_size;
+extern long wsrep_max_ws_rows;
+extern const char* wsrep_notify_cmd;
+extern my_bool wsrep_certify_nonPK;
+extern long wsrep_max_protocol_version;
+extern long wsrep_protocol_version;
+extern ulong wsrep_forced_binlog_format;
+extern ulong wsrep_OSU_method_options;
+extern my_bool wsrep_recovery;
+extern my_bool wsrep_replicate_myisam;
+extern my_bool wsrep_log_conflicts;
+extern ulong wsrep_mysql_replication_bundle;
+
+enum enum_wsrep_OSU_method { WSREP_OSU_TOI, WSREP_OSU_RSU };
+
+// MySQL status variables
+extern my_bool wsrep_connected;
+extern my_bool wsrep_ready;
+extern const char* wsrep_cluster_state_uuid;
+extern long long wsrep_cluster_conf_id;
+extern const char* wsrep_cluster_status;
+extern long wsrep_cluster_size;
+extern long wsrep_local_index;
+extern const char* wsrep_provider_name;
+extern const char* wsrep_provider_version;
+extern const char* wsrep_provider_vendor;
+extern int wsrep_show_status(THD *thd, SHOW_VAR *var, char *buff);
+extern void wsrep_free_status(THD *thd);
+
+#define WSREP_SST_ADDRESS_AUTO "AUTO"
+// MySQL variables funcs
+
+#define CHECK_ARGS (sys_var *self, THD* thd, set_var *var)
+#define UPDATE_ARGS (sys_var *self, THD* thd, enum_var_type type)
+#define DEFAULT_ARGS (THD* thd, enum_var_type var_type)
+#define INIT_ARGS (const char* opt)
+
+extern int wsrep_init_vars();
+
+extern bool wsrep_on_update UPDATE_ARGS;
+extern void wsrep_causal_reads_update UPDATE_ARGS;
+extern bool wsrep_start_position_check CHECK_ARGS;
+extern bool wsrep_start_position_update UPDATE_ARGS;
+extern void wsrep_start_position_init INIT_ARGS;
+
+extern bool wsrep_provider_check CHECK_ARGS;
+extern bool wsrep_provider_update UPDATE_ARGS;
+extern void wsrep_provider_init INIT_ARGS;
+
+extern bool wsrep_provider_options_check CHECK_ARGS;
+extern bool wsrep_provider_options_update UPDATE_ARGS;
+extern void wsrep_provider_options_init INIT_ARGS;
+
+extern bool wsrep_cluster_address_check CHECK_ARGS;
+extern bool wsrep_cluster_address_update UPDATE_ARGS;
+extern void wsrep_cluster_address_init INIT_ARGS;
+
+extern bool wsrep_cluster_name_check CHECK_ARGS;
+extern bool wsrep_cluster_name_update UPDATE_ARGS;
+
+extern bool wsrep_node_name_check CHECK_ARGS;
+extern bool wsrep_node_name_update UPDATE_ARGS;
+
+extern bool wsrep_node_address_check CHECK_ARGS;
+extern bool wsrep_node_address_update UPDATE_ARGS;
+extern void wsrep_node_address_init INIT_ARGS;
+
+extern bool wsrep_sst_method_check CHECK_ARGS;
+extern bool wsrep_sst_method_update UPDATE_ARGS;
+extern void wsrep_sst_method_init INIT_ARGS;
+
+extern bool wsrep_sst_receive_address_check CHECK_ARGS;
+extern bool wsrep_sst_receive_address_update UPDATE_ARGS;
+
+extern bool wsrep_sst_auth_check CHECK_ARGS;
+extern bool wsrep_sst_auth_update UPDATE_ARGS;
+extern void wsrep_sst_auth_init INIT_ARGS;
+
+extern bool wsrep_sst_donor_check CHECK_ARGS;
+extern bool wsrep_sst_donor_update UPDATE_ARGS;
+
+extern bool wsrep_slave_threads_check CHECK_ARGS;
+extern bool wsrep_slave_threads_update UPDATE_ARGS;
+
+extern bool wsrep_before_SE(); // initialize wsrep before storage
+ // engines (true) or after (false)
+extern int wsrep_init();
+extern void wsrep_deinit();
+extern void wsrep_recover();
+
+
+
+extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd);
+extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd);
+extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd);
+extern "C" const char * wsrep_thd_exec_mode_str(THD *thd);
+extern "C" const char * wsrep_thd_conflict_state_str(THD *thd);
+extern "C" const char * wsrep_thd_query_state_str(THD *thd);
+extern "C" wsrep_trx_handle_t* wsrep_thd_trx_handle(THD *thd);
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode);
+extern "C" void wsrep_thd_set_query_state(
+ THD *thd, enum wsrep_query_state state);
+extern "C" void wsrep_thd_set_conflict_state(
+ THD *thd, enum wsrep_conflict_state state);
+
+extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id);
+
+extern "C"void wsrep_thd_LOCK(THD *thd);
+extern "C"void wsrep_thd_UNLOCK(THD *thd);
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd);
+extern "C" time_t wsrep_thd_query_start(THD *thd);
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd);
+extern "C" int64_t wsrep_thd_trx_seqno(THD *thd);
+extern "C" query_id_t wsrep_thd_query_id(THD *thd);
+extern "C" char * wsrep_thd_query(THD *thd);
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd);
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id);
+extern "C" void wsrep_thd_awake(THD *thd, my_bool signal);
+
+
+
+/* wsrep initialization sequence at startup
+ * @param first wsrep_before_SE() value */
+extern void wsrep_init_startup(bool before);
+
+extern void wsrep_close_client_connections(my_bool wait_to_end);
+extern int wsrep_wait_committing_connections_close(int wait_time);
+extern void wsrep_close_applier(THD *thd);
+extern void wsrep_wait_appliers_close(THD *thd);
+extern void wsrep_close_applier_threads(int count);
+extern void wsrep_create_appliers(long threads = wsrep_slave_threads);
+extern void wsrep_create_rollbacker();
+extern void wsrep_kill_mysql(THD *thd);
+
+/* new defines */
+extern void wsrep_stop_replication(THD *thd);
+extern bool wsrep_start_replication();
+extern bool wsrep_causal_wait(THD* thd);
+extern int wsrep_check_opts (int argc, char* const* argv);
+extern void wsrep_prepend_PATH (const char* path);
+
+/* Other global variables */
+extern wsrep_seqno_t wsrep_locked_seqno;
+
+#define WSREP_ON \
+ (global_system_variables.wsrep_on)
+
+#define WSREP(thd) \
+ (WSREP_ON && (thd && thd->variables.wsrep_on))
+
+#define WSREP_CLIENT(thd) \
+ (WSREP(thd) && thd->wsrep_client_thread)
+
+#define WSREP_EMULATE_BINLOG(thd) \
+ (WSREP(thd) && wsrep_emulate_bin_log)
+
+// MySQL logging functions don't seem to understand long long length modifer.
+// This is a workaround. It also prefixes all messages with "WSREP"
+#define WSREP_LOG(fun, ...) \
+ { \
+ char msg[1024] = {'\0'}; \
+ snprintf(msg, sizeof(msg) - 1, ## __VA_ARGS__); \
+ fun("WSREP: %s", msg); \
+ }
+
+#define WSREP_DEBUG(...) \
+ if (wsrep_debug) WSREP_LOG(sql_print_information, ##__VA_ARGS__)
+#define WSREP_INFO(...) WSREP_LOG(sql_print_information, ##__VA_ARGS__)
+#define WSREP_WARN(...) WSREP_LOG(sql_print_warning, ##__VA_ARGS__)
+#define WSREP_ERROR(...) WSREP_LOG(sql_print_error, ##__VA_ARGS__)
+
+#define WSREP_LOG_CONFLICT_THD(thd, role) \
+ WSREP_LOG(sql_print_information, \
+ "%s: \n " \
+ " THD: %lu, mode: %s, state: %s, conflict: %s, seqno: %lld\n " \
+ " SQL: %s", \
+ role, wsrep_thd_thread_id(thd), wsrep_thd_exec_mode_str(thd), \
+ wsrep_thd_query_state_str(thd), \
+ wsrep_thd_conflict_state_str(thd), (long long)wsrep_thd_trx_seqno(thd), \
+ wsrep_thd_query(thd) \
+ );
+
+#define WSREP_LOG_CONFLICT(bf_thd, victim_thd, bf_abort) \
+ if (wsrep_debug || wsrep_log_conflicts) \
+ { \
+ WSREP_LOG(sql_print_information, "cluster conflict due to %s for threads:",\
+ (bf_abort) ? "high priority abort" : "certification failure" \
+ ); \
+ if (bf_thd) WSREP_LOG_CONFLICT_THD(bf_thd, "Winning thread"); \
+ if (victim_thd) WSREP_LOG_CONFLICT_THD(victim_thd, "Victim thread"); \
+ }
+
+/*! Synchronizes applier thread start with init thread */
+extern void wsrep_sst_grab();
+/*! Init thread waits for SST completion */
+extern bool wsrep_sst_wait();
+/*! Signals wsrep that initialization is complete, writesets can be applied */
+extern void wsrep_sst_continue();
+
+extern void wsrep_SE_init_grab(); /*! grab init critical section */
+extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */
+extern void wsrep_SE_init_done(); /*! signal that SE init is complte */
+extern void wsrep_SE_initialized(); /*! mark SE initialization complete */
+
+extern void wsrep_ready_wait();
+
+enum wsrep_trx_status {
+ WSREP_TRX_OK,
+ WSREP_TRX_ROLLBACK,
+ WSREP_TRX_ERROR,
+ };
+
+extern enum wsrep_trx_status
+wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all);
+class Ha_trx_info;
+struct THD_TRANS;
+void wsrep_register_hton(THD* thd, bool all);
+
+void wsrep_replication_process(THD *thd);
+void wsrep_rollback_process(THD *thd);
+void wsrep_brute_force_killer(THD *thd);
+int wsrep_hire_brute_force_killer(THD *thd, uint64_t trx_id);
+extern "C" bool wsrep_consistency_check(void *thd_ptr);
+extern "C" int wsrep_thd_is_brute_force(void *thd_ptr);
+extern "C" int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr,
+ my_bool signal);
+extern "C" int wsrep_thd_in_locking_session(void *thd_ptr);
+void *wsrep_prepare_bf_thd(THD *thd);
+void wsrep_return_from_bf_mode(void *shadow, THD *thd);
+
+/* this is visible for client build so that innodb plugin gets this */
+typedef struct wsrep_aborting_thd {
+ struct wsrep_aborting_thd *next;
+ THD *aborting_thd;
+} *wsrep_aborting_thd_t;
+
+extern mysql_mutex_t LOCK_wsrep_ready;
+extern mysql_cond_t COND_wsrep_ready;
+extern mysql_mutex_t LOCK_wsrep_sst;
+extern mysql_cond_t COND_wsrep_sst;
+extern mysql_mutex_t LOCK_wsrep_sst_init;
+extern mysql_cond_t COND_wsrep_sst_init;
+extern mysql_mutex_t LOCK_wsrep_rollback;
+extern mysql_cond_t COND_wsrep_rollback;
+extern int wsrep_replaying;
+extern mysql_mutex_t LOCK_wsrep_replaying;
+extern mysql_cond_t COND_wsrep_replaying;
+extern wsrep_aborting_thd_t wsrep_aborting_thd;
+extern MYSQL_PLUGIN_IMPORT my_bool wsrep_debug;
+extern my_bool wsrep_convert_LOCK_to_trx;
+extern ulong wsrep_retry_autocommit;
+extern my_bool wsrep_emulate_bin_log;
+extern my_bool wsrep_auto_increment_control;
+extern my_bool wsrep_drupal_282555_workaround;
+extern long long wsrep_max_ws_size;
+extern long wsrep_max_ws_rows;
+extern int wsrep_to_isolation;
+extern my_bool wsrep_certify_nonPK;
+extern mysql_mutex_t LOCK_wsrep_slave_threads;
+
+extern PSI_mutex_key key_LOCK_wsrep_ready;
+extern PSI_mutex_key key_COND_wsrep_ready;
+extern PSI_mutex_key key_LOCK_wsrep_sst;
+extern PSI_cond_key key_COND_wsrep_sst;
+extern PSI_mutex_key key_LOCK_wsrep_sst_init;
+extern PSI_cond_key key_COND_wsrep_sst_init;
+extern PSI_mutex_key key_LOCK_wsrep_sst_thread;
+extern PSI_cond_key key_COND_wsrep_sst_thread;
+extern PSI_mutex_key key_LOCK_wsrep_rollback;
+extern PSI_cond_key key_COND_wsrep_rollback;
+extern PSI_mutex_key key_LOCK_wsrep_replaying;
+extern PSI_cond_key key_COND_wsrep_replaying;
+extern PSI_mutex_key key_LOCK_wsrep_slave_threads;
+
+struct TABLE_LIST;
+int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_,
+ const TABLE_LIST* table_list);
+void wsrep_to_isolation_end(THD *thd);
+
+void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow*);
+void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow*);
+int wsrep_to_buf_helper(
+ THD* thd, const char *query, uint query_len, uchar** buf, uint* buf_len);
+int wsrep_create_sp(THD *thd, uchar** buf, uint* buf_len);
+int wsrep_create_trigger_query(THD *thd, uchar** buf, uint* buf_len);
+int wsrep_create_event_query(THD *thd, uchar** buf, uint* buf_len);
+
+const wsrep_uuid_t* wsrep_cluster_uuid();
+struct xid_t;
+void wsrep_set_SE_checkpoint(xid_t*);
+
+void wsrep_xid_init(xid_t*, const wsrep_uuid_t*, wsrep_seqno_t);
+const wsrep_uuid_t* wsrep_xid_uuid(const xid_t*);
+wsrep_seqno_t wsrep_xid_seqno(const xid_t*);
+extern "C" int wsrep_is_wsrep_xid(const void* xid);
+
+#endif /* WSREP_MYSQLD_H */
diff --git a/sql/wsrep_notify.cc b/sql/wsrep_notify.cc
new file mode 100644
index 00000000000..ff997d01183
--- /dev/null
+++ b/sql/wsrep_notify.cc
@@ -0,0 +1,107 @@
+/* Copyright 2010 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include "wsrep_priv.h"
+
+const char* wsrep_notify_cmd="";
+
+static const char* _status_str(wsrep_member_status_t status)
+{
+ switch (status)
+ {
+ case WSREP_MEMBER_UNDEFINED: return "Undefined";
+ case WSREP_MEMBER_JOINER: return "Joiner";
+ case WSREP_MEMBER_DONOR: return "Donor";
+ case WSREP_MEMBER_JOINED: return "Joined";
+ case WSREP_MEMBER_SYNCED: return "Synced";
+ default: return "Error(?)";
+ }
+}
+
+void wsrep_notify_status (wsrep_member_status_t status,
+ const wsrep_view_info_t* view)
+{
+ if (!wsrep_notify_cmd || 0 == strlen(wsrep_notify_cmd))
+ {
+ WSREP_INFO("wsrep_notify_cmd is not defined, skipping notification.");
+ return;
+ }
+
+ char cmd_buf[1 << 16]; // this can be long
+ long cmd_len = sizeof(cmd_buf) - 1;
+ char* cmd_ptr = cmd_buf;
+ long cmd_off = 0;
+
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, "%s",
+ wsrep_notify_cmd);
+
+ if (status >= WSREP_MEMBER_UNDEFINED && status < WSREP_MEMBER_ERROR)
+ {
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s",
+ _status_str(status));
+ }
+ else
+ {
+ /* here we preserve provider error codes */
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --status 'Error(%d)'", status);
+ }
+
+ if (0 != view)
+ {
+ char uuid_str[40];
+
+ wsrep_uuid_print (&view->uuid, uuid_str, sizeof(uuid_str));
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --uuid %s", uuid_str);
+
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --primary %s", view->view >= 0 ? "yes" : "no");
+
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --index %d", view->my_idx);
+
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members");
+
+ for (int i = 0; i < view->memb_num; i++)
+ {
+ wsrep_uuid_print (&view->members[i].id, uuid_str, sizeof(uuid_str));
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ "%c%s/%s/%s", i > 0 ? ',' : ' ',
+ uuid_str, view->members[i].name,
+ view->members[i].incoming);
+ }
+ }
+
+ if (cmd_off == cmd_len)
+ {
+ WSREP_ERROR("Notification buffer too short (%ld). Aborting notification.",
+ cmd_len);
+ return;
+ }
+
+ wsp::process p(cmd_ptr, "r");
+
+ p.wait();
+ int err = p.error();
+
+ if (err)
+ {
+ WSREP_ERROR("Notification command failed: %d (%s): \"%s\"",
+ err, strerror(err), cmd_ptr);
+ }
+}
+
diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h
new file mode 100644
index 00000000000..700639ebcb1
--- /dev/null
+++ b/sql/wsrep_priv.h
@@ -0,0 +1,233 @@
+/* Copyright 2010 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+//! @file declares symbols private to wsrep integration layer
+
+#ifndef WSREP_PRIV_H
+#define WSREP_PRIV_H
+
+#include "wsrep_mysqld.h"
+#include "../wsrep/wsrep_api.h"
+
+#include <log.h>
+#include <pthread.h>
+#include <cstdio>
+
+extern void wsrep_ready_set (my_bool x);
+
+extern ssize_t wsrep_sst_prepare (void** msg);
+extern int wsrep_sst_donate_cb (void* app_ctx,
+ void* recv_ctx,
+ const void* msg, size_t msg_len,
+ const wsrep_uuid_t* current_uuid,
+ wsrep_seqno_t current_seqno,
+ const char* state, size_t state_len,
+ bool bypass);
+
+extern size_t guess_ip (char* buf, size_t buf_len);
+extern size_t guess_address(char* buf, size_t buf_len);
+
+extern wsrep_uuid_t local_uuid;
+extern wsrep_seqno_t local_seqno;
+
+/*! SST thread signals init thread about sst completion */
+extern void wsrep_sst_complete(const wsrep_uuid_t* uuid, wsrep_seqno_t, bool);
+
+extern void wsrep_notify_status (wsrep_member_status_t new_status,
+ const wsrep_view_info_t* view = 0);
+
+namespace wsp {
+class node_status
+{
+public:
+ node_status() : status(WSREP_MEMBER_UNDEFINED) {}
+ void set(wsrep_member_status_t new_status,
+ const wsrep_view_info_t* view = 0)
+ {
+ if (status != new_status || 0 != view)
+ {
+ wsrep_notify_status(new_status, view);
+ status = new_status;
+ }
+ }
+ wsrep_member_status_t get() const { return status; }
+private:
+ wsrep_member_status_t status;
+};
+} /* namespace wsp */
+
+extern wsp::node_status local_status;
+
+namespace wsp {
+/* A small class to run external programs. */
+class process
+{
+private:
+ const char* const str_;
+ FILE* io_;
+ int err_;
+ pid_t pid_;
+
+public:
+/*! @arg type is a pointer to a null-terminated string which must contain
+ either the letter 'r' for reading or the letter 'w' for writing.
+ */
+ process (const char* cmd, const char* type);
+ ~process ();
+
+ FILE* pipe () { return io_; }
+ int error() { return err_; }
+ int wait ();
+ const char* cmd() { return str_; }
+};
+#ifdef REMOVED
+class lock
+{
+ pthread_mutex_t* const mtx_;
+
+public:
+
+ lock (pthread_mutex_t* mtx) : mtx_(mtx)
+ {
+ int err = pthread_mutex_lock (mtx_);
+
+ if (err)
+ {
+ WSREP_ERROR("Mutex lock failed: %s", strerror(err));
+ abort();
+ }
+ }
+
+ virtual ~lock ()
+ {
+ int err = pthread_mutex_unlock (mtx_);
+
+ if (err)
+ {
+ WSREP_ERROR("Mutex unlock failed: %s", strerror(err));
+ abort();
+ }
+ }
+
+ inline void wait (pthread_cond_t* cond)
+ {
+ pthread_cond_wait (cond, mtx_);
+ }
+
+private:
+
+ lock (const lock&);
+ lock& operator=(const lock&);
+
+};
+
+class monitor
+{
+ int mutable refcnt;
+ pthread_mutex_t mutable mtx;
+ pthread_cond_t mutable cond;
+
+public:
+
+ monitor() : refcnt(0)
+ {
+ pthread_mutex_init (&mtx, NULL);
+ pthread_cond_init (&cond, NULL);
+ }
+
+ ~monitor()
+ {
+ pthread_mutex_destroy (&mtx);
+ pthread_cond_destroy (&cond);
+ }
+
+ void enter() const
+ {
+ lock l(&mtx);
+
+ while (refcnt)
+ {
+ l.wait(&cond);
+ }
+ refcnt++;
+ }
+
+ void leave() const
+ {
+ lock l(&mtx);
+
+ refcnt--;
+ if (refcnt == 0)
+ {
+ pthread_cond_signal (&cond);
+ }
+ }
+
+private:
+
+ monitor (const monitor&);
+ monitor& operator= (const monitor&);
+};
+
+class critical
+{
+ const monitor& mon;
+
+public:
+
+ critical(const monitor& m) : mon(m) { mon.enter(); }
+
+ ~critical() { mon.leave(); }
+
+private:
+
+ critical (const critical&);
+ critical& operator= (const critical&);
+};
+#endif
+
+class thd
+{
+ class thd_init
+ {
+ public:
+ thd_init() { my_thread_init(); }
+ ~thd_init() { my_thread_end(); }
+ }
+ init;
+
+ thd (const thd&);
+ thd& operator= (const thd&);
+
+public:
+
+ thd(my_bool wsrep_on);
+ ~thd();
+ THD* const ptr;
+};
+
+class string
+{
+public:
+ string() : string_(0) {}
+ void set(char* str) { if (string_) free (string_); string_ = str; }
+ ~string() { set (0); }
+private:
+ char* string_;
+};
+
+} // namespace wsrep
+#endif /* WSREP_PRIV_H */
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc
new file mode 100644
index 00000000000..597d0ea087d
--- /dev/null
+++ b/sql/wsrep_sst.cc
@@ -0,0 +1,1001 @@
+/* Copyright 2008-2012 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include <sql_class.h>
+#include <set_var.h>
+#include <sql_acl.h>
+#include <sql_reload.h>
+#include <sql_parse.h>
+#include "wsrep_priv.h"
+#include <cstdio>
+#include <cstdlib>
+
+extern const char wsrep_defaults_file[];
+
+#define WSREP_SST_OPT_ROLE "--role"
+#define WSREP_SST_OPT_ADDR "--address"
+#define WSREP_SST_OPT_AUTH "--auth"
+#define WSREP_SST_OPT_DATA "--datadir"
+#define WSREP_SST_OPT_CONF "--defaults-file"
+#define WSREP_SST_OPT_PARENT "--parent"
+
+// mysqldump-specific options
+#define WSREP_SST_OPT_USER "--user"
+#define WSREP_SST_OPT_PSWD "--password"
+#define WSREP_SST_OPT_HOST "--host"
+#define WSREP_SST_OPT_PORT "--port"
+#define WSREP_SST_OPT_LPORT "--local-port"
+
+// donor-specific
+#define WSREP_SST_OPT_SOCKET "--socket"
+#define WSREP_SST_OPT_GTID "--gtid"
+#define WSREP_SST_OPT_BYPASS "--bypass"
+
+#define WSREP_SST_MYSQLDUMP "mysqldump"
+#define WSREP_SST_SKIP "skip"
+#define WSREP_SST_DEFAULT WSREP_SST_MYSQLDUMP
+#define WSREP_SST_ADDRESS_AUTO "AUTO"
+#define WSREP_SST_AUTH_MASK "********"
+
+const char* wsrep_sst_method = WSREP_SST_DEFAULT;
+const char* wsrep_sst_receive_address = WSREP_SST_ADDRESS_AUTO;
+const char* wsrep_sst_donor = "";
+ char* wsrep_sst_auth = NULL;
+
+// container for real auth string
+static const char* sst_auth_real = NULL;
+
+my_bool wsrep_sst_donor_rejects_queries = FALSE;
+
+bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var)
+{
+ char buff[FN_REFLEN];
+ String str(buff, sizeof(buff), system_charset_info), *res;
+ const char* c_str = NULL;
+
+ if ((res = var->value->val_str(&str)) &&
+ (c_str = res->c_ptr()) &&
+ strlen(c_str) > 0)
+ return 0;
+
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "wsrep_sst_method", c_str ? c_str : "NULL");
+ return 1;
+}
+
+bool wsrep_sst_method_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+static bool sst_receive_address_check (const char* str)
+{
+ if (!strncasecmp(str, "127.0.0.1", strlen("127.0.0.1")) ||
+ !strncasecmp(str, "localhost", strlen("localhost")))
+ {
+ return 1;
+ }
+
+ return 0;
+}
+
+bool wsrep_sst_receive_address_check (sys_var *self, THD* thd, set_var* var)
+{
+ const char* c_str = var->value->str_value.c_ptr();
+
+ if (sst_receive_address_check (c_str))
+ {
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "wsrep_sst_receive_address", c_str ? c_str : "NULL");
+ return 1;
+ }
+
+ return 0;
+}
+
+bool wsrep_sst_receive_address_update (sys_var *self, THD* thd,
+ enum_var_type type)
+{
+ return 0;
+}
+
+bool wsrep_sst_auth_check (sys_var *self, THD* thd, set_var* var)
+{
+ return 0;
+}
+static bool sst_auth_real_set (const char* value)
+{
+ const char* v = strdup (value);
+
+ if (v)
+ {
+ if (sst_auth_real) free (const_cast<char*>(sst_auth_real));
+ sst_auth_real = v;
+
+ if (strlen(sst_auth_real))
+ {
+ if (wsrep_sst_auth)
+ {
+ my_free ((void*)wsrep_sst_auth);
+ wsrep_sst_auth = my_strdup(WSREP_SST_AUTH_MASK, MYF(0));
+ //strncpy (wsrep_sst_auth, WSREP_SST_AUTH_MASK,
+ // sizeof(wsrep_sst_auth) - 1);
+ }
+ else
+ wsrep_sst_auth = my_strdup (WSREP_SST_AUTH_MASK, MYF(0));
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+bool wsrep_sst_auth_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return sst_auth_real_set (wsrep_sst_auth);
+}
+
+void wsrep_sst_auth_init (const char* value)
+{
+ if (wsrep_sst_auth == value) wsrep_sst_auth = NULL;
+ if (value) sst_auth_real_set (value);
+}
+
+bool wsrep_sst_donor_check (sys_var *self, THD* thd, set_var* var)
+{
+ return 0;
+}
+
+bool wsrep_sst_donor_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED;
+
+bool wsrep_before_SE()
+{
+ return (wsrep_provider != NULL
+ && strcmp (wsrep_provider, WSREP_NONE)
+ && strcmp (wsrep_sst_method, WSREP_SST_SKIP)
+ && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP));
+}
+
+static bool sst_complete = false;
+static bool sst_needed = false;
+
+void wsrep_sst_grab ()
+{
+ WSREP_INFO("wsrep_sst_grab()");
+ if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
+ sst_complete = false;
+ mysql_mutex_unlock (&LOCK_wsrep_sst);
+}
+
+// Wait for end of SST
+bool wsrep_sst_wait ()
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
+ while (!sst_complete)
+ {
+ WSREP_INFO("Waiting for SST to complete.");
+ mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
+ }
+
+ if (local_seqno >= 0)
+ {
+ WSREP_INFO("SST complete, seqno: %lld", (long long) local_seqno);
+ }
+ else
+ {
+ WSREP_ERROR("SST failed: %d (%s)",
+ int(-local_seqno), strerror(-local_seqno));
+ }
+
+ mysql_mutex_unlock (&LOCK_wsrep_sst);
+
+ return (local_seqno >= 0);
+}
+
+// Signal end of SST
+void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid,
+ wsrep_seqno_t sst_seqno,
+ bool needed)
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
+ if (!sst_complete)
+ {
+ sst_complete = true;
+ sst_needed = needed;
+ local_uuid = *sst_uuid;
+ local_seqno = sst_seqno;
+ mysql_cond_signal (&COND_wsrep_sst);
+ }
+ else
+ {
+ WSREP_WARN("Nobody is waiting for SST.");
+ }
+ mysql_mutex_unlock (&LOCK_wsrep_sst);
+}
+
+// Let applier threads to continue
+void wsrep_sst_continue ()
+{
+ if (sst_needed)
+ {
+ WSREP_INFO("Signalling provider to continue.");
+ wsrep->sst_received (wsrep, &local_uuid, local_seqno, NULL, 0);
+ }
+}
+
+struct sst_thread_arg
+{
+ const char* cmd;
+ int err;
+ char* ret_str;
+ mysql_mutex_t lock;
+ mysql_cond_t cond;
+
+ sst_thread_arg (const char* c) : cmd(c), err(-1), ret_str(0)
+ {
+ mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL);
+ }
+
+ ~sst_thread_arg()
+ {
+ mysql_cond_destroy (&cond);
+ mysql_mutex_unlock (&lock);
+ mysql_mutex_destroy (&lock);
+ }
+};
+
+static int sst_scan_uuid_seqno (const char* str,
+ wsrep_uuid_t* uuid, wsrep_seqno_t* seqno)
+{
+ int offt = wsrep_uuid_scan (str, strlen(str), uuid);
+ if (offt > 0 && strlen(str) > (unsigned int)offt && ':' == str[offt])
+ {
+ *seqno = strtoll (str + offt + 1, NULL, 10);
+ if (*seqno != LLONG_MAX || errno != ERANGE)
+ {
+ return 0;
+ }
+ }
+
+ WSREP_ERROR("Failed to parse uuid:seqno pair: '%s'", str);
+ return EINVAL;
+}
+
+// get rid of trailing \n
+static char* my_fgets (char* buf, size_t buf_len, FILE* stream)
+{
+ char* ret= fgets (buf, buf_len, stream);
+
+ if (ret)
+ {
+ size_t len = strlen(ret);
+ if (len > 0 && ret[len - 1] == '\n') ret[len - 1] = '\0';
+ }
+
+ return ret;
+}
+
+static void* sst_joiner_thread (void* a)
+{
+ sst_thread_arg* arg= (sst_thread_arg*) a;
+ int err= 1;
+
+ {
+ const char magic[] = "ready";
+ const size_t magic_len = sizeof(magic) - 1;
+ const size_t out_len = 512;
+ char out[out_len];
+
+ WSREP_INFO("Running: '%s'", arg->cmd);
+
+ wsp::process proc (arg->cmd, "r");
+
+ if (proc.pipe() && !proc.error())
+ {
+ const char* tmp= my_fgets (out, out_len, proc.pipe());
+
+ if (!tmp || strlen(tmp) < (magic_len + 2) ||
+ strncasecmp (tmp, magic, magic_len))
+ {
+ WSREP_ERROR("Failed to read '%s <addr>' from: %s\n\tRead: '%s'",
+ magic, arg->cmd, tmp);
+ proc.wait();
+ if (proc.error()) err = proc.error();
+ }
+ else
+ {
+ err = 0;
+ }
+ }
+ else
+ {
+ err = proc.error();
+ WSREP_ERROR("Failed to execute: %s : %d (%s)",
+ arg->cmd, err, strerror(err));
+ }
+
+ // signal sst_prepare thread with ret code,
+ // it will go on sending SST request
+ mysql_mutex_lock (&arg->lock);
+ if (!err)
+ {
+ arg->ret_str = strdup (out + magic_len + 1);
+ if (!arg->ret_str) err = ENOMEM;
+ }
+ arg->err = -err;
+ mysql_cond_signal (&arg->cond);
+ mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that.
+
+ if (err) return NULL; /* lp:808417 - return immediately, don't signal
+ * initializer thread to ensure single thread of
+ * shutdown. */
+
+ wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED;
+ wsrep_seqno_t ret_seqno = WSREP_SEQNO_UNDEFINED;
+
+ // in case of successfull receiver start, wait for SST completion/end
+ char* tmp = my_fgets (out, out_len, proc.pipe());
+
+ proc.wait();
+ err= EINVAL;
+
+ if (!tmp)
+ {
+ WSREP_ERROR("Failed to read uuid:seqno from joiner script.");
+ if (proc.error()) err = proc.error();
+ }
+ else
+ {
+ err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno);
+ }
+
+ if (err)
+ {
+ ret_uuid= WSREP_UUID_UNDEFINED;
+ ret_seqno= -err;
+ }
+
+ // Tell initializer thread that SST is complete
+ wsrep_sst_complete (&ret_uuid, ret_seqno, true);
+ }
+
+ return NULL;
+}
+
+static ssize_t sst_prepare_other (const char* method,
+ const char* addr_in,
+ const char** addr_out)
+{
+ ssize_t cmd_len= 1024;
+ char cmd_str[cmd_len];
+ const char* sst_dir= mysql_real_data_home;
+
+ int ret= snprintf (cmd_str, cmd_len,
+ "wsrep_sst_%s "
+ WSREP_SST_OPT_ROLE" 'joiner' "
+ WSREP_SST_OPT_ADDR" '%s' "
+ WSREP_SST_OPT_AUTH" '%s' "
+ WSREP_SST_OPT_DATA" '%s' "
+ WSREP_SST_OPT_CONF" '%s' "
+ WSREP_SST_OPT_PARENT" '%d'",
+ method, addr_in, (sst_auth_real) ? sst_auth_real : "",
+ sst_dir, wsrep_defaults_file, (int)getpid());
+
+ if (ret < 0 || ret >= cmd_len)
+ {
+ WSREP_ERROR("sst_prepare_other(): snprintf() failed: %d", ret);
+ return (ret < 0 ? ret : -EMSGSIZE);
+ }
+
+ pthread_t tmp;
+ sst_thread_arg arg(cmd_str);
+ mysql_mutex_lock (&arg.lock);
+ ret = pthread_create (&tmp, NULL, sst_joiner_thread, &arg);
+ if (ret)
+ {
+ WSREP_ERROR("sst_prepare_other(): pthread_create() failed: %d (%s)",
+ ret, strerror(ret));
+ return ret;
+ }
+ mysql_cond_wait (&arg.cond, &arg.lock);
+
+ *addr_out= arg.ret_str;
+
+ if (!arg.err)
+ ret = strlen(*addr_out);
+ else
+ {
+ assert (arg.err < 0);
+ ret = arg.err;
+ }
+
+ pthread_detach (tmp);
+
+ return ret;
+}
+
+//extern ulong my_bind_addr;
+extern uint mysqld_port;
+
+/*! Just tells donor where to send mysqldump */
+static ssize_t sst_prepare_mysqldump (const char* addr_in,
+ const char** addr_out)
+{
+ ssize_t ret = strlen (addr_in);
+
+ if (!strrchr(addr_in, ':'))
+ {
+ ssize_t s = ret + 7;
+ char* tmp = (char*) malloc (s);
+
+ if (tmp)
+ {
+ ret= snprintf (tmp, s, "%s:%u", addr_in, mysqld_port);
+
+ if (ret > 0 && ret < s)
+ {
+ *addr_out= tmp;
+ return ret;
+ }
+ if (ret > 0) /* buffer too short */ ret = -EMSGSIZE;
+ free (tmp);
+ }
+ else {
+ ret= -ENOMEM;
+ }
+
+ WSREP_ERROR ("Could not prepare state transfer request: "
+ "adding default port failed: %zd.", ret);
+ }
+ else {
+ *addr_out= addr_in;
+ }
+
+ return ret;
+}
+
+static bool SE_initialized = false;
+
+ssize_t wsrep_sst_prepare (void** msg)
+{
+ const ssize_t ip_max= 256;
+ char ip_buf[ip_max];
+ const char* addr_in= NULL;
+ const char* addr_out= NULL;
+
+ if (!strcmp(wsrep_sst_method, WSREP_SST_SKIP))
+ {
+ ssize_t ret = strlen(WSREP_STATE_TRANSFER_TRIVIAL) + 1;
+ *msg = strdup(WSREP_STATE_TRANSFER_TRIVIAL);
+ if (!msg)
+ {
+ WSREP_ERROR("Could not allocate %zd bytes for state request", ret);
+ unireg_abort(1);
+ }
+ return ret;
+ }
+
+ // Figure out SST address. Common for all SST methods
+ if (wsrep_sst_receive_address &&
+ strcmp (wsrep_sst_receive_address, WSREP_SST_ADDRESS_AUTO))
+ {
+ addr_in= wsrep_sst_receive_address;
+ }
+ else if (wsrep_node_address && strlen(wsrep_node_address))
+ {
+ const char* const colon= strchr (wsrep_node_address, ':');
+ if (colon)
+ {
+ ptrdiff_t const len= colon - wsrep_node_address;
+ strncpy (ip_buf, wsrep_node_address, len);
+ ip_buf[len]= '\0';
+ addr_in= ip_buf;
+ }
+ else
+ {
+ addr_in= wsrep_node_address;
+ }
+ }
+ else
+ {
+ ssize_t ret= guess_ip (ip_buf, ip_max);
+
+ if (ret && ret < ip_max)
+ {
+ addr_in= ip_buf;
+ }
+ else
+ {
+ WSREP_ERROR("Could not prepare state transfer request: "
+ "failed to guess address to accept state transfer at. "
+ "wsrep_sst_receive_address must be set manually.");
+ unireg_abort(1);
+ }
+ }
+
+ ssize_t addr_len= -ENOSYS;
+ if (!strcmp(wsrep_sst_method, WSREP_SST_MYSQLDUMP))
+ {
+ addr_len= sst_prepare_mysqldump (addr_in, &addr_out);
+ if (addr_len < 0) unireg_abort(1);
+ }
+ else
+ {
+ /*! A heuristic workaround until we learn how to stop and start engines */
+ if (SE_initialized)
+ {
+ // we already did SST at initializaiton, now engines are running
+ // sql_print_information() is here because the message is too long
+ // for WSREP_INFO.
+ sql_print_information ("WSREP: "
+ "You have configured '%s' state snapshot transfer method "
+ "which cannot be performed on a running server. "
+ "Wsrep provider won't be able to fall back to it "
+ "if other means of state transfer are unavailable. "
+ "In that case you will need to restart the server.",
+ wsrep_sst_method);
+ *msg = 0;
+ return 0;
+ }
+
+ addr_len = sst_prepare_other (wsrep_sst_method, addr_in, &addr_out);
+ if (addr_len < 0)
+ {
+ WSREP_ERROR("Failed to prepare for '%s' SST. Unrecoverable.",
+ wsrep_sst_method);
+ unireg_abort(1);
+ }
+ }
+
+ size_t const method_len(strlen(wsrep_sst_method));
+ size_t const msg_len (method_len + addr_len + 2 /* + auth_len + 1*/);
+
+ *msg = malloc (msg_len);
+ if (NULL != *msg) {
+ char* const method_ptr(reinterpret_cast<char*>(*msg));
+ strcpy (method_ptr, wsrep_sst_method);
+ char* const addr_ptr(method_ptr + method_len + 1);
+ strcpy (addr_ptr, addr_out);
+
+ WSREP_INFO ("Prepared SST request: %s|%s", method_ptr, addr_ptr);
+ }
+ else {
+ WSREP_ERROR("Failed to allocate SST request of size %zu. Can't continue.",
+ msg_len);
+ unireg_abort(1);
+ }
+
+ if (addr_out != addr_in) /* malloc'ed */ free ((char*)addr_out);
+
+ return msg_len;
+}
+
+// helper method for donors
+static int sst_run_shell (const char* cmd_str, int max_tries)
+{
+ int ret = 0;
+
+ for (int tries=1; tries <= max_tries; tries++)
+ {
+ wsp::process proc (cmd_str, "r");
+
+ if (NULL != proc.pipe())
+ {
+ proc.wait();
+ }
+
+ if ((ret = proc.error()))
+ {
+ WSREP_ERROR("Try %d/%d: '%s' failed: %d (%s)",
+ tries, max_tries, proc.cmd(), ret, strerror(ret));
+ sleep (1);
+ }
+ else
+ {
+ WSREP_DEBUG("SST script successfully completed.");
+ break;
+ }
+ }
+
+ return -ret;
+}
+
+static void sst_reject_queries(my_bool close_conn)
+{
+ wsrep_ready_set (FALSE); // this will be resotred when donor becomes synced
+ WSREP_INFO("Rejecting client queries for the duration of SST.");
+ if (TRUE == close_conn) wsrep_close_client_connections(FALSE);
+}
+
+static int sst_mysqldump_check_addr (const char* user, const char* pswd,
+ const char* host, const char* port)
+{
+ return 0;
+}
+
+static int sst_donate_mysqldump (const char* addr,
+ const wsrep_uuid_t* uuid,
+ const char* uuid_str,
+ wsrep_seqno_t seqno,
+ bool bypass)
+{
+ size_t host_len;
+ const char* port = strchr (addr, ':');
+
+ if (port)
+ {
+ port += 1;
+ host_len = port - addr;
+ }
+ else
+ {
+ port = "";
+ host_len = strlen (addr) + 1;
+ }
+
+ char host[host_len];
+
+ strncpy (host, addr, host_len - 1);
+ host[host_len - 1] = '\0';
+
+ const char* auth = sst_auth_real;
+ const char* pswd = (auth) ? strchr (auth, ':') : NULL;
+ size_t user_len;
+
+ if (pswd)
+ {
+ pswd += 1;
+ user_len = pswd - auth;
+ }
+ else
+ {
+ pswd = "";
+ user_len = (auth) ? strlen (auth) + 1 : 1;
+ }
+
+ char user[user_len];
+
+ strncpy (user, (auth) ? auth : "", user_len - 1);
+ user[user_len - 1] = '\0';
+
+ int ret = sst_mysqldump_check_addr (user, pswd, host, port);
+ if (!ret)
+ {
+ size_t cmd_len= 1024;
+ char cmd_str[cmd_len];
+
+ if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(TRUE);
+
+ snprintf (cmd_str, cmd_len,
+ "wsrep_sst_mysqldump "
+ WSREP_SST_OPT_USER" '%s' "
+ WSREP_SST_OPT_PSWD" '%s' "
+ WSREP_SST_OPT_HOST" '%s' "
+ WSREP_SST_OPT_PORT" '%s' "
+ WSREP_SST_OPT_LPORT" '%u' "
+ WSREP_SST_OPT_SOCKET" '%s' "
+ WSREP_SST_OPT_GTID" '%s:%lld'"
+ "%s",
+ user, pswd, host, port, mysqld_port, mysqld_unix_port, uuid_str,
+ (long long)seqno, bypass ? " "WSREP_SST_OPT_BYPASS : "");
+
+ WSREP_DEBUG("Running: '%s'", cmd_str);
+
+ ret= sst_run_shell (cmd_str, 3);
+ }
+
+ wsrep->sst_sent (wsrep, uuid, ret ? ret : seqno);
+
+ return ret;
+}
+
+wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
+
+static int run_sql_command(THD *thd, const char *query)
+{
+ thd->set_query((char *)query, strlen(query));
+
+ Parser_state ps;
+ if (ps.init(thd, thd->query(), thd->query_length()))
+ {
+ WSREP_ERROR("SST query: %s failed", query);
+ return -1;
+ }
+
+ mysql_parse(thd, thd->query(), thd->query_length(), &ps);
+ if (thd->is_error())
+ {
+ int const err= thd->stmt_da->sql_errno();
+ WSREP_WARN ("error executing '%s': %d (%s)%s",
+ query, err, thd->stmt_da->message(),
+ err == ER_UNKNOWN_SYSTEM_VARIABLE ?
+ ". Was mysqld built with --with-innodb-disallow-writes ?" : "");
+ thd->clear_error();
+ return -1;
+ }
+ return 0;
+}
+
+static int sst_flush_tables(THD* thd)
+{
+ WSREP_INFO("Flushing tables for SST...");
+
+ int err;
+ int not_used;
+ if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK"))
+ {
+ WSREP_ERROR("Failed to flush and lock tables");
+ err = -1;
+ }
+ else
+ {
+ /* make sure logs are flushed after global read lock acquired */
+ err= reload_acl_and_cache(thd, REFRESH_ENGINE_LOG,
+ (TABLE_LIST*) 0, &not_used);
+ }
+
+ if (err)
+ {
+ WSREP_ERROR("Failed to flush tables: %d (%s)", err, strerror(err));
+ }
+ else
+ {
+ WSREP_INFO("Tables flushed.");
+ const char base_name[]= "tables_flushed";
+ ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2;
+ char real_name[full_len];
+ sprintf(real_name, "%s/%s", mysql_real_data_home, base_name);
+ char tmp_name[full_len + 4];
+ sprintf(tmp_name, "%s.tmp", real_name);
+
+ FILE* file= fopen(tmp_name, "w+");
+ if (0 == file)
+ {
+ err= errno;
+ WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err));
+ }
+ else
+ {
+ fprintf(file, "%s:%lld\n",
+ wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno);
+ fsync(fileno(file));
+ fclose(file);
+ if (rename(tmp_name, real_name) == -1)
+ {
+ err= errno;
+ WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)",
+ tmp_name, real_name, err,strerror(err));
+ }
+ }
+ }
+
+ return err;
+}
+
+static void sst_disallow_writes (THD* thd, bool yes)
+{
+ char query_str[64] = { 0, };
+ ssize_t const query_max = sizeof(query_str) - 1;
+ snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d",
+ yes ? 1 : 0);
+
+ if (run_sql_command(thd, query_str))
+ {
+ WSREP_ERROR("Failed to disallow InnoDB writes");
+ }
+}
+
+static void* sst_donor_thread (void* a)
+{
+ sst_thread_arg* arg= (sst_thread_arg*)a;
+
+ WSREP_INFO("Running: '%s'", arg->cmd);
+
+ int err= 1;
+ bool locked= false;
+
+ const char* out= NULL;
+ const size_t out_len= 128;
+ char out_buf[out_len];
+
+ wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED;
+ wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST
+
+ wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can
+ // operate with wsrep_ready == OFF
+ wsp::process proc(arg->cmd, "r");
+
+ err= proc.error();
+
+/* Inform server about SST script startup and release TO isolation */
+ mysql_mutex_lock (&arg->lock);
+ arg->err = -err;
+ mysql_cond_signal (&arg->cond);
+ mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that.
+
+ if (proc.pipe() && !err)
+ {
+wait_signal:
+ out= my_fgets (out_buf, out_len, proc.pipe());
+
+ if (out)
+ {
+ const char magic_flush[]= "flush tables";
+ const char magic_cont[]= "continue";
+ const char magic_done[]= "done";
+
+ if (!strcasecmp (out, magic_flush))
+ {
+ err= sst_flush_tables (thd.ptr);
+ if (!err)
+ {
+ sst_disallow_writes (thd.ptr, true);
+ locked= true;
+ goto wait_signal;
+ }
+ }
+ else if (!strcasecmp (out, magic_cont))
+ {
+ if (locked)
+ {
+ sst_disallow_writes (thd.ptr, false);
+ thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
+ locked= false;
+ }
+ err= 0;
+ goto wait_signal;
+ }
+ else if (!strncasecmp (out, magic_done, strlen(magic_done)))
+ {
+ err= sst_scan_uuid_seqno (out + strlen(magic_done) + 1,
+ &ret_uuid, &ret_seqno);
+ }
+ else
+ {
+ WSREP_WARN("Received unknown signal: '%s'", out);
+ }
+ }
+ else
+ {
+ WSREP_ERROR("Failed to read from: %s", proc.cmd());
+ }
+ if (err && proc.error()) err= proc.error();
+ }
+ else
+ {
+ WSREP_ERROR("Failed to execute: %s : %d (%s)",
+ proc.cmd(), err, strerror(err));
+ }
+
+ if (locked) // don't forget to unlock server before return
+ {
+ sst_disallow_writes (thd.ptr, false);
+ thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
+ }
+
+ // signal to donor that SST is over
+ wsrep->sst_sent (wsrep, &ret_uuid, err ? -err : ret_seqno);
+ proc.wait();
+
+ return NULL;
+}
+
+static int sst_donate_other (const char* method,
+ const char* addr,
+ const char* uuid,
+ wsrep_seqno_t seqno,
+ bool bypass)
+{
+ ssize_t cmd_len = 4096;
+ char cmd_str[cmd_len];
+
+ int ret= snprintf (cmd_str, cmd_len,
+ "wsrep_sst_%s "
+ WSREP_SST_OPT_ROLE" 'donor' "
+ WSREP_SST_OPT_ADDR" '%s' "
+ WSREP_SST_OPT_AUTH" '%s' "
+ WSREP_SST_OPT_SOCKET" '%s' "
+ WSREP_SST_OPT_DATA" '%s' "
+ WSREP_SST_OPT_CONF" '%s' "
+ WSREP_SST_OPT_GTID" '%s:%lld'"
+ "%s",
+ method, addr, sst_auth_real, mysqld_unix_port,
+ mysql_real_data_home, wsrep_defaults_file,
+ uuid, (long long) seqno,
+ bypass ? " "WSREP_SST_OPT_BYPASS : "");
+
+ if (ret < 0 || ret >= cmd_len)
+ {
+ WSREP_ERROR("sst_donate_other(): snprintf() failed: %d", ret);
+ return (ret < 0 ? ret : -EMSGSIZE);
+ }
+
+ if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(FALSE);
+
+ pthread_t tmp;
+ sst_thread_arg arg(cmd_str);
+ mysql_mutex_lock (&arg.lock);
+ ret = pthread_create (&tmp, NULL, sst_donor_thread, &arg);
+ if (ret)
+ {
+ WSREP_ERROR("sst_donate_other(): pthread_create() failed: %d (%s)",
+ ret, strerror(ret));
+ return ret;
+ }
+ mysql_cond_wait (&arg.cond, &arg.lock);
+
+ WSREP_INFO("sst_donor_thread signaled with %d", arg.err);
+ return arg.err;
+}
+
+int wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx,
+ const void* msg, size_t msg_len,
+ const wsrep_uuid_t* current_uuid,
+ wsrep_seqno_t current_seqno,
+ const char* state, size_t state_len,
+ bool bypass)
+{
+ /* This will be reset when sync callback is called.
+ * Should we set wsrep_ready to FALSE here too? */
+// wsrep_notify_status(WSREP_MEMBER_DONOR);
+ local_status.set(WSREP_MEMBER_DONOR);
+
+ const char* method = (char*)msg;
+ size_t method_len = strlen (method);
+ const char* data = method + method_len + 1;
+
+ char uuid_str[37];
+ wsrep_uuid_print (current_uuid, uuid_str, sizeof(uuid_str));
+
+ int ret;
+ if (!strcmp (WSREP_SST_MYSQLDUMP, method))
+ {
+ ret = sst_donate_mysqldump (data, current_uuid, uuid_str, current_seqno,
+ bypass);
+ }
+ else
+ {
+ ret = sst_donate_other (method, data, uuid_str, current_seqno, bypass);
+ }
+
+ return (ret > 0 ? 0 : ret);
+}
+
+void wsrep_SE_init_grab()
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_sst_init)) abort();
+}
+
+void wsrep_SE_init_wait()
+{
+ mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
+ mysql_mutex_unlock (&LOCK_wsrep_sst_init);
+}
+
+void wsrep_SE_init_done()
+{
+ mysql_cond_signal (&COND_wsrep_sst_init);
+ mysql_mutex_unlock (&LOCK_wsrep_sst_init);
+}
+
+void wsrep_SE_initialized()
+{
+ SE_initialized = true;
+}
diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc
new file mode 100644
index 00000000000..daba0e4cab2
--- /dev/null
+++ b/sql/wsrep_utils.cc
@@ -0,0 +1,468 @@
+/* Copyright 2010 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+//! @file declares symbols private to wsrep integration layer
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE // POSIX_SPAWN_USEVFORK flag
+#endif
+
+#include <spawn.h> // posix_spawn()
+#include <unistd.h> // pipe()
+#include <errno.h> // errno
+#include <string.h> // strerror()
+#include <sys/wait.h> // waitpid()
+
+#include <sql_class.h>
+#include "wsrep_priv.h"
+
+extern char** environ; // environment variables
+
+static wsp::string wsrep_PATH;
+
+void
+wsrep_prepend_PATH (const char* path)
+{
+ int count = 0;
+
+ while (environ[count])
+ {
+ if (strncmp (environ[count], "PATH=", 5))
+ {
+ count++;
+ continue;
+ }
+
+ char* const old_path (environ[count]);
+
+ if (strstr (old_path, path)) return; // path already there
+
+ size_t const new_path_len(strlen(old_path) + strlen(":") +
+ strlen(path) + 1);
+
+ char* const new_path (reinterpret_cast<char*>(malloc(new_path_len)));
+
+ if (new_path)
+ {
+ snprintf (new_path, new_path_len, "PATH=%s:%s", path,
+ old_path + strlen("PATH="));
+
+ wsrep_PATH.set (new_path);
+ environ[count] = new_path;
+ }
+ else
+ {
+ WSREP_ERROR ("Failed to allocate 'PATH' environment variable "
+ "buffer of size %zu.", new_path_len);
+ }
+
+ return;
+ }
+
+ WSREP_ERROR ("Failed to find 'PATH' environment variable. "
+ "State snapshot transfer may not be working.");
+}
+
+namespace wsp
+{
+
+#define PIPE_READ 0
+#define PIPE_WRITE 1
+#define STDIN_FD 0
+#define STDOUT_FD 1
+
+#ifndef POSIX_SPAWN_USEVFORK
+# define POSIX_SPAWN_USEVFORK 0
+#endif
+
+process::process (const char* cmd, const char* type)
+ : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(EINVAL), pid_(0)
+{
+ if (0 == str_)
+ {
+ WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd));
+ err_ = ENOMEM;
+ return;
+ }
+
+ if (0 == strlen(str_))
+ {
+ WSREP_ERROR ("Can't start a process: null or empty command line.");
+ return;
+ }
+
+ if (NULL == type || (strcmp (type, "w") && strcmp(type, "r")))
+ {
+ WSREP_ERROR ("type argument should be either \"r\" or \"w\".");
+ return;
+ }
+
+ int pipe_fds[2] = { -1, };
+ if (::pipe(pipe_fds))
+ {
+ err_ = errno;
+ WSREP_ERROR ("pipe() failed: %d (%s)", err_, strerror(err_));
+ return;
+ }
+
+ // which end of pipe will be returned to parent
+ int const parent_end (strcmp(type,"w") ? PIPE_READ : PIPE_WRITE);
+ int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ);
+ int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD);
+
+ char* const pargv[4] = { strdup("sh"), strdup("-c"), strdup(str_), NULL };
+ if (!(pargv[0] && pargv[1] && pargv[2]))
+ {
+ err_ = ENOMEM;
+ WSREP_ERROR ("Failed to allocate pargv[] array.");
+ goto cleanup_pipe;
+ }
+
+ posix_spawnattr_t attr;
+ err_ = posix_spawnattr_init (&attr);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_pipe;
+ }
+
+ err_ = posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF |
+ POSIX_SPAWN_USEVFORK);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnattr_setflags() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_attr;
+ }
+
+ posix_spawn_file_actions_t fact;
+ err_ = posix_spawn_file_actions_init (&fact);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_attr;
+ }
+
+ // close child's stdout|stdin depending on what we returning
+ err_ = posix_spawn_file_actions_addclose (&fact, close_fd);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_fact;
+ }
+
+ // substitute our pipe descriptor in place of the closed one
+ err_ = posix_spawn_file_actions_adddup2 (&fact,
+ pipe_fds[child_end], close_fd);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_addup2() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_fact;
+ }
+
+ err_ = posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, environ);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)",
+ pargv[2], err_, strerror(err_));
+ pid_ = 0; // just to make sure it was not messed up in the call
+ goto cleanup_fact;
+ }
+
+ io_ = fdopen (pipe_fds[parent_end], type);
+
+ if (io_)
+ {
+ pipe_fds[parent_end] = -1; // skip close on cleanup
+ }
+ else
+ {
+ err_ = errno;
+ WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_));
+ }
+
+cleanup_fact:
+ int err; // to preserve err_ code
+ err = posix_spawn_file_actions_destroy (&fact);
+ if (err)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n",
+ err, strerror(err));
+ }
+
+cleanup_attr:
+ err = posix_spawnattr_destroy (&attr);
+ if (err)
+ {
+ WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)",
+ err, strerror(err));
+ }
+
+cleanup_pipe:
+ if (pipe_fds[0] >= 0) close (pipe_fds[0]);
+ if (pipe_fds[1] >= 0) close (pipe_fds[1]);
+
+ free (pargv[0]);
+ free (pargv[1]);
+ free (pargv[2]);
+}
+
+process::~process ()
+{
+ if (io_)
+ {
+ assert (pid_);
+ assert (str_);
+
+ WSREP_WARN("Closing pipe to child process: %s, PID(%ld) "
+ "which might still be running.", str_, (long)pid_);
+
+ if (fclose (io_) == -1)
+ {
+ err_ = errno;
+ WSREP_ERROR("fclose() failed: %d (%s)", err_, strerror(err_));
+ }
+ }
+
+ if (str_) free (const_cast<char*>(str_));
+}
+
+int
+process::wait ()
+{
+ if (pid_)
+ {
+ int status;
+ if (-1 == waitpid(pid_, &status, 0))
+ {
+ err_ = errno; assert (err_);
+ WSREP_ERROR("Waiting for process failed: %s, PID(%ld): %d (%s)",
+ str_, (long)pid_, err_, strerror (err_));
+ }
+ else
+ { // command completed, check exit status
+ if (WIFEXITED (status)) {
+ err_ = WEXITSTATUS (status);
+ }
+ else { // command didn't complete with exit()
+ WSREP_ERROR("Process was aborted.");
+ err_ = errno ? errno : ECHILD;
+ }
+
+ if (err_) {
+ switch (err_) /* Translate error codes to more meaningful */
+ {
+ case 126: err_ = EACCES; break; /* Permission denied */
+ case 127: err_ = ENOENT; break; /* No such file or directory */
+ }
+ WSREP_ERROR("Process completed with error: %s: %d (%s)",
+ str_, err_, strerror(err_));
+ }
+
+ pid_ = 0;
+ if (io_) fclose (io_);
+ io_ = NULL;
+ }
+ }
+ else {
+ assert (NULL == io_);
+ WSREP_ERROR("Command did not run: %s", str_);
+ }
+
+ return err_;
+}
+
+thd::thd (my_bool won) : init(), ptr(new THD)
+{
+ if (ptr)
+ {
+ ptr->thread_stack= (char*) &ptr;
+ ptr->store_globals();
+ ptr->variables.option_bits&= ~OPTION_BIN_LOG; // disable binlog
+ ptr->variables.wsrep_on = won;
+ ptr->security_ctx->master_access= ~(ulong)0;
+ lex_start(ptr);
+ }
+}
+
+thd::~thd ()
+{
+ if (ptr)
+ {
+ delete ptr;
+ my_pthread_setspecific_ptr (THR_THD, 0);
+ }
+}
+
+} // namespace wsp
+
+extern ulong my_bind_addr;
+extern uint mysqld_port;
+
+size_t guess_ip (char* buf, size_t buf_len)
+{
+ size_t ip_len = 0;
+
+ if (htonl(INADDR_NONE) == my_bind_addr) {
+ WSREP_ERROR("Networking not configured, cannot receive state transfer.");
+ return 0;
+ }
+
+ if (htonl(INADDR_ANY) != my_bind_addr) {
+ uint8_t* b = (uint8_t*)&my_bind_addr;
+ ip_len = snprintf (buf, buf_len,
+ "%hhu.%hhu.%hhu.%hhu", b[0],b[1],b[2],b[3]);
+ return ip_len;
+ }
+
+ // mysqld binds to all interfaces - try IP from wsrep_node_address
+ if (wsrep_node_address && wsrep_node_address[0] != '\0') {
+ const char* const colon_ptr = strchr(wsrep_node_address, ':');
+
+ if (colon_ptr)
+ ip_len = colon_ptr - wsrep_node_address;
+ else
+ ip_len = strlen(wsrep_node_address);
+
+ if (ip_len >= buf_len) {
+ WSREP_WARN("default_ip(): buffer too short: %zu <= %zd", buf_len, ip_len);
+ return 0;
+ }
+
+ memcpy (buf, wsrep_node_address, ip_len);
+ buf[ip_len] = '\0';
+ return ip_len;
+ }
+
+ // try to find the address of the first one
+#if (TARGET_OS_LINUX == 1)
+ const char cmd[] = "/sbin/ifconfig | "
+// "grep -m1 -1 -E '^[a-z]?eth[0-9]' | tail -n 1 | "
+ "grep -E '^[[:space:]]+inet addr:' | grep -m1 -v 'inet addr:127' | "
+ "sed 's/:/ /' | awk '{ print $3 }'";
+#elif defined(__sun__)
+ const char cmd[] = "/sbin/ifconfig -a | "
+ "/usr/gnu/bin/grep -m1 -1 -E 'net[0-9]:' | tail -n 1 | awk '{ print $2 }'";
+#else
+ char *cmd;
+#error "OS not supported"
+#endif
+ wsp::process proc (cmd, "r");
+
+ if (NULL != proc.pipe()) {
+ char* ret;
+
+ ret = fgets (buf, buf_len, proc.pipe());
+
+ if (proc.wait()) return 0;
+
+ if (NULL == ret) {
+ WSREP_ERROR("Failed to read output of: '%s'", cmd);
+ return 0;
+ }
+ }
+ else {
+ WSREP_ERROR("Failed to execute: '%s'", cmd);
+ return 0;
+ }
+
+ // clear possible \n at the end of ip string left by fgets()
+ ip_len = strlen (buf);
+ if (ip_len > 0 && '\n' == buf[ip_len - 1]) {
+ ip_len--;
+ buf[ip_len] = '\0';
+ }
+
+ if (INADDR_NONE == inet_addr(buf)) {
+ if (strlen(buf) != 0) {
+ WSREP_WARN("Shell command returned invalid address: '%s'", buf);
+ }
+ return 0;
+ }
+
+ return ip_len;
+}
+
+size_t guess_address(char* buf, size_t buf_len)
+{
+ size_t addr_len = guess_ip (buf, buf_len);
+
+ if (addr_len && addr_len < buf_len) {
+ addr_len += snprintf (buf + addr_len, buf_len - addr_len,
+ ":%u", mysqld_port);
+ }
+
+ return addr_len;
+}
+
+/*
+ * WSREPXid
+ */
+
+#define WSREP_XID_PREFIX "WSREPXid"
+#define WSREP_XID_PREFIX_LEN MYSQL_XID_PREFIX_LEN
+#define WSREP_XID_UUID_OFFSET 8
+#define WSREP_XID_SEQNO_OFFSET (WSREP_XID_UUID_OFFSET + sizeof(wsrep_uuid_t))
+#define WSREP_XID_GTRID_LEN (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t))
+
+void wsrep_xid_init(XID* xid, const wsrep_uuid_t* uuid, wsrep_seqno_t seqno)
+{
+ xid->formatID= 1;
+ xid->gtrid_length= WSREP_XID_GTRID_LEN;
+ xid->bqual_length= 0;
+ memset(xid->data, 0, sizeof(xid->data));
+ memcpy(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN);
+ memcpy(xid->data + WSREP_XID_UUID_OFFSET, uuid, sizeof(wsrep_uuid_t));
+ memcpy(xid->data + WSREP_XID_SEQNO_OFFSET, &seqno, sizeof(wsrep_seqno_t));
+}
+
+const wsrep_uuid_t* wsrep_xid_uuid(const XID* xid)
+{
+ if (wsrep_is_wsrep_xid(xid))
+ return reinterpret_cast<const wsrep_uuid_t*>(xid->data
+ + WSREP_XID_UUID_OFFSET);
+ else
+ return &WSREP_UUID_UNDEFINED;
+}
+
+wsrep_seqno_t wsrep_xid_seqno(const XID* xid)
+{
+
+ if (wsrep_is_wsrep_xid(xid))
+ {
+ wsrep_seqno_t seqno;
+ memcpy(&seqno, xid->data + WSREP_XID_SEQNO_OFFSET, sizeof(wsrep_seqno_t));
+ return seqno;
+ }
+ else
+ {
+ return WSREP_SEQNO_UNDEFINED;
+ }
+}
+
+extern "C"
+int wsrep_is_wsrep_xid(const void* xid_ptr)
+{
+ const XID* xid= reinterpret_cast<const XID*>(xid_ptr);
+ return (xid->formatID == 1 &&
+ xid->gtrid_length == WSREP_XID_GTRID_LEN &&
+ xid->bqual_length == 0 &&
+ !memcmp(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN));
+}
diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc
new file mode 100644
index 00000000000..01f91b0a5ae
--- /dev/null
+++ b/sql/wsrep_var.cc
@@ -0,0 +1,548 @@
+/* Copyright 2008 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include <sql_class.h>
+#include <sql_plugin.h>
+#include <set_var.h>
+#include <sql_acl.h>
+#include "wsrep_priv.h"
+#include <my_dir.h>
+#include <cstdio>
+#include <cstdlib>
+
+#define WSREP_START_POSITION_ZERO "00000000-0000-0000-0000-000000000000:-1"
+#define WSREP_CLUSTER_NAME "my_wsrep_cluster"
+
+const char* wsrep_provider = 0;
+const char* wsrep_provider_options = 0;
+const char* wsrep_cluster_address = 0;
+const char* wsrep_cluster_name = 0;
+const char* wsrep_node_name = 0;
+static char node_address[256] = { 0, };
+const char* wsrep_node_address = node_address; // ???
+const char* wsrep_start_position = 0;
+ulong wsrep_OSU_method_options;
+static int wsrep_thread_change = 0;
+
+int wsrep_init_vars()
+{
+ wsrep_provider = my_strdup(WSREP_NONE, MYF(MY_WME));
+ wsrep_provider_options= my_strdup("", MYF(MY_WME));
+ wsrep_cluster_address = my_strdup("", MYF(MY_WME));
+ wsrep_cluster_name = my_strdup(WSREP_CLUSTER_NAME, MYF(MY_WME));
+ wsrep_node_name = my_strdup("", MYF(MY_WME));
+ wsrep_start_position = my_strdup(WSREP_START_POSITION_ZERO, MYF(MY_WME));
+
+ global_system_variables.binlog_format=BINLOG_FORMAT_ROW;
+ return 0;
+}
+
+bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type)
+{
+ if (var_type == OPT_GLOBAL) {
+ // FIXME: this variable probably should be changed only per session
+ thd->variables.wsrep_on = global_system_variables.wsrep_on;
+ }
+ else {
+ }
+
+#ifdef REMOVED
+ if (thd->variables.wsrep_on)
+ thd->variables.option_bits |= (OPTION_BIN_LOG);
+ else
+ thd->variables.option_bits &= ~(OPTION_BIN_LOG);
+#endif
+ return false;
+}
+
+void wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type)
+{
+ if (var_type == OPT_GLOBAL) {
+ thd->variables.wsrep_causal_reads = global_system_variables.wsrep_causal_reads;
+ }
+ else {
+ }
+}
+
+static int wsrep_start_position_verify (const char* start_str)
+{
+ size_t start_len;
+ wsrep_uuid_t uuid;
+ ssize_t uuid_len;
+
+ start_len = strlen (start_str);
+ if (start_len < 34)
+ return 1;
+
+ uuid_len = wsrep_uuid_scan (start_str, start_len, &uuid);
+ if (uuid_len < 0 || (start_len - uuid_len) < 2)
+ return 1;
+
+ if (start_str[uuid_len] != ':') // separator should follow UUID
+ return 1;
+
+ char* endptr;
+ wsrep_seqno_t const seqno __attribute__((unused)) // to avoid GCC warnings
+ (strtoll(&start_str[uuid_len + 1], &endptr, 10));
+
+ if (*endptr == '\0') return 0; // remaining string was seqno
+
+ return 1;
+}
+
+bool wsrep_start_position_check (sys_var *self, THD* thd, set_var* var)
+{
+ char buff[FN_REFLEN];
+ String str(buff, sizeof(buff), system_charset_info), *res;
+ const char* start_str = NULL;
+
+ if (!(res = var->value->val_str(&str))) goto err;
+
+ start_str = res->c_ptr();
+
+ if (!start_str) goto err;
+
+ if (!wsrep_start_position_verify(start_str)) return 0;
+
+err:
+
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ start_str ? start_str : "NULL");
+ return 1;
+}
+
+void wsrep_set_local_position (const char* value)
+{
+ size_t value_len = strlen (value);
+ size_t uuid_len = wsrep_uuid_scan (value, value_len, &local_uuid);
+
+ local_seqno = strtoll (value + uuid_len + 1, NULL, 10);
+
+ XID xid;
+ wsrep_xid_init(&xid, &local_uuid, local_seqno);
+ wsrep_set_SE_checkpoint(&xid);
+ WSREP_INFO ("wsrep_start_position var submitted: '%s'", wsrep_start_position);
+}
+
+bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ // since this value passed wsrep_start_position_check, don't check anything
+ // here
+ wsrep_set_local_position (wsrep_start_position);
+
+ if (wsrep) {
+ wsrep->sst_received (wsrep, &local_uuid, local_seqno, NULL, 0);
+ }
+
+ return 0;
+}
+
+void wsrep_start_position_init (const char* val)
+{
+ if (NULL == val || wsrep_start_position_verify (val))
+ {
+ WSREP_ERROR("Bad initial value for wsrep_start_position: %s",
+ (val ? val : ""));
+ return;
+ }
+
+ wsrep_set_local_position (val);
+}
+
+static bool refresh_provider_options()
+{
+ WSREP_DEBUG("refresh_provider_options: %s",
+ (wsrep_provider_options) ? wsrep_provider_options : "null");
+ char* opts= wsrep->options_get(wsrep);
+ if (opts)
+ {
+ if (wsrep_provider_options) my_free((void *)wsrep_provider_options);
+ wsrep_provider_options = (char*)my_memdup(opts, strlen(opts) + 1,
+ MYF(MY_WME));
+ }
+ else
+ {
+ WSREP_ERROR("Failed to get provider options");
+ return true;
+ }
+ return false;
+}
+
+static int wsrep_provider_verify (const char* provider_str)
+{
+ MY_STAT f_stat;
+ char path[FN_REFLEN];
+
+ if (!provider_str || strlen(provider_str)== 0)
+ return 1;
+
+ if (!strcmp(provider_str, WSREP_NONE))
+ return 0;
+
+ if (!unpack_filename(path, provider_str))
+ return 1;
+
+ /* check that provider file exists */
+ bzero(&f_stat, sizeof(MY_STAT));
+ if (!my_stat(path, &f_stat, MYF(0)))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+bool wsrep_provider_check (sys_var *self, THD* thd, set_var* var)
+{
+ char buff[FN_REFLEN];
+ String str(buff, sizeof(buff), system_charset_info), *res;
+ const char* provider_str = NULL;
+
+ if (!(res = var->value->val_str(&str))) goto err;
+
+ provider_str = res->c_ptr();
+
+ if (!provider_str) goto err;
+
+ if (!wsrep_provider_verify(provider_str)) return 0;
+
+err:
+
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ provider_str ? provider_str : "NULL");
+ return 1;
+}
+
+bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ bool rcode= false;
+
+ bool wsrep_on_saved= thd->variables.wsrep_on;
+ thd->variables.wsrep_on= false;
+
+ WSREP_DEBUG("wsrep_provider_update: %s", wsrep_provider);
+
+ wsrep_stop_replication(thd);
+ wsrep_deinit();
+
+ char* tmp= strdup(wsrep_provider); // wsrep_init() rewrites provider
+ //when fails
+ if (wsrep_init())
+ {
+ my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp);
+ rcode = true;
+ }
+ free(tmp);
+
+ // we sure don't want to use old address with new provider
+ wsrep_cluster_address_init(NULL);
+ wsrep_provider_options_init(NULL);
+
+ thd->variables.wsrep_on= wsrep_on_saved;
+
+ refresh_provider_options();
+
+ return rcode;
+}
+
+void wsrep_provider_init (const char* value)
+{
+ WSREP_DEBUG("wsrep_provider_init: %s -> %s",
+ (wsrep_provider) ? wsrep_provider : "null",
+ (value) ? value : "null");
+ if (NULL == value || wsrep_provider_verify (value))
+ {
+ WSREP_ERROR("Bad initial value for wsrep_provider: %s",
+ (value ? value : ""));
+ return;
+ }
+
+ if (wsrep_provider) my_free((void *)wsrep_provider);
+ wsrep_provider = my_strdup(value, MYF(0));
+}
+
+bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var)
+{
+ return 0;
+}
+
+bool wsrep_provider_options_update(sys_var *self, THD* thd, enum_var_type type)
+{
+ wsrep_status_t ret= wsrep->options_set(wsrep, wsrep_provider_options);
+ if (ret != WSREP_OK)
+ {
+ WSREP_ERROR("Set options returned %d", ret);
+ return true;
+ }
+ return refresh_provider_options();
+}
+
+void wsrep_provider_options_init(const char* value)
+{
+ if (wsrep_provider_options && wsrep_provider_options != value)
+ my_free((void *)wsrep_provider_options);
+ wsrep_provider_options = (value) ? my_strdup(value, MYF(0)) : NULL;
+}
+
+static int wsrep_cluster_address_verify (const char* cluster_address_str)
+{
+ /* There is no predefined address format, it depends on provider. */
+ return 0;
+}
+
+bool wsrep_cluster_address_check (sys_var *self, THD* thd, set_var* var)
+{
+ char buff[FN_REFLEN];
+ String str(buff, sizeof(buff), system_charset_info), *res;
+ const char* cluster_address_str = NULL;
+
+ if (!(res = var->value->val_str(&str))) goto err;
+
+ cluster_address_str = res->c_ptr();
+
+ if (!wsrep_cluster_address_verify(cluster_address_str)) return 0;
+
+ err:
+
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ cluster_address_str ? cluster_address_str : "NULL");
+ return 1 ;
+}
+
+bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ bool wsrep_on_saved= thd->variables.wsrep_on;
+ thd->variables.wsrep_on= false;
+
+ wsrep_stop_replication(thd);
+
+ if (wsrep_start_replication())
+ {
+ wsrep_create_rollbacker();
+ wsrep_create_appliers(wsrep_slave_threads);
+ }
+
+ thd->variables.wsrep_on= wsrep_on_saved;
+
+ return false;
+}
+
+void wsrep_cluster_address_init (const char* value)
+{
+ WSREP_DEBUG("wsrep_cluster_address_init: %s -> %s",
+ (wsrep_cluster_address) ? wsrep_cluster_address : "null",
+ (value) ? value : "null");
+
+ if (wsrep_cluster_address) my_free ((void*)wsrep_cluster_address);
+ wsrep_cluster_address = (value) ? my_strdup(value, MYF(0)) : NULL;
+}
+
+bool wsrep_cluster_name_check (sys_var *self, THD* thd, set_var* var)
+{
+ char buff[FN_REFLEN];
+ String str(buff, sizeof(buff), system_charset_info), *res;
+ const char* cluster_name_str = NULL;
+
+ if (!(res = var->value->val_str(&str))) goto err;
+
+ cluster_name_str = res->c_ptr();
+
+ if (!cluster_name_str || strlen(cluster_name_str) == 0) goto err;
+
+ return 0;
+
+ err:
+
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ cluster_name_str ? cluster_name_str : "NULL");
+ return 1;
+}
+
+bool wsrep_cluster_name_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+bool wsrep_node_name_check (sys_var *self, THD* thd, set_var* var)
+{
+ char buff[FN_REFLEN];
+ String str(buff, sizeof(buff), system_charset_info), *res;
+ const char* node_name_str = NULL;
+
+ if (!(res = var->value->val_str(&str))) goto err;
+
+ node_name_str = res->c_ptr();
+
+ if (!node_name_str || strlen(node_name_str) == 0) goto err;
+
+ return 0;
+
+ err:
+
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ node_name_str ? node_name_str : "NULL");
+ return 1;
+}
+
+bool wsrep_node_name_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+// TODO: do something more elaborate, like checking connectivity
+bool wsrep_node_address_check (sys_var *self, THD* thd, set_var* var)
+{
+ char buff[FN_REFLEN];
+ String str(buff, sizeof(buff), system_charset_info), *res;
+ const char* node_address_str = NULL;
+
+ if (!(res = var->value->val_str(&str))) goto err;
+
+ node_address_str = res->c_ptr();
+
+ if (!node_address_str || strlen(node_address_str) == 0) goto err;
+
+ return 0;
+
+ err:
+
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ node_address_str ? node_address_str : "NULL");
+ return 1;
+}
+
+bool wsrep_node_address_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+void wsrep_node_address_init (const char* value)
+{
+ if (wsrep_node_address && strcmp(wsrep_node_address, value))
+ my_free ((void*)wsrep_node_address);
+
+ wsrep_node_address = (value) ? my_strdup(value, MYF(0)) : NULL;
+}
+
+bool wsrep_slave_threads_check (sys_var *self, THD* thd, set_var* var)
+{
+ mysql_mutex_lock(&LOCK_wsrep_slave_threads);
+ wsrep_thread_change = var->value->val_int() - wsrep_slave_threads;
+ mysql_mutex_unlock(&LOCK_wsrep_slave_threads);
+
+ return 0;
+}
+
+bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ if (wsrep_thread_change > 0)
+ {
+ wsrep_create_appliers(wsrep_thread_change);
+ }
+ else if (wsrep_thread_change < 0)
+ {
+ wsrep_close_applier_threads(-wsrep_thread_change);
+ }
+ return false;
+}
+/*
+ * Status variables stuff below
+ */
+static inline void
+wsrep_assign_to_mysql (SHOW_VAR* mysql, wsrep_stats_var* wsrep)
+{
+ mysql->name = wsrep->name;
+ switch (wsrep->type) {
+ case WSREP_VAR_INT64:
+ mysql->value = (char*) &wsrep->value._int64;
+ mysql->type = SHOW_LONGLONG;
+ break;
+ case WSREP_VAR_STRING:
+ mysql->value = (char*) &wsrep->value._string;
+ mysql->type = SHOW_CHAR_PTR;
+ break;
+ case WSREP_VAR_DOUBLE:
+ mysql->value = (char*) &wsrep->value._double;
+ mysql->type = SHOW_DOUBLE;
+ break;
+ }
+}
+
+#if DYNAMIC
+// somehow this mysql status thing works only with statically allocated arrays.
+static SHOW_VAR* mysql_status_vars = NULL;
+static int mysql_status_len = -1;
+#else
+static SHOW_VAR mysql_status_vars[512 + 1];
+static const int mysql_status_len = 512;
+#endif
+
+static void export_wsrep_status_to_mysql(THD* thd)
+{
+ int wsrep_status_len, i;
+
+ thd->wsrep_status_vars = wsrep->stats_get(wsrep);
+
+ if (!thd->wsrep_status_vars) {
+ return;
+ }
+
+ for (wsrep_status_len = 0;
+ thd->wsrep_status_vars[wsrep_status_len].name != NULL;
+ wsrep_status_len++);
+
+#if DYNAMIC
+ if (wsrep_status_len != mysql_status_len) {
+ void* tmp = realloc (mysql_status_vars,
+ (wsrep_status_len + 1) * sizeof(SHOW_VAR));
+ if (!tmp) {
+
+ sql_print_error ("Out of memory for wsrep status variables."
+ "Number of variables: %d", wsrep_status_len);
+ return;
+ }
+
+ mysql_status_len = wsrep_status_len;
+ mysql_status_vars = (SHOW_VAR*)tmp;
+ }
+ /* @TODO: fix this: */
+#else
+ if (mysql_status_len < wsrep_status_len) wsrep_status_len= mysql_status_len;
+#endif
+
+ for (i = 0; i < wsrep_status_len; i++)
+ wsrep_assign_to_mysql (mysql_status_vars + i, thd->wsrep_status_vars + i);
+
+ mysql_status_vars[wsrep_status_len].name = NullS;
+ mysql_status_vars[wsrep_status_len].value = NullS;
+ mysql_status_vars[wsrep_status_len].type = SHOW_LONG;
+}
+
+int wsrep_show_status (THD *thd, SHOW_VAR *var, char *buff)
+{
+ export_wsrep_status_to_mysql(thd);
+ var->type= SHOW_ARRAY;
+ var->value= (char *) &mysql_status_vars;
+ return 0;
+}
+
+void wsrep_free_status (THD* thd)
+{
+ if (thd->wsrep_status_vars)
+ {
+ wsrep->stats_free (wsrep, thd->wsrep_status_vars);
+ thd->wsrep_status_vars = 0;
+ }
+}
diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
index 6f2c2caffaf..d338330585a 100644
--- a/storage/innobase/dict/dict0dict.c
+++ b/storage/innobase/dict/dict0dict.c
@@ -2489,7 +2489,26 @@ next_rec:
return(NULL);
}
-
+#ifdef WITH_WSREP
+dict_index_t*
+wsrep_dict_foreign_find_index(
+/*====================*/
+ dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ dict_index_t* types_idx, /*!< in: NULL or an index to whose types the
+ column types must match */
+ ibool check_charsets,
+ /*!< in: whether to check charsets.
+ only has an effect if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of the columns must
+ be declared NOT NULL */
+{
+ return dict_foreign_find_index(
+ table, columns, n_cols, types_idx, check_charsets, check_null);
+}
+#endif /* WITH_WSREP */
/**********************************************************************//**
Find an index that is equivalent to the one passed in and is not marked
for deletion.
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 4ae98723c69..ebcb5fdff6e 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -90,6 +90,12 @@ extern "C" {
#include "ha_prototypes.h"
#include "ut0mem.h"
#include "ibuf0ibuf.h"
+#ifdef WITH_WSREP
+#include "../storage/innobase/include/ut0byte.h"
+#ifndef EXTRA_DEBUG
+ //#include "../storage/innobase/include/ut0byte.ic"
+#endif /* EXTRA_DEBUG */
+#endif /* WITH_WSREP */
}
#include "ha_innodb.h"
@@ -99,6 +105,32 @@ extern "C" {
# define MYSQL_PLUGIN_IMPORT /* nothing */
# endif /* MYSQL_PLUGIN_IMPORT */
+#ifdef WITH_WSREP
+#include <wsrep_mysqld.h>
+#include <my_md5.h>
+extern my_bool wsrep_certify_nonPK;
+class binlog_trx_data;
+extern handlerton *binlog_hton;
+
+extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log;
+extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_wsrep_rollback;
+extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_wsrep_rollback;
+extern MYSQL_PLUGIN_IMPORT wsrep_aborting_thd_t wsrep_aborting_thd;
+
+static inline wsrep_trx_handle_t*
+wsrep_trx_handle(THD* thd, const trx_t* trx) {
+ return wsrep_trx_handle_for_id(wsrep_thd_trx_handle(thd),
+ (wsrep_trx_id_t)trx->id);
+}
+
+extern bool wsrep_prepare_key_for_innodb(const uchar *cache_key,
+ size_t cache_key_len,
+ const uchar* row_id,
+ size_t row_id_len,
+ wsrep_key_part_t* key,
+ size_t* key_len);
+
+#endif /* WITH_WSREP */
/** to protect innobase_open_files */
static mysql_mutex_t innobase_share_mutex;
/** to force correct commit order in binlog */
@@ -852,6 +884,15 @@ thd_to_trx(
{
return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
}
+#ifdef WITH_WSREP
+ulonglong
+thd_to_trx_id(
+/*=======*/
+ THD* thd) /*!< in: MySQL thread */
+{
+ return(thd_to_trx(thd)->id);
+}
+#endif
/********************************************************************//**
Call this function when mysqld passes control to the client. That is to
@@ -882,6 +923,15 @@ innobase_release_temporary_latches(
return(0);
}
+#ifdef WITH_WSREP
+static int
+wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
+ my_bool signal);
+static void
+wsrep_fake_trx_id(handlerton* hton, THD *thd);
+static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
+static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
+#endif
/********************************************************************//**
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
@@ -1331,6 +1381,9 @@ int
innobase_mysql_tmpfile(void)
/*========================*/
{
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ os_event_wait(srv_allow_writes_event);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
int fd2 = -1;
File fd = mysql_tmpfile("ib");
if (fd >= 0) {
@@ -2271,6 +2324,12 @@ innobase_init(
innobase_hton->flags=HTON_NO_FLAGS;
innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
innobase_hton->alter_table_flags = innobase_alter_table_flags;
+#ifdef WITH_WSREP
+ innobase_hton->wsrep_abort_transaction=wsrep_abort_transaction;
+ innobase_hton->wsrep_set_checkpoint=innobase_wsrep_set_checkpoint;
+ innobase_hton->wsrep_get_checkpoint=innobase_wsrep_get_checkpoint;
+ innobase_hton->wsrep_fake_trx_id=wsrep_fake_trx_id;
+#endif /* WITH_WSREP */
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
@@ -2700,6 +2759,27 @@ innobase_commit_low(
trx_commit_for_mysql(trx);
}
+#ifdef WITH_WSREP
+ THD* thd = (THD*)trx->mysql_thd;
+ const char* tmp = 0;
+ if (wsrep_on((void*)thd)) {
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1,
+ "innobase_commit_low():trx_commit_for_mysql(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ tmp = thd_proc_info(thd, info);
+
+#else
+ tmp = thd_proc_info(thd, "innobase_commit_low()");
+#endif /* WSREP_PROC_INFO */
+ }
+#endif /* WITH_WSREP */
+ trx_commit_for_mysql(trx);
+#ifdef WITH_WSREP
+ if (wsrep_on((void*)thd)) { thd_proc_info(thd, tmp); }
+#endif /* WITH_WSREP */
}
/*****************************************************************//**
@@ -3316,7 +3396,11 @@ ha_innobase::max_supported_key_length() const
therefore set to slightly less than 1 / 4 of page size which
is 16 kB; but currently MySQL does not work with keys whose
size is > MAX_KEY_LENGTH */
+#ifdef WITH_WSREP
+ return(3500);
+#else
return(3500);
+#endif
}
/****************************************************************//**
@@ -4404,7 +4488,96 @@ innobase_mysql_cmp(
return(0);
}
+#ifdef WITH_WSREP
+extern "C" UNIV_INTERN
+void
+wsrep_innobase_mysql_sort(
+/*===============*/
+ /* out: str contains sort string */
+ int mysql_type, /* in: MySQL type */
+ uint charset_number, /* in: number of the charset */
+ unsigned char* str, /* in: data field */
+ unsigned int str_length) /* in: data field length,
+ not UNIV_SQL_NULL */
+{
+ CHARSET_INFO* charset;
+ enum_field_types mysql_tp;
+
+ DBUG_ASSERT(str_length != UNIV_SQL_NULL);
+
+ mysql_tp = (enum_field_types) mysql_type;
+ switch (mysql_tp) {
+
+ case MYSQL_TYPE_BIT:
+ case MYSQL_TYPE_STRING:
+ case MYSQL_TYPE_VAR_STRING:
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_VARCHAR:
+ {
+ uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN];
+ uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN;
+
+ /* Use the charset number to pick the right charset struct for
+ the comparison. Since the MySQL function get_charset may be
+ slow before Bar removes the mutex operation there, we first
+ look at 2 common charsets directly. */
+
+ if (charset_number == default_charset_info->number) {
+ charset = default_charset_info;
+ } else if (charset_number == my_charset_latin1.number) {
+ charset = &my_charset_latin1;
+ } else {
+ charset = get_charset(charset_number, MYF(MY_WME));
+
+ if (charset == NULL) {
+ sql_print_error("InnoDB needs charset %lu for doing "
+ "a comparison, but MySQL cannot "
+ "find that charset.",
+ (ulong) charset_number);
+ ut_a(0);
+ }
+ }
+
+ ut_a(str_length <= tmp_length);
+ memcpy(tmp_str, str, str_length);
+
+ tmp_length = charset->coll->strnxfrm(charset, str, str_length,
+ tmp_str, tmp_length);
+ DBUG_ASSERT(tmp_length == str_length);
+
+ break;
+ }
+ case MYSQL_TYPE_DECIMAL :
+ case MYSQL_TYPE_TINY :
+ case MYSQL_TYPE_SHORT :
+ case MYSQL_TYPE_LONG :
+ case MYSQL_TYPE_FLOAT :
+ case MYSQL_TYPE_DOUBLE :
+ case MYSQL_TYPE_NULL :
+ case MYSQL_TYPE_TIMESTAMP :
+ case MYSQL_TYPE_LONGLONG :
+ case MYSQL_TYPE_INT24 :
+ case MYSQL_TYPE_DATE :
+ case MYSQL_TYPE_TIME :
+ case MYSQL_TYPE_DATETIME :
+ case MYSQL_TYPE_YEAR :
+ case MYSQL_TYPE_NEWDATE :
+ case MYSQL_TYPE_NEWDECIMAL :
+ case MYSQL_TYPE_ENUM :
+ case MYSQL_TYPE_SET :
+ case MYSQL_TYPE_GEOMETRY :
+ break;
+ default:
+ break;
+ }
+
+ return;
+}
+#endif // WITH_WSREP
/**************************************************************//**
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
@@ -4523,6 +4696,262 @@ innobase_read_from_2_little_endian(
/*******************************************************************//**
Stores a key value for a row to a buffer.
@return key value length as stored in buff */
+#ifdef WITH_WSREP
+UNIV_INTERN
+uint
+wsrep_store_key_val_for_row(
+/*===============================*/
+ TABLE* table,
+ uint keynr, /*!< in: key number */
+ char* buff, /*!< in/out: buffer for the key value (in MySQL
+ format) */
+ uint buff_len,/*!< in: buffer length */
+ const uchar* record,
+ ibool* key_is_null)/*!< out: full key was null */
+{
+ KEY* key_info = table->key_info + keynr;
+ KEY_PART_INFO* key_part = key_info->key_part;
+ KEY_PART_INFO* end = key_part + key_info->key_parts;
+ char* buff_start = buff;
+ enum_field_types mysql_type;
+ Field* field;
+
+ DBUG_ENTER("store_key_val_for_row");
+
+ bzero(buff, buff_len);
+ *key_is_null = TRUE;
+
+ for (; key_part != end; key_part++) {
+ uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
+ ibool part_is_null = FALSE;
+
+ if (key_part->null_bit) {
+ if (record[key_part->null_offset] &
+ key_part->null_bit) {
+ *buff = 1;
+ part_is_null = TRUE;
+ } else {
+ *buff = 0;
+ }
+ buff++;
+ }
+ if (!part_is_null) *key_is_null = FALSE;
+
+ field = key_part->field;
+ mysql_type = field->type();
+
+ if (mysql_type == MYSQL_TYPE_VARCHAR) {
+ /* >= 5.0.3 true VARCHAR */
+ ulint lenlen;
+ ulint len;
+ const byte* data;
+ ulint key_len;
+ ulint true_len;
+ CHARSET_INFO* cs;
+ int error=0;
+
+ key_len = key_part->length;
+
+ if (part_is_null) {
+ buff += key_len + 2;
+
+ continue;
+ }
+ cs = field->charset();
+
+ lenlen = (ulint)
+ (((Field_varstring*)field)->length_bytes);
+
+ data = row_mysql_read_true_varchar(&len,
+ (byte*) (record
+ + (ulint)get_field_offset(table, field)),
+ lenlen);
+
+ true_len = len;
+
+ /* For multi byte character sets we need to calculate
+ the true length of the key */
+
+ if (len > 0 && cs->mbmaxlen > 1) {
+ true_len = (ulint) cs->cset->well_formed_len(cs,
+ (const char *) data,
+ (const char *) data + len,
+ (uint) (key_len /
+ cs->mbmaxlen),
+ &error);
+ }
+
+ /* In a column prefix index, we may need to truncate
+ the stored value: */
+
+ if (true_len > key_len) {
+ true_len = key_len;
+ }
+
+ memcpy(sorted, data, true_len);
+ wsrep_innobase_mysql_sort(
+ mysql_type, cs->number, sorted, true_len);
+
+ if (wsrep_protocol_version > 1) {
+ memcpy(buff, sorted, true_len);
+ /* Note that we always reserve the maximum possible
+ length of the true VARCHAR in the key value, though
+ only len first bytes after the 2 length bytes contain
+ actual data. The rest of the space was reset to zero
+ in the bzero() call above. */
+ buff += true_len;
+ } else {
+ buff += key_len;
+ }
+ } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
+ || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
+ || mysql_type == MYSQL_TYPE_BLOB
+ || mysql_type == MYSQL_TYPE_LONG_BLOB
+ /* MYSQL_TYPE_GEOMETRY data is treated
+ as BLOB data in innodb. */
+ || mysql_type == MYSQL_TYPE_GEOMETRY) {
+
+ CHARSET_INFO* cs;
+ ulint key_len;
+ ulint true_len;
+ int error=0;
+ ulint blob_len;
+ const byte* blob_data;
+
+ ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
+
+ key_len = key_part->length;
+
+ if (part_is_null) {
+ buff += key_len + 2;
+
+ continue;
+ }
+
+ cs = field->charset();
+
+ blob_data = row_mysql_read_blob_ref(&blob_len,
+ (byte*) (record
+ + (ulint)get_field_offset(table, field)),
+ (ulint) field->pack_length());
+
+ true_len = blob_len;
+
+ ut_a(get_field_offset(table, field)
+ == key_part->offset);
+
+ /* For multi byte character sets we need to calculate
+ the true length of the key */
+
+ if (blob_len > 0 && cs->mbmaxlen > 1) {
+ true_len = (ulint) cs->cset->well_formed_len(cs,
+ (const char *) blob_data,
+ (const char *) blob_data
+ + blob_len,
+ (uint) (key_len /
+ cs->mbmaxlen),
+ &error);
+ }
+
+ /* All indexes on BLOB and TEXT are column prefix
+ indexes, and we may need to truncate the data to be
+ stored in the key value: */
+
+ if (true_len > key_len) {
+ true_len = key_len;
+ }
+
+ memcpy(sorted, blob_data, true_len);
+ wsrep_innobase_mysql_sort(
+ mysql_type, cs->number, sorted, true_len);
+
+ memcpy(buff, sorted, true_len);
+
+ /* Note that we always reserve the maximum possible
+ length of the BLOB prefix in the key value. */
+ if (wsrep_protocol_version > 1) {
+ buff += true_len;
+ } else {
+ buff += key_len;
+ }
+ } else {
+ /* Here we handle all other data types except the
+ true VARCHAR, BLOB and TEXT. Note that the column
+ value we store may be also in a column prefix
+ index. */
+
+ CHARSET_INFO* cs;
+ ulint true_len;
+ ulint key_len;
+ const uchar* src_start;
+ int error=0;
+ enum_field_types real_type;
+
+ key_len = key_part->length;
+
+ if (part_is_null) {
+ buff += key_len;
+
+ continue;
+ }
+
+ src_start = record + key_part->offset;
+ real_type = field->real_type();
+ true_len = key_len;
+
+ /* Character set for the field is defined only
+ to fields whose type is string and real field
+ type is not enum or set. For these fields check
+ if character set is multi byte. */
+
+ if (real_type != MYSQL_TYPE_ENUM
+ && real_type != MYSQL_TYPE_SET
+ && ( mysql_type == MYSQL_TYPE_VAR_STRING
+ || mysql_type == MYSQL_TYPE_STRING)) {
+
+ cs = field->charset();
+
+ /* For multi byte character sets we need to
+ calculate the true length of the key */
+
+ if (key_len > 0 && cs->mbmaxlen > 1) {
+
+ true_len = (ulint)
+ cs->cset->well_formed_len(cs,
+ (const char *)src_start,
+ (const char *)src_start
+ + key_len,
+ (uint) (key_len /
+ cs->mbmaxlen),
+ &error);
+ }
+ memcpy(sorted, src_start, true_len);
+ wsrep_innobase_mysql_sort(
+ mysql_type, cs->number, sorted, true_len);
+ memcpy(buff, sorted, true_len);
+ } else {
+ memcpy(buff, src_start, true_len);
+ }
+ buff += true_len;
+
+ /* Pad the unused space with spaces. */
+
+ if (true_len < key_len) {
+ ulint pad_len = key_len - true_len;
+ ut_a(!(pad_len % cs->mbminlen));
+
+ cs->cset->fill(cs, buff, pad_len,
+ 0x20 /* space */);
+ buff += pad_len;
+ }
+ }
+ }
+
+ ut_a(buff <= buff_start + buff_len);
+
+ DBUG_RETURN((uint)(buff - buff_start));
+}
+#endif /* WITH_WSREP */
UNIV_INTERN
uint
ha_innobase::store_key_val_for_row(
@@ -5126,6 +5555,9 @@ ha_innobase::write_row(
ulint error = 0;
int error_result= 0;
ibool auto_inc_used= FALSE;
+#ifdef WITH_WSREP
+ ibool auto_inc_inserted= FALSE; /* if NULL was inserted */
+#endif
ulint sql_command;
trx_t* trx = thd_to_trx(user_thd);
@@ -5156,8 +5588,17 @@ ha_innobase::write_row(
if ((sql_command == SQLCOM_ALTER_TABLE
|| sql_command == SQLCOM_OPTIMIZE
|| sql_command == SQLCOM_CREATE_INDEX
+#ifdef WITH_WSREP
+ || (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD)
+#endif /* WITH_WSREP */
|| sql_command == SQLCOM_DROP_INDEX)
&& num_write_row >= 10000) {
+#ifdef WITH_WSREP
+ if (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD) {
+ WSREP_DEBUG("forced trx split for LOAD: %s",
+ wsrep_thd_query(user_thd));
+ }
+#endif /* WITH_WSREP */
/* ALTER TABLE is COMMITted at every 10000 copied rows.
The IX table lock for the original table has to be re-issued.
As this method will be called on a temporary table where the
@@ -5229,7 +5670,9 @@ no_commit:
/* Reset the error code before calling
innobase_get_auto_increment(). */
prebuilt->autoinc_error = DB_SUCCESS;
-
+#ifdef WITH_WSREP
+ auto_inc_inserted= (table->next_number_field->val_int() == 0);
+#endif
if ((error = update_auto_increment())) {
/* We don't want to mask autoinc overflow errors. */
@@ -5312,6 +5755,30 @@ no_commit:
case SQLCOM_REPLACE_SELECT:
goto set_max_autoinc;
+#ifdef WITH_WSREP
+ /* workaround for LP bug #355000, retrying the insert */
+ case SQLCOM_INSERT:
+ if (wsrep_on(current_thd) &&
+ auto_inc_inserted &&
+ wsrep_drupal_282555_workaround &&
+ !thd_test_options(current_thd,
+ OPTION_NOT_AUTOCOMMIT |
+ OPTION_BEGIN)) {
+ WSREP_DEBUG(
+ "retrying insert: %s",
+ (*wsrep_thd_query(current_thd)) ?
+ wsrep_thd_query(current_thd) :
+ (char *)"void");
+ error= DB_SUCCESS;
+ wsrep_thd_set_conflict_state(
+ current_thd, MUST_ABORT);
+ innodb_srv_conc_exit_innodb(prebuilt->trx);
+ /* jump straight to func exit over
+ * later wsrep hooks */
+ goto func_exit;
+ }
+ break;
+#endif
default:
break;
}
@@ -5360,6 +5827,20 @@ report_error:
error_result = convert_error_code_to_mysql((int) error,
prebuilt->table->flags,
user_thd);
+#ifdef WITH_WSREP
+ if (!error_result && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
+ wsrep_on(user_thd) && !wsrep_consistency_check(user_thd) &&
+ (sql_command != SQLCOM_LOAD ||
+ thd_binlog_format(user_thd) == BINLOG_FORMAT_ROW)) {
+
+ if (wsrep_append_keys(user_thd, false, record, NULL)) {
+ DBUG_PRINT("wsrep", ("row key failed"));
+ error_result = HA_ERR_INTERNAL_ERROR;
+ goto wsrep_error;
+ }
+ }
+wsrep_error:
+#endif
func_exit:
innobase_active_small();
@@ -5641,6 +6122,20 @@ ha_innobase::update_row(
innobase_active_small();
+#ifdef WITH_WSREP
+ if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
+ wsrep_on(user_thd)) {
+
+ DBUG_PRINT("wsrep", ("update row key"));
+
+ if (wsrep_append_keys(user_thd, false, old_row, new_row)) {
+ DBUG_PRINT("wsrep", ("row key failed"));
+ error = HA_ERR_INTERNAL_ERROR;
+ goto wsrep_error;
+ }
+ }
+wsrep_error:
+#endif
DBUG_RETURN(error);
}
@@ -5684,6 +6179,18 @@ ha_innobase::delete_row(
innobase_active_small();
+#ifdef WITH_WSREP
+ if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
+ wsrep_on(user_thd)) {
+
+ if (wsrep_append_keys(user_thd, false, record, NULL)) {
+ DBUG_PRINT("wsrep", ("delete fail"));
+ error = HA_ERR_INTERNAL_ERROR;
+ goto wsrep_error;
+ }
+ }
+wsrep_error:
+#endif
DBUG_RETURN(error);
}
@@ -6471,7 +6978,354 @@ ha_innobase::rnd_pos(
DBUG_RETURN(error);
}
+#ifdef WITH_WSREP
+extern "C" {
+dict_index_t*
+wsrep_dict_foreign_find_index(
+ dict_table_t* table,
+ const char** columns,
+ ulint n_cols,
+ dict_index_t* types_idx,
+ ibool check_charsets,
+ ulint check_null);
+ulint
+wsrep_append_foreign_key(
+/*===========================*/
+ trx_t* trx, /*!< in: trx */
+ dict_foreign_t* foreign, /*!< in: foreign key constraint */
+ const rec_t* rec, /*!<in: clustered index record */
+ dict_index_t* index, /*!<in: clustered index */
+ ibool referenced, /*!<in: is check for referenced table */
+ ibool shared) /*!<in: is shared access */
+{
+ ut_a(trx);
+ THD* thd = (THD*)trx->mysql_thd;
+ ulint rcode = DB_SUCCESS;
+ char cache_key[513] = {'\0'};
+ int cache_key_len;
+
+ if (!wsrep_on(trx->mysql_thd) ||
+ wsrep_thd_exec_mode(thd) != LOCAL_STATE)
+ return DB_SUCCESS;
+
+ if (!thd || !foreign ||
+ (!foreign->referenced_table && !foreign->foreign_table))
+ {
+ WSREP_INFO("FK: %s missing in: %s",
+ (!thd) ? "thread" :
+ ((!foreign) ? "constraint" :
+ ((!foreign->referenced_table) ?
+ "referenced table" : "foreign table")),
+ (thd && wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ return DB_ERROR;
+ }
+
+ if ( !((referenced) ?
+ foreign->referenced_table : foreign->foreign_table))
+ {
+ WSREP_DEBUG("pulling %s table into cache",
+ (referenced) ? "referenced" : "foreign");
+ mutex_enter(&(dict_sys->mutex));
+ if (referenced)
+ {
+ foreign->referenced_table =
+ dict_table_check_if_in_cache_low(
+ foreign->referenced_table_name_lookup);
+ foreign->referenced_index =
+ wsrep_dict_foreign_find_index(
+ foreign->referenced_table,
+ foreign->referenced_col_names,
+ foreign->n_fields,
+ foreign->foreign_index,
+ TRUE, FALSE);
+ }
+ else
+ {
+ foreign->foreign_table =
+ dict_table_check_if_in_cache_low(
+ foreign->foreign_table_name_lookup);
+ foreign->foreign_index =
+ wsrep_dict_foreign_find_index(
+ foreign->foreign_table,
+ foreign->foreign_col_names,
+ foreign->n_fields,
+ foreign->referenced_index,
+ TRUE, FALSE);
+
+ }
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ if ( !((referenced) ?
+ foreign->referenced_table : foreign->foreign_table))
+ {
+ WSREP_WARN("FK: %s missing in query: %s",
+ (!foreign->referenced_table) ?
+ "referenced table" : "foreign table",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ return DB_ERROR;
+ }
+ byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1];
+ ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
+
+ dict_index_t *idx_target = (referenced) ?
+ foreign->referenced_index : foreign->foreign_index;
+ dict_index_t *idx = (referenced) ?
+ UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
+ UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
+ int i = 0;
+ while (idx != NULL && idx != idx_target) {
+ idx = UT_LIST_GET_NEXT(indexes, idx);
+ i++;
+ }
+ ut_a(idx);
+ key[0] = (char)i;
+
+ rcode = wsrep_rec_get_primary_key(
+ &key[1], &len, rec, index,
+ wsrep_protocol_version > 1);
+ if (rcode != DB_SUCCESS) {
+ WSREP_ERROR(
+ "FK key set failed: %lu (%lu %lu), index: %s %s, %s",
+ rcode, referenced, shared,
+ (index && index->name) ? index->name :
+ "void index",
+ (index && index->table_name) ? index->table_name :
+ "void table",
+ wsrep_thd_query(thd));
+ return rcode;
+ }
+ strncpy(cache_key,
+ (wsrep_protocol_version > 1) ?
+ ((referenced) ?
+ foreign->referenced_table->name :
+ foreign->foreign_table->name) :
+ foreign->foreign_table->name, sizeof(cache_key) - 1);
+ cache_key_len = strlen(cache_key);
+#ifdef WSREP_DEBUG_PRINT
+ ulint j;
+ fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
+ cache_key, (shared) ? "shared" : "exclusive", len+1);
+ for (j=0; j<len+1; j++) {
+ fprintf(stderr, " %hhX, ", key[j]);
+ }
+ fprintf(stderr, "\n");
+#endif
+ char *p = strchr(cache_key, '/');
+ if (p) {
+ *p = '\0';
+ } else {
+ WSREP_WARN("unexpected foreign key table %s %s",
+ foreign->referenced_table->name,
+ foreign->foreign_table->name);
+ }
+
+ wsrep_key_part_t wkey_part[3];
+ wsrep_key_t wkey = {wkey_part, 3};
+ if (!wsrep_prepare_key_for_innodb(
+ (const uchar*)cache_key,
+ cache_key_len + 1,
+ (const uchar*)key, len+1,
+ wkey_part,
+ &wkey.key_parts_len)) {
+ WSREP_WARN("key prepare failed for cascaded FK: %s",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ return DB_ERROR;
+ }
+ rcode = (int)wsrep->append_key(
+ wsrep,
+ wsrep_trx_handle(thd, trx),
+ &wkey,
+ 1,
+ shared);
+ if (rcode) {
+ DBUG_PRINT("wsrep", ("row key failed: %lu", rcode));
+ WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void", rcode);
+ return DB_ERROR;
+ }
+
+ return DB_SUCCESS;
+}
+}
+
+static int
+wsrep_append_key(
+/*==================*/
+ THD *thd,
+ trx_t *trx,
+ TABLE_SHARE *table_share,
+ TABLE *table,
+ const char* key,
+ uint16_t key_len,
+ bool shared
+)
+{
+ DBUG_ENTER("wsrep_append_key");
+#ifdef WSREP_DEBUG_PRINT
+ fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s ",
+ (shared) ? "Shared" : "Exclusive",
+ wsrep_thd_thread_id(thd), trx->id, key_len,
+ table_share->table_name.str);
+ for (int i=0; i<key_len; i++) {
+ fprintf(stderr, "%hhX, ", key[i]);
+ }
+ fprintf(stderr, "\n");
+#endif
+ wsrep_key_part_t wkey_part[3];
+ wsrep_key_t wkey = {wkey_part, 3};
+ if (!wsrep_prepare_key_for_innodb(
+ (const uchar*)table_share->table_cache_key.str,
+ table_share->table_cache_key.length,
+ (const uchar*)key, key_len,
+ wkey_part,
+ &wkey.key_parts_len)) {
+ WSREP_WARN("key prepare failed for: %s",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ }
+
+ int rcode = (int)wsrep->append_key(
+ wsrep,
+ wsrep_trx_handle(thd, trx),
+ &wkey,
+ 1,
+ shared);
+ if (rcode) {
+ DBUG_PRINT("wsrep", ("row key failed: %d", rcode));
+ WSREP_WARN("Appending row key failed: %s, %d",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void", rcode);
+ DBUG_RETURN(rcode);
+ }
+ DBUG_RETURN(0);
+}
+
+ibool
+wsrep_is_cascding_foreign_key_parent(
+ dict_table_t* table, /*!< in: InnoDB table */
+ dict_index_t* index /*!< in: InnoDB index */
+) {
+ // return referenced_by_foreign_key();
+ dict_foreign_t* fk = dict_table_get_referenced_constraint(table, index);
+ if (fk &&
+ (fk->type & DICT_FOREIGN_ON_UPDATE_CASCADE ||
+ fk->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)
+ ) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+int
+ha_innobase::wsrep_append_keys(
+/*==================*/
+ THD *thd,
+ bool shared,
+ const uchar* record0, /* in: row in MySQL format */
+ const uchar* record1) /* in: row in MySQL format */
+{
+ DBUG_ENTER("wsrep_append_keys");
+ trx_t *trx = thd_to_trx(thd);
+
+ /* if no PK, calculate hash of full row, to be the key value */
+ if (prebuilt->clust_index_was_generated && wsrep_certify_nonPK) {
+ uchar digest[16];
+ int rcode;
+
+ MY_MD5_HASH(digest, (uchar *)record0, table->s->reclength);
+ if ((rcode = wsrep_append_key(thd, trx, table_share, table,
+ (const char*) digest, 16,
+ shared))) {
+ DBUG_RETURN(rcode);
+ }
+ if (record1) {
+ MY_MD5_HASH(digest, (uchar *)record1, table->s->reclength);
+ if ((rcode = wsrep_append_key(thd, trx, table_share,
+ table,
+ (const char*) digest,
+ 16, shared))) {
+ DBUG_RETURN(rcode);
+ }
+ }
+ }
+ if (wsrep_protocol_version == 0) {
+ uint len;
+ char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
+ char *key = &keyval[0];
+ KEY *key_info = table->key_info;
+ ibool is_null;
+
+ len = wsrep_store_key_val_for_row(
+ table, 0, key, key_info->key_length, record0, &is_null);
+
+ if (!is_null) {
+ int rcode = wsrep_append_key(
+ thd, trx, table_share, table, keyval,
+ len, shared);
+ if (rcode) DBUG_RETURN(rcode);
+ }
+ else
+ {
+ WSREP_DEBUG("NULL key skipped (proto 0): %s",
+ wsrep_thd_query(thd));
+ }
+ } else {
+ ut_a(table->s->keys <= 256);
+ uint i;
+ for (i=0; i<table->s->keys; ++i) {
+ uint len;
+ char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
+ char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
+ char *key0 = &keyval0[1];
+ char *key1 = &keyval1[1];
+ KEY *key_info = table->key_info + i;
+ ibool is_null;
+
+ keyval0[0] = (char)i;
+ keyval1[0] = (char)i;
+
+ if (key_info->flags & HA_NOSAME ||
+ referenced_by_foreign_key()) {
+
+ len = wsrep_store_key_val_for_row(
+ table, i, key0, key_info->key_length,
+ record0, &is_null);
+ if (!is_null) {
+ int rcode = wsrep_append_key(
+ thd, trx, table_share, table,
+ keyval0, len+1, shared);
+ if (rcode) DBUG_RETURN(rcode);
+ }
+ else
+ {
+ WSREP_DEBUG("NULL key skipped: %s",
+ wsrep_thd_query(thd));
+ }
+ if (record1) {
+ len = wsrep_store_key_val_for_row(
+ table, i, key1, key_info->key_length,
+ record1, &is_null);
+ if (!is_null && memcmp(key0, key1, len)) {
+ int rcode = wsrep_append_key(
+ thd, trx, table_share,
+ table,
+ keyval1, len+1, shared);
+ if (rcode) DBUG_RETURN(rcode);
+ }
+ }
+ }
+ }
+ }
+ DBUG_RETURN(0);
+}
+#endif
/*********************************************************************//**
Stores a reference to the current row to 'ref' field of the handle. Note
that in the case where we have generated the clustered index for the
@@ -9347,11 +10201,18 @@ ha_innobase::external_lock(
/* used by test case */
DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = 1;);
if (!skip) {
+#ifdef WITH_WSREP
+ if (!wsrep_on(thd) || wsrep_thd_exec_mode(thd) == LOCAL_STATE)
+ {
+#endif /* WITH_WSREP */
my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
" InnoDB is limited to row-logging when "
"transaction isolation level is "
"READ COMMITTED or READ UNCOMMITTED.");
DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
}
}
@@ -11389,6 +12250,281 @@ static SHOW_VAR innodb_status_variables_export[]= {
static struct st_mysql_storage_engine innobase_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
+#ifdef WITH_WSREP
+void
+wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno)
+{
+ WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
+ "caused by:\n\t"
+ "1) unsupported configuration options combination, please check documentation.\n\t"
+ "2) a bug in the code.\n\t"
+ "3) a database corruption.\n Node consistency compromized, "
+ "need to abort. Restart the node to resync with cluster.",
+ (long long)bf_seqno, (long long)victim_seqno);
+ abort();
+}
+int
+wsrep_innobase_kill_one_trx(void *bf_thd_ptr, trx_t *bf_trx, trx_t *victim_trx, ibool signal)
+{
+ DBUG_ENTER("wsrep_innobase_kill_one_trx");
+ THD *bf_thd = (THD *)bf_thd_ptr;
+ THD *thd = (THD *) victim_trx->mysql_thd;
+ int64_t bf_seqno = (bf_thd) ? wsrep_thd_trx_seqno(bf_thd) : 0;
+
+ if (!bf_thd) bf_thd = (bf_trx) ? (THD *)bf_trx->mysql_thd : NULL;
+
+ if (!thd) {
+ DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
+ WSREP_WARN("no THD for trx: %llu", victim_trx->id);
+ DBUG_RETURN(1);
+ }
+ if (!bf_thd) {
+ DBUG_PRINT("wsrep", ("no BF thd for conflicting lock"));
+ WSREP_WARN("no BF THD for trx: %llu", (bf_trx) ? bf_trx->id : 0);
+ DBUG_RETURN(1);
+ }
+
+ WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
+
+ WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: %llu",
+ signal, (long long)bf_seqno,
+ wsrep_thd_thread_id(thd),
+ victim_trx->id);
+
+ WSREP_DEBUG("Aborting query: %s",
+ (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void");
+
+ wsrep_thd_LOCK(thd);
+
+ if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
+ WSREP_DEBUG("kill trx EXITING for %llu", victim_trx->id);
+ wsrep_thd_UNLOCK(thd);
+ DBUG_RETURN(0);
+ }
+ if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
+ WSREP_DEBUG("withdraw for BF trx: %llu, state: %d",
+ victim_trx->id,
+ wsrep_thd_conflict_state(thd));
+ }
+
+ switch (wsrep_thd_conflict_state(thd)) {
+ case NO_CONFLICT:
+ wsrep_thd_set_conflict_state(thd, MUST_ABORT);
+ break;
+ case MUST_ABORT:
+ WSREP_DEBUG("victim %llu in MUST ABORT state",
+ victim_trx->id);
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ DBUG_RETURN(0);
+ break;
+ case ABORTED:
+ case ABORTING: // fall through
+ default:
+ WSREP_DEBUG("victim %llu in state %d",
+ victim_trx->id, wsrep_thd_conflict_state(thd));
+ wsrep_thd_UNLOCK(thd);
+ DBUG_RETURN(0);
+ break;
+ }
+
+ switch (wsrep_thd_query_state(thd)) {
+ case QUERY_COMMITTING:
+ enum wsrep_status rcode;
+
+ WSREP_DEBUG("kill query for: %ld",
+ wsrep_thd_thread_id(thd));
+ wsrep_thd_awake(thd, signal);
+ WSREP_DEBUG("kill trx QUERY_COMMITTING for %llu",
+ victim_trx->id);
+
+ if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
+ wsrep_abort_slave_trx(bf_seqno,
+ wsrep_thd_trx_seqno(thd));
+ } else {
+ rcode = wsrep->abort_pre_commit(
+ wsrep, bf_seqno,
+ (wsrep_trx_id_t)victim_trx->id
+ );
+
+ switch (rcode) {
+ case WSREP_WARNING:
+ WSREP_DEBUG("cancel commit warning: %llu",
+ victim_trx->id);
+ wsrep_thd_UNLOCK(thd);
+ DBUG_RETURN(1);
+ break;
+ case WSREP_OK:
+ break;
+ default:
+ WSREP_ERROR(
+ "cancel commit bad exit: %d %llu",
+ rcode,
+ victim_trx->id);
+ /* unable to interrupt, must abort */
+ /* note: kill_mysql() will block, if we cannot.
+ * kill the lock holder first.
+ */
+ abort();
+ break;
+ }
+ }
+ break;
+ case QUERY_EXEC:
+ /* it is possible that victim trx is itself waiting for some
+ * other lock. We need to cancel this waiting
+ */
+ WSREP_DEBUG("kill trx QUERY_EXEC for %llu", victim_trx->id);
+
+ victim_trx->was_chosen_as_deadlock_victim= TRUE;
+ if (victim_trx->wait_lock) {
+ WSREP_DEBUG("victim has wait flag: %ld",
+ wsrep_thd_thread_id(thd));
+ lock_t* wait_lock = victim_trx->wait_lock;
+ if (wait_lock) {
+ WSREP_DEBUG("canceling wait lock");
+ victim_trx->was_chosen_as_deadlock_victim= TRUE;
+ lock_cancel_waiting_and_release(wait_lock);
+ }
+
+ wsrep_thd_awake(thd, signal);
+ } else {
+ /* abort currently executing query */
+ DBUG_PRINT("wsrep",("sending KILL_QUERY to: %ld",
+ wsrep_thd_thread_id(thd)));
+ WSREP_DEBUG("kill query for: %ld",
+ wsrep_thd_thread_id(thd));
+ wsrep_thd_awake(thd, signal);
+
+ /* for BF thd, we need to prevent him from committing */
+ if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
+ wsrep_abort_slave_trx(bf_seqno,
+ wsrep_thd_trx_seqno(thd));
+ }
+ }
+ break;
+ case QUERY_IDLE:
+ {
+ bool skip_abort= false;
+ wsrep_aborting_thd_t abortees;
+
+ WSREP_DEBUG("kill IDLE for %llu", victim_trx->id);
+
+ if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
+ WSREP_DEBUG("kill BF IDLE, seqno: %lld",
+ (long long)wsrep_thd_trx_seqno(thd));
+ wsrep_thd_UNLOCK(thd);
+ wsrep_abort_slave_trx(bf_seqno,
+ wsrep_thd_trx_seqno(thd));
+ DBUG_RETURN(0);
+ }
+ /* This will lock thd from proceeding after net_read() */
+ wsrep_thd_set_conflict_state(thd, ABORTING);
+
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+
+ abortees = wsrep_aborting_thd;
+ while (abortees && !skip_abort) {
+ /* check if we have a kill message for this already */
+ if (abortees->aborting_thd == thd) {
+ skip_abort = true;
+ WSREP_WARN("duplicate thd aborter %lu",
+ wsrep_thd_thread_id(thd));
+ }
+ abortees = abortees->next;
+ }
+ if (!skip_abort) {
+ wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t)
+ my_malloc(sizeof(struct wsrep_aborting_thd),
+ MYF(0));
+ aborting->aborting_thd = thd;
+ aborting->next = wsrep_aborting_thd;
+ wsrep_aborting_thd = aborting;
+ DBUG_PRINT("wsrep",("enqueuing trx abort for %lu",
+ wsrep_thd_thread_id(thd)));
+ WSREP_DEBUG("enqueuing trx abort for (%lu)",
+ wsrep_thd_thread_id(thd));
+ }
+
+ DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
+ WSREP_DEBUG("signaling aborter");
+ mysql_cond_signal(&COND_wsrep_rollback);
+ mysql_mutex_unlock(&LOCK_wsrep_rollback);
+
+ break;
+ }
+ default:
+ WSREP_WARN("bad wsrep query state: %d",
+ wsrep_thd_query_state(thd));
+ break;
+ }
+ wsrep_thd_UNLOCK(thd);
+
+ DBUG_RETURN(0);
+}
+static int
+wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
+ my_bool signal)
+{
+ DBUG_ENTER("wsrep_innobase_abort_thd");
+ trx_t* victim_trx = thd_to_trx(victim_thd);
+ trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL;
+ WSREP_DEBUG("abort transaction: BF: %s victim: %s",
+ wsrep_thd_query(bf_thd),
+ wsrep_thd_query(victim_thd));
+
+ if (victim_trx)
+ {
+ mutex_enter(&kernel_mutex);
+ int rcode = wsrep_innobase_kill_one_trx(
+ bf_thd, bf_trx, victim_trx, signal);
+ mutex_exit(&kernel_mutex);
+ DBUG_RETURN(rcode);
+ } else {
+ WSREP_DEBUG("victim does not have transaction");
+ wsrep_thd_LOCK(victim_thd);
+ wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
+ wsrep_thd_UNLOCK(victim_thd);
+ wsrep_thd_awake(victim_thd, signal);
+ }
+ DBUG_RETURN(-1);
+}
+
+static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid)
+{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+ if (wsrep_is_wsrep_xid(xid)) {
+ mtr_t mtr;
+ mtr_start(&mtr);
+ trx_sys_update_wsrep_checkpoint(xid, &mtr);
+ mtr_commit(&mtr);
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid)
+{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+ trx_sys_read_wsrep_checkpoint(xid);
+ return 0;
+}
+
+static void
+wsrep_fake_trx_id(
+/*==================*/
+ handlerton *hton,
+ THD *thd) /*!< in: user thread handle */
+{
+ mutex_enter(&kernel_mutex);
+ trx_id_t trx_id = trx_sys_get_new_trx_id();
+ mutex_exit(&kernel_mutex);
+
+ (void *)wsrep_trx_handle_for_id(wsrep_thd_trx_handle(thd), trx_id);
+}
+
+#endif /* WITH_WSREP */
/* plugin options */
static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -11720,6 +12856,40 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
NULL, NULL, 0, 0, 1, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+#ifdef WITH_INNODB_DISALLOW_WRITES
+/*******************************************************
+ * innobase_disallow_writes variable definition *
+ *******************************************************/
+
+/* Must always init to FALSE. */
+static my_bool innobase_disallow_writes = FALSE;
+
+/**************************************************************************
+An "update" method for innobase_disallow_writes variable. */
+static
+void
+innobase_disallow_writes_update(
+/*============================*/
+ THD* thd, /* in: thread handle */
+ st_mysql_sys_var* var, /* in: pointer to system
+ variable */
+ void* var_ptr, /* out: pointer to dynamic
+ variable */
+ const void* save) /* in: temporary storage */
+{
+ *(my_bool*)var_ptr = *(my_bool*)save;
+ ut_a(srv_allow_writes_event);
+ if (*(my_bool*)var_ptr)
+ os_event_reset(srv_allow_writes_event);
+ else
+ os_event_set(srv_allow_writes_event);
+}
+
+static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
+ PLUGIN_VAR_NOCMDOPT,
+ "Tell InnoDB to stop any writes to disk",
+ NULL, innobase_disallow_writes_update, FALSE);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
PLUGIN_VAR_NOCMDARG,
"Whether to use read ahead for random access within an extent.",
@@ -11801,6 +12971,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ MYSQL_SYSVAR(disallow_writes),
+#endif /* WITH_INNODB_DISALLOW_WRITES */
MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(io_capacity),
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 7ef3e954636..aa653adcabf 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -112,6 +112,10 @@ class ha_innobase: public handler
dict_index_t* innobase_get_index(uint keynr);
int info_low(uint flag, bool called_from_analyze);
+#ifdef WITH_WSREP
+ int wsrep_append_keys(THD *thd, bool shared,
+ const uchar* record0, const uchar* record1);
+#endif
/* Init values for the class: */
public:
ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
@@ -291,6 +295,40 @@ bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
*/
extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+#ifdef WITH_WSREP
+#include <wsrep_mysqld.h>
+//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2);
+
+extern "C" bool wsrep_thd_is_wsrep_on(THD *thd);
+
+extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd);
+extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd);
+extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd);
+extern "C" const char * wsrep_thd_exec_mode_str(THD *thd);
+extern "C" const char * wsrep_thd_conflict_state_str(THD *thd);
+extern "C" const char * wsrep_thd_query_state_str(THD *thd);
+extern "C" wsrep_trx_handle_t* wsrep_thd_trx_handle(THD *thd);
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode);
+extern "C" void wsrep_thd_set_query_state(
+ THD *thd, enum wsrep_query_state state);
+extern "C" void wsrep_thd_set_conflict_state(
+ THD *thd, enum wsrep_conflict_state state);
+
+extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id);
+
+extern "C"void wsrep_thd_LOCK(THD *thd);
+extern "C"void wsrep_thd_UNLOCK(THD *thd);
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd);
+extern "C" time_t wsrep_thd_query_start(THD *thd);
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd);
+extern "C" int64_t wsrep_thd_trx_seqno(THD *thd);
+extern "C" query_id_t wsrep_thd_query_id(THD *thd);
+extern "C" char * wsrep_thd_query(THD *thd);
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd);
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id);
+extern "C" void wsrep_thd_awake(THD *thd, my_bool signal);
+#endif
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
@@ -331,3 +369,6 @@ innobase_index_name_is_reserved(
ulint num_of_keys); /*!< in: Number of indexes to
be created. */
+#ifdef WITH_WSREP
+extern "C" int wsrep_trx_is_aborting(void *thd_ptr);
+#endif
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index dc65fb3ff1a..c7bc4904575 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -36,6 +36,10 @@ extern "C" {
#include "handler0alter.h"
}
+#ifdef WITH_WSREP
+//#include "wsrep_api.h"
+#include <sql_acl.h> // PROCESS_ACL
+#endif
#include "ha_innodb.h"
/*************************************************************//**
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 4c371c8d5cf..25a3a5a82e0 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -343,6 +343,9 @@ barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN
/** Defines the maximum fixed length column size */
#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
+#ifdef WITH_WSREP
+#define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500
+#endif /* WITH_WSREP */
/** Data structure for a field in an index */
struct dict_field_struct{
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index edf7a1a28c1..664e18c52a8 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -285,6 +285,18 @@ thd_set_lock_wait_time(
void* thd, /*!< in: thread handle (THD*) */
ulint value); /*!< in: time waited for the lock */
+#ifdef WITH_WSREP
+UNIV_INTERN
+int
+wsrep_innobase_kill_one_trx(void *bf_thd, trx_t *bf_trx, trx_t *victim_trx, ibool signal);
+int wsrep_thd_is_brute_force(void *thd_ptr);
+int wsrep_trx_order_before(void *thd1, void *thd2);
+void wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
+ unsigned char* str, unsigned int str_length);
+int
+wsrep_on(void *thd_ptr);
+int wsrep_is_wsrep_xid(const void*);
+#endif /* WITH_WSREP */
/**********************************************************************//**
Get the current setting of the lower_case_table_names global parameter from
mysqld.cc. We do a dirty read because for one there is no synchronization
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 29fdc3bbe97..e5a61e599bc 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -797,6 +797,7 @@ lock_rec_get_page_no(
remains set when the waiting lock is granted,
or if the lock is inherited to a neighboring
record */
+#define WSREP_BF 4096
#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
# error
#endif
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 10b74d18c13..30ef3048ff1 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -832,6 +832,14 @@ are given in one byte (resp. two byte) format. */
two upmost bits in a two byte offset for special purposes */
#define REC_MAX_DATA_SIZE (16 * 1024)
+#ifdef WITH_WSREP
+int wsrep_rec_get_primary_key(
+ byte *buf, /* out: extracted key */
+ ulint *buf_len, /* in/out: length of buf */
+ const rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ibool new_protocol); /* in: protocol > 1 */
+#endif /* WITH_WSREP */
#ifndef UNIV_NONINL
#include "rem0rec.ic"
#endif
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 5290ef5fe11..093be2a6dcb 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -141,6 +141,10 @@ extern ulint srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit;
extern char srv_adaptive_flushing;
+#ifdef WITH_INNODB_DISALLOW_WRITES
+/* When this event is reset we do not allow any file writes to take place. */
+extern os_event_t srv_allow_writes_event;
+#endif /* WITH_INNODB_DISALLOW_WRITES */
/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
even if they are marked as "corrupted". Mostly it is for DBA to process
corrupted index and table */
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 635e7ec30b6..c297174df31 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -41,6 +41,9 @@ Created 3/26/1996 Heikki Tuuri
#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
+#ifdef WITH_WSREP
+#include "trx0xa.h"
+#endif /* WITH_WSREP */
/** In a MySQL replication slave, in crash recovery we store the master log
file name and position here. */
@@ -314,6 +317,17 @@ UNIV_INTERN
void
trx_sys_print_mysql_binlog_offset(void);
/*===================================*/
+#ifdef WITH_WSREP
+/** Update WSREP checkpoint XID in sys header. */
+void
+trx_sys_update_wsrep_checkpoint(
+ const XID* xid, /*!< in: WSREP XID */
+ mtr_t* mtr); /*!< in: mtr */
+void
+/** Read WSREP checkpoint XID from sys header. */
+trx_sys_read_wsrep_checkpoint(
+ XID* xid); /*!< out: WSREP XID */
+#endif /* WITH_WSREP */
/*****************************************************************//**
Prints to stderr the MySQL master log offset info in the trx system header if
the magic number shows it valid. */
@@ -519,6 +533,22 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
within that file */
#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
+#ifdef WITH_WSREP
+/* We hijack TRX_SYS_MYSQL_MASTER_LOG_INFO, it seems to be completely unused
+ otherwise (see comments for MySQL bug #34058). */
+/** */
+#define TRX_SYS_WSREP_XID_INFO TRX_SYS_MYSQL_MASTER_LOG_INFO
+#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
+#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
+
+/* XID field: formatID, gtrid_len, bqual_len, xid_data */
+#define TRX_SYS_WSREP_XID_LEN (4 + 4 + 4 + XIDDATASIZE)
+#define TRX_SYS_WSREP_XID_FORMAT 4
+#define TRX_SYS_WSREP_XID_GTRID_LEN 8
+#define TRX_SYS_WSREP_XID_BQUAL_LEN 12
+#define TRX_SYS_WSREP_XID_DATA 16
+#endif /* WITH_WSREP*/
+
/** Doublewrite buffer */
/* @{ */
/** The offset of the doublewrite buffer header on the trx system header page */
diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
index 66ec18aeee7..bb864937751 100644
--- a/storage/innobase/lock/lock0lock.c
+++ b/storage/innobase/lock/lock0lock.c
@@ -40,6 +40,10 @@ Created 5/7/1996 Heikki Tuuri
#include "trx0sys.h"
#include "btr0btr.h"
+#ifdef WITH_WSREP
+extern my_bool wsrep_debug;
+extern my_bool wsrep_log_conflicts;
+#endif
/* Restricts the length of search we will do in the waits-for
graph of transactions */
#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
@@ -921,6 +925,11 @@ lock_rec_has_to_wait(
if (trx != lock2->trx
&& !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
lock_get_mode(lock2))) {
+#ifdef WITH_WSREP
+ if ((type_mode & WSREP_BF) && (lock2->type_mode & WSREP_BF)) {
+ return FALSE;
+ }
+#endif /* WITH_WSREP */
/* We have somewhat complex rules when gap type record locks
cause waits */
@@ -1450,6 +1459,11 @@ lock_rec_has_expl(
return(NULL);
}
+#ifdef WITH_WSREP
+static
+void
+lock_rec_discard(lock_t* in_lock);
+#endif
#ifdef UNIV_DEBUG
/*********************************************************************//**
Checks if some other transaction has a lock request in the queue.
@@ -1499,6 +1513,54 @@ lock_rec_other_has_expl_req(
}
#endif /* UNIV_DEBUG */
+#ifdef WITH_WSREP
+static void
+wsrep_kill_victim(trx_t *trx, lock_t *lock) {
+ int bf_this = wsrep_thd_is_brute_force(trx->mysql_thd);
+ int bf_other =
+ wsrep_thd_is_brute_force(lock->trx->mysql_thd);
+ if ((bf_this && !bf_other) ||
+ (bf_this && bf_other && wsrep_trx_order_before(
+ trx->mysql_thd, lock->trx->mysql_thd))) {
+
+ if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: BF victim waiting\n");
+ /* cannot release lock, until our lock
+ is in the queue*/
+ } else if (lock->trx != trx) {
+ if (wsrep_log_conflicts) {
+ if (bf_this)
+ fputs("\n*** Priority TRANSACTION:\n",
+ stderr);
+ else
+ fputs("\n*** Victim TRANSACTION:\n",
+ stderr);
+ trx_print(stderr, trx, 3000);
+
+ if (bf_other)
+ fputs("\n*** Priority TRANSACTION:\n",
+ stderr);
+ else
+ fputs("\n*** Victim TRANSACTION:\n",
+ stderr);
+ trx_print(stderr, lock->trx, 3000);
+
+ fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
+ stderr);
+
+ if (lock_get_type(lock) == LOCK_REC) {
+ lock_rec_print(stderr, lock);
+ } else {
+ lock_table_print(stderr, lock);
+ }
+ }
+ wsrep_innobase_kill_one_trx(
+ trx->mysql_thd, trx, lock->trx, TRUE);
+ }
+ }
+}
+#endif
/*********************************************************************//**
Checks if some other transaction has a conflicting explicit lock request
in the queue, so that we have to wait.
@@ -1528,6 +1590,9 @@ lock_rec_other_has_conflicting(
do {
if (lock_rec_has_to_wait(trx, mode, lock,
TRUE)) {
+#ifdef WITH_WSREP
+ wsrep_kill_victim(trx, lock);
+#endif
return(lock);
}
@@ -1538,6 +1603,9 @@ lock_rec_other_has_conflicting(
do {
if (lock_rec_has_to_wait(trx, mode, lock,
FALSE)) {
+#ifdef WITH_WSREP
+ wsrep_kill_victim(trx, lock);
+#endif
return(lock);
}
@@ -1669,6 +1737,9 @@ static
lock_t*
lock_rec_create(
/*============*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
ulint type_mode,/*!< in: lock mode and wait
flag, type is ignored and
replaced by LOCK_REC */
@@ -1714,6 +1785,11 @@ lock_rec_create(
lock->trx = trx;
lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ lock->type_mode |= WSREP_BF;
+ }
+#endif /* WITH_WSREP */
lock->index = index;
lock->un_member.rec_lock.space = space;
@@ -1728,8 +1804,56 @@ lock_rec_create(
/* Set the bit corresponding to rec */
lock_rec_set_nth_bit(lock, heap_no);
+#ifdef WITH_WSREP
+ if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ lock_t *hash = c_lock->hash;
+ lock_t *prev = NULL;
+
+ while (hash &&
+ wsrep_thd_is_brute_force(hash->trx->mysql_thd) &&
+ wsrep_trx_order_before(hash->trx->mysql_thd, trx->mysql_thd)){
+ prev = hash;
+ hash = hash->hash;
+ }
+ lock->hash = hash;
+ if (prev) {
+ prev->hash = lock;
+ } else {
+ c_lock->hash = lock;
+ }
+ /*
+ * delayed conflict resolution '...kill_one_trx' was not called,
+ * if victim was waiting for some other lock
+ */
+ if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ c_lock->trx->was_chosen_as_deadlock_victim = TRUE;
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ lock_set_lock_and_trx_wait(lock, trx);
+
+ lock_cancel_waiting_and_release(c_lock->trx->wait_lock);
+
+ /* trx might not wait for c_lock, but some other lock */
+ if (wsrep_debug && c_lock->trx->wait_lock != c_lock) {
+ fprintf(stderr, "WSREP: c_lock != wait lock\n");
+ }
+ if (c_lock->trx->wait_lock == c_lock) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: c_lock canceled %llu\n",
+ (ulonglong) c_lock->trx->id);
+ /* have to bail out here to avoid lock_set_lock... */
+ return(lock);
+ }
+ } else {
+ HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), lock);
+ }
+#else
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
+#endif
if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
lock_set_lock_and_trx_wait(lock, trx);
@@ -1749,6 +1873,9 @@ static
enum db_err
lock_rec_enqueue_waiting(
/*=====================*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
ulint type_mode,/*!< in: lock mode this
transaction is requesting:
LOCK_S or LOCK_X, possibly
@@ -1800,8 +1927,16 @@ lock_rec_enqueue_waiting(
}
/* Enqueue the lock request that will wait to be granted */
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) && trx->was_chosen_as_deadlock_victim) {
+ return(DB_DEADLOCK);
+ }
+ lock = lock_rec_create(c_lock, type_mode | LOCK_WAIT,
+ block, heap_no, index, trx);
+#else
lock = lock_rec_create(type_mode | LOCK_WAIT,
block, heap_no, index, trx);
+#endif
/* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */
@@ -1879,7 +2014,19 @@ lock_rec_add_to_queue(
lock_t* other_lock
= lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
block, heap_no, trx);
+#ifdef WITH_WSREP
+ /* this can potentionally assert with wsrep */
+ if (wsrep_on(trx->mysql_thd)) {
+ if (wsrep_debug && other_lock) {
+ fprintf(stderr,
+ "WSREP: InnoDB assert ignored\n");
+ }
+ } else {
+ ut_a(!other_lock);
+ }
+#else
ut_a(!other_lock);
+#endif /* WITH_WSREP */
}
#endif /* UNIV_DEBUG */
@@ -1932,7 +2079,11 @@ lock_rec_add_to_queue(
}
somebody_waits:
+#ifdef WITH_WSREP
+ return(lock_rec_create(NULL, type_mode, block, heap_no, index, trx));
+#else
return(lock_rec_create(type_mode, block, heap_no, index, trx));
+#endif
}
/** Record locking request status */
@@ -1982,6 +2133,11 @@ lock_rec_lock_fast(
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
+#ifdef WITH_WSREP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP
+#endif /* WITH_WSREP */
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
lock = lock_rec_get_first_on_page(block);
@@ -1990,7 +2146,11 @@ lock_rec_lock_fast(
if (lock == NULL) {
if (!impl) {
+#ifdef WITH_WSREP
+ lock_rec_create(NULL, mode, block, heap_no, index, trx);
+#else
lock_rec_create(mode, block, heap_no, index, trx);
+#endif
}
return(LOCK_REC_SUCCESS_CREATED);
@@ -2046,6 +2206,9 @@ lock_rec_lock_slow(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
+#ifdef WITH_WSREP
+ lock_t *c_lock;
+#endif
ut_ad(mutex_own(&kernel_mutex));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
@@ -2056,6 +2219,11 @@ lock_rec_lock_slow(
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
+#ifdef WITH_WSREP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP
+#endif /* WITH_WSREP */
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
trx = thr_get_trx(thr);
@@ -2064,14 +2232,24 @@ lock_rec_lock_slow(
/* The trx already has a strong enough lock on rec: do
nothing */
+#ifdef WITH_WSREP
+ } else if ((c_lock = lock_rec_other_has_conflicting(
+ mode, block, heap_no, trx))) {
+#else
} else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) {
+#endif
/* If another transaction has a non-gap conflicting request in
the queue, as this transaction does not have a lock strong
enough already granted on the record, we have to wait. */
+#ifdef WITH_WSREP
+ return(lock_rec_enqueue_waiting(c_lock,mode, block, heap_no,
+ index, thr));
+#else
return(lock_rec_enqueue_waiting(mode, block, heap_no,
index, thr));
+#endif
} else if (!impl) {
/* Set the requested lock on the record */
@@ -2117,8 +2295,17 @@ lock_rec_lock(
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
+#ifdef WITH_WSREP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP
+#endif /* WITH_WSREP */
|| mode - (LOCK_MODE_MASK & mode) == 0);
-
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_brute_force(thr_get_trx(thr)->mysql_thd)) {
+ mode |= WSREP_BF;
+ }
+#endif
/* We try a simplified and faster subroutine for the most
common cases */
switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
@@ -3487,6 +3674,34 @@ lock_deadlock_recursive(
stderr);
}
#endif /* UNIV_DEBUG */
+#ifdef WITH_WSREP
+ if (wsrep_debug)
+ fputs("WSREP: Deadlock detected\n", ef);
+ if (wsrep_thd_is_brute_force(start->mysql_thd) &&
+ wsrep_thd_is_brute_force(
+ wait_lock->trx->mysql_thd) &&
+ (start != wait_lock->trx)) {
+
+ if (wsrep_trx_order_before(
+ start->mysql_thd,
+ wait_lock->trx->mysql_thd)) {
+
+ wait_lock->trx->was_chosen_as_deadlock_victim = TRUE;
+ lock_cancel_waiting_and_release(wait_lock);
+ return(LOCK_VICTIM_IS_OTHER);
+ } else {
+ return(LOCK_VICTIM_IS_START);
+ }
+ }
+#endif
+ if (too_far) {
+
+ fputs("TOO DEEP OR LONG SEARCH"
+ " IN THE LOCK TABLE"
+ " WAITS-FOR GRAPH\n", ef);
+
+ return(LOCK_VICTIM_IS_START);
+ }
if (trx_weight_ge(wait_lock->trx, start)) {
/* Our recursion starting point
@@ -3494,8 +3709,21 @@ lock_deadlock_recursive(
choose 'start' as the victim and roll
back it */
+#ifdef WITH_WSREP
+ if (!wsrep_thd_is_brute_force(
+ start->mysql_thd)) {
+ return(LOCK_VICTIM_IS_START);
+ }
+#else
+ return(LOCK_VICTIM_IS_START);
+#endif
+ }
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_brute_force(
+ wait_lock->trx->mysql_thd)) {
return(LOCK_VICTIM_IS_START);
}
+#endif
lock_deadlock_found = TRUE;
@@ -3580,6 +3808,9 @@ UNIV_INLINE
lock_t*
lock_table_create(
/*==============*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
dict_table_t* table, /*!< in: database table in dictionary cache */
ulint type_mode,/*!< in: lock mode possibly ORed with
LOCK_WAIT */
@@ -3615,7 +3846,25 @@ lock_table_create(
lock->un_member.tab_lock.table = table;
+#ifdef WITH_WSREP
+ if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ UT_LIST_INSERT_AFTER(
+ un_member.tab_lock.locks, table->locks, c_lock, lock);
+ } else {
+ UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+ }
+
+ if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: table c_lock in wait: %llu\n",
+ (ulonglong) lock->trx->id);
+ c_lock->trx->was_chosen_as_deadlock_victim = TRUE;
+ lock_cancel_waiting_and_release(c_lock);
+ }
+
+#else
UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+#endif
if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
@@ -3761,6 +4010,9 @@ static
ulint
lock_table_enqueue_waiting(
/*=======================*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
ulint mode, /*!< in: lock mode this transaction is
requesting */
dict_table_t* table, /*!< in: table */
@@ -3802,7 +4054,14 @@ lock_table_enqueue_waiting(
/* Enqueue the lock request that will wait to be granted */
+#ifdef WITH_WSREP
+ if (trx->was_chosen_as_deadlock_victim) {
+ return(DB_DEADLOCK);
+ }
+ lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
+#else
lock = lock_table_create(table, mode | LOCK_WAIT, trx);
+#endif
/* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */
@@ -3860,7 +4119,11 @@ lock_table_other_has_incompatible(
if ((lock->trx != trx)
&& (!lock_mode_compatible(lock_get_mode(lock), mode))
&& (wait || !(lock_get_wait(lock)))) {
-
+#ifdef WITH_WSREP
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: table lock abort");
+ wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
+#endif
return(lock);
}
@@ -3884,6 +4147,9 @@ lock_table(
enum lock_mode mode, /*!< in: lock mode */
que_thr_t* thr) /*!< in: query thread */
{
+#ifdef WITH_WSREP
+ lock_t *c_lock;
+#endif
trx_t* trx;
ulint err;
@@ -3912,19 +4178,32 @@ lock_table(
/* We have to check if the new lock is compatible with any locks
other transactions have in the table lock queue. */
+#ifdef WITH_WSREP
+ if ((c_lock = (lock_t *)lock_table_other_has_incompatible(
+ trx, LOCK_WAIT, table, mode))) {
+#else
if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
+#endif
/* Another trx has a request on the table in an incompatible
mode: this trx may have to wait */
+#ifdef WITH_WSREP
+ err = lock_table_enqueue_waiting(c_lock, mode | flags, table, thr);
+#else
err = lock_table_enqueue_waiting(mode | flags, table, thr);
+#endif
lock_mutex_exit_kernel();
return(err);
}
+#ifdef WITH_WSREP
+ lock_table_create(c_lock, table, mode | flags, trx);
+#else
lock_table_create(table, mode | flags, trx);
+#endif
ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
@@ -4840,6 +5119,7 @@ lock_rec_queue_validate(
if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
+#ifndef WITH_WSREP
enum lock_mode mode;
if (lock_get_mode(lock) == LOCK_S) {
@@ -4849,6 +5129,7 @@ lock_rec_queue_validate(
}
ut_a(!lock_rec_other_has_expl_req(
mode, 0, 0, block, heap_no, lock->trx));
+#endif /* WITH_WSREP */
} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
@@ -5123,6 +5404,9 @@ lock_rec_insert_check_and_lock(
lock_t* lock;
ulint err;
ulint next_rec_heap_no;
+#ifdef WITH_WSREP
+ lock_t *c_lock;
+#endif
ut_ad(block->frame == page_align(rec));
@@ -5175,15 +5459,28 @@ lock_rec_insert_check_and_lock(
had to wait for their insert. Both had waiting gap type lock requests
on the successor, which produced an unnecessary deadlock. */
+#ifdef WITH_WSREP
+ if ((c_lock = lock_rec_other_has_conflicting(
+ LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION | WSREP_BF,
+ block, next_rec_heap_no, trx))) {
+#else
if (lock_rec_other_has_conflicting(
LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
block, next_rec_heap_no, trx)) {
+#endif
/* Note that we may get DB_SUCCESS also here! */
+#ifdef WITH_WSREP
+ err = lock_rec_enqueue_waiting(c_lock, LOCK_X | LOCK_GAP
+ | LOCK_INSERT_INTENTION,
+ block, next_rec_heap_no,
+ index, thr);
+#else
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION,
block, next_rec_heap_no,
index, thr);
+#endif
} else {
err = DB_SUCCESS;
}
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
index 1068c033871..5058f906473 100644
--- a/storage/innobase/os/os0file.c
+++ b/storage/innobase/os/os0file.c
@@ -89,6 +89,12 @@ UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
#define OS_AIO_MERGE_N_CONSECUTIVE 64
+#ifdef WITH_INNODB_DISALLOW_WRITES
+#define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event)
+#else
+#define WAIT_ALLOW_WRITES() do { } while (0)
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
/**********************************************************************
InnoDB AIO Implementation:
@@ -726,7 +732,9 @@ os_file_create_tmpfile(void)
/*========================*/
{
FILE* file = NULL;
- int fd = innobase_mysql_tmpfile();
+ int fd;
+ WAIT_ALLOW_WRITES();
+ fd = innobase_mysql_tmpfile();
if (fd >= 0) {
file = fdopen(fd, "w+b");
@@ -1045,6 +1053,7 @@ os_file_create_directory(
return (TRUE);
#else
int rcode;
+ WAIT_ALLOW_WRITES();
rcode = mkdir(pathname, 0770);
@@ -1146,6 +1155,8 @@ try_again:
os_file_t file;
int create_flag;
ibool retry;
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
+ WAIT_ALLOW_WRITES();
try_again:
ut_a(name);
@@ -1278,6 +1289,8 @@ os_file_create_simple_no_error_handling_func(
int create_flag;
ut_a(name);
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
+ WAIT_ALLOW_WRITES();
if (create_mode == OS_FILE_OPEN) {
if (access_type == OS_FILE_READ_ONLY) {
@@ -1511,6 +1524,8 @@ try_again:
int create_flag;
ibool retry;
const char* mode_str = NULL;
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
+ WAIT_ALLOW_WRITES();
try_again:
ut_a(name);
@@ -1667,6 +1682,7 @@ loop:
goto loop;
#else
int ret;
+ WAIT_ALLOW_WRITES();
ret = unlink(name);
@@ -1730,6 +1746,7 @@ loop:
goto loop;
#else
int ret;
+ WAIT_ALLOW_WRITES();
ret = unlink(name);
@@ -1770,6 +1787,7 @@ os_file_rename_func(
return(FALSE);
#else
int ret;
+ WAIT_ALLOW_WRITES();
ret = rename(oldpath, newpath);
@@ -2033,6 +2051,7 @@ os_file_set_eof(
HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
return(SetEndOfFile(h));
#else /* __WIN__ */
+ WAIT_ALLOW_WRITES();
return(!ftruncate(fileno(file), ftell(file)));
#endif /* __WIN__ */
}
@@ -2127,6 +2146,7 @@ os_file_flush_func(
return(FALSE);
#else
int ret;
+ WAIT_ALLOW_WRITES();
#if defined(HAVE_DARWIN_THREADS)
# ifndef F_FULLFSYNC
@@ -2819,6 +2839,7 @@ retry:
return(FALSE);
#else
ssize_t ret;
+ WAIT_ALLOW_WRITES();
ret = os_file_pwrite(file, buf, n, offset, offset_high);
diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c
index 30fc28561fa..44249c3a87e 100644
--- a/storage/innobase/rem/rem0rec.c
+++ b/storage/innobase/rem/rem0rec.c
@@ -31,6 +31,9 @@ Created 5/30/1994 Heikki Tuuri
#include "mtr0mtr.h"
#include "mtr0log.h"
+#ifdef WITH_WSREP
+#include <ha_prototypes.h>
+#endif /* WITH_WSREP */
/* PHYSICAL RECORD (OLD STYLE)
===========================
@@ -1772,3 +1775,123 @@ rec_print(
}
}
#endif /* !UNIV_HOTBACKUP */
+#ifdef WITH_WSREP
+int
+wsrep_rec_get_primary_key(
+ byte *buf, /* out: extracted key */
+ ulint *buf_len, /* in/out: length of buf */
+ const rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ibool new_protocol) /* in: protocol > 1 */
+{
+ const byte* data;
+ ulint len;
+ ulint key_len = 0;
+ ulint i;
+ uint key_parts;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ const ulint* offsets;
+
+ ut_ad(index);
+
+ rec_offs_init(offsets_);
+ offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap);
+
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ ut_ad(rec);
+
+ key_parts = dict_index_get_n_unique_in_tree(index);
+ for (i = 0;
+ i < key_parts && (index->type & DICT_CLUSTERED || i < key_parts - 1);
+ i++) {
+
+ dict_field_t* field = dict_index_get_nth_field(index, i);
+ const dict_col_t* col = dict_field_get_col(field);
+
+ data = rec_get_nth_field(rec, offsets, i, &len);
+ if (key_len + ((len != UNIV_SQL_NULL) ? len + 1 : 1) >
+ *buf_len) {
+ fprintf (stderr,
+ "WSREP: FK key len exceeded %lu %lu %lu\n",
+ key_len, len, *buf_len);
+ goto err_out;
+ }
+
+ if (len == UNIV_SQL_NULL) {
+ ut_a(!(col->prtype & DATA_NOT_NULL));
+ *buf++ = 1;
+ key_len++;
+ } else if (!new_protocol) {
+ if (!(col->prtype & DATA_NOT_NULL)) {
+ *buf++ = 0;
+ key_len++;
+ }
+ memcpy(buf, data, len);
+ wsrep_innobase_mysql_sort(
+ (int)(col->prtype & DATA_MYSQL_TYPE_MASK),
+ (uint)dtype_get_charset_coll(col->prtype),
+ buf, len);
+ } else { /* new protocol */
+ if (!(col->prtype & DATA_NOT_NULL)) {
+ *buf++ = 0;
+ key_len++;
+ }
+ switch (col->mtype) {
+ case DATA_INT: {
+ byte* ptr = buf+len;
+ for (;;) {
+ ptr--;
+ *ptr = *data;
+ if (ptr == buf) {
+ break;
+ }
+ data++;
+ }
+
+ if (!(col->prtype & DATA_UNSIGNED)) {
+ buf[len-1] = (byte) (buf[len-1] ^ 128);
+ }
+
+ break;
+ }
+ case DATA_VARCHAR:
+ case DATA_VARMYSQL:
+ case DATA_BINARY:
+ /* Copy the actual data */
+ ut_memcpy(buf, data, len);
+ wsrep_innobase_mysql_sort(
+ (int)(col->prtype & DATA_MYSQL_TYPE_MASK),
+ (uint)dtype_get_charset_coll(col->prtype),
+ buf, len);
+ break;
+ case DATA_BLOB:
+ case DATA_MYSQL:
+ memcpy(buf, data, len);
+ break;
+ default:
+ break;
+ }
+
+ key_len += len;
+ buf += len;
+ }
+ }
+
+ rec_validate(rec, offsets);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ *buf_len = key_len;
+ return DB_SUCCESS;
+
+ err_out:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return DB_ERROR;
+}
+#endif // WITH_WSREP
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
index 18486514267..0b9993e2969 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
@@ -756,6 +756,14 @@ row_ins_invalidate_query_cache(
innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
mem_free(buf);
}
+#ifdef WITH_WSREP
+ulint wsrep_append_foreign_key(trx_t *trx,
+ dict_foreign_t* foreign,
+ const rec_t* clust_rec,
+ dict_index_t* clust_index,
+ ibool referenced,
+ ibool shared);
+#endif /* WITH_WSREP */
/*********************************************************************//**
Perform referential actions or checks when a parent row is deleted or updated
@@ -1072,6 +1080,16 @@ row_ins_foreign_check_on_constraint(
err = row_update_cascade_for_mysql(thr, cascade,
foreign->foreign_table);
+#ifdef WITH_WSREP
+ if (err == DB_SUCCESS) {
+ err = wsrep_append_foreign_key(
+ thr_get_trx(thr),
+ foreign,
+ clust_rec,
+ clust_index,
+ FALSE, FALSE);
+ }
+#endif /* WITH_WSREP */
if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
fprintf(stderr,
"InnoDB: error: table %s has the counter 0"
@@ -1402,7 +1420,14 @@ run_again:
if (check_ref) {
err = DB_SUCCESS;
-
+#ifdef WITH_WSREP
+ err = wsrep_append_foreign_key(
+ thr_get_trx(thr),
+ foreign,
+ rec,
+ check_index,
+ check_ref, TRUE);
+#endif /* WITH_WSREP */
goto end_scan;
} else if (foreign->type != 0) {
/* There is an ON UPDATE or ON DELETE
@@ -1654,6 +1679,9 @@ row_ins_scan_sec_index_for_duplicate(
dtuple_t* entry, /*!< in: index entry */
que_thr_t* thr) /*!< in: query thread */
{
+#ifdef WITH_WSREP
+ trx_t* trx = thr_get_trx(thr);
+#endif
ulint n_unique;
ulint i;
int cmp;
@@ -1707,7 +1735,14 @@ row_ins_scan_sec_index_for_duplicate(
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
+#ifdef WITH_WSREP
+ /* slave applier must not get duplicate error */
+ if (allow_duplicates ||
+ (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd))) {
+#else
if (allow_duplicates) {
+#endif
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
@@ -1825,7 +1860,13 @@ row_ins_duplicate_error_in_clust(
sure that in roll-forward we get the same duplicate
errors as in original execution */
+#ifdef WITH_WSREP
+ if (trx->duplicates ||
+ (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd))) {
+#else
if (trx->duplicates) {
+#endif
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
@@ -1869,7 +1910,13 @@ row_ins_duplicate_error_in_clust(
offsets = rec_get_offsets(rec, cursor->index, offsets,
ULINT_UNDEFINED, &heap);
+#ifdef WITH_WSREP
+ if (trx->duplicates ||
+ (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd))) {
+#else
if (trx->duplicates) {
+#endif
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index 4f5096a162b..8469d688c5c 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -170,6 +170,46 @@ func_exit:
return(is_referenced);
}
+#ifdef WITH_WSREP
+ulint wsrep_append_foreign_key(trx_t *trx,
+ dict_foreign_t* foreign,
+ const rec_t* clust_rec,
+ dict_index_t* clust_index,
+ ibool referenced,
+ ibool shared);
+
+static
+void
+wsrep_append_fk_reference(
+/*=================================*/
+ upd_node_t* node, /*!< in: row update node */
+ dict_table_t* table, /*!< in: table in question */
+ dict_index_t* index, /*!< in: index of the cursor */
+ que_thr_t* thr, /*!< in: query thread */
+ const rec_t* rec
+) {
+ dict_foreign_t *foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+ while (foreign) {
+ if (foreign->foreign_index == index
+ && node->is_delete)
+ {
+ if (DB_SUCCESS != wsrep_append_foreign_key(
+ thr_get_trx(thr),
+ foreign,
+ rec,
+ index,
+ TRUE, TRUE)
+ ) {
+ fprintf(stderr,
+ "WSREP: FK key append failed\n");
+ }
+ }
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+ }
+}
+#endif /* WITH_WSREP */
+
/*********************************************************************//**
Checks if possible foreign key constraints hold after a delete of the record
under pcur.
@@ -283,7 +323,6 @@ row_upd_check_references_constraints(
}
err = DB_SUCCESS;
-
func_exit:
if (got_s_lock) {
row_mysql_unfreeze_data_dictionary(trx);
@@ -1646,6 +1685,12 @@ row_upd_sec_index_entry(
node, &pcur, index->table,
index, offsets, thr, &mtr);
}
+#ifdef WITH_WSREP
+ if (err == DB_SUCCESS && !referenced) {
+ wsrep_append_fk_reference(node, index->table,
+ index, thr, rec);
+ }
+#endif /* WITH_WSREP */
}
break;
}
@@ -1887,6 +1932,12 @@ err_exit:
goto err_exit;
}
}
+#ifdef WITH_WSREP
+ if (!referenced) {
+ wsrep_append_fk_reference(node, index->table,
+ index, thr, rec);
+ }
+#endif /* WITH_WSREP */
}
mtr_commit(mtr);
@@ -2089,6 +2140,9 @@ row_upd_del_mark_clust_rec(
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
+#ifdef WITH_WSREP
+ rec_t* rec;
+#endif /* WITH_WSREP */
ut_ad(node);
ut_ad(dict_index_is_clust(index));
@@ -2105,15 +2159,29 @@ row_upd_del_mark_clust_rec(
/* Mark the clustered index record deleted; we do not have to check
locks, because we assume that we have an x-lock on the record */
+#ifdef WITH_WSREP
+ rec = btr_cur_get_rec(btr_cur);
+#endif /* WITH_WSREP */
+
err = btr_cur_del_mark_set_clust_rec(
BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
+#ifdef WITH_WSREP
+ rec, index, offsets, TRUE, thr, mtr);
+#else
btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr);
+#endif /* WITH_WSREP */
if (err == DB_SUCCESS && referenced) {
/* NOTE that the following call loses the position of pcur ! */
err = row_upd_check_references_constraints(
node, pcur, index->table, index, offsets, thr, mtr);
}
+#ifdef WITH_WSREP
+ if (err == DB_SUCCESS && !referenced) {
+ wsrep_append_fk_reference(node, index->table,
+ index, thr, rec);
+ }
+#endif /* WITH_WSREP */
mtr_commit(mtr);
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index df89156baae..57046ff644c 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -86,6 +86,10 @@ Created 10/8/1995 Heikki Tuuri
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
+#ifdef WITH_WSREP
+extern int wsrep_debug;
+extern int wsrep_trx_is_aborting(void *thd_ptr);
+#endif
/* The following counter is incremented whenever there is some user activity
in the server */
UNIV_INTERN ulint srv_activity_count = 0;
@@ -198,6 +202,10 @@ srv_printf_innodb_monitor() will request mutex acquisition
with mutex_enter(), which will wait until it gets the mutex. */
#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
+#ifdef WITH_INNODB_DISALLOW_WRITES
+UNIV_INTERN os_event_t srv_allow_writes_event;
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
/** The sort order table of the MySQL latin1_swedish_ci character set
collation */
UNIV_INTERN const byte* srv_latin1_ordering;
@@ -368,6 +376,9 @@ struct srv_conc_slot_struct{
free to proceed; but
reserved may still be
TRUE at that point */
+#ifdef WITH_WSREP
+ void *thd; /*!< to see priority */
+#endif
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
};
@@ -1075,8 +1086,20 @@ srv_init(void)
conc_slot->reserved = FALSE;
conc_slot->event = os_event_create(NULL);
ut_a(conc_slot->event);
+#ifdef WITH_WSREP
+ conc_slot->thd = NULL;
+#endif /* WITH_WSREP */
}
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ /* Writes have to be enabled on init or else we hang. Thus, we
+ always set the event here regardless of innobase_disallow_writes.
+ That flag will always be 0 at this point because it isn't settable
+ via my.cnf or command line arg. */
+ srv_allow_writes_event = os_event_create(NULL);
+ os_event_set(srv_allow_writes_event);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
/* Initialize some INFORMATION SCHEMA internal structures */
trx_i_s_cache_init(trx_i_s_cache);
}
@@ -1162,6 +1185,18 @@ srv_conc_enter_innodb(
return;
}
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ srv_conc_force_enter_innodb(trx);
+ return;
+ }
+ if (wsrep_on(trx->mysql_thd) &&
+ wsrep_trx_is_aborting(trx->mysql_thd)) {
+ srv_conc_force_enter_innodb(trx);
+ return;
+ }
+#endif
os_fast_mutex_lock(&srv_conc_mutex);
retry:
if (trx->declared_to_be_inside_innodb) {
@@ -1254,6 +1289,9 @@ retry:
/* Add to the queue */
slot->reserved = TRUE;
slot->wait_ended = FALSE;
+#ifdef WITH_WSREP
+ slot->thd = trx->mysql_thd;
+#endif
UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
@@ -1286,6 +1324,9 @@ retry:
incremented the thread counter on behalf of this thread */
slot->reserved = FALSE;
+#ifdef WITH_WSREP
+ slot->thd = NULL;
+#endif
UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
@@ -1356,6 +1397,9 @@ srv_conc_force_exit_innodb(
trx->n_tickets_to_enter_innodb = 0;
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
+#ifdef WITH_WSREP
+ srv_conc_slot_t* wsrep_slot;
+#endif
/* Look for a slot where a thread is waiting and no other
thread has yet released the thread */
@@ -1365,6 +1409,19 @@ srv_conc_force_exit_innodb(
slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
}
+#ifdef WITH_WSREP
+ /* look for aborting trx, they must be released asap */
+ wsrep_slot= slot;
+ while (wsrep_slot && (wsrep_slot->wait_ended == TRUE ||
+ !wsrep_trx_is_aborting(wsrep_slot->thd))) {
+ wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot);
+ }
+ if (wsrep_slot) {
+ slot = wsrep_slot;
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: releasing aborting thd\n");
+ }
+#endif
if (slot != NULL) {
slot->wait_ended = TRUE;
@@ -1737,7 +1794,20 @@ srv_suspend_mysql_thread(
if (lock_wait_timeout < 100000000
&& wait_time > (double) lock_wait_timeout) {
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ fprintf(stderr,
+ "WSREP: BF long lock wait ended after %.f sec\n",
+ wait_time);
+ srv_print_innodb_monitor = FALSE;
+ srv_print_innodb_lock_monitor = FALSE;
+ } else {
+#endif
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
+#ifdef WITH_WSREP
+ }
+#endif
}
if (trx_is_interrupted(trx)) {
@@ -2256,6 +2326,27 @@ exit_func:
OS_THREAD_DUMMY_RETURN;
}
+#ifdef WITH_WSREP
+/*********************************************************************//**
+check if lock timeout was for priority thread,
+as a side effect trigger lock monitor
+@return false for regular lock timeout */
+static ibool
+wsrep_is_BF_lock_timeout(
+/*====================*/
+ srv_slot_t* slot) /* in: lock slot to check for lock priority */
+{
+ if (wsrep_on(thr_get_trx(slot->thr)->mysql_thd) &&
+ wsrep_thd_is_brute_force((thr_get_trx(slot->thr))->mysql_thd)) {
+ fprintf(stderr, "WSREP: BF lock wait long\n");
+ srv_print_innodb_monitor = TRUE;
+ srv_print_innodb_lock_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+ return TRUE;
+ }
+ return FALSE;
+ }
+#endif /* WITH_WSREP */
/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
@return a dummy parameter */
@@ -2324,8 +2415,14 @@ loop:
granted: in that case do nothing */
if (trx->wait_lock) {
+#ifdef WITH_WSREP
+ if (!wsrep_is_BF_lock_timeout(slot)) {
+#endif
lock_cancel_waiting_and_release(
trx->wait_lock);
+#ifdef WITH_WSREP
+ }
+#endif
}
}
}
@@ -2442,7 +2539,20 @@ loop:
if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
+#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
+ if (srv_allow_writes_event->is_set) {
+#endif /* WITH_WSREP */
fatal_cnt++;
+#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
+ } else {
+ fprintf(stderr,
+ "WSREP: avoiding InnoDB self crash due to long "
+ "semaphore wait of > %lu seconds\n"
+ "Server is processing SST donor operation, "
+ "fatal_cnt now: %lu",
+ (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt);
+ }
+#endif /* WITH_WSREP */
if (fatal_cnt > 10) {
fprintf(stderr,
diff --git a/storage/innobase/trx/trx0roll.c b/storage/innobase/trx/trx0roll.c
index b55471959ce..3db63469cbe 100644
--- a/storage/innobase/trx/trx0roll.c
+++ b/storage/innobase/trx/trx0roll.c
@@ -42,6 +42,9 @@ Created 3/26/1996 Heikki Tuuri
#include "row0mysql.h"
#include "lock0lock.h"
#include "pars0pars.h"
+#ifdef WITH_WSREP
+#include "ha_prototypes.h"
+#endif /* WITH_WSREP */
/** This many pages must be undone before a truncate is tried within
rollback */
@@ -147,6 +150,12 @@ trx_rollback_for_mysql(
trx->op_info = "";
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
return(err);
}
@@ -174,6 +183,12 @@ trx_rollback_last_sql_stat_for_mysql(
trx->op_info = "";
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
return(err);
}
@@ -1123,6 +1138,12 @@ trx_rollback(
srv_que_task_enqueue_low(thr);
/* srv_que_task_enqueue_low(thr2); */
}
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
}
/****************************************************************//**
@@ -1281,6 +1302,12 @@ trx_finish_rollback_off_kernel(
sig = next_sig;
}
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
}
/*********************************************************************//**
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
index 0c5e367ac30..d62b95adc2b 100644
--- a/storage/innobase/trx/trx0sys.c
+++ b/storage/innobase/trx/trx0sys.c
@@ -44,6 +44,10 @@ Created 3/26/1996 Heikki Tuuri
#include "os0file.h"
#include "read0read.h"
+#ifdef WITH_WSREP
+#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */
+#endif /* */
+
/** The file format tag structure with id and name. */
struct file_format_struct {
ulint id; /*!< id of the file format */
@@ -787,6 +791,89 @@ trx_sys_print_mysql_binlog_offset(void)
mtr_commit(&mtr);
}
+#ifdef WITH_WSREP
+
+void
+trx_sys_update_wsrep_checkpoint(
+ const XID* xid, /*!< in: transaction XID */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ trx_sysf_t* sys_header;
+
+ ut_ad(xid && mtr);
+ ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
+
+ sys_header = trx_sysf_get(mtr);
+ if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
+ TRX_SYS_WSREP_XID_MAGIC_N,
+ MLOG_4BYTES, mtr);
+ }
+
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_FORMAT,
+ (int)xid->formatID,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_GTRID_LEN,
+ (int)xid->gtrid_length,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_BQUAL_LEN,
+ (int)xid->bqual_length,
+ MLOG_4BYTES, mtr);
+ mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_DATA,
+ (const unsigned char*) xid->data,
+ XIDDATASIZE, mtr);
+
+}
+
+void
+trx_sys_read_wsrep_checkpoint(XID* xid)
+/*===================================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+ ulint magic;
+
+ ut_ad(xid);
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ memset(xid, 0, sizeof(*xid));
+ xid->formatID = -1;
+ trx_sys_update_wsrep_checkpoint(xid, &mtr);
+ mtr_commit(&mtr);
+ return;
+ }
+
+ xid->formatID = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
+ xid->gtrid_length = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
+ xid->bqual_length = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
+ ut_memcpy(xid->data,
+ sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
+ XIDDATASIZE);
+
+ mtr_commit(&mtr);
+}
+
+#endif /* WITH_WSREP */
+
/*****************************************************************//**
Prints to stderr the MySQL master log offset info in the trx system header if
the magic number shows it valid. */
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 85246aa6d1f..23cac1e11db 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -714,6 +714,11 @@ trx_start_low(
trx->id = trx_sys_get_new_trx_id();
+#ifdef WITH_WSREP
+ memset(&trx->xid, 0, sizeof(trx->xid));
+ trx->xid.formatID = -1;
+#endif /* WITH_WSREP */
+
/* The initial value for trx->no: IB_ULONGLONG_MAX is used in
read_view_open_now: */
@@ -867,6 +872,14 @@ trx_write_serialisation_history(
mutex_exit(&rseg->mutex);
+#ifdef WITH_WSREP
+ /* Update latest MySQL wsrep XID in trx sys header. */
+ if (wsrep_is_wsrep_xid(&trx->xid))
+ {
+ trx_sys_update_wsrep_checkpoint(&trx->xid, &mtr);
+ }
+#endif /* WITH_WSREP */
+
/* Update the latest MySQL binlog name and offset info
in trx sys header if MySQL binlogging is on or the database
server is a MySQL replication slave */
@@ -1064,6 +1077,12 @@ trx_commit_off_kernel(
ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
trx->error_state = DB_SUCCESS;
diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c
index 516b6e927e0..0d484dc44cd 100644
--- a/storage/xtradb/dict/dict0dict.c
+++ b/storage/xtradb/dict/dict0dict.c
@@ -2560,7 +2560,26 @@ next_rec:
return(NULL);
}
-
+#ifdef WITH_WSREP
+dict_index_t*
+wsrep_dict_foreign_find_index(
+/*====================*/
+ dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ dict_index_t* types_idx, /*!< in: NULL or an index to whose types the
+ column types must match */
+ ibool check_charsets,
+ /*!< in: whether to check charsets.
+ only has an effect if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of the columns must
+ be declared NOT NULL */
+{
+ return dict_foreign_find_index(
+ table, columns, n_cols, types_idx, check_charsets, check_null);
+}
+#endif /* WITH_WSREP */
/**********************************************************************//**
Find an index that is equivalent to the one passed in and is not marked
for deletion.
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 5cf3b021510..59acbdb504d 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -98,6 +98,12 @@ extern "C" {
#include "ha_prototypes.h"
#include "ut0mem.h"
#include "ibuf0ibuf.h"
+#ifdef WITH_WSREP
+#include "../storage/innobase/include/ut0byte.h"
+#ifndef EXTRA_DEBUG
+ //#include "../storage/innobase/include/ut0byte.ic"
+#endif /* EXTRA_DEBUG */
+#endif /* WITH_WSREP */
}
#include "ha_innodb.h"
@@ -115,6 +121,32 @@ extern ib_int64_t trx_sys_mysql_relay_log_pos;
# define MYSQL_PLUGIN_IMPORT /* nothing */
# endif /* MYSQL_PLUGIN_IMPORT */
+#ifdef WITH_WSREP
+#include <wsrep_mysqld.h>
+#include <my_md5.h>
+extern my_bool wsrep_certify_nonPK;
+class binlog_trx_data;
+extern handlerton *binlog_hton;
+
+extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log;
+extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_wsrep_rollback;
+extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_wsrep_rollback;
+extern MYSQL_PLUGIN_IMPORT wsrep_aborting_thd_t wsrep_aborting_thd;
+
+static inline wsrep_trx_handle_t*
+wsrep_trx_handle(THD* thd, const trx_t* trx) {
+ return wsrep_trx_handle_for_id(wsrep_thd_trx_handle(thd),
+ (wsrep_trx_id_t)trx->id);
+}
+
+extern bool wsrep_prepare_key_for_innodb(const uchar *cache_key,
+ size_t cache_key_len,
+ const uchar* row_id,
+ size_t row_id_len,
+ wsrep_key_part_t* key,
+ size_t* key_len);
+
+#endif /* WITH_WSREP */
/** to protect innobase_open_files */
static mysql_mutex_t innobase_share_mutex;
static ulong commit_threads = 0;
@@ -1041,6 +1073,15 @@ thd_to_trx(
{
return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
}
+#ifdef WITH_WSREP
+ulonglong
+thd_to_trx_id(
+/*=======*/
+ THD* thd) /*!< in: MySQL thread */
+{
+ return(thd_to_trx(thd)->id);
+}
+#endif
/********************************************************************//**
Call this function when mysqld passes control to the client. That is to
@@ -1071,6 +1112,13 @@ innobase_release_temporary_latches(
return(0);
}
+#ifdef WITH_WSREP
+static int
+wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
+ my_bool signal);
+static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
+static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
+#endif
/********************************************************************//**
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
@@ -1523,6 +1571,9 @@ int
innobase_mysql_tmpfile(void)
/*========================*/
{
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ os_event_wait(srv_allow_writes_event);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
int fd2 = -1;
File fd = mysql_tmpfile("ib");
if (fd >= 0) {
@@ -2627,6 +2678,11 @@ innobase_init(
innobase_hton->flags=HTON_NO_FLAGS;
innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
innobase_hton->alter_table_flags = innobase_alter_table_flags;
+#ifdef WITH_WSREP
+ innobase_hton->wsrep_abort_transaction=wsrep_abort_transaction;
+ innobase_hton->wsrep_set_checkpoint=innobase_wsrep_set_checkpoint;
+ innobase_hton->wsrep_get_checkpoint=innobase_wsrep_get_checkpoint;
+#endif /* WITH_WSREP */
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
@@ -3279,6 +3335,27 @@ innobase_commit_low(
trx_commit_for_mysql(trx);
}
+#ifdef WITH_WSREP
+ THD* thd = (THD*)trx->mysql_thd;
+ const char* tmp = 0;
+ if (wsrep_on((void*)thd)) {
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1,
+ "innobase_commit_low():trx_commit_for_mysql(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ tmp = thd_proc_info(thd, info);
+
+#else
+ tmp = thd_proc_info(thd, "innobase_commit_low()");
+#endif /* WSREP_PROC_INFO */
+ }
+#endif /* WITH_WSREP */
+ trx_commit_for_mysql(trx);
+#ifdef WITH_WSREP
+ if (wsrep_on((void*)thd)) { thd_proc_info(thd, tmp); }
+#endif /* WITH_WSREP */
}
/*****************************************************************//**
@@ -3901,7 +3978,11 @@ ha_innobase::max_supported_key_length() const
therefore set to slightly less than 1 / 4 of page size which
is 16 kB; but currently MySQL does not work with keys whose
size is > MAX_KEY_LENGTH */
+#ifdef WITH_WSREP
+ return(3500);
+#else
return(3500);
+#endif
}
/****************************************************************//**
@@ -5002,7 +5083,96 @@ innobase_mysql_cmp(
return(0);
}
+#ifdef WITH_WSREP
+extern "C" UNIV_INTERN
+void
+wsrep_innobase_mysql_sort(
+/*===============*/
+ /* out: str contains sort string */
+ int mysql_type, /* in: MySQL type */
+ uint charset_number, /* in: number of the charset */
+ unsigned char* str, /* in: data field */
+ unsigned int str_length) /* in: data field length,
+ not UNIV_SQL_NULL */
+{
+ CHARSET_INFO* charset;
+ enum_field_types mysql_tp;
+
+ DBUG_ASSERT(str_length != UNIV_SQL_NULL);
+
+ mysql_tp = (enum_field_types) mysql_type;
+
+ switch (mysql_tp) {
+
+ case MYSQL_TYPE_BIT:
+ case MYSQL_TYPE_STRING:
+ case MYSQL_TYPE_VAR_STRING:
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_VARCHAR:
+ {
+ uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN];
+ uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN;
+ /* Use the charset number to pick the right charset struct for
+ the comparison. Since the MySQL function get_charset may be
+ slow before Bar removes the mutex operation there, we first
+ look at 2 common charsets directly. */
+
+ if (charset_number == default_charset_info->number) {
+ charset = default_charset_info;
+ } else if (charset_number == my_charset_latin1.number) {
+ charset = &my_charset_latin1;
+ } else {
+ charset = get_charset(charset_number, MYF(MY_WME));
+
+ if (charset == NULL) {
+ sql_print_error("InnoDB needs charset %lu for doing "
+ "a comparison, but MySQL cannot "
+ "find that charset.",
+ (ulong) charset_number);
+ ut_a(0);
+ }
+ }
+
+ ut_a(str_length <= tmp_length);
+ memcpy(tmp_str, str, str_length);
+
+ tmp_length = charset->coll->strnxfrm(charset, str, str_length,
+ tmp_str, tmp_length);
+ DBUG_ASSERT(tmp_length == str_length);
+
+ break;
+ }
+ case MYSQL_TYPE_DECIMAL :
+ case MYSQL_TYPE_TINY :
+ case MYSQL_TYPE_SHORT :
+ case MYSQL_TYPE_LONG :
+ case MYSQL_TYPE_FLOAT :
+ case MYSQL_TYPE_DOUBLE :
+ case MYSQL_TYPE_NULL :
+ case MYSQL_TYPE_TIMESTAMP :
+ case MYSQL_TYPE_LONGLONG :
+ case MYSQL_TYPE_INT24 :
+ case MYSQL_TYPE_DATE :
+ case MYSQL_TYPE_TIME :
+ case MYSQL_TYPE_DATETIME :
+ case MYSQL_TYPE_YEAR :
+ case MYSQL_TYPE_NEWDATE :
+ case MYSQL_TYPE_NEWDECIMAL :
+ case MYSQL_TYPE_ENUM :
+ case MYSQL_TYPE_SET :
+ case MYSQL_TYPE_GEOMETRY :
+ break;
+ default:
+ break;
+ }
+
+ return;
+}
+#endif // WITH_WSREP
/**************************************************************//**
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
@@ -5156,6 +5326,262 @@ innobase_read_from_2_little_endian(
/*******************************************************************//**
Stores a key value for a row to a buffer.
@return key value length as stored in buff */
+#ifdef WITH_WSREP
+UNIV_INTERN
+uint
+wsrep_store_key_val_for_row(
+/*===============================*/
+ TABLE* table,
+ uint keynr, /*!< in: key number */
+ char* buff, /*!< in/out: buffer for the key value (in MySQL
+ format) */
+ uint buff_len,/*!< in: buffer length */
+ const uchar* record,
+ ibool* key_is_null)/*!< out: full key was null */
+{
+ KEY* key_info = table->key_info + keynr;
+ KEY_PART_INFO* key_part = key_info->key_part;
+ KEY_PART_INFO* end = key_part + key_info->key_parts;
+ char* buff_start = buff;
+ enum_field_types mysql_type;
+ Field* field;
+
+ DBUG_ENTER("store_key_val_for_row");
+
+ bzero(buff, buff_len);
+ *key_is_null = TRUE;
+
+ for (; key_part != end; key_part++) {
+ uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
+ ibool part_is_null = FALSE;
+
+ if (key_part->null_bit) {
+ if (record[key_part->null_offset] &
+ key_part->null_bit) {
+ *buff = 1;
+ part_is_null = TRUE;
+ } else {
+ *buff = 0;
+ }
+ buff++;
+ }
+ if (!part_is_null) *key_is_null = FALSE;
+
+ field = key_part->field;
+ mysql_type = field->type();
+
+ if (mysql_type == MYSQL_TYPE_VARCHAR) {
+ /* >= 5.0.3 true VARCHAR */
+ ulint lenlen;
+ ulint len;
+ const byte* data;
+ ulint key_len;
+ ulint true_len;
+ CHARSET_INFO* cs;
+ int error=0;
+
+ key_len = key_part->length;
+
+ if (part_is_null) {
+ buff += key_len + 2;
+
+ continue;
+ }
+ cs = field->charset();
+
+ lenlen = (ulint)
+ (((Field_varstring*)field)->length_bytes);
+
+ data = row_mysql_read_true_varchar(&len,
+ (byte*) (record
+ + (ulint)get_field_offset(table, field)),
+ lenlen);
+
+ true_len = len;
+
+ /* For multi byte character sets we need to calculate
+ the true length of the key */
+
+ if (len > 0 && cs->mbmaxlen > 1) {
+ true_len = (ulint) cs->cset->well_formed_len(cs,
+ (const char *) data,
+ (const char *) data + len,
+ (uint) (key_len /
+ cs->mbmaxlen),
+ &error);
+ }
+
+ /* In a column prefix index, we may need to truncate
+ the stored value: */
+
+ if (true_len > key_len) {
+ true_len = key_len;
+ }
+
+ memcpy(sorted, data, true_len);
+ wsrep_innobase_mysql_sort(
+ mysql_type, cs->number, sorted, true_len);
+
+ if (wsrep_protocol_version > 1) {
+ memcpy(buff, sorted, true_len);
+ /* Note that we always reserve the maximum possible
+ length of the true VARCHAR in the key value, though
+ only len first bytes after the 2 length bytes contain
+ actual data. The rest of the space was reset to zero
+ in the bzero() call above. */
+ buff += true_len;
+ } else {
+ buff += key_len;
+ }
+ } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
+ || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
+ || mysql_type == MYSQL_TYPE_BLOB
+ || mysql_type == MYSQL_TYPE_LONG_BLOB
+ /* MYSQL_TYPE_GEOMETRY data is treated
+ as BLOB data in innodb. */
+ || mysql_type == MYSQL_TYPE_GEOMETRY) {
+
+ CHARSET_INFO* cs;
+ ulint key_len;
+ ulint true_len;
+ int error=0;
+ ulint blob_len;
+ const byte* blob_data;
+
+ ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
+
+ key_len = key_part->length;
+
+ if (part_is_null) {
+ buff += key_len + 2;
+
+ continue;
+ }
+
+ cs = field->charset();
+
+ blob_data = row_mysql_read_blob_ref(&blob_len,
+ (byte*) (record
+ + (ulint)get_field_offset(table, field)),
+ (ulint) field->pack_length());
+
+ true_len = blob_len;
+
+ ut_a(get_field_offset(table, field)
+ == key_part->offset);
+
+ /* For multi byte character sets we need to calculate
+ the true length of the key */
+
+ if (blob_len > 0 && cs->mbmaxlen > 1) {
+ true_len = (ulint) cs->cset->well_formed_len(cs,
+ (const char *) blob_data,
+ (const char *) blob_data
+ + blob_len,
+ (uint) (key_len /
+ cs->mbmaxlen),
+ &error);
+ }
+
+ /* All indexes on BLOB and TEXT are column prefix
+ indexes, and we may need to truncate the data to be
+ stored in the key value: */
+
+ if (true_len > key_len) {
+ true_len = key_len;
+ }
+
+ memcpy(sorted, blob_data, true_len);
+ wsrep_innobase_mysql_sort(
+ mysql_type, cs->number, sorted, true_len);
+
+ memcpy(buff, sorted, true_len);
+
+ /* Note that we always reserve the maximum possible
+ length of the BLOB prefix in the key value. */
+ if (wsrep_protocol_version > 1) {
+ buff += true_len;
+ } else {
+ buff += key_len;
+ }
+ } else {
+ /* Here we handle all other data types except the
+ true VARCHAR, BLOB and TEXT. Note that the column
+ value we store may be also in a column prefix
+ index. */
+
+ CHARSET_INFO* cs;
+ ulint true_len;
+ ulint key_len;
+ const uchar* src_start;
+ int error=0;
+ enum_field_types real_type;
+
+ key_len = key_part->length;
+
+ if (part_is_null) {
+ buff += key_len;
+
+ continue;
+ }
+
+ src_start = record + key_part->offset;
+ real_type = field->real_type();
+ true_len = key_len;
+
+ /* Character set for the field is defined only
+ to fields whose type is string and real field
+ type is not enum or set. For these fields check
+ if character set is multi byte. */
+
+ if (real_type != MYSQL_TYPE_ENUM
+ && real_type != MYSQL_TYPE_SET
+ && ( mysql_type == MYSQL_TYPE_VAR_STRING
+ || mysql_type == MYSQL_TYPE_STRING)) {
+
+ cs = field->charset();
+
+ /* For multi byte character sets we need to
+ calculate the true length of the key */
+
+ if (key_len > 0 && cs->mbmaxlen > 1) {
+
+ true_len = (ulint)
+ cs->cset->well_formed_len(cs,
+ (const char *)src_start,
+ (const char *)src_start
+ + key_len,
+ (uint) (key_len /
+ cs->mbmaxlen),
+ &error);
+ }
+ memcpy(sorted, src_start, true_len);
+ wsrep_innobase_mysql_sort(
+ mysql_type, cs->number, sorted, true_len);
+ memcpy(buff, sorted, true_len);
+ } else {
+ memcpy(buff, src_start, true_len);
+ }
+ buff += true_len;
+
+ /* Pad the unused space with spaces. */
+
+ if (true_len < key_len) {
+ ulint pad_len = key_len - true_len;
+ ut_a(!(pad_len % cs->mbminlen));
+
+ cs->cset->fill(cs, buff, pad_len,
+ 0x20 /* space */);
+ buff += pad_len;
+ }
+ }
+ }
+
+ ut_a(buff <= buff_start + buff_len);
+
+ DBUG_RETURN((uint)(buff - buff_start));
+}
+#endif /* WITH_WSREP */
UNIV_INTERN
uint
ha_innobase::store_key_val_for_row(
@@ -5966,6 +6392,9 @@ ha_innobase::write_row(
ulint error = 0;
int error_result= 0;
ibool auto_inc_used= FALSE;
+#ifdef WITH_WSREP
+ ibool auto_inc_inserted= FALSE; /* if NULL was inserted */
+#endif
ulint sql_command;
trx_t* trx = thd_to_trx(user_thd);
@@ -6000,8 +6429,17 @@ ha_innobase::write_row(
if ((sql_command == SQLCOM_ALTER_TABLE
|| sql_command == SQLCOM_OPTIMIZE
|| sql_command == SQLCOM_CREATE_INDEX
+#ifdef WITH_WSREP
+ || (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD)
+#endif /* WITH_WSREP */
|| sql_command == SQLCOM_DROP_INDEX)
&& num_write_row >= 10000) {
+#ifdef WITH_WSREP
+ if (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD) {
+ WSREP_DEBUG("forced trx split for LOAD: %s",
+ wsrep_thd_query(user_thd));
+ }
+#endif /* WITH_WSREP */
/* ALTER TABLE is COMMITted at every 10000 copied rows.
The IX table lock for the original table has to be re-issued.
As this method will be called on a temporary table where the
@@ -6073,7 +6511,9 @@ no_commit:
/* Reset the error code before calling
innobase_get_auto_increment(). */
prebuilt->autoinc_error = DB_SUCCESS;
-
+#ifdef WITH_WSREP
+ auto_inc_inserted= (table->next_number_field->val_int() == 0);
+#endif
if ((error = update_auto_increment())) {
/* We don't want to mask autoinc overflow errors. */
@@ -6160,6 +6600,30 @@ no_commit:
case SQLCOM_REPLACE_SELECT:
goto set_max_autoinc;
+#ifdef WITH_WSREP
+ /* workaround for LP bug #355000, retrying the insert */
+ case SQLCOM_INSERT:
+ if (wsrep_on(current_thd) &&
+ auto_inc_inserted &&
+ wsrep_drupal_282555_workaround &&
+ !thd_test_options(current_thd,
+ OPTION_NOT_AUTOCOMMIT |
+ OPTION_BEGIN)) {
+ WSREP_DEBUG(
+ "retrying insert: %s",
+ (*wsrep_thd_query(current_thd)) ?
+ wsrep_thd_query(current_thd) :
+ (char *)"void");
+ error= DB_SUCCESS;
+ wsrep_thd_set_conflict_state(
+ current_thd, MUST_ABORT);
+ innodb_srv_conc_exit_innodb(prebuilt->trx);
+ /* jump straight to func exit over
+ * later wsrep hooks */
+ goto func_exit;
+ }
+ break;
+#endif
default:
break;
}
@@ -6208,6 +6672,20 @@ report_error:
error_result = convert_error_code_to_mysql((int) error,
prebuilt->table->flags,
user_thd);
+#ifdef WITH_WSREP
+ if (!error_result && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
+ wsrep_on(user_thd) && !wsrep_consistency_check(user_thd) &&
+ (sql_command != SQLCOM_LOAD ||
+ thd_binlog_format(user_thd) == BINLOG_FORMAT_ROW)) {
+
+ if (wsrep_append_keys(user_thd, false, record, NULL)) {
+ DBUG_PRINT("wsrep", ("row key failed"));
+ error_result = HA_ERR_INTERNAL_ERROR;
+ goto wsrep_error;
+ }
+ }
+wsrep_error:
+#endif
func_exit:
innobase_active_small();
@@ -6509,6 +6987,20 @@ ha_innobase::update_row(
DBUG_RETURN(HA_ERR_CRASHED);
}
+#ifdef WITH_WSREP
+ if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
+ wsrep_on(user_thd)) {
+
+ DBUG_PRINT("wsrep", ("update row key"));
+
+ if (wsrep_append_keys(user_thd, false, old_row, new_row)) {
+ DBUG_PRINT("wsrep", ("row key failed"));
+ error = HA_ERR_INTERNAL_ERROR;
+ goto wsrep_error;
+ }
+ }
+wsrep_error:
+#endif
DBUG_RETURN(error);
}
@@ -6564,6 +7056,18 @@ ha_innobase::delete_row(
DBUG_RETURN(HA_ERR_CRASHED);
}
+#ifdef WITH_WSREP
+ if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
+ wsrep_on(user_thd)) {
+
+ if (wsrep_append_keys(user_thd, false, record, NULL)) {
+ DBUG_PRINT("wsrep", ("delete fail"));
+ error = HA_ERR_INTERNAL_ERROR;
+ goto wsrep_error;
+ }
+ }
+wsrep_error:
+#endif
DBUG_RETURN(error);
}
@@ -7383,7 +7887,354 @@ ha_innobase::rnd_pos(
DBUG_RETURN(error);
}
+#ifdef WITH_WSREP
+extern "C" {
+dict_index_t*
+wsrep_dict_foreign_find_index(
+ dict_table_t* table,
+ const char** columns,
+ ulint n_cols,
+ dict_index_t* types_idx,
+ ibool check_charsets,
+ ulint check_null);
+ulint
+wsrep_append_foreign_key(
+/*===========================*/
+ trx_t* trx, /*!< in: trx */
+ dict_foreign_t* foreign, /*!< in: foreign key constraint */
+ const rec_t* rec, /*!<in: clustered index record */
+ dict_index_t* index, /*!<in: clustered index */
+ ibool referenced, /*!<in: is check for referenced table */
+ ibool shared) /*!<in: is shared access */
+{
+ ut_a(trx);
+ THD* thd = (THD*)trx->mysql_thd;
+ ulint rcode = DB_SUCCESS;
+ char cache_key[513] = {'\0'};
+ int cache_key_len;
+
+ if (!wsrep_on(trx->mysql_thd) ||
+ wsrep_thd_exec_mode(thd) != LOCAL_STATE)
+ return DB_SUCCESS;
+
+ if (!thd || !foreign ||
+ (!foreign->referenced_table && !foreign->foreign_table))
+ {
+ WSREP_INFO("FK: %s missing in: %s",
+ (!thd) ? "thread" :
+ ((!foreign) ? "constraint" :
+ ((!foreign->referenced_table) ?
+ "referenced table" : "foreign table")),
+ (thd && wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ return DB_ERROR;
+ }
+
+ if ( !((referenced) ?
+ foreign->referenced_table : foreign->foreign_table))
+ {
+ WSREP_DEBUG("pulling %s table into cache",
+ (referenced) ? "referenced" : "foreign");
+ mutex_enter(&(dict_sys->mutex));
+ if (referenced)
+ {
+ foreign->referenced_table =
+ dict_table_check_if_in_cache_low(
+ foreign->referenced_table_name_lookup);
+ foreign->referenced_index =
+ wsrep_dict_foreign_find_index(
+ foreign->referenced_table,
+ foreign->referenced_col_names,
+ foreign->n_fields,
+ foreign->foreign_index,
+ TRUE, FALSE);
+ }
+ else
+ {
+ foreign->foreign_table =
+ dict_table_check_if_in_cache_low(
+ foreign->foreign_table_name_lookup);
+ foreign->foreign_index =
+ wsrep_dict_foreign_find_index(
+ foreign->foreign_table,
+ foreign->foreign_col_names,
+ foreign->n_fields,
+ foreign->referenced_index,
+ TRUE, FALSE);
+
+ }
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ if ( !((referenced) ?
+ foreign->referenced_table : foreign->foreign_table))
+ {
+ WSREP_WARN("FK: %s missing in query: %s",
+ (!foreign->referenced_table) ?
+ "referenced table" : "foreign table",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ return DB_ERROR;
+ }
+ byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1];
+ ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
+
+ dict_index_t *idx_target = (referenced) ?
+ foreign->referenced_index : foreign->foreign_index;
+ dict_index_t *idx = (referenced) ?
+ UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
+ UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
+ int i = 0;
+ while (idx != NULL && idx != idx_target) {
+ idx = UT_LIST_GET_NEXT(indexes, idx);
+ i++;
+ }
+ ut_a(idx);
+ key[0] = (char)i;
+
+ rcode = wsrep_rec_get_primary_key(
+ &key[1], &len, rec, index,
+ wsrep_protocol_version > 1);
+ if (rcode != DB_SUCCESS) {
+ WSREP_ERROR(
+ "FK key set failed: %lu (%lu %lu), index: %s %s, %s",
+ rcode, referenced, shared,
+ (index && index->name) ? index->name :
+ "void index",
+ (index && index->table_name) ? index->table_name :
+ "void table",
+ wsrep_thd_query(thd));
+ return rcode;
+ }
+ strncpy(cache_key,
+ (wsrep_protocol_version > 1) ?
+ ((referenced) ?
+ foreign->referenced_table->name :
+ foreign->foreign_table->name) :
+ foreign->foreign_table->name, sizeof(cache_key) - 1);
+ cache_key_len = strlen(cache_key);
+#ifdef WSREP_DEBUG_PRINT
+ ulint j;
+ fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
+ cache_key, (shared) ? "shared" : "exclusive", len+1);
+ for (j=0; j<len+1; j++) {
+ fprintf(stderr, " %hhX, ", key[j]);
+ }
+ fprintf(stderr, "\n");
+#endif
+ char *p = strchr(cache_key, '/');
+ if (p) {
+ *p = '\0';
+ } else {
+ WSREP_WARN("unexpected foreign key table %s %s",
+ foreign->referenced_table->name,
+ foreign->foreign_table->name);
+ }
+
+ wsrep_key_part_t wkey_part[3];
+ wsrep_key_t wkey = {wkey_part, 3};
+ if (!wsrep_prepare_key_for_innodb(
+ (const uchar*)cache_key,
+ cache_key_len + 1,
+ (const uchar*)key, len+1,
+ wkey_part,
+ &wkey.key_parts_len)) {
+ WSREP_WARN("key prepare failed for cascaded FK: %s",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ return DB_ERROR;
+ }
+ rcode = (int)wsrep->append_key(
+ wsrep,
+ wsrep_trx_handle(thd, trx),
+ &wkey,
+ 1,
+ shared);
+ if (rcode) {
+ DBUG_PRINT("wsrep", ("row key failed: %lu", rcode));
+ WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void", rcode);
+ return DB_ERROR;
+ }
+
+ return DB_SUCCESS;
+}
+}
+
+static int
+wsrep_append_key(
+/*==================*/
+ THD *thd,
+ trx_t *trx,
+ TABLE_SHARE *table_share,
+ TABLE *table,
+ const char* key,
+ uint16_t key_len,
+ bool shared
+)
+{
+ DBUG_ENTER("wsrep_append_key");
+#ifdef WSREP_DEBUG_PRINT
+ fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s ",
+ (shared) ? "Shared" : "Exclusive",
+ wsrep_thd_thread_id(thd), trx->id, key_len,
+ table_share->table_name.str);
+ for (int i=0; i<key_len; i++) {
+ fprintf(stderr, "%hhX, ", key[i]);
+ }
+ fprintf(stderr, "\n");
+#endif
+ wsrep_key_part_t wkey_part[3];
+ wsrep_key_t wkey = {wkey_part, 3};
+ if (!wsrep_prepare_key_for_innodb(
+ (const uchar*)table_share->table_cache_key.str,
+ table_share->table_cache_key.length,
+ (const uchar*)key, key_len,
+ wkey_part,
+ &wkey.key_parts_len)) {
+ WSREP_WARN("key prepare failed for: %s",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void");
+ DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ }
+
+ int rcode = (int)wsrep->append_key(
+ wsrep,
+ wsrep_trx_handle(thd, trx),
+ &wkey,
+ 1,
+ shared);
+ if (rcode) {
+ DBUG_PRINT("wsrep", ("row key failed: %d", rcode));
+ WSREP_WARN("Appending row key failed: %s, %d",
+ (wsrep_thd_query(thd)) ?
+ wsrep_thd_query(thd) : "void", rcode);
+ DBUG_RETURN(rcode);
+ }
+ DBUG_RETURN(0);
+}
+
+ibool
+wsrep_is_cascding_foreign_key_parent(
+ dict_table_t* table, /*!< in: InnoDB table */
+ dict_index_t* index /*!< in: InnoDB index */
+) {
+ // return referenced_by_foreign_key();
+ dict_foreign_t* fk = dict_table_get_referenced_constraint(table, index);
+ if (fk &&
+ (fk->type & DICT_FOREIGN_ON_UPDATE_CASCADE ||
+ fk->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)
+ ) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+int
+ha_innobase::wsrep_append_keys(
+/*==================*/
+ THD *thd,
+ bool shared,
+ const uchar* record0, /* in: row in MySQL format */
+ const uchar* record1) /* in: row in MySQL format */
+{
+ DBUG_ENTER("wsrep_append_keys");
+ trx_t *trx = thd_to_trx(thd);
+
+ /* if no PK, calculate hash of full row, to be the key value */
+ if (prebuilt->clust_index_was_generated && wsrep_certify_nonPK) {
+ uchar digest[16];
+ int rcode;
+
+ MY_MD5_HASH(digest, (uchar *)record0, table->s->reclength);
+ if ((rcode = wsrep_append_key(thd, trx, table_share, table,
+ (const char*) digest, 16,
+ shared))) {
+ DBUG_RETURN(rcode);
+ }
+ if (record1) {
+ MY_MD5_HASH(digest, (uchar *)record1, table->s->reclength);
+ if ((rcode = wsrep_append_key(thd, trx, table_share,
+ table,
+ (const char*) digest,
+ 16, shared))) {
+ DBUG_RETURN(rcode);
+ }
+ }
+ }
+ if (wsrep_protocol_version == 0) {
+ uint len;
+ char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
+ char *key = &keyval[0];
+ KEY *key_info = table->key_info;
+ ibool is_null;
+
+ len = wsrep_store_key_val_for_row(
+ table, 0, key, key_info->key_length, record0, &is_null);
+
+ if (!is_null) {
+ int rcode = wsrep_append_key(
+ thd, trx, table_share, table, keyval,
+ len, shared);
+ if (rcode) DBUG_RETURN(rcode);
+ }
+ else
+ {
+ WSREP_DEBUG("NULL key skipped (proto 0): %s",
+ wsrep_thd_query(thd));
+ }
+ } else {
+ ut_a(table->s->keys <= 256);
+ uint i;
+ for (i=0; i<table->s->keys; ++i) {
+ uint len;
+ char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
+ char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
+ char *key0 = &keyval0[1];
+ char *key1 = &keyval1[1];
+ KEY *key_info = table->key_info + i;
+ ibool is_null;
+
+ keyval0[0] = (char)i;
+ keyval1[0] = (char)i;
+
+ if (key_info->flags & HA_NOSAME ||
+ referenced_by_foreign_key()) {
+
+ len = wsrep_store_key_val_for_row(
+ table, i, key0, key_info->key_length,
+ record0, &is_null);
+ if (!is_null) {
+ int rcode = wsrep_append_key(
+ thd, trx, table_share, table,
+ keyval0, len+1, shared);
+ if (rcode) DBUG_RETURN(rcode);
+ }
+ else
+ {
+ WSREP_DEBUG("NULL key skipped: %s",
+ wsrep_thd_query(thd));
+ }
+ if (record1) {
+ len = wsrep_store_key_val_for_row(
+ table, i, key1, key_info->key_length,
+ record1, &is_null);
+ if (!is_null && memcmp(key0, key1, len)) {
+ int rcode = wsrep_append_key(
+ thd, trx, table_share,
+ table,
+ keyval1, len+1, shared);
+ if (rcode) DBUG_RETURN(rcode);
+ }
+ }
+ }
+ }
+ }
+ DBUG_RETURN(0);
+}
+#endif
/*********************************************************************//**
Stores a reference to the current row to 'ref' field of the handle. Note
that in the case where we have generated the clustered index for the
@@ -10352,11 +11203,18 @@ ha_innobase::external_lock(
/* used by test case */
DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = 1;);
if (!skip) {
+#ifdef WITH_WSREP
+ if (!wsrep_on(thd) || wsrep_thd_exec_mode(thd) == LOCAL_STATE)
+ {
+#endif /* WITH_WSREP */
my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
" InnoDB is limited to row-logging when "
"transaction isolation level is "
"READ COMMITTED or READ UNCOMMITTED.");
DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
}
}
@@ -12520,6 +13378,259 @@ static SHOW_VAR innodb_status_variables_export[]= {
static struct st_mysql_storage_engine innobase_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
+#ifdef WITH_WSREP
+void
+wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno)
+{
+ WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
+ "caused by:\n\t"
+ "1) unsupported configuration options combination, please check documentation.\n\t"
+ "2) a bug in the code.\n\t"
+ "3) a database corruption.\n Node consistency compromized, "
+ "need to abort. Restart the node to resync with cluster.",
+ (long long)bf_seqno, (long long)victim_seqno);
+ abort();
+}
+int
+wsrep_innobase_kill_one_trx(trx_t *bf_trx, trx_t *victim_trx, ibool signal)
+{
+ DBUG_ENTER("wsrep_innobase_kill_one_trx");
+ THD *thd = (THD *) victim_trx->mysql_thd;
+ THD *bf_thd = (bf_trx) ? (THD *)bf_trx->mysql_thd : NULL;
+ int64_t bf_seqno = (bf_thd) ? wsrep_thd_trx_seqno(bf_thd) : 0;
+
+ if (!thd) {
+ DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
+ WSREP_WARN("no THD for trx: %llu", victim_trx->id);
+ DBUG_RETURN(1);
+ }
+
+ WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: %llu",
+ signal, (long long)bf_seqno,
+ wsrep_thd_thread_id(thd),
+ victim_trx->id);
+
+ WSREP_DEBUG("Aborting query: %s",
+ (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void");
+
+ wsrep_thd_LOCK(thd);
+
+ if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
+ WSREP_DEBUG("kill trx EXITING for %llu", victim_trx->id);
+ wsrep_thd_UNLOCK(thd);
+ DBUG_RETURN(0);
+ }
+ if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
+ WSREP_DEBUG("withdraw for BF trx: %llu, state: %d",
+ victim_trx->id,
+ wsrep_thd_conflict_state(thd));
+ }
+
+ switch (wsrep_thd_conflict_state(thd)) {
+ case NO_CONFLICT:
+ wsrep_thd_set_conflict_state(thd, MUST_ABORT);
+ break;
+ case MUST_ABORT:
+ WSREP_DEBUG("victim %llu in MUST ABORT state",
+ victim_trx->id);
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ DBUG_RETURN(0);
+ break;
+ case ABORTED:
+ case ABORTING: // fall through
+ default:
+ WSREP_DEBUG("victim %llu in state %d",
+ victim_trx->id, wsrep_thd_conflict_state(thd));
+ wsrep_thd_UNLOCK(thd);
+ DBUG_RETURN(0);
+ break;
+ }
+
+ switch (wsrep_thd_query_state(thd)) {
+ case QUERY_COMMITTING:
+ enum wsrep_status rcode;
+
+ WSREP_DEBUG("kill query for: %ld",
+ wsrep_thd_thread_id(thd));
+ wsrep_thd_awake(thd, signal);
+ WSREP_DEBUG("kill trx QUERY_COMMITTING for %llu",
+ victim_trx->id);
+
+ if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
+ wsrep_abort_slave_trx(bf_seqno,
+ wsrep_thd_trx_seqno(thd));
+ } else {
+ rcode = wsrep->abort_pre_commit(
+ wsrep, bf_seqno,
+ (wsrep_trx_id_t)victim_trx->id
+ );
+
+ switch (rcode) {
+ case WSREP_WARNING:
+ WSREP_DEBUG("cancel commit warning: %llu",
+ victim_trx->id);
+ wsrep_thd_UNLOCK(thd);
+ DBUG_RETURN(1);
+ break;
+ case WSREP_OK:
+ break;
+ default:
+ WSREP_ERROR(
+ "cancel commit bad exit: %d %llu",
+ rcode,
+ victim_trx->id);
+ /* unable to interrupt, must abort */
+ /* note: kill_mysql() will block, if we cannot.
+ * kill the lock holder first.
+ */
+ abort();
+ break;
+ }
+ }
+ break;
+ case QUERY_EXEC:
+ /* it is possible that victim trx is itself waiting for some
+ * other lock. We need to cancel this waiting
+ */
+ WSREP_DEBUG("kill trx QUERY_EXEC for %llu", victim_trx->id);
+
+ victim_trx->was_chosen_as_deadlock_victim= TRUE;
+ if (victim_trx->wait_lock) {
+ WSREP_DEBUG("victim has wait flag: %ld",
+ wsrep_thd_thread_id(thd));
+ lock_t* wait_lock = victim_trx->wait_lock;
+ if (wait_lock) {
+ WSREP_DEBUG("canceling wait lock");
+ victim_trx->was_chosen_as_deadlock_victim= TRUE;
+ lock_cancel_waiting_and_release(wait_lock);
+ }
+
+ wsrep_thd_awake(thd, signal);
+ } else {
+ /* abort currently executing query */
+ DBUG_PRINT("wsrep",("sending KILL_QUERY to: %ld",
+ wsrep_thd_thread_id(thd)));
+ WSREP_DEBUG("kill query for: %ld",
+ wsrep_thd_thread_id(thd));
+ wsrep_thd_awake(thd, signal);
+
+ /* for BF thd, we need to prevent him from committing */
+ if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
+ wsrep_abort_slave_trx(bf_seqno,
+ wsrep_thd_trx_seqno(thd));
+ }
+ }
+ break;
+ case QUERY_IDLE:
+ {
+ bool skip_abort= false;
+ wsrep_aborting_thd_t abortees;
+
+ WSREP_DEBUG("kill IDLE for %llu", victim_trx->id);
+
+ if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
+ WSREP_DEBUG("kill BF IDLE, seqno: %lld",
+ (long long)wsrep_thd_trx_seqno(thd));
+ wsrep_thd_UNLOCK(thd);
+ wsrep_abort_slave_trx(bf_seqno,
+ wsrep_thd_trx_seqno(thd));
+ DBUG_RETURN(0);
+ }
+ /* This will lock thd from proceeding after net_read() */
+ wsrep_thd_set_conflict_state(thd, ABORTING);
+
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+
+ abortees = wsrep_aborting_thd;
+ while (abortees && !skip_abort) {
+ /* check if we have a kill message for this already */
+ if (abortees->aborting_thd == thd) {
+ skip_abort = true;
+ WSREP_WARN("duplicate thd aborter %lu",
+ wsrep_thd_thread_id(thd));
+ }
+ abortees = abortees->next;
+ }
+ if (!skip_abort) {
+ wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t)
+ my_malloc(sizeof(struct wsrep_aborting_thd),
+ MYF(0));
+ aborting->aborting_thd = thd;
+ aborting->next = wsrep_aborting_thd;
+ wsrep_aborting_thd = aborting;
+ DBUG_PRINT("wsrep",("enqueuing trx abort for %lu",
+ wsrep_thd_thread_id(thd)));
+ WSREP_DEBUG("enqueuing trx abort for (%lu)",
+ wsrep_thd_thread_id(thd));
+ }
+
+ DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
+ WSREP_DEBUG("signaling aborter");
+ mysql_cond_signal(&COND_wsrep_rollback);
+ mysql_mutex_unlock(&LOCK_wsrep_rollback);
+
+ break;
+ }
+ default:
+ WSREP_WARN("bad wsrep query state: %d",
+ wsrep_thd_query_state(thd));
+ break;
+ }
+ wsrep_thd_UNLOCK(thd);
+
+ DBUG_RETURN(0);
+}
+static int
+wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
+ my_bool signal)
+{
+ DBUG_ENTER("wsrep_innobase_abort_thd");
+ trx_t* victim_trx = thd_to_trx(victim_thd);
+ trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL;
+ WSREP_DEBUG("abort transaction: BF: %s victim: %s",
+ wsrep_thd_query(bf_thd),
+ wsrep_thd_query(victim_thd));
+
+ if (victim_trx)
+ {
+ mutex_enter(&kernel_mutex);
+ int rcode = wsrep_innobase_kill_one_trx(bf_trx, victim_trx,
+ signal);
+ mutex_exit(&kernel_mutex);
+ DBUG_RETURN(rcode);
+ } else {
+ WSREP_DEBUG("victim does not have transaction");
+ wsrep_thd_LOCK(victim_thd);
+ wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
+ wsrep_thd_UNLOCK(victim_thd);
+ wsrep_thd_awake(victim_thd, signal);
+ }
+ DBUG_RETURN(-1);
+}
+
+static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid)
+{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+ if (wsrep_is_wsrep_xid(xid)) {
+ mtr_t mtr;
+ mtr_start(&mtr);
+ trx_sys_update_wsrep_checkpoint(xid, &mtr);
+ mtr_commit(&mtr);
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid)
+{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+ trx_sys_read_wsrep_checkpoint(xid);
+ return 0;
+}
+
+#endif /* WITH_WSREP */
/* plugin options */
static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -12976,6 +14087,40 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
NULL, NULL, 0, 0, 1, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+#ifdef WITH_INNODB_DISALLOW_WRITES
+/*******************************************************
+ * innobase_disallow_writes variable definition *
+ *******************************************************/
+
+/* Must always init to FALSE. */
+static my_bool innobase_disallow_writes = FALSE;
+
+/**************************************************************************
+An "update" method for innobase_disallow_writes variable. */
+static
+void
+innobase_disallow_writes_update(
+/*============================*/
+ THD* thd, /* in: thread handle */
+ st_mysql_sys_var* var, /* in: pointer to system
+ variable */
+ void* var_ptr, /* out: pointer to dynamic
+ variable */
+ const void* save) /* in: temporary storage */
+{
+ *(my_bool*)var_ptr = *(my_bool*)save;
+ ut_a(srv_allow_writes_event);
+ if (*(my_bool*)var_ptr)
+ os_event_reset(srv_allow_writes_event);
+ else
+ os_event_set(srv_allow_writes_event);
+}
+
+static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
+ PLUGIN_VAR_NOCMDOPT,
+ "Tell InnoDB to stop any writes to disk",
+ NULL, innobase_disallow_writes_update, FALSE);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
PLUGIN_VAR_NOCMDARG,
"Whether to use read ahead for random access within an extent.",
@@ -13259,6 +14404,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ MYSQL_SYSVAR(disallow_writes),
+#endif /* WITH_INNODB_DISALLOW_WRITES */
MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(io_capacity),
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 933d75cf0d2..a21639eda78 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -113,6 +113,10 @@ class ha_innobase: public handler
dict_index_t* innobase_get_index(uint keynr);
int info_low(uint flag, bool called_from_analyze);
+#ifdef WITH_WSREP
+ int wsrep_append_keys(THD *thd, bool shared,
+ const uchar* record0, const uchar* record1);
+#endif
/* Init values for the class: */
public:
ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
@@ -370,6 +374,37 @@ bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
*/
extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+#ifdef WITH_WSREP
+#include <wsrep_mysqld.h>
+//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2);
+
+extern "C" bool wsrep_thd_is_wsrep_on(THD *thd);
+
+extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd);
+extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd);
+extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd);
+extern "C" wsrep_trx_handle_t* wsrep_thd_trx_handle(THD *thd);
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode);
+extern "C" void wsrep_thd_set_query_state(
+ THD *thd, enum wsrep_query_state state);
+extern "C" void wsrep_thd_set_conflict_state(
+ THD *thd, enum wsrep_conflict_state state);
+
+extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id);
+
+extern "C"void wsrep_thd_LOCK(THD *thd);
+extern "C"void wsrep_thd_UNLOCK(THD *thd);
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd);
+extern "C" time_t wsrep_thd_query_start(THD *thd);
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd);
+extern "C" int64_t wsrep_thd_trx_seqno(THD *thd);
+extern "C" query_id_t wsrep_thd_query_id(THD *thd);
+extern "C" char * wsrep_thd_query(THD *thd);
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd);
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id);
+extern "C" void wsrep_thd_awake(THD *thd, my_bool signal);
+#endif
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
@@ -410,3 +445,6 @@ innobase_index_name_is_reserved(
ulint num_of_keys); /*!< in: Number of indexes to
be created. */
+#ifdef WITH_WSREP
+extern "C" int wsrep_trx_is_aborting(void *thd_ptr);
+#endif
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index 0496cb98080..823f1c56313 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -37,6 +37,10 @@ extern "C" {
#include "handler0alter.h"
}
+#ifdef WITH_WSREP
+//#include "wsrep_api.h"
+#include <sql_acl.h> // PROCESS_ACL
+#endif
#include "ha_innodb.h"
/*************************************************************//**
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
index 54593a0b9c7..50c6e1ee142 100644
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@@ -343,6 +343,9 @@ barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN
/** Defines the maximum fixed length column size */
#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
+#ifdef WITH_WSREP
+#define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500
+#endif /* WITH_WSREP */
/** Data structure for a field in an index */
struct dict_field_struct{
diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h
index 51e331c3b5d..075b4ff2617 100644
--- a/storage/xtradb/include/ha_prototypes.h
+++ b/storage/xtradb/include/ha_prototypes.h
@@ -300,6 +300,19 @@ thd_flush_log_at_trx_commit(
/*================================*/
void* thd);
+#ifdef WITH_WSREP
+UNIV_INTERN
+int
+wsrep_innobase_kill_one_trx(trx_t *bf_trx, trx_t *victim_trx, ibool signal);
+int wsrep_thd_is_brute_force(void *thd_ptr);
+int wsrep_trx_order_before(void *thd1, void *thd2);
+void wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
+ unsigned char* str, unsigned int str_length);
+//UNIV_INTERN
+int
+wsrep_on(void *thd_ptr);
+int wsrep_is_wsrep_xid(const void*);
+#endif /* WITH_WSREP */
/**********************************************************************//**
Get the current setting of the lower_case_table_names global parameter from
mysqld.cc. We do a dirty read because for one there is no synchronization
diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h
index ea636f985b4..f7c3d82e495 100644
--- a/storage/xtradb/include/lock0lock.h
+++ b/storage/xtradb/include/lock0lock.h
@@ -798,6 +798,7 @@ lock_rec_get_page_no(
remains set when the waiting lock is granted,
or if the lock is inherited to a neighboring
record */
+#define WSREP_BF 4096
#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
# error
#endif
diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h
index 10b74d18c13..30ef3048ff1 100644
--- a/storage/xtradb/include/rem0rec.h
+++ b/storage/xtradb/include/rem0rec.h
@@ -832,6 +832,14 @@ are given in one byte (resp. two byte) format. */
two upmost bits in a two byte offset for special purposes */
#define REC_MAX_DATA_SIZE (16 * 1024)
+#ifdef WITH_WSREP
+int wsrep_rec_get_primary_key(
+ byte *buf, /* out: extracted key */
+ ulint *buf_len, /* in/out: length of buf */
+ const rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ibool new_protocol); /* in: protocol > 1 */
+#endif /* WITH_WSREP */
#ifndef UNIV_NONINL
#include "rem0rec.ic"
#endif
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index a95eb8a1d58..b99585ede77 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -165,6 +165,10 @@ extern ulint srv_log_buffer_size;
extern char srv_use_global_flush_log_at_trx_commit;
extern char srv_adaptive_flushing;
+#ifdef WITH_INNODB_DISALLOW_WRITES
+/* When this event is reset we do not allow any file writes to take place. */
+extern os_event_t srv_allow_writes_event;
+#endif /* WITH_INNODB_DISALLOW_WRITES */
/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
even if they are marked as "corrupted". Mostly it is for DBA to process
corrupted index and table */
diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h
index cba21ae97a9..7ae3ba74478 100644
--- a/storage/xtradb/include/trx0sys.h
+++ b/storage/xtradb/include/trx0sys.h
@@ -41,6 +41,9 @@ Created 3/26/1996 Heikki Tuuri
#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
+#ifdef WITH_WSREP
+#include "trx0xa.h"
+#endif /* WITH_WSREP */
/** In a MySQL replication slave, in crash recovery we store the master log
file name and position here. */
@@ -341,6 +344,17 @@ UNIV_INTERN
void
trx_sys_print_mysql_binlog_offset(void);
/*===================================*/
+#ifdef WITH_WSREP
+/** Update WSREP checkpoint XID in sys header. */
+void
+trx_sys_update_wsrep_checkpoint(
+ const XID* xid, /*!< in: WSREP XID */
+ mtr_t* mtr); /*!< in: mtr */
+void
+/** Read WSREP checkpoint XID from sys header. */
+trx_sys_read_wsrep_checkpoint(
+ XID* xid); /*!< out: WSREP XID */
+#endif /* WITH_WSREP */
/*****************************************************************//**
Prints to stderr the MySQL master log offset info in the trx system header
COMMIT set of fields if the magic number shows it valid and stores it
@@ -564,6 +578,22 @@ crash recovery rollbacks a PREPAREd transaction, they are copied back. */
within that file */
#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
+#ifdef WITH_WSREP
+/* We hijack TRX_SYS_MYSQL_MASTER_LOG_INFO, it seems to be completely unused
+ otherwise (see comments for MySQL bug #34058). */
+/** */
+#define TRX_SYS_WSREP_XID_INFO TRX_SYS_MYSQL_MASTER_LOG_INFO
+#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
+#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
+
+/* XID field: formatID, gtrid_len, bqual_len, xid_data */
+#define TRX_SYS_WSREP_XID_LEN (4 + 4 + 4 + XIDDATASIZE)
+#define TRX_SYS_WSREP_XID_FORMAT 4
+#define TRX_SYS_WSREP_XID_GTRID_LEN 8
+#define TRX_SYS_WSREP_XID_BQUAL_LEN 12
+#define TRX_SYS_WSREP_XID_DATA 16
+#endif /* WITH_WSREP*/
+
/** Doublewrite buffer */
/* @{ */
/** The offset of the doublewrite buffer header on the trx system header page */
diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c
index 414d3ae2c49..eadd52f462a 100644
--- a/storage/xtradb/lock/lock0lock.c
+++ b/storage/xtradb/lock/lock0lock.c
@@ -40,6 +40,9 @@ Created 5/7/1996 Heikki Tuuri
#include "trx0sys.h"
#include "btr0btr.h"
+#ifdef WITH_WSREP
+extern my_bool wsrep_debug;
+#endif
/* Restricts the length of search we will do in the waits-for
graph of transactions */
#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
@@ -922,6 +925,11 @@ lock_rec_has_to_wait(
if (trx != lock2->trx
&& !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
lock_get_mode(lock2))) {
+#ifdef WITH_WSREP
+ if ((type_mode & WSREP_BF) && (lock2->type_mode & WSREP_BF)) {
+ return FALSE;
+ }
+#endif /* WITH_WSREP */
/* We have somewhat complex rules when gap type record locks
cause waits */
@@ -1451,6 +1459,11 @@ lock_rec_has_expl(
return(NULL);
}
+#ifdef WITH_WSREP
+static
+void
+lock_rec_discard(lock_t* in_lock);
+#endif
#ifdef UNIV_DEBUG
/*********************************************************************//**
Checks if some other transaction has a lock request in the queue.
@@ -1500,6 +1513,27 @@ lock_rec_other_has_expl_req(
}
#endif /* UNIV_DEBUG */
+#ifdef WITH_WSREP
+static void
+wsrep_kill_victim(trx_t *trx, lock_t *lock) {
+ int bf_this = wsrep_thd_is_brute_force(trx->mysql_thd);
+ int bf_other =
+ wsrep_thd_is_brute_force(lock->trx->mysql_thd);
+ if ((bf_this && !bf_other) ||
+ (bf_this && bf_other && wsrep_trx_order_before(
+ trx->mysql_thd, lock->trx->mysql_thd))) {
+
+ if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: BF victim waiting\n");
+ /* cannot release lock, until our lock
+ is in the queue*/
+ } else if (lock->trx != trx) {
+ wsrep_innobase_kill_one_trx(trx, lock->trx, TRUE);
+ }
+ }
+}
+#endif
/*********************************************************************//**
Checks if some other transaction has a conflicting explicit lock request
in the queue, so that we have to wait.
@@ -1529,6 +1563,9 @@ lock_rec_other_has_conflicting(
do {
if (lock_rec_has_to_wait(trx, mode, lock,
TRUE)) {
+#ifdef WITH_WSREP
+ wsrep_kill_victim(trx, lock);
+#endif
return(lock);
}
@@ -1539,6 +1576,9 @@ lock_rec_other_has_conflicting(
do {
if (lock_rec_has_to_wait(trx, mode, lock,
FALSE)) {
+#ifdef WITH_WSREP
+ wsrep_kill_victim(trx, lock);
+#endif
return(lock);
}
@@ -1670,6 +1710,9 @@ static
lock_t*
lock_rec_create(
/*============*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
ulint type_mode,/*!< in: lock mode and wait
flag, type is ignored and
replaced by LOCK_REC */
@@ -1715,6 +1758,11 @@ lock_rec_create(
lock->trx = trx;
lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ lock->type_mode |= WSREP_BF;
+ }
+#endif /* WITH_WSREP */
lock->index = index;
lock->un_member.rec_lock.space = space;
@@ -1729,8 +1777,56 @@ lock_rec_create(
/* Set the bit corresponding to rec */
lock_rec_set_nth_bit(lock, heap_no);
+#ifdef WITH_WSREP
+ if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ lock_t *hash = c_lock->hash;
+ lock_t *prev = NULL;
+
+ while (hash &&
+ wsrep_thd_is_brute_force(hash->trx->mysql_thd) &&
+ wsrep_trx_order_before(hash->trx->mysql_thd, trx->mysql_thd)){
+ prev = hash;
+ hash = hash->hash;
+ }
+ lock->hash = hash;
+ if (prev) {
+ prev->hash = lock;
+ } else {
+ c_lock->hash = lock;
+ }
+ /*
+ * delayed conflict resolution '...kill_one_trx' was not called,
+ * if victim was waiting for some other lock
+ */
+ if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ c_lock->trx->was_chosen_as_deadlock_victim = TRUE;
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ lock_set_lock_and_trx_wait(lock, trx);
+
+ lock_cancel_waiting_and_release(c_lock->trx->wait_lock);
+
+ /* trx might not wait for c_lock, but some other lock */
+ if (wsrep_debug && c_lock->trx->wait_lock != c_lock) {
+ fprintf(stderr, "WSREP: c_lock != wait lock\n");
+ }
+ if (c_lock->trx->wait_lock == c_lock) {
+ lock_reset_lock_and_trx_wait(lock);
+ }
+
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: c_lock canceled %llu\n",
+ (ulonglong) c_lock->trx->id);
+ /* have to bail out here to avoid lock_set_lock... */
+ return(lock);
+ }
+ } else {
+ HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), lock);
+ }
+#else
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
+#endif
lock_sys->rec_num++;
if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
@@ -1751,6 +1847,9 @@ static
enum db_err
lock_rec_enqueue_waiting(
/*=====================*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
ulint type_mode,/*!< in: lock mode this
transaction is requesting:
LOCK_S or LOCK_X, possibly
@@ -1804,8 +1903,16 @@ lock_rec_enqueue_waiting(
}
/* Enqueue the lock request that will wait to be granted */
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) && trx->was_chosen_as_deadlock_victim) {
+ return(DB_DEADLOCK);
+ }
+ lock = lock_rec_create(c_lock, type_mode | LOCK_WAIT,
+ block, heap_no, index, trx);
+#else
lock = lock_rec_create(type_mode | LOCK_WAIT,
block, heap_no, index, trx);
+#endif
/* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */
@@ -1887,7 +1994,19 @@ lock_rec_add_to_queue(
lock_t* other_lock
= lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
block, heap_no, trx);
+#ifdef WITH_WSREP
+ /* this can potentionally assert with wsrep */
+ if (wsrep_on(trx->mysql_thd)) {
+ if (wsrep_debug && other_lock) {
+ fprintf(stderr,
+ "WSREP: InnoDB assert ignored\n");
+ }
+ } else {
+ ut_a(!other_lock);
+ }
+#else
ut_a(!other_lock);
+#endif /* WITH_WSREP */
}
#endif /* UNIV_DEBUG */
@@ -1940,7 +2059,11 @@ lock_rec_add_to_queue(
}
somebody_waits:
+#ifdef WITH_WSREP
+ return(lock_rec_create(NULL, type_mode, block, heap_no, index, trx));
+#else
return(lock_rec_create(type_mode, block, heap_no, index, trx));
+#endif
}
/** Record locking request status */
@@ -1990,6 +2113,11 @@ lock_rec_lock_fast(
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
+#ifdef WITH_WSREP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP
+#endif /* WITH_WSREP */
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
lock = lock_rec_get_first_on_page(block);
@@ -1998,7 +2126,11 @@ lock_rec_lock_fast(
if (lock == NULL) {
if (!impl) {
+#ifdef WITH_WSREP
+ lock_rec_create(NULL, mode, block, heap_no, index, trx);
+#else
lock_rec_create(mode, block, heap_no, index, trx);
+#endif
}
return(LOCK_REC_SUCCESS_CREATED);
@@ -2054,6 +2186,9 @@ lock_rec_lock_slow(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
+#ifdef WITH_WSREP
+ lock_t *c_lock;
+#endif
ut_ad(mutex_own(&kernel_mutex));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
@@ -2064,6 +2199,11 @@ lock_rec_lock_slow(
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
+#ifdef WITH_WSREP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP
+#endif /* WITH_WSREP */
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
trx = thr_get_trx(thr);
@@ -2072,14 +2212,24 @@ lock_rec_lock_slow(
/* The trx already has a strong enough lock on rec: do
nothing */
+#ifdef WITH_WSREP
+ } else if ((c_lock = lock_rec_other_has_conflicting(
+ mode, block, heap_no, trx))) {
+#else
} else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) {
+#endif
/* If another transaction has a non-gap conflicting request in
the queue, as this transaction does not have a lock strong
enough already granted on the record, we have to wait. */
+#ifdef WITH_WSREP
+ return(lock_rec_enqueue_waiting(c_lock,mode, block, heap_no,
+ index, thr));
+#else
return(lock_rec_enqueue_waiting(mode, block, heap_no,
index, thr));
+#endif
} else if (!impl) {
/* Set the requested lock on the record */
@@ -2125,8 +2275,17 @@ lock_rec_lock(
|| (LOCK_MODE_MASK & mode) == LOCK_X);
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
+#ifdef WITH_WSREP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP
+ || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP
+#endif /* WITH_WSREP */
|| mode - (LOCK_MODE_MASK & mode) == 0);
-
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_brute_force(thr_get_trx(thr)->mysql_thd)) {
+ mode |= WSREP_BF;
+ }
+#endif
/* We try a simplified and faster subroutine for the most
common cases */
switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
@@ -3498,6 +3657,34 @@ lock_deadlock_recursive(
stderr);
}
#endif /* UNIV_DEBUG */
+#ifdef WITH_WSREP
+ if (wsrep_debug)
+ fputs("WSREP: Deadlock detected\n", ef);
+ if (wsrep_thd_is_brute_force(start->mysql_thd) &&
+ wsrep_thd_is_brute_force(
+ wait_lock->trx->mysql_thd) &&
+ (start != wait_lock->trx)) {
+
+ if (wsrep_trx_order_before(
+ start->mysql_thd,
+ wait_lock->trx->mysql_thd)) {
+
+ wait_lock->trx->was_chosen_as_deadlock_victim = TRUE;
+ lock_cancel_waiting_and_release(wait_lock);
+ return(LOCK_VICTIM_IS_OTHER);
+ } else {
+ return(LOCK_VICTIM_IS_START);
+ }
+ }
+#endif
+ if (too_far) {
+
+ fputs("TOO DEEP OR LONG SEARCH"
+ " IN THE LOCK TABLE"
+ " WAITS-FOR GRAPH\n", ef);
+
+ return(LOCK_VICTIM_IS_START);
+ }
if (trx_weight_ge(wait_lock->trx, start)) {
/* Our recursion starting point
@@ -3505,8 +3692,21 @@ lock_deadlock_recursive(
choose 'start' as the victim and roll
back it */
+#ifdef WITH_WSREP
+ if (!wsrep_thd_is_brute_force(
+ start->mysql_thd)) {
+ return(LOCK_VICTIM_IS_START);
+ }
+#else
return(LOCK_VICTIM_IS_START);
+#endif
}
+#ifdef WITH_WSREP
+ if (wsrep_thd_is_brute_force(
+ wait_lock->trx->mysql_thd)) {
+ return(LOCK_VICTIM_IS_START);
+ }
+#endif
lock_deadlock_found = TRUE;
@@ -3591,6 +3791,9 @@ UNIV_INLINE
lock_t*
lock_table_create(
/*==============*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
dict_table_t* table, /*!< in: database table in dictionary cache */
ulint type_mode,/*!< in: lock mode possibly ORed with
LOCK_WAIT */
@@ -3626,7 +3829,25 @@ lock_table_create(
lock->un_member.tab_lock.table = table;
+#ifdef WITH_WSREP
+ if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ UT_LIST_INSERT_AFTER(
+ un_member.tab_lock.locks, table->locks, c_lock, lock);
+ } else {
+ UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+ }
+
+ if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: table c_lock in wait: %llu\n",
+ (ulonglong) lock->trx->id);
+ c_lock->trx->was_chosen_as_deadlock_victim = TRUE;
+ lock_cancel_waiting_and_release(c_lock);
+ }
+
+#else
UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
+#endif
if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
@@ -3772,6 +3993,9 @@ static
ulint
lock_table_enqueue_waiting(
/*=======================*/
+#ifdef WITH_WSREP
+ lock_t* c_lock, /* conflicting lock */
+#endif
ulint mode, /*!< in: lock mode this transaction is
requesting */
dict_table_t* table, /*!< in: table */
@@ -3815,7 +4039,14 @@ lock_table_enqueue_waiting(
/* Enqueue the lock request that will wait to be granted */
+#ifdef WITH_WSREP
+ if (trx->was_chosen_as_deadlock_victim) {
+ return(DB_DEADLOCK);
+ }
+ lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
+#else
lock = lock_table_create(table, mode | LOCK_WAIT, trx);
+#endif
/* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */
@@ -3878,7 +4109,32 @@ lock_table_other_has_incompatible(
&& (!lock_mode_compatible(lock_get_mode(lock), mode))
&& (wait || !(lock_get_wait(lock)))) {
+#ifdef WITH_WSREP
+ int bf_this = wsrep_thd_is_brute_force(trx->mysql_thd);
+ int bf_other = wsrep_thd_is_brute_force(
+ lock->trx->mysql_thd);
+ if ((bf_this && !bf_other) ||
+ (bf_this && bf_other &&
+ wsrep_trx_order_before(
+ trx->mysql_thd, lock->trx->mysql_thd)
+ )
+ ) {
+ if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
+ if (wsrep_debug) fprintf(stderr,
+ "WSREP: BF victim waiting");
+ return(lock);
+ } else {
+ if (bf_this && bf_other)
+ wsrep_innobase_kill_one_trx(
+ (trx_t *)trx, lock->trx, TRUE);
+ return(lock);
+ }
+ } else {
+ return(lock);
+ }
+#else
return(lock);
+#endif
}
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
@@ -3901,6 +4157,9 @@ lock_table(
enum lock_mode mode, /*!< in: lock mode */
que_thr_t* thr) /*!< in: query thread */
{
+#ifdef WITH_WSREP
+ lock_t *c_lock;
+#endif
trx_t* trx;
ulint err;
@@ -3933,19 +4192,32 @@ lock_table(
/* We have to check if the new lock is compatible with any locks
other transactions have in the table lock queue. */
+#ifdef WITH_WSREP
+ if ((c_lock = (lock_t *)lock_table_other_has_incompatible(
+ trx, LOCK_WAIT, table, mode))) {
+#else
if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
+#endif
/* Another trx has a request on the table in an incompatible
mode: this trx may have to wait */
+#ifdef WITH_WSREP
+ err = lock_table_enqueue_waiting(c_lock, mode | flags, table, thr);
+#else
err = lock_table_enqueue_waiting(mode | flags, table, thr);
+#endif
lock_mutex_exit_kernel();
return(err);
}
+#ifdef WITH_WSREP
+ lock_table_create(c_lock, table, mode | flags, trx);
+#else
lock_table_create(table, mode | flags, trx);
+#endif
ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
@@ -4863,6 +5135,7 @@ lock_rec_queue_validate(
if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
+#ifndef WITH_WSREP
enum lock_mode mode;
if (lock_get_mode(lock) == LOCK_S) {
@@ -4872,6 +5145,7 @@ lock_rec_queue_validate(
}
ut_a(!lock_rec_other_has_expl_req(
mode, 0, 0, block, heap_no, lock->trx));
+#endif /* WITH_WSREP */
} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
@@ -5146,6 +5420,9 @@ lock_rec_insert_check_and_lock(
lock_t* lock;
ulint err;
ulint next_rec_heap_no;
+#ifdef WITH_WSREP
+ lock_t *c_lock;
+#endif
ut_ad(block->frame == page_align(rec));
@@ -5203,15 +5480,28 @@ lock_rec_insert_check_and_lock(
had to wait for their insert. Both had waiting gap type lock requests
on the successor, which produced an unnecessary deadlock. */
+#ifdef WITH_WSREP
+ if ((c_lock = lock_rec_other_has_conflicting(
+ LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION | WSREP_BF,
+ block, next_rec_heap_no, trx))) {
+#else
if (lock_rec_other_has_conflicting(
LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
block, next_rec_heap_no, trx)) {
+#endif
/* Note that we may get DB_SUCCESS also here! */
+#ifdef WITH_WSREP
+ err = lock_rec_enqueue_waiting(c_lock, LOCK_X | LOCK_GAP
+ | LOCK_INSERT_INTENTION,
+ block, next_rec_heap_no,
+ index, thr);
+#else
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION,
block, next_rec_heap_no,
index, thr);
+#endif
} else {
err = DB_SUCCESS;
}
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index 2555c010027..af50cfa1a24 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -96,6 +96,12 @@ UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
#define OS_AIO_MERGE_N_CONSECUTIVE 64
+#ifdef WITH_INNODB_DISALLOW_WRITES
+#define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event)
+#else
+#define WAIT_ALLOW_WRITES() do { } while (0)
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
/**********************************************************************
InnoDB AIO Implementation:
@@ -819,7 +825,9 @@ os_file_create_tmpfile(void)
/*========================*/
{
FILE* file = NULL;
- int fd = innobase_mysql_tmpfile();
+ int fd;
+ WAIT_ALLOW_WRITES();
+ fd = innobase_mysql_tmpfile();
if (fd >= 0) {
file = fdopen(fd, "w+b");
@@ -1138,6 +1146,7 @@ os_file_create_directory(
return (TRUE);
#else
int rcode;
+ WAIT_ALLOW_WRITES();
rcode = mkdir(pathname, 0770);
@@ -1239,6 +1248,8 @@ try_again:
os_file_t file;
int create_flag;
ibool retry;
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
+ WAIT_ALLOW_WRITES();
try_again:
ut_a(name);
@@ -1371,6 +1382,8 @@ os_file_create_simple_no_error_handling_func(
int create_flag;
ut_a(name);
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
+ WAIT_ALLOW_WRITES();
if (create_mode == OS_FILE_OPEN) {
if (access_type == OS_FILE_READ_ONLY) {
@@ -1617,6 +1630,8 @@ try_again:
int create_flag;
ibool retry;
const char* mode_str = NULL;
+ if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
+ WAIT_ALLOW_WRITES();
try_again:
ut_a(name);
@@ -1778,6 +1793,7 @@ loop:
goto loop;
#else
int ret;
+ WAIT_ALLOW_WRITES();
ret = unlink(name);
@@ -1841,6 +1857,7 @@ loop:
goto loop;
#else
int ret;
+ WAIT_ALLOW_WRITES();
ret = unlink(name);
@@ -1881,6 +1898,7 @@ os_file_rename_func(
return(FALSE);
#else
int ret;
+ WAIT_ALLOW_WRITES();
ret = rename(oldpath, newpath);
@@ -2144,6 +2162,7 @@ os_file_set_eof(
HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
return(SetEndOfFile(h));
#else /* __WIN__ */
+ WAIT_ALLOW_WRITES();
return(!ftruncate(fileno(file), ftell(file)));
#endif /* __WIN__ */
}
@@ -2271,6 +2290,7 @@ os_file_flush_func(
return(FALSE);
#else
int ret;
+ WAIT_ALLOW_WRITES();
#if defined(HAVE_DARWIN_THREADS)
# ifndef F_FULLFSYNC
@@ -2946,6 +2966,7 @@ retry:
return(FALSE);
#else
ssize_t ret;
+ WAIT_ALLOW_WRITES();
ret = os_file_pwrite(file, buf, n, offset, offset_high);
diff --git a/storage/xtradb/rem/rem0rec.c b/storage/xtradb/rem/rem0rec.c
index 30fc28561fa..0cee1ffb3c8 100644
--- a/storage/xtradb/rem/rem0rec.c
+++ b/storage/xtradb/rem/rem0rec.c
@@ -31,6 +31,9 @@ Created 5/30/1994 Heikki Tuuri
#include "mtr0mtr.h"
#include "mtr0log.h"
+#ifdef WITH_WSREP
+#include <ha_prototypes.h>
+#endif /* WITH_WSREP */
/* PHYSICAL RECORD (OLD STYLE)
===========================
@@ -1772,3 +1775,123 @@ rec_print(
}
}
#endif /* !UNIV_HOTBACKUP */
+#ifdef WITH_WSREP
+int
+wsrep_rec_get_primary_key(
+ byte *buf, /* out: extracted key */
+ ulint *buf_len, /* in/out: length of buf */
+ const rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ibool new_protocol) /* in: protocol > 1 */
+{
+ const byte* data;
+ ulint len;
+ ulint key_len = 0;
+ ulint i;
+ uint key_parts;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ const ulint* offsets;
+
+ ut_ad(index);
+
+ rec_offs_init(offsets_);
+ offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap);
+
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ ut_ad(rec);
+
+ key_parts = dict_index_get_n_unique_in_tree(index);
+ for (i = 0;
+ i < key_parts && (index->type & DICT_CLUSTERED || i < key_parts - 1);
+ i++) {
+
+ dict_field_t* field = dict_index_get_nth_field(index, i);
+ const dict_col_t* col = dict_field_get_col(field);
+
+ data = rec_get_nth_field(rec, offsets, i, &len);
+ if (key_len + len > ((col->prtype & DATA_NOT_NULL) ?
+ *buf_len : *buf_len - 1)) {
+ fprintf (stderr,
+ "WSREP: FK key len exceeded %lu %lu %lu\n",
+ key_len, len, *buf_len);
+ goto err_out;
+ }
+
+ if (len == UNIV_SQL_NULL) {
+ ut_a(!(col->prtype & DATA_NOT_NULL));
+ *buf++ = 1;
+ key_len++;
+ } else if (!new_protocol) {
+ if (!(col->prtype & DATA_NOT_NULL)) {
+ *buf++ = 0;
+ key_len++;
+ }
+ memcpy(buf, data, len);
+ wsrep_innobase_mysql_sort(
+ (int)(col->prtype & DATA_MYSQL_TYPE_MASK),
+ (uint)dtype_get_charset_coll(col->prtype),
+ buf, len);
+ } else { /* new protocol */
+ if (!(col->prtype & DATA_NOT_NULL)) {
+ *buf++ = 0;
+ key_len++;
+ }
+ switch (col->mtype) {
+ case DATA_INT: {
+ byte* ptr = buf+len;
+ for (;;) {
+ ptr--;
+ *ptr = *data;
+ if (ptr == buf) {
+ break;
+ }
+ data++;
+ }
+
+ if (!(col->prtype & DATA_UNSIGNED)) {
+ buf[len-1] = (byte) (buf[len-1] ^ 128);
+ }
+
+ break;
+ }
+ case DATA_VARCHAR:
+ case DATA_VARMYSQL:
+ case DATA_BINARY:
+ /* Copy the actual data */
+ ut_memcpy(buf, data, len);
+ wsrep_innobase_mysql_sort(
+ (int)(col->prtype & DATA_MYSQL_TYPE_MASK),
+ (uint)dtype_get_charset_coll(col->prtype),
+ buf, len);
+ break;
+ case DATA_BLOB:
+ case DATA_MYSQL:
+ memcpy(buf, data, len);
+ break;
+ default:
+ break;
+ }
+
+ key_len += len;
+ buf += len;
+ }
+ }
+
+ rec_validate(rec, offsets);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ *buf_len = key_len;
+ return DB_SUCCESS;
+
+ err_out:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return DB_ERROR;
+}
+#endif // WITH_WSREP
diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c
index 3ae4c227ddc..c648722870c 100644
--- a/storage/xtradb/row/row0ins.c
+++ b/storage/xtradb/row/row0ins.c
@@ -756,6 +756,14 @@ row_ins_invalidate_query_cache(
innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
mem_free(buf);
}
+#ifdef WITH_WSREP
+ulint wsrep_append_foreign_key(trx_t *trx,
+ dict_foreign_t* foreign,
+ const rec_t* clust_rec,
+ dict_index_t* clust_index,
+ ibool referenced,
+ ibool shared);
+#endif /* WITH_WSREP */
/*********************************************************************//**
Perform referential actions or checks when a parent row is deleted or updated
@@ -1072,6 +1080,16 @@ row_ins_foreign_check_on_constraint(
err = row_update_cascade_for_mysql(thr, cascade,
foreign->foreign_table);
+#ifdef WITH_WSREP
+ if (err == DB_SUCCESS) {
+ err = wsrep_append_foreign_key(
+ thr_get_trx(thr),
+ foreign,
+ clust_rec,
+ clust_index,
+ FALSE, FALSE);
+ }
+#endif /* WITH_WSREP */
if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
fprintf(stderr,
"InnoDB: error: table %s has the counter 0"
@@ -1408,7 +1426,14 @@ run_again:
if (check_ref) {
err = DB_SUCCESS;
-
+#ifdef WITH_WSREP
+ err = wsrep_append_foreign_key(
+ thr_get_trx(thr),
+ foreign,
+ rec,
+ check_index,
+ check_ref, TRUE);
+#endif /* WITH_WSREP */
goto end_scan;
} else if (foreign->type != 0) {
/* There is an ON UPDATE or ON DELETE
@@ -1665,6 +1690,9 @@ row_ins_scan_sec_index_for_duplicate(
dtuple_t* entry, /*!< in: index entry */
que_thr_t* thr) /*!< in: query thread */
{
+#ifdef WITH_WSREP
+ trx_t* trx = thr_get_trx(thr);
+#endif
ulint n_unique;
ulint i;
int cmp;
@@ -1718,7 +1746,14 @@ row_ins_scan_sec_index_for_duplicate(
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
+#ifdef WITH_WSREP
+ /* slave applier must not get duplicate error */
+ if (allow_duplicates ||
+ (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd))) {
+#else
if (allow_duplicates) {
+#endif
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
@@ -1836,7 +1871,13 @@ row_ins_duplicate_error_in_clust(
sure that in roll-forward we get the same duplicate
errors as in original execution */
+#ifdef WITH_WSREP
+ if (trx->duplicates ||
+ (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd))) {
+#else
if (trx->duplicates) {
+#endif
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
@@ -1880,7 +1921,13 @@ row_ins_duplicate_error_in_clust(
offsets = rec_get_offsets(rec, cursor->index, offsets,
ULINT_UNDEFINED, &heap);
+#ifdef WITH_WSREP
+ if (trx->duplicates ||
+ (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd))) {
+#else
if (trx->duplicates) {
+#endif
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c
index 859b3d415ab..6453277c8d6 100644
--- a/storage/xtradb/row/row0upd.c
+++ b/storage/xtradb/row/row0upd.c
@@ -170,6 +170,46 @@ func_exit:
return(is_referenced);
}
+#ifdef WITH_WSREP
+ulint wsrep_append_foreign_key(trx_t *trx,
+ dict_foreign_t* foreign,
+ const rec_t* clust_rec,
+ dict_index_t* clust_index,
+ ibool referenced,
+ ibool shared);
+
+static
+void
+wsrep_append_fk_reference(
+/*=================================*/
+ upd_node_t* node, /*!< in: row update node */
+ dict_table_t* table, /*!< in: table in question */
+ dict_index_t* index, /*!< in: index of the cursor */
+ que_thr_t* thr, /*!< in: query thread */
+ const rec_t* rec
+) {
+ dict_foreign_t *foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+ while (foreign) {
+ if (foreign->foreign_index == index
+ && node->is_delete)
+ {
+ if (DB_SUCCESS != wsrep_append_foreign_key(
+ thr_get_trx(thr),
+ foreign,
+ rec,
+ index,
+ TRUE, TRUE)
+ ) {
+ fprintf(stderr,
+ "WSREP: FK key append failed\n");
+ }
+ }
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+ }
+}
+#endif /* WITH_WSREP */
+
/*********************************************************************//**
Checks if possible foreign key constraints hold after a delete of the record
under pcur.
@@ -283,7 +323,6 @@ row_upd_check_references_constraints(
}
err = DB_SUCCESS;
-
func_exit:
if (got_s_lock) {
row_mysql_unfreeze_data_dictionary(trx);
@@ -1665,6 +1704,12 @@ row_upd_sec_index_entry(
node, &pcur, index->table,
index, offsets, thr, &mtr);
}
+#ifdef WITH_WSREP
+ if (err == DB_SUCCESS && !referenced) {
+ wsrep_append_fk_reference(node, index->table,
+ index, thr, rec);
+ }
+#endif /* WITH_WSREP */
}
break;
}
@@ -1908,6 +1953,12 @@ err_exit:
goto err_exit;
}
}
+#ifdef WITH_WSREP
+ if (!referenced) {
+ wsrep_append_fk_reference(node, index->table,
+ index, thr, rec);
+ }
+#endif /* WITH_WSREP */
}
mtr_commit(mtr);
@@ -2114,6 +2165,9 @@ row_upd_del_mark_clust_rec(
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
+#ifdef WITH_WSREP
+ rec_t* rec;
+#endif /* WITH_WSREP */
ut_ad(node);
ut_ad(dict_index_is_clust(index));
@@ -2130,15 +2184,29 @@ row_upd_del_mark_clust_rec(
/* Mark the clustered index record deleted; we do not have to check
locks, because we assume that we have an x-lock on the record */
+#ifdef WITH_WSREP
+ rec = btr_cur_get_rec(btr_cur);
+#endif /* WITH_WSREP */
+
err = btr_cur_del_mark_set_clust_rec(
BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
+#ifdef WITH_WSREP
+ rec, index, offsets, TRUE, thr, mtr);
+#else
btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr);
+#endif /* WITH_WSREP */
if (err == DB_SUCCESS && referenced) {
/* NOTE that the following call loses the position of pcur ! */
err = row_upd_check_references_constraints(
node, pcur, index->table, index, offsets, thr, mtr);
}
+#ifdef WITH_WSREP
+ if (err == DB_SUCCESS && !referenced) {
+ wsrep_append_fk_reference(node, index->table,
+ index, thr, rec);
+ }
+#endif /* WITH_WSREP */
mtr_commit(mtr);
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index 9d479ac6c87..afc3c7759d9 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -97,6 +97,10 @@ ulong innobase_thd_get_thread_id(const void* thd);
/* prototypes for new functions added to ha_innodb.cc */
ibool innobase_get_slow_log();
+#ifdef WITH_WSREP
+extern int wsrep_debug;
+extern int wsrep_trx_is_aborting(void *thd_ptr);
+#endif
/* The following counter is incremented whenever there is some user activity
in the server */
UNIV_INTERN ulint srv_activity_count = 0;
@@ -224,6 +228,10 @@ srv_printf_innodb_monitor() will request mutex acquisition
with mutex_enter(), which will wait until it gets the mutex. */
#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
+#ifdef WITH_INNODB_DISALLOW_WRITES
+UNIV_INTERN os_event_t srv_allow_writes_event;
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
/** The sort order table of the MySQL latin1_swedish_ci character set
collation */
UNIV_INTERN const byte* srv_latin1_ordering;
@@ -412,6 +420,9 @@ struct srv_conc_slot_struct{
free to proceed; but
reserved may still be
TRUE at that point */
+#ifdef WITH_WSREP
+ void *thd; /*!< to see priority */
+#endif
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
};
@@ -1155,8 +1166,20 @@ srv_init(void)
conc_slot->reserved = FALSE;
conc_slot->event = os_event_create(NULL);
ut_a(conc_slot->event);
+#ifdef WITH_WSREP
+ conc_slot->thd = NULL;
+#endif /* WITH_WSREP */
}
+#ifdef WITH_INNODB_DISALLOW_WRITES
+ /* Writes have to be enabled on init or else we hang. Thus, we
+ always set the event here regardless of innobase_disallow_writes.
+ That flag will always be 0 at this point because it isn't settable
+ via my.cnf or command line arg. */
+ srv_allow_writes_event = os_event_create(NULL);
+ os_event_set(srv_allow_writes_event);
+#endif /* WITH_INNODB_DISALLOW_WRITES */
+
/* Initialize some INFORMATION SCHEMA internal structures */
trx_i_s_cache_init(trx_i_s_cache);
}
@@ -1322,6 +1345,18 @@ srv_conc_enter_innodb(
}
#endif
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ srv_conc_force_enter_innodb(trx);
+ return;
+ }
+ if (wsrep_on(trx->mysql_thd) &&
+ wsrep_trx_is_aborting(trx->mysql_thd)) {
+ srv_conc_force_enter_innodb(trx);
+ return;
+ }
+#endif
os_fast_mutex_lock(&srv_conc_mutex);
retry:
if (trx->declared_to_be_inside_innodb) {
@@ -1415,6 +1450,9 @@ retry:
/* Add to the queue */
slot->reserved = TRUE;
slot->wait_ended = FALSE;
+#ifdef WITH_WSREP
+ slot->thd = trx->mysql_thd;
+#endif
UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
@@ -1461,6 +1499,9 @@ retry:
incremented the thread counter on behalf of this thread */
slot->reserved = FALSE;
+#ifdef WITH_WSREP
+ slot->thd = NULL;
+#endif
UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
@@ -1546,6 +1587,9 @@ srv_conc_force_exit_innodb(
trx->n_tickets_to_enter_innodb = 0;
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
+#ifdef WITH_WSREP
+ srv_conc_slot_t* wsrep_slot;
+#endif
/* Look for a slot where a thread is waiting and no other
thread has yet released the thread */
@@ -1555,6 +1599,19 @@ srv_conc_force_exit_innodb(
slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
}
+#ifdef WITH_WSREP
+ /* look for aborting trx, they must be released asap */
+ wsrep_slot= slot;
+ while (wsrep_slot && (wsrep_slot->wait_ended == TRUE ||
+ !wsrep_trx_is_aborting(wsrep_slot->thd))) {
+ wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot);
+ }
+ if (wsrep_slot) {
+ slot = wsrep_slot;
+ if (wsrep_debug)
+ fprintf(stderr, "WSREP: releasing aborting thd\n");
+ }
+#endif
if (slot != NULL) {
slot->wait_ended = TRUE;
@@ -1927,7 +1984,20 @@ srv_suspend_mysql_thread(
if (lock_wait_timeout < 100000000
&& wait_time > (double) lock_wait_timeout) {
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ wsrep_thd_is_brute_force(trx->mysql_thd)) {
+ fprintf(stderr,
+ "WSREP: BF long lock wait ended after %.f sec\n",
+ wait_time);
+ srv_print_innodb_monitor = FALSE;
+ srv_print_innodb_lock_monitor = FALSE;
+ } else {
+#endif
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
+#ifdef WITH_WSREP
+ }
+#endif
}
if (trx_is_interrupted(trx)) {
@@ -2660,6 +2730,27 @@ exit_func:
OS_THREAD_DUMMY_RETURN;
}
+#ifdef WITH_WSREP
+/*********************************************************************//**
+check if lock timeout was for priority thread,
+as a side effect trigger lock monitor
+@return false for regular lock timeout */
+static ibool
+wsrep_is_BF_lock_timeout(
+/*====================*/
+ srv_slot_t* slot) /* in: lock slot to check for lock priority */
+{
+ if (wsrep_on(thr_get_trx(slot->thr)->mysql_thd) &&
+ wsrep_thd_is_brute_force((thr_get_trx(slot->thr))->mysql_thd)) {
+ fprintf(stderr, "WSREP: BF lock wait long\n");
+ srv_print_innodb_monitor = TRUE;
+ srv_print_innodb_lock_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+ return TRUE;
+ }
+ return FALSE;
+ }
+#endif /* WITH_WSREP */
/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
@return a dummy parameter */
@@ -2728,8 +2819,14 @@ loop:
granted: in that case do nothing */
if (trx->wait_lock) {
+#ifdef WITH_WSREP
+ if (!wsrep_is_BF_lock_timeout(slot)) {
+#endif
lock_cancel_waiting_and_release(
trx->wait_lock);
+#ifdef WITH_WSREP
+ }
+#endif
}
}
}
@@ -2846,7 +2943,20 @@ loop:
if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
+#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
+ if (srv_allow_writes_event->is_set) {
+#endif /* WITH_WSREP */
fatal_cnt++;
+#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
+ } else {
+ fprintf(stderr,
+ "WSREP: avoiding InnoDB self crash due to long "
+ "semaphore wait of > %lu seconds\n"
+ "Server is processing SST donor operation, "
+ "fatal_cnt now: %lu",
+ (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt);
+ }
+#endif /* WITH_WSREP */
if (fatal_cnt > 10) {
fprintf(stderr,
diff --git a/storage/xtradb/trx/trx0roll.c b/storage/xtradb/trx/trx0roll.c
index b55471959ce..3db63469cbe 100644
--- a/storage/xtradb/trx/trx0roll.c
+++ b/storage/xtradb/trx/trx0roll.c
@@ -42,6 +42,9 @@ Created 3/26/1996 Heikki Tuuri
#include "row0mysql.h"
#include "lock0lock.h"
#include "pars0pars.h"
+#ifdef WITH_WSREP
+#include "ha_prototypes.h"
+#endif /* WITH_WSREP */
/** This many pages must be undone before a truncate is tried within
rollback */
@@ -147,6 +150,12 @@ trx_rollback_for_mysql(
trx->op_info = "";
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
return(err);
}
@@ -174,6 +183,12 @@ trx_rollback_last_sql_stat_for_mysql(
trx->op_info = "";
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
return(err);
}
@@ -1123,6 +1138,12 @@ trx_rollback(
srv_que_task_enqueue_low(thr);
/* srv_que_task_enqueue_low(thr2); */
}
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
}
/****************************************************************//**
@@ -1281,6 +1302,12 @@ trx_finish_rollback_off_kernel(
sig = next_sig;
}
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
}
/*********************************************************************//**
diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c
index c40bf02a7a2..59b7da4083d 100644
--- a/storage/xtradb/trx/trx0sys.c
+++ b/storage/xtradb/trx/trx0sys.c
@@ -44,6 +44,10 @@ Created 3/26/1996 Heikki Tuuri
#include "os0file.h"
#include "read0read.h"
+#ifdef WITH_WSREP
+#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */
+#endif /* */
+
/** The file format tag structure with id and name. */
struct file_format_struct {
ulint id; /*!< id of the file format */
@@ -958,6 +962,89 @@ trx_sys_print_mysql_binlog_offset(void)
mtr_commit(&mtr);
}
+#ifdef WITH_WSREP
+
+void
+trx_sys_update_wsrep_checkpoint(
+ const XID* xid, /*!< in: transaction XID */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ trx_sysf_t* sys_header;
+
+ ut_ad(xid && mtr);
+ ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
+
+ sys_header = trx_sysf_get(mtr);
+ if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
+ TRX_SYS_WSREP_XID_MAGIC_N,
+ MLOG_4BYTES, mtr);
+ }
+
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_FORMAT,
+ (int)xid->formatID,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_GTRID_LEN,
+ (int)xid->gtrid_length,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_BQUAL_LEN,
+ (int)xid->bqual_length,
+ MLOG_4BYTES, mtr);
+ mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_DATA,
+ (const unsigned char*) xid->data,
+ XIDDATASIZE, mtr);
+
+}
+
+void
+trx_sys_read_wsrep_checkpoint(XID* xid)
+/*===================================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+ ulint magic;
+
+ ut_ad(xid);
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ memset(xid, 0, sizeof(*xid));
+ xid->formatID = -1;
+ trx_sys_update_wsrep_checkpoint(xid, &mtr);
+ mtr_commit(&mtr);
+ return;
+ }
+
+ xid->formatID = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
+ xid->gtrid_length = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
+ xid->bqual_length = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
+ ut_memcpy(xid->data,
+ sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
+ XIDDATASIZE);
+
+ mtr_commit(&mtr);
+}
+
+#endif /* WITH_WSREP */
+
/*****************************************************************//**
Reads the log coordinates at the given offset in the trx sys header. */
static
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index eea8e861387..521fc96d35c 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -755,6 +755,11 @@ trx_start_low(
trx->id = trx_sys_get_new_trx_id();
+#ifdef WITH_WSREP
+ memset(&trx->xid, 0, sizeof(trx->xid));
+ trx->xid.formatID = -1;
+#endif /* WITH_WSREP */
+
/* The initial value for trx->no: IB_ULONGLONG_MAX is used in
read_view_open_now: */
@@ -909,6 +914,14 @@ trx_write_serialisation_history(
mutex_exit(&rseg->mutex);
+#ifdef WITH_WSREP
+ /* Update latest MySQL wsrep XID in trx sys header. */
+ if (wsrep_is_wsrep_xid(&trx->xid))
+ {
+ trx_sys_update_wsrep_checkpoint(&trx->xid, &mtr);
+ }
+#endif /* WITH_WSREP */
+
/* Update the latest MySQL binlog name and offset info
in trx sys header if MySQL binlogging is on or the database
server is a MySQL replication slave */
@@ -1138,6 +1151,12 @@ trx_commit_off_kernel(
ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->was_chosen_as_deadlock_victim) {
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
trx->error_state = DB_SUCCESS;
diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt
index fde4746df16..6fbd52a64eb 100644
--- a/support-files/CMakeLists.txt
+++ b/support-files/CMakeLists.txt
@@ -41,7 +41,7 @@ ELSE()
SET(inst_location ${INSTALL_SUPPORTFILESDIR})
ENDIF()
-FOREACH(inifile my-huge my-innodb-heavy-4G my-large my-medium my-small)
+FOREACH(inifile my-huge my-innodb-heavy-4G my-large my-medium my-small wsrep)
CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/${inifile}.cnf.sh
${CMAKE_CURRENT_BINARY_DIR}/${inifile}.${ini_file_extension} @ONLY)
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/${inifile}.${ini_file_extension}
@@ -93,6 +93,12 @@ IF(UNIX)
DESTINATION ${inst_location} COMPONENT SupportFiles
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
+ CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/wsrep_notify.sh
+ ${CMAKE_CURRENT_BINARY_DIR}/wsrep_notify @ONLY)
+ INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/wsrep_notify
+ DESTINATION ${inst_location} COMPONENT SupportFiles
+ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
+ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
IF (INSTALL_SYSCONFDIR)
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/mysql-log-rotate DESTINATION ${INSTALL_SYSCONFDIR}/logrotate.d
diff --git a/support-files/build-tags b/support-files/build-tags
index b5386dc79c3..f42fae218d2 100755
--- a/support-files/build-tags
+++ b/support-files/build-tags
@@ -1,12 +1,17 @@
#! /bin/sh
rm -f TAGS
-filter='\.cc$\|\.c$\|\.h$\|\.yy$'
+filter='\.cc$\|\.c$\|\.h$\|\.yy\|\.ic\|.ih$'
-list="find . -type f"
+#list="find . -type f"
bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root -R --kind=file --versioned"
$list |grep $filter |while read f;
do
etags -o TAGS --append $f
done
+
+(cd storage/galera && svn ls -R) | grep $filter | while read f;
+do
+ etags -o TAGS --append storage/galera/$f
+done
diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh
index 0d673e5b339..aeedd652d73 100644
--- a/support-files/mysql.spec.sh
+++ b/support-files/mysql.spec.sh
@@ -68,6 +68,14 @@
#
# ----------------------------------------------------------------------------
+# wsrep builds
+# ----------------------------------------------------------------------------
+%if %{defined with_wsrep}
+%define mysql_version @VERSION@_wsrep_@WSREP_API_VERSION@.@WSREP_PATCH_VERSION@
+%define wsrep_version @WSREP_VERSION@
+%endif
+
+# ----------------------------------------------------------------------------
# Commercial builds
# ----------------------------------------------------------------------------
%if %{undefined commercial}
@@ -115,6 +123,13 @@
%endif
# ----------------------------------------------------------------------------
+# Packager
+# ----------------------------------------------------------------------------
+%if %{undefined mysql_packager}
+%define mysql_packager MySQL Build Team <build@mysql.com>
+%endif
+
+# ----------------------------------------------------------------------------
# Distribution support
# ----------------------------------------------------------------------------
%if %{undefined distro_specific}
@@ -283,6 +298,10 @@ documentation and the manual for more information.
##############################################################################
%package -n MySQL-server%{product_suffix}
+%if %{defined with_wsrep}
+Version: %{mysql_version}
+#Release: %{wsrep_version}.%{release}
+%endif
Summary: MySQL: a very fast and reliable SQL database server
Group: Applications/Databases
Requires: %{distro_requires}
@@ -312,6 +331,9 @@ and the manual for more information.
This package includes the MySQL server binary as well as related utilities
to run and administer a MySQL server.
+%if %{defined with_wsrep}
+Built with wsrep patch %{wsrep_version}.
+%endif
If you want to access and work with the database, you have to install
package "MySQL-client%{product_suffix}" as well!
@@ -377,6 +399,7 @@ This package contains the shared libraries (*.so*) which certain languages
and applications need to dynamically load and use MySQL.
# ----------------------------------------------------------------------------
+%if %{undefined with_wsrep}
%package -n MySQL-embedded%{product_suffix}
Summary: MySQL - Embedded library
Group: Applications/Databases
@@ -398,6 +421,7 @@ The API is identical for the embedded MySQL version and the
client/server version.
For a description of MySQL see the base MySQL RPM or http://www.mysql.com/
+%endif
##############################################################################
%prep
@@ -473,6 +497,9 @@ mkdir debug
-DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \
-DFEATURE_SET="%{feature_set}" \
-DCOMPILATION_COMMENT="%{compilation_comment_debug}" \
+%if %{defined with_wsrep}
+ -DWITH_WSREP=1 \
+%endif
-DMYSQL_SERVER_SUFFIX="%{server_suffix}"
echo BEGIN_DEBUG_CONFIG ; egrep '^#define' include/config.h ; echo END_DEBUG_CONFIG
make ${MAKE_JFLAG} VERBOSE=1
@@ -488,6 +515,9 @@ mkdir release
-DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \
-DFEATURE_SET="%{feature_set}" \
-DCOMPILATION_COMMENT="%{compilation_comment_release}" \
+%if %{defined with_wsrep}
+ -DWITH_WSREP=1 \
+%endif
-DMYSQL_SERVER_SUFFIX="%{server_suffix}"
echo BEGIN_NORMAL_CONFIG ; egrep '^#define' include/config.h ; echo END_NORMAL_CONFIG
make ${MAKE_JFLAG} VERBOSE=1
@@ -550,11 +580,20 @@ install -m 755 $MBD/release/support-files/mysql.server $RBR%{_sysconfdir}/init.d
# Create a symlink "rcmysql", pointing to the init.script. SuSE users
# will appreciate that, as all services usually offer this.
-ln -s %{_sysconfdir}/init.d/mysql $RBR%{_sbindir}/rcmysql
+ln -sf %{_sysconfdir}/init.d/mysql $RBR%{_sbindir}/rcmysql
+
+%if %{defined with_wsrep}
+# Create a wsrep_sst_rsync_wan symlink.
+install -d $RBR%{_bindir}
+ln -sf wsrep_sst_rsync $RBR%{_bindir}/wsrep_sst_rsync_wan
+%endif
# Touch the place where the my.cnf config file might be located
# Just to make sure it's in the file list and marked as a config file
touch $RBR%{_sysconfdir}/my.cnf
+%if %{defined with_wsrep}
+touch $RBR%{_sysconfdir}/wsrep.cnf
+%endif
# Install SELinux files in datadir
install -m 600 $MBD/%{src_dir}/support-files/RHEL4-SElinux/mysql.{fc,te} \
@@ -1008,6 +1047,11 @@ echo "=====" >> $STATUS_HISTORY
%doc %{src_dir}/Docs/INFO_SRC*
%doc release/Docs/INFO_BIN*
%doc release/support-files/my-*.cnf
+%if %{defined with_wsrep}
+%doc %{src_dir}/Docs/README-wsrep
+%doc release/support-files/wsrep.cnf
+%doc release/support-files/wsrep_notify
+%endif
%doc %attr(644, root, root) %{_infodir}/mysql.info*
@@ -1041,6 +1085,9 @@ echo "=====" >> $STATUS_HISTORY
%doc %attr(644, root, man) %{_mandir}/man1/resolveip.1*
%ghost %config(noreplace,missingok) %{_sysconfdir}/my.cnf
+%if %{defined with_wsrep}
+%ghost %config(noreplace,missingok) %{_sysconfdir}/wsrep.cnf
+%endif
%attr(755, root, root) %{_bindir}/innochecksum
%attr(755, root, root) %{_bindir}/my_print_defaults
@@ -1067,6 +1114,13 @@ echo "=====" >> $STATUS_HISTORY
%attr(755, root, root) %{_bindir}/replace
%attr(755, root, root) %{_bindir}/resolve_stack_dump
%attr(755, root, root) %{_bindir}/resolveip
+%if %{defined with_wsrep}
+%attr(755, root, root) %{_bindir}/wsrep_sst_common
+%attr(755, root, root) %{_bindir}/wsrep_sst_mysqldump
+%attr(755, root, root) %{_bindir}/wsrep_sst_rsync
+%attr(755, root, root) %{_bindir}/wsrep_sst_rsync_wan
+%attr(755, root, root) %{_bindir}/wsrep_sst_xtrabackup
+%endif
%attr(755, root, root) %{_sbindir}/mysqld
%attr(755, root, root) %{_sbindir}/mysqld-debug
@@ -1145,8 +1199,10 @@ echo "=====" >> $STATUS_HISTORY
%defattr(-, root, root, 0755)
%attr(-, root, root) %{_datadir}/mysql-test
%attr(755, root, root) %{_bindir}/mysql_client_test
+%if %{undefined with_wsrep}
%attr(755, root, root) %{_bindir}/mysql_client_test_embedded
%attr(755, root, root) %{_bindir}/mysqltest_embedded
+%endif
%doc %attr(644, root, man) %{_mandir}/man1/mysql_client_test.1*
%doc %attr(644, root, man) %{_mandir}/man1/mysql-stress-test.pl.1*
%doc %attr(644, root, man) %{_mandir}/man1/mysql-test-run.pl.1*
@@ -1154,11 +1210,13 @@ echo "=====" >> $STATUS_HISTORY
%doc %attr(644, root, man) %{_mandir}/man1/mysqltest_embedded.1*
# ----------------------------------------------------------------------------
+%if %{undefined with_wsrep}
%files -n MySQL-embedded%{product_suffix}
%defattr(-, root, root, 0755)
%attr(755, root, root) %{_bindir}/mysql_embedded
%attr(644, root, root) %{_libdir}/mysql/libmysqld.a
%attr(644, root, root) %{_libdir}/mysql/libmysqld-debug.a
+%endif
##############################################################################
# The spec file changelog only includes changes made to the spec file
@@ -1174,6 +1232,10 @@ echo "=====" >> $STATUS_HISTORY
--define "runselftest 0"
Failures of the test suite will NOT make the RPM build fail!
+* Wed Dec 07 2011 Alexey Yurchenko <alexey.yurchenko@codership.com>
+
+- wsrep-related cleanups.
+
* Wed Sep 28 2011 Joerg Bruehe <joerg.bruehe@oracle.com>
- Fix duplicate mentioning of "mysql_plugin" and its manual page,
@@ -1226,7 +1288,6 @@ echo "=====" >> $STATUS_HISTORY
- Fix bug#12561297: Added the MySQL embedded binary
* Thu Jul 07 2011 Joerg Bruehe <joerg.bruehe@oracle.com>
-
- Fix bug#45415: "rpm upgrade recreates test database"
Let the creation of the "test" database happen only during a new installation,
not in an RPM upgrade.
diff --git a/support-files/wsrep.cnf.sh b/support-files/wsrep.cnf.sh
new file mode 100644
index 00000000000..507f83324b9
--- /dev/null
+++ b/support-files/wsrep.cnf.sh
@@ -0,0 +1,129 @@
+# This file contains wsrep-related mysqld options. It should be included
+# in the main MySQL configuration file.
+#
+# Options that need to be customized:
+# - wsrep_provider
+# - wsrep_cluster_address
+# - wsrep_sst_auth
+# The rest of defaults should work out of the box.
+
+##
+## mysqld options _MANDATORY_ for correct opration of the cluster
+##
+[mysqld]
+
+# (This must be substituted by wsrep_format)
+binlog_format=ROW
+
+# Currently only InnoDB storage engine is supported
+default-storage-engine=innodb
+
+# to avoid issues with 'bulk mode inserts' using autoinc
+innodb_autoinc_lock_mode=2
+
+# This is a must for paralell applying
+innodb_locks_unsafe_for_binlog=1
+
+# Query Cache is not supported with wsrep
+query_cache_size=0
+query_cache_type=0
+
+# Override bind-address
+# In some systems bind-address defaults to 127.0.0.1, and with mysqldump SST
+# it will have (most likely) disastrous consequences on donor node
+bind-address=0.0.0.0
+
+##
+## WSREP options
+##
+
+# Full path to wsrep provider library or 'none'
+wsrep_provider=none
+
+# Provider specific configuration options
+#wsrep_provider_options=
+
+# Logical cluster name. Should be the same for all nodes.
+wsrep_cluster_name="my_wsrep_cluster"
+
+# Group communication system handle
+#wsrep_cluster_address="dummy://"
+
+# Human-readable node name (non-unique). Hostname by default.
+#wsrep_node_name=
+
+# Base replication <address|hostname>[:port] of the node.
+# The values supplied will be used as defaults for state transfer receiving,
+# listening ports and so on. Default: address of the first network interface.
+#wsrep_node_address=
+
+# Address for incoming client connections. Autodetect by default.
+#wsrep_node_incoming_address=
+
+# How many threads will process writesets from other nodes
+wsrep_slave_threads=1
+
+# DBUG options for wsrep provider
+#wsrep_dbug_option
+
+# Generate fake primary keys for non-PK tables (required for multi-master
+# and parallel applying operation)
+wsrep_certify_nonPK=1
+
+# Maximum number of rows in write set
+wsrep_max_ws_rows=131072
+
+# Maximum size of write set
+wsrep_max_ws_size=1073741824
+
+# to enable debug level logging, set this to 1
+wsrep_debug=0
+
+# convert locking sessions into transactions
+wsrep_convert_LOCK_to_trx=0
+
+# how many times to retry deadlocked autocommits
+wsrep_retry_autocommit=1
+
+# change auto_increment_increment and auto_increment_offset automatically
+wsrep_auto_increment_control=1
+
+# retry autoinc insert, which failed for duplicate key error
+wsrep_drupal_282555_workaround=0
+
+# enable "strictly synchronous" semantics for read operations
+wsrep_causal_reads=0
+
+# Command to call when node status or cluster membership changes.
+# Will be passed all or some of the following options:
+# --status - new status of this node
+# --uuid - UUID of the cluster
+# --primary - whether the component is primary or not ("yes"/"no")
+# --members - comma-separated list of members
+# --index - index of this node in the list
+wsrep_notify_cmd=
+
+##
+## WSREP State Transfer options
+##
+
+# State Snapshot Transfer method
+wsrep_sst_method=mysqldump
+
+# Address which donor should send State Snapshot to.
+# Should be the address of THIS node. DON'T SET IT TO DONOR ADDRESS!!!
+# (SST method dependent. Defaults to the first IP of the first interface)
+#wsrep_sst_receive_address=
+
+# SST authentication string. This will be used to send SST to joining nodes.
+# Depends on SST method. For mysqldump method it is root:<root password>
+wsrep_sst_auth=root:
+
+# Desired SST donor name.
+#wsrep_sst_donor=
+
+# Reject client queries when donating SST (false)
+#wsrep_sst_donor_rejects_queries=0
+
+# Protocol version to use
+# wsrep_protocol_version=
diff --git a/support-files/wsrep_notify.sh b/support-files/wsrep_notify.sh
new file mode 100644
index 00000000000..bdbe3d12a39
--- /dev/null
+++ b/support-files/wsrep_notify.sh
@@ -0,0 +1,102 @@
+#!/bin/sh -eu
+
+# This is a simple example of wsrep notification script (wsrep_notify_cmd).
+# It will create 'wsrep' schema and two tables in it: 'membeship' and 'status'
+# and fill them on every membership or node status change.
+#
+# Edit parameters below to specify the address and login to server.
+
+USER=root
+PSWD=rootpass
+HOST=127.0.0.1
+PORT=3306
+
+SCHEMA="wsrep"
+MEMB_TABLE="$SCHEMA.membership"
+STATUS_TABLE="$SCHEMA.status"
+
+BEGIN="
+SET wsrep_on=0;
+DROP SCHEMA IF EXISTS $SCHEMA; CREATE SCHEMA $SCHEMA;
+CREATE TABLE $MEMB_TABLE (
+ idx INT UNIQUE PRIMARY KEY,
+ uuid CHAR(40) UNIQUE, /* node UUID */
+ name VARCHAR(32), /* node name */
+ addr VARCHAR(256) /* node address */
+) ENGINE=MEMORY;
+CREATE TABLE $STATUS_TABLE (
+ size INT, /* component size */
+ idx INT, /* this node index */
+ status CHAR(16), /* this node status */
+ uuid CHAR(40), /* cluster UUID */
+ prim BOOLEAN /* if component is primary */
+) ENGINE=MEMORY;
+BEGIN;
+DELETE FROM $MEMB_TABLE;
+DELETE FROM $STATUS_TABLE;
+"
+END="COMMIT;"
+
+configuration_change()
+{
+ echo "$BEGIN;"
+
+ local idx=0
+
+ for NODE in $(echo $MEMBERS | sed s/,/\ /g)
+ do
+ echo "INSERT INTO $MEMB_TABLE VALUES ( $idx, "
+ # Don't forget to properly quote string values
+ echo "'$NODE'" | sed s/\\//\',\'/g
+ echo ");"
+ idx=$(( $idx + 1 ))
+ done
+
+ echo "INSERT INTO $STATUS_TABLE VALUES($idx, $INDEX, '$STATUS', '$CLUSTER_UUID', $PRIMARY);"
+
+ echo "$END"
+}
+
+status_update()
+{
+ echo "SET wsrep_on=0; BEGIN; UPDATE $STATUS_TABLE SET status='$STATUS'; COMMIT;"
+}
+
+COM=status_update # not a configuration change by default
+
+while [ $# -gt 0 ]
+do
+ case $1 in
+ --status)
+ STATUS=$2
+ shift
+ ;;
+ --uuid)
+ CLUSTER_UUID=$2
+ shift
+ ;;
+ --primary)
+ [ "$2" = "yes" ] && PRIMARY="1" || PRIMARY="0"
+ COM=configuration_change
+ shift
+ ;;
+ --index)
+ INDEX=$2
+ shift
+ ;;
+ --members)
+ MEMBERS=$2
+ shift
+ ;;
+ esac
+ shift
+done
+
+# Undefined means node is shutting down
+if [ "$STATUS" != "Undefined" ]
+then
+ $COM | mysql -B -u$USER -p$PSWD -h$HOST -P$PORT
+fi
+
+exit 0
+#
diff --git a/wsrep/CMakeLists.txt b/wsrep/CMakeLists.txt
new file mode 100644
index 00000000000..11d0e34d1b0
--- /dev/null
+++ b/wsrep/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright (c) 2012, Codership Oy. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+INCLUDE_DIRECTORIES( "." )
+
+SET(WSREP_SOURCES wsrep_uuid.c wsrep_loader.c wsrep_dummy.c)
+
+ADD_CONVENIENCE_LIBRARY(wsrep ${WSREP_SOURCES})
+DTRACE_INSTRUMENT(wsrep)
+
+#ADD_EXECUTABLE(listener wsrep_listener.c ${WSREP_SOURCES})
+#TARGET_LINK_LIBRARIES(listener ${LIBDL})
diff --git a/wsrep/Makefile.am b/wsrep/Makefile.am
new file mode 100644
index 00000000000..40e4b501e86
--- /dev/null
+++ b/wsrep/Makefile.am
@@ -0,0 +1,2 @@
+noinst_LIBRARIES = libwsrep.a
+libwsrep_a_SOURCES = wsrep_api.h wsrep_loader.c wsrep_dummy.c wsrep_uuid.c
diff --git a/wsrep/wsrep_api.h b/wsrep/wsrep_api.h
new file mode 100644
index 00000000000..2cd10afc7ff
--- /dev/null
+++ b/wsrep/wsrep_api.h
@@ -0,0 +1,875 @@
+/* Copyright (C) 2009-2011 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef WSREP_H
+#define WSREP_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ * wsrep replication API
+ */
+
+#define WSREP_INTERFACE_VERSION "23"
+
+/*!
+ * Certain provider capabilities application may need to know
+ */
+#define WSREP_CAP_MULTI_MASTER ( 1ULL << 0 )
+#define WSREP_CAP_CERTIFICATION ( 1ULL << 1 )
+#define WSREP_CAP_PARALLEL_APPLYING ( 1ULL << 2 )
+#define WSREP_CAP_TRX_REPLAY ( 1ULL << 3 )
+#define WSREP_CAP_ISOLATION ( 1ULL << 4 )
+#define WSREP_CAP_PAUSE ( 1ULL << 5 )
+#define WSREP_CAP_CAUSAL_READS ( 1ULL << 6 )
+#define WSREP_CAP_CAUSAL_TRX ( 1ULL << 7 )
+#define WSREP_CAP_WRITE_SET_INCREMENTS ( 1ULL << 8 )
+#define WSREP_CAP_SESSION_LOCKS ( 1ULL << 9 )
+#define WSREP_CAP_DISTRIBUTED_LOCKS ( 1ULL << 10 )
+#define WSREP_CAP_CONSISTENCY_CHECK ( 1ULL << 11 )
+
+/*!
+ * Write set replication flags
+ */
+#define WSREP_FLAG_PA_SAFE ( 1ULL << 0 )
+
+/* Empty backend spec */
+#define WSREP_NONE "none"
+
+typedef uint64_t wsrep_trx_id_t; //!< application transaction ID
+typedef uint64_t wsrep_conn_id_t; //!< application connection ID
+typedef int64_t wsrep_seqno_t; //!< sequence number of a writeset, etc.
+
+/*! undefined seqno */
+#define WSREP_SEQNO_UNDEFINED (-1)
+
+/*! wsrep status codes */
+typedef enum wsrep_status {
+ WSREP_OK = 0, //!< success
+ WSREP_WARNING, //!< minor warning, error logged
+ WSREP_TRX_MISSING, //!< transaction is not known by wsrep
+ WSREP_TRX_FAIL, //!< transaction aborted, server can continue
+ WSREP_BF_ABORT, //!< trx was victim of brute force abort
+ WSREP_CONN_FAIL, //!< error in client connection, must abort
+ WSREP_NODE_FAIL, //!< error in node state, wsrep must reinit
+ WSREP_FATAL, //!< fatal error, server must abort
+ WSREP_NOT_IMPLEMENTED //!< feature not implemented
+} wsrep_status_t;
+
+/*!
+ * @brief log severity levels, passed as first argument to log handler
+ */
+typedef enum wsrep_log_level
+{
+ WSREP_LOG_FATAL, //!< Unrecoverable error, application must quit.
+ WSREP_LOG_ERROR, //!< Operation failed, must be repeated.
+ WSREP_LOG_WARN, //!< Unexpected condition, but no operational failure.
+ WSREP_LOG_INFO, //!< Informational message.
+ WSREP_LOG_DEBUG //!< Debug message. Shows only of compiled with debug.
+} wsrep_log_level_t;
+
+/*!
+ * @brief error log handler
+ *
+ * All messages from wsrep library are directed to this
+ * handler, if present.
+ *
+ * @param level log level
+ * @param message log message
+ */
+typedef void (*wsrep_log_cb_t)(wsrep_log_level_t, const char *);
+
+/*!
+ * UUID type - for all unique IDs
+ */
+typedef struct wsrep_uuid {
+ uint8_t uuid[16];
+} wsrep_uuid_t;
+
+/*! Undefined UUID */
+static const wsrep_uuid_t WSREP_UUID_UNDEFINED = {{0,}};
+
+/*!
+ * Scan UUID from string
+ * @return length of UUID string representation or negative error code
+ */
+extern ssize_t
+wsrep_uuid_scan (const char* str, size_t str_len, wsrep_uuid_t* uuid);
+
+/*!
+ * Print UUID to string
+ * @return length of UUID string representation or negative error code
+ */
+extern ssize_t
+wsrep_uuid_print (const wsrep_uuid_t* uuid, char* str, size_t str_len);
+
+#define WSREP_MEMBER_NAME_LEN 32 //!< maximum logical member name length
+#define WSREP_INCOMING_LEN 256 //!< max Domain Name length + 0x00
+
+/*!
+ * member status
+ */
+typedef enum wsrep_member_status {
+ WSREP_MEMBER_UNDEFINED, //!< undefined state
+ WSREP_MEMBER_JOINER, //!< incomplete state, requested state transfer
+ WSREP_MEMBER_DONOR, //!< complete state, donates state transfer
+ WSREP_MEMBER_JOINED, //!< complete state
+ WSREP_MEMBER_SYNCED, //!< complete state, synchronized with group
+ WSREP_MEMBER_ERROR, //!< this and above is provider-specific error code
+ WSREP_MEMBER_MAX
+} wsrep_member_status_t;
+
+/*!
+ * static information about a group member (some fields are tentative yet)
+ */
+typedef struct wsrep_member_info {
+ wsrep_uuid_t id; //!< group-wide unique member ID
+ char name[WSREP_MEMBER_NAME_LEN]; //!< human-readable name
+ char incoming[WSREP_INCOMING_LEN]; //!< address for client requests
+} wsrep_member_info_t;
+
+/*!
+ * group status
+ */
+typedef enum wsrep_view_status {
+ WSREP_VIEW_PRIMARY, //!< primary group configuration (quorum present)
+ WSREP_VIEW_NON_PRIMARY, //!< non-primary group configuration (quorum lost)
+ WSREP_VIEW_DISCONNECTED, //!< not connected to group, retrying.
+ WSREP_VIEW_MAX
+} wsrep_view_status_t;
+
+/*!
+ * view of the group
+ */
+typedef struct wsrep_view_info {
+ wsrep_uuid_t uuid; //!< global state UUID
+ wsrep_seqno_t seqno; //!< global state seqno
+ wsrep_seqno_t view; //!< global view number
+ wsrep_view_status_t status; //!< view status
+ bool state_gap; //!< gap between global and local states
+ int my_idx; //!< index of this member in the view
+ int memb_num; //!< number of members in the view
+ int proto_ver; //!< application protocol agreed on in the view
+ wsrep_member_info_t members[1]; //!< array of member information
+} wsrep_view_info_t;
+
+/*!
+ * Magic string to tell provider to engage into trivial (empty) state transfer.
+ * No data will be passed, but the node shall be considered JOINED.
+ * Should be passed in sst_req parameter of wsrep_view_cb_t.
+ */
+#define WSREP_STATE_TRANSFER_TRIVIAL "trivial"
+
+/*!
+ * Magic string to tell provider not to engage in state transfer at all.
+ * The member will stay in WSREP_MEMBER_UNDEFINED state but will keep on
+ * receiving all writesets.
+ * Should be passed in sst_req parameter of wsrep_view_cb_t.
+ */
+#define WSREP_STATE_TRANSFER_NONE "none"
+
+/*!
+ * @brief group view handler
+ *
+ * This handler is called in total order corresponding to the group
+ * configuration change. It is to provide a vital information about
+ * new group view. If view info indicates existence of discontinuity
+ * between group and member states, state transfer request message
+ * should be filled in by the callback implementation.
+ *
+ * @note Currently it is assumed that sst_req is allocated using
+ * malloc()/calloc()/realloc() and it will be freed by
+ * wsrep implementation.
+ *
+ * @param app_ctx application context
+ * @param recv_ctx receiver context
+ * @param view new view on the group
+ * @param state current state
+ * @param state_len lenght of current state
+ * @param sst_req location to store SST request
+ * @param sst_req_len location to store SST request length or error code
+ * value of 0 means no SST.
+ */
+typedef void (*wsrep_view_cb_t) (void* app_ctx,
+ void* recv_ctx,
+ const wsrep_view_info_t* view,
+ const char* state,
+ size_t state_len,
+ void** sst_req,
+ ssize_t* sst_req_len);
+
+/*!
+ * @brief apply callback
+ *
+ * This handler is called from wsrep library to apply replicated write set
+ * Must support brute force applying for multi-master operation
+ *
+ * @param recv_ctx receiver context pointer provided by the application
+ * @param data data buffer containing the write set
+ * @param size data buffer size
+ * @param seqno global seqno part of the write set to be applied
+ *
+ * @return success code:
+ * @retval WSREP_OK
+ * @retval WSREP_NOT_IMPLEMENTED appl. does not support the write set format
+ * @retval WSREP_ERROR failed to apply the write set
+ */
+typedef enum wsrep_status (*wsrep_apply_cb_t) (void* recv_ctx,
+ const void* data,
+ size_t size,
+ wsrep_seqno_t seqno);
+
+/*!
+ * @brief commit callback
+ *
+ * This handler is called to commit the changes made by apply callback.
+ *
+ * @param recv_ctx receiver context pointer provided by the application
+ * @param seqno global seqno part of the write set to be committed
+ * @param commit true - commit writeset, false - rollback writeset
+ *
+ * @return success code:
+ * @retval WSREP_OK
+ * @retval WSREP_ERROR call failed
+ */
+typedef enum wsrep_status (*wsrep_commit_cb_t) (void* recv_ctx,
+ wsrep_seqno_t seqno,
+ bool commit);
+
+/*!
+ * @brief a callback to donate state snapshot
+ *
+ * This handler is called from wsrep library when it needs this node
+ * to deliver state to a new cluster member.
+ * No state changes will be committed for the duration of this call.
+ * Wsrep implementation may provide internal state to be transmitted
+ * to new cluster member for initial state.
+ *
+ * @param app_ctx application context
+ * @param recv_ctx receiver context
+ * @param msg state transfer request message
+ * @param msg_len state transfer request message length
+ * @param uuid current state uuid on this node
+ * @param seqno current state seqno on this node
+ * @param state current wsrep internal state buffer
+ * @param state_len current wsrep internal state buffer len
+ * @param bypass bypass snapshot transfer, only transfer uuid:seqno pair
+ * @return 0 for success or negative error code
+ */
+typedef int (*wsrep_sst_donate_cb_t) (void* app_ctx,
+ void* recv_ctx,
+ const void* msg,
+ size_t msg_len,
+ const wsrep_uuid_t* uuid,
+ wsrep_seqno_t seqno,
+ const char* state,
+ size_t state_len,
+ bool bypass);
+
+/*!
+ * @brief a callback to signal application that wsrep state is synced
+ * with cluster
+ *
+ * This callback is called after wsrep library has got in sync with
+ * rest of the cluster.
+ *
+ * @param app_ctx application context
+ */
+typedef void (*wsrep_synced_cb_t)(void* app_ctx);
+
+
+/*!
+ * Initialization parameters for wsrep, used as arguments for wsrep_init()
+ */
+struct wsrep_init_args
+{
+ void* app_ctx; //!< Application context for callbacks
+
+ /* Configuration parameters */
+ const char* node_name; //!< Symbolic name of this node (e.g. hostname)
+ const char* node_address; //!< Address to be used by wsrep provider
+ const char* node_incoming; //!< Address for incoming client connections
+ const char* data_dir; //!< Directory where wsrep files are kept if any
+ const char* options; //!< Provider-specific configuration string
+ int proto_ver; //!< Max supported application protocol version
+
+ /* Application initial state information. */
+ const wsrep_uuid_t* state_uuid; //!< Application state sequence UUID
+ wsrep_seqno_t state_seqno; //!< Applicaiton state sequence number
+ const char* state; //!< Initial state for wsrep implementation
+ size_t state_len; //!< Length of state buffer
+
+ /* Application callbacks */
+ wsrep_log_cb_t logger_cb; //!< logging handler
+ wsrep_view_cb_t view_handler_cb; //!< group view change handler
+
+ /* applier callbacks */
+ wsrep_apply_cb_t apply_cb; //!< apply callback
+ wsrep_commit_cb_t commit_cb; //!< commit callback
+
+ /* state snapshot transfer callbacks */
+ wsrep_sst_donate_cb_t sst_donate_cb; //!< starting to donate
+ wsrep_synced_cb_t synced_cb; //!< synced with group
+};
+
+/*! Type of the stats variable value in struct wsrep_status_var */
+typedef enum wsrep_var_type
+{
+ WSREP_VAR_STRING, //!< pointer to null-terminated string
+ WSREP_VAR_INT64, //!< int64_t
+ WSREP_VAR_DOUBLE //!< double
+}
+wsrep_var_type_t;
+
+/*! Generalized stats variable representation */
+struct wsrep_stats_var
+{
+ const char* name; //!< variable name
+ wsrep_var_type_t type; //!< variable value type
+ union {
+ int64_t _int64;
+ double _double;
+ const char* _string;
+ } value; //!< variable value
+};
+
+
+/*! Key part structure */
+typedef struct wsrep_key_part_
+{
+ const void* buf; /*!< Buffer containing key part data */
+ size_t buf_len; /*!< Length of buffer */
+} wsrep_key_part_t;
+
+/*! Key struct used to pass certification keys for transaction handling calls.
+ * A key consists of zero or more key parts. */
+typedef struct wsrep_key_
+{
+ const wsrep_key_part_t* key_parts; /*!< Array of key parts */
+ size_t key_parts_len; /*!< Length of key parts array */
+} wsrep_key_t;
+
+/*! Transaction handle struct passed for wsrep transaction handling calls */
+typedef struct wsrep_trx_handle_
+{
+ wsrep_trx_id_t trx_id; //!< transaction ID
+ void* opaque; //!< opaque provider transaction context data
+} wsrep_trx_handle_t;
+
+/*!
+ * @brief Helper method to reset trx handle state when trx id changes
+ *
+ * Instead of passing wsrep_trx_handle_t directly for wsrep calls,
+ * wrapping handle with this call offloads bookkeeping from
+ * application.
+ */
+static inline wsrep_trx_handle_t* wsrep_trx_handle_for_id(
+ wsrep_trx_handle_t* trx_handle,
+ wsrep_trx_id_t trx_id)
+{
+ if (trx_handle->trx_id != trx_id)
+ {
+ trx_handle->trx_id = trx_id;
+ trx_handle->opaque = NULL;
+ }
+ return trx_handle;
+}
+
+
+typedef struct wsrep_ wsrep_t;
+/*!
+ * wsrep interface for dynamically loadable libraries
+ */
+struct wsrep_ {
+
+ const char *version; //!< interface version string
+
+ /*!
+ * @brief Initializes wsrep provider
+ *
+ * @param wsrep this wsrep handle
+ * @param args wsrep initialization parameters
+ */
+ wsrep_status_t (*init) (wsrep_t* wsrep,
+ const struct wsrep_init_args* args);
+
+ /*!
+ * @brief Returns provider capabilities flag bitmap
+ *
+ * @param wsrep this wsrep handle
+ */
+ uint64_t (*capabilities) (wsrep_t* wsrep);
+
+ /*!
+ * @brief Passes provider-specific configuration string to provider.
+ *
+ * @param wsrep this wsrep handle
+ * @param conf configuration string
+ *
+ * @retval WSREP_OK configuration string was parsed successfully
+ * @retval WSREP_WARNING could't not parse conf string, no action taken
+ */
+ wsrep_status_t (*options_set) (wsrep_t* wsrep, const char* conf);
+
+ /*!
+ * @brief Returns provider-specific string with current configuration values.
+ *
+ * @param wsrep this wsrep handle
+ *
+ * @return a dynamically allocated string with current configuration
+ * parameter values
+ */
+ char* (*options_get) (wsrep_t* wsrep);
+
+ /*!
+ * @brief Opens connection to cluster
+ *
+ * Returns when either node is ready to operate as a part of the clsuter
+ * or fails to reach operating status.
+ *
+ * @param wsrep this wsrep handle
+ * @param cluster_name unique symbolic cluster name
+ * @param cluster_url URL-like cluster address (backend://address)
+ * @param state_donor name of the node to be asked for state transfer.
+ */
+ wsrep_status_t (*connect) (wsrep_t* wsrep,
+ const char* cluster_name,
+ const char* cluster_url,
+ const char* state_donor);
+
+ /*!
+ * @brief Closes connection to cluster.
+ *
+ * If state_uuid and/or state_seqno is not NULL, will store final state
+ * in there.
+ *
+ * @param wsrep this wsrep handler
+ */
+ wsrep_status_t (*disconnect)(wsrep_t* wsrep);
+
+ /*!
+ * @brief start receiving replication events
+ *
+ * This function never returns
+ *
+ * @param wsrep this wsrep handle
+ * @param recv_ctx receiver context
+ */
+ wsrep_status_t (*recv)(wsrep_t* wsrep, void* recv_ctx);
+
+ /*!
+ * @brief Replicates/logs result of transaction to other nodes and allocates
+ * required resources.
+ *
+ * Must be called before transaction commit. Returns success code, which
+ * caller must check.
+ * In case of WSREP_OK, starts commit critical section, transaction can
+ * commit. Otherwise transaction must rollback.
+ *
+ * @param wsrep this wsrep handle
+ * @param trx_handle transaction which is committing
+ * @param conn_id connection ID
+ * @param app_data application specific applying data
+ * @param data_len the size of the applying data
+ * @param flags fine tuning the replication WSREP_FLAG_*
+ * @param seqno seqno part of the global transaction ID
+ *
+ * @retval WSREP_OK cluster-wide commit succeeded
+ * @retval WSREP_TRX_FAIL must rollback transaction
+ * @retval WSREP_CONN_FAIL must close client connection
+ * @retval WSREP_NODE_FAIL must close all connections and reinit
+ */
+ wsrep_status_t (*pre_commit)(wsrep_t* wsrep,
+ wsrep_conn_id_t conn_id,
+ wsrep_trx_handle_t* trx_handle,
+ const void* app_data,
+ size_t data_len,
+ uint64_t flags,
+ wsrep_seqno_t* seqno);
+
+ /*!
+ * @brief Releases resources after transaction commit.
+ *
+ * Ends commit critical section.
+ *
+ * @param wsrep this wsrep handle
+ * @param trx_handle transaction which is committing
+ * @retval WSREP_OK post_commit succeeded
+ */
+ wsrep_status_t (*post_commit) (wsrep_t* wsrep,
+ wsrep_trx_handle_t* trx_handle);
+
+ /*!
+ * @brief Releases resources after transaction rollback.
+ *
+ * @param wsrep this wsrep handle
+ * @param trx_handle transaction which is committing
+ * @retval WSREP_OK post_rollback succeeded
+ */
+ wsrep_status_t (*post_rollback)(wsrep_t* wsrep,
+ wsrep_trx_handle_t* trx_handle);
+
+ /*!
+ * @brief Replay trx as a slave write set
+ *
+ * If local trx has been aborted by brute force, and it has already
+ * replicated before this abort, we must try if we can apply it as
+ * slave trx. Note that slave nodes see only trx write sets and certification
+ * test based on write set content can be different to DBMS lock conflicts.
+ *
+ * @param wsrep this wsrep handle
+ * @param trx_handle transaction which is committing
+ * @param trx_ctx transaction context
+ *
+ * @retval WSREP_OK cluster commit succeeded
+ * @retval WSREP_TRX_FAIL must rollback transaction
+ * @retval WSREP_BF_ABORT brute force abort happened after trx replicated
+ * must rollback transaction and try to replay
+ * @retval WSREP_CONN_FAIL must close client connection
+ * @retval WSREP_NODE_FAIL must close all connections and reinit
+ */
+ wsrep_status_t (*replay_trx)(wsrep_t* wsrep,
+ wsrep_trx_handle_t* trx_handle,
+ void* trx_ctx);
+
+ /*!
+ * @brief Abort pre_commit() call of another thread.
+ *
+ * It is possible, that some high-priority transaction needs to abort
+ * another transaction which is in pre_commit() call waiting for resources.
+ *
+ * The kill routine checks that abort is not attmpted against a transaction
+ * which is front of the caller (in total order).
+ *
+ * @param wsrep this wsrep handle
+ * @param bf_seqno seqno of brute force trx, running this cancel
+ * @param victim_trx transaction to be aborted, and which is committing
+ *
+ * @retval WSREP_OK abort secceded
+ * @retval WSREP_WARNING abort failed
+ */
+ wsrep_status_t (*abort_pre_commit)(wsrep_t* wsrep,
+ wsrep_seqno_t bf_seqno,
+ wsrep_trx_id_t victim_trx);
+
+ /*!
+ * @brief Appends a query in transaction's write set
+ *
+ * @param wsrep this wsrep handle
+ * @param trx_handle transaction handle
+ * @param query SQL statement string
+ * @param timeval time to use for time functions
+ * @param randseed seed for rand
+ */
+ wsrep_status_t (*append_query)(wsrep_t* wsrep,
+ wsrep_trx_handle_t* trx_handle,
+ const char* query,
+ time_t timeval,
+ uint32_t randseed);
+
+ /*!
+ * @brief Appends a row reference in transaction's write set
+ *
+ * @param wsrep this wsrep handle
+ * @param trx_handle transaction handle
+ * @param key array of keys
+ * @param key_len length of the array of keys
+ * @param shared boolean denoting if key corresponds to shared resource
+ */
+ wsrep_status_t (*append_key)(wsrep_t* wsrep,
+ wsrep_trx_handle_t* trx_handle,
+ const wsrep_key_t* key,
+ size_t key_len,
+ bool shared);
+ /*!
+ * @brief Appends data in transaction's write set
+ *
+ * This method can be called any time before commit and it
+ * appends data block into transaction's write set.
+ *
+ * @param wsrep this wsrep handle
+ * @param trx_handle transaction handle
+ * @param data data buffer
+ * @param data_len data buffer length
+ */
+ wsrep_status_t (*append_data)(wsrep_t* wsrep,
+ wsrep_trx_handle_t* trx_handle,
+ const void* data,
+ size_t data_len);
+
+
+ /*!
+ * @brief Get causal ordering for read operation
+ *
+ * This call will block until causal ordering with all possible
+ * preceding writes in the cluster is guaranteed. If pointer to
+ * seqno is non-null, the call stores the global transaction ID
+ * of the last transaction which is guaranteed to be ordered
+ * causally before this call.
+ *
+ * @param wsrep this wsrep handle
+ * @param seqno location to store global transaction ID
+ */
+ wsrep_status_t (*causal_read)(wsrep_t* wsrep, wsrep_seqno_t* seqno);
+
+ /*!
+ * @brief Clears allocated connection context.
+ *
+ * Whenever a new connection ID is passed to wsrep provider through
+ * any of the API calls, a connection context is allocated for this
+ * connection. This call is to explicitly notify provider fo connection
+ * closing.
+ *
+ * @param wsrep this wsrep handle
+ * @param conn_id connection ID
+ * @param query the 'set database' query
+ * @param query_len length of query (does not end with 0)
+ */
+ wsrep_status_t (*free_connection)(wsrep_t* wsrep,
+ wsrep_conn_id_t conn_id);
+
+ /*!
+ * @brief Replicates a query and starts "total order isolation" section.
+ *
+ * Replicates the query and returns success code, which
+ * caller must check. Total order isolation continues
+ * until to_execute_end() is called.
+ *
+ * @param wsrep this wsrep handle
+ * @param conn_id connection ID
+ * @param key array of keys
+ * @param key_len lenght of the array of keys
+ * @param query query to be executed
+ * @param query_len length of the query string
+ * @param seqno seqno part of the action ID
+ *
+ * @retval WSREP_OK cluster commit succeeded
+ * @retval WSREP_CONN_FAIL must close client connection
+ * @retval WSREP_NODE_FAIL must close all connections and reinit
+ */
+ wsrep_status_t (*to_execute_start)(wsrep_t* wsrep,
+ wsrep_conn_id_t conn_id,
+ const wsrep_key_t* key,
+ size_t key_len,
+ const void* query,
+ size_t query_len,
+ wsrep_seqno_t* seqno);
+
+ /*!
+ * @brief Ends the total order isolation section.
+ *
+ * Marks the end of total order isolation. TO locks are freed
+ * and other transactions are free to commit from this point on.
+ *
+ * @param wsrep this wsrep handle
+ * @param conn_id connection ID
+ *
+ * @retval WSREP_OK cluster commit succeeded
+ * @retval WSREP_CONN_FAIL must close client connection
+ * @retval WSREP_NODE_FAIL must close all connections and reinit
+ */
+ wsrep_status_t (*to_execute_end)(wsrep_t* wsrep, wsrep_conn_id_t conn_id);
+
+ /*!
+ * @brief Signals to wsrep provider that state snapshot has been sent to
+ * joiner.
+ *
+ * @param wsrep this wsrep handle
+ * @param uuid sequence UUID (group UUID)
+ * @param seqno sequence number or negative error code of the operation
+ */
+ wsrep_status_t (*sst_sent)(wsrep_t* wsrep,
+ const wsrep_uuid_t* uuid,
+ wsrep_seqno_t seqno);
+
+ /*!
+ * @brief Signals to wsrep provider that new state snapshot has been received.
+ * May deadlock if called from sst_prepare_cb.
+ *
+ * @param wsrep this wsrep handle
+ * @param uuid sequence UUID (group UUID)
+ * @param seqno sequence number or negative error code of the operation
+ * @param state initial state provided by SST donor
+ * @param state_len length of state buffer
+ */
+ wsrep_status_t (*sst_received)(wsrep_t* wsrep,
+ const wsrep_uuid_t* uuid,
+ wsrep_seqno_t seqno,
+ const char* state,
+ size_t state_len);
+
+
+ /*!
+ * @brief Generate request for consistent snapshot.
+ *
+ * If successfull, this call will generate internally SST request
+ * which in turn triggers calling SST donate callback on the nodes
+ * specified in donor_spec. If donor_spec is null, callback is
+ * called only locally. This call will block until sst_sent is called
+ * from callback.
+ *
+ * @param wsrep this wsrep handle
+ * @param msg context message for SST donate callback
+ * @param msg_len length of context message
+ * @param donor_spec list of snapshot donors
+ */
+ wsrep_status_t (*snapshot)(wsrep_t* wsrep,
+ const void* msg,
+ size_t msg_len,
+ const char* donor_spec);
+
+ /*!
+ * @brief Returns an array fo status variables.
+ * Array is terminated by Null variable name.
+ *
+ * @param wsrep this wsrep handle
+ * @return array of struct wsrep_status_var.
+ */
+ struct wsrep_stats_var* (*stats_get) (wsrep_t* wsrep);
+
+ /*!
+ * @brief Release resources that might be associated with the array.
+ *
+ * @param wsrep this wsrep handle.
+ */
+ void (*stats_free) (wsrep_t* wsrep, struct wsrep_stats_var* var_array);
+
+ /*!
+ * @brief Pauses writeset applying/committing.
+ *
+ * @return global sequence number of the paused state or negative error code.
+ */
+ wsrep_seqno_t (*pause) (wsrep_t* wsrep);
+
+ /*!
+ * @brief Resumes writeset applying/committing.
+ */
+ wsrep_status_t (*resume) (wsrep_t* wsrep);
+
+ /*!
+ * @brief Desynchronize from cluster
+ *
+ * Effectively turns off flow control for this node, allowing it
+ * to fall behind the cluster.
+ */
+ wsrep_status_t (*desync) (wsrep_t* wsrep);
+
+ /*!
+ * @brief Request to resynchronize with cluster.
+ *
+ * Effectively turns on flow control. Asynchronous - actual synchronization
+ * event to be deliverred via sync_cb.
+ */
+ wsrep_status_t (*resync) (wsrep_t* wsrep);
+
+ /*!
+ * @brief Acquire global named lock
+ *
+ * @param wsrep wsrep provider handle
+ * @param name lock name
+ * @param owner 64-bit owner ID
+ * @param tout timeout in nanoseconds.
+ * 0 - return immediately, -1 wait forever.
+ * @return wsrep status or negative error code
+ * @retval -EDEADLK lock was already acquired by this thread
+ * @retval -EBUSY lock was busy
+ */
+ wsrep_status_t (*lock) (wsrep_t* wsrep, const char* name, int64_t owner,
+ int64_t tout);
+
+ /*!
+ * @brief Release global named lock
+ *
+ * @param wsrep wsrep provider handle
+ * @param name lock name
+ * @param owner 64-bit owner ID
+ * @return wsrep status or negative error code
+ * @retval -EPERM lock does not belong to this owner
+ */
+ wsrep_status_t (*unlock) (wsrep_t* wsrep, const char* name, int64_t owner);
+
+ /*!
+ * @brief Check if global named lock is locked
+ *
+ * @param wsrep wsrep provider handle
+ * @param name lock name
+ * @param owner if not NULL will contain 64-bit owner ID
+ * @param node if not NULL will contain owner's node UUID
+ * @return true if lock is locked
+ */
+ bool (*is_locked) (wsrep_t* wsrep, const char* name, int64_t* conn,
+ wsrep_uuid_t* node);
+
+ /*!
+ * wsrep provider name
+ */
+ const char* provider_name;
+
+ /*!
+ * wsrep provider version
+ */
+ const char* provider_version;
+
+ /*!
+ * wsrep provider vendor name
+ */
+ const char* provider_vendor;
+
+ /*!
+ * @brief Frees allocated resources before unloading the library.
+ * @param wsrep this wsrep handle
+ */
+ void (*free)(wsrep_t* wsrep);
+
+ void *dlh; //!< reserved for future use
+ void *ctx; //!< reserved for implemetation private context
+};
+
+typedef int (*wsrep_loader_fun)(wsrep_t*);
+
+/*!
+ *
+ * @brief Loads wsrep library
+ *
+ * @param spec path to wsrep library. If NULL or WSREP_NONE initialises dummy
+ * pass-through implementation.
+ * @param hptr wsrep handle
+ * @param log_cb callback to handle loader messages. Otherwise writes to stderr.
+ *
+ * @return zero on success, errno on failure
+ */
+int wsrep_load(const char* spec, wsrep_t** hptr, wsrep_log_cb_t log_cb);
+
+/*!
+ * @brief Unloads wsrep library and frees associated resources
+ *
+ * @param hptr wsrep handler pointer
+ */
+void wsrep_unload(wsrep_t* hptr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* WSREP_H */
diff --git a/wsrep/wsrep_dummy.c b/wsrep/wsrep_dummy.c
new file mode 100644
index 00000000000..6d01ce14b4e
--- /dev/null
+++ b/wsrep/wsrep_dummy.c
@@ -0,0 +1,368 @@
+/* Copyright (C) 2009-2010 Codership Oy <info@codersihp.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*! @file Dummy wsrep API implementation. */
+
+#include <errno.h>
+
+#include "wsrep_api.h"
+
+/*! Dummy backend context. */
+typedef struct wsrep_dummy
+{
+ wsrep_log_cb_t log_fn;
+} wsrep_dummy_t;
+
+/* Get pointer to wsrep_dummy context from wsrep_t pointer */
+#define WSREP_DUMMY(_p) ((wsrep_dummy_t *) (_p)->ctx)
+
+/* Trace function usage a-la DBUG */
+#define WSREP_DBUG_ENTER(_w) do { \
+ if (WSREP_DUMMY(_w)) { \
+ if (WSREP_DUMMY(_w)->log_fn) \
+ WSREP_DUMMY(_w)->log_fn(WSREP_LOG_DEBUG, __FUNCTION__); \
+ } \
+ } while (0)
+
+
+static void dummy_free(wsrep_t *w)
+{
+ WSREP_DBUG_ENTER(w);
+ free(w->ctx);
+ w->ctx = NULL;
+}
+
+static wsrep_status_t dummy_init (wsrep_t* w,
+ const struct wsrep_init_args* args)
+{
+ WSREP_DUMMY(w)->log_fn = args->logger_cb;
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static uint64_t dummy_capabilities (wsrep_t* w __attribute__((unused)))
+{
+ return 0;
+}
+
+static wsrep_status_t dummy_options_set(
+ wsrep_t* w,
+ const char* conf __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static char* dummy_options_get (wsrep_t* w)
+{
+ WSREP_DBUG_ENTER(w);
+ return NULL;
+}
+
+static wsrep_status_t dummy_connect(
+ wsrep_t* w,
+ const char* name __attribute__((unused)),
+ const char* url __attribute__((unused)),
+ const char* donor __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_disconnect(wsrep_t* w)
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_recv(wsrep_t* w,
+ void* recv_ctx __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_pre_commit(
+ wsrep_t* w,
+ const wsrep_conn_id_t conn_id __attribute__((unused)),
+ wsrep_trx_handle_t* trx_handle __attribute__((unused)),
+ const void* query __attribute__((unused)),
+ const size_t query_len __attribute__((unused)),
+ uint64_t flags __attribute__((unused)),
+ wsrep_seqno_t* seqno __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_post_commit(
+ wsrep_t* w,
+ wsrep_trx_handle_t* trx_handle __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_post_rollback(
+ wsrep_t* w,
+ wsrep_trx_handle_t* trx_handle __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_replay_trx(
+ wsrep_t* w,
+ wsrep_trx_handle_t* trx_handle __attribute__((unused)),
+ void* trx_ctx __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_abort_pre_commit(
+ wsrep_t* w,
+ const wsrep_seqno_t bf_seqno __attribute__((unused)),
+ const wsrep_trx_id_t trx_id __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_append_query(
+ wsrep_t* w,
+ wsrep_trx_handle_t* trx_handle __attribute__((unused)),
+ const char* query __attribute__((unused)),
+ const time_t timeval __attribute__((unused)),
+ const uint32_t randseed __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_append_row_key(
+ wsrep_t* w,
+ wsrep_trx_handle_t* trx_handle __attribute__((unused)),
+ const wsrep_key_t* key __attribute__((unused)),
+ const size_t key_len __attribute__((unused)),
+ const bool shared __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_append_data(
+ wsrep_t* w,
+ wsrep_trx_handle_t* trx_handle __attribute__((unused)),
+ const void* data __attribute__((unused)),
+ size_t data_len __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_causal_read(
+ wsrep_t* w,
+ wsrep_seqno_t* seqno __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_free_connection(
+ wsrep_t* w,
+ const wsrep_conn_id_t conn_id __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_to_execute_start(
+ wsrep_t* w,
+ const wsrep_conn_id_t conn_id __attribute__((unused)),
+ const wsrep_key_t* key __attribute__((unused)),
+ const size_t key_len __attribute__((unused)),
+ const void* query __attribute__((unused)),
+ const size_t query_len __attribute__((unused)),
+ wsrep_seqno_t* seqno __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_to_execute_end(
+ wsrep_t* w,
+ const wsrep_conn_id_t conn_id __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_sst_sent(
+ wsrep_t* w,
+ const wsrep_uuid_t* uuid __attribute__((unused)),
+ wsrep_seqno_t seqno __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_sst_received(
+ wsrep_t* w,
+ const wsrep_uuid_t* uuid __attribute__((unused)),
+ const wsrep_seqno_t seqno __attribute__((unused)),
+ const char* state __attribute__((unused)),
+ const size_t state_len __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_snapshot(
+ wsrep_t* w,
+ const void* msg __attribute__((unused)),
+ const size_t msg_len __attribute__((unused)),
+ const char* donor_spec __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static struct wsrep_stats_var dummy_stats[] = {
+ { NULL, WSREP_VAR_STRING, { 0 } }
+};
+
+static struct wsrep_stats_var* dummy_stats_get (wsrep_t* w)
+{
+ WSREP_DBUG_ENTER(w);
+ return dummy_stats;
+}
+
+static void dummy_stats_free (
+ wsrep_t* w,
+ struct wsrep_stats_var* stats __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+}
+
+static wsrep_seqno_t dummy_pause (wsrep_t* w)
+{
+ WSREP_DBUG_ENTER(w);
+ return -ENOSYS;
+}
+
+static wsrep_status_t dummy_resume (wsrep_t* w)
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_desync (wsrep_t* w)
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_NOT_IMPLEMENTED;
+}
+
+static wsrep_status_t dummy_resync (wsrep_t* w)
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static wsrep_status_t dummy_lock (wsrep_t* w,
+ const char* s __attribute__((unused)),
+ int64_t o __attribute__((unused)),
+ int64_t t __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_NOT_IMPLEMENTED;
+}
+
+static wsrep_status_t dummy_unlock (wsrep_t* w,
+ const char* s __attribute__((unused)),
+ int64_t o __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return WSREP_OK;
+}
+
+static bool dummy_is_locked (wsrep_t* w,
+ const char* s __attribute__((unused)),
+ int64_t* o __attribute__((unused)),
+ wsrep_uuid_t* t __attribute__((unused)))
+{
+ WSREP_DBUG_ENTER(w);
+ return false;
+}
+
+static wsrep_t dummy_iface = {
+ WSREP_INTERFACE_VERSION,
+ &dummy_init,
+ &dummy_capabilities,
+ &dummy_options_set,
+ &dummy_options_get,
+ &dummy_connect,
+ &dummy_disconnect,
+ &dummy_recv,
+ &dummy_pre_commit,
+ &dummy_post_commit,
+ &dummy_post_rollback,
+ &dummy_replay_trx,
+ &dummy_abort_pre_commit,
+ &dummy_append_query,
+ &dummy_append_row_key,
+ &dummy_append_data,
+ &dummy_causal_read,
+ &dummy_free_connection,
+ &dummy_to_execute_start,
+ &dummy_to_execute_end,
+ &dummy_sst_sent,
+ &dummy_sst_received,
+ &dummy_snapshot,
+ &dummy_stats_get,
+ &dummy_stats_free,
+ &dummy_pause,
+ &dummy_resume,
+ &dummy_desync,
+ &dummy_resync,
+ &dummy_lock,
+ &dummy_unlock,
+ &dummy_is_locked,
+ WSREP_NONE,
+ WSREP_INTERFACE_VERSION,
+ "Codership Oy <info@codership.com>",
+ &dummy_free,
+ NULL,
+ NULL
+};
+
+int wsrep_dummy_loader(wsrep_t* w)
+{
+ if (!w)
+ return EINVAL;
+
+ *w = dummy_iface;
+
+ // allocate private context
+ if (!(w->ctx = malloc(sizeof(wsrep_dummy_t))))
+ return ENOMEM;
+
+ // initialize private context
+ WSREP_DUMMY(w)->log_fn = NULL;
+
+ return 0;
+}
+
diff --git a/wsrep/wsrep_loader.c b/wsrep/wsrep_loader.c
new file mode 100644
index 00000000000..b4460658f80
--- /dev/null
+++ b/wsrep/wsrep_loader.c
@@ -0,0 +1,199 @@
+/* Copyright (C) 2009-2011 Codership Oy <info@codersihp.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*! @file wsrep implementation loader */
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "wsrep_api.h"
+
+// Logging stuff for the loader
+static const char* log_levels[] = {"FATAL", "ERROR", "WARN", "INFO", "DEBUG"};
+
+static void default_logger (wsrep_log_level_t lvl, const char* msg)
+{
+ fprintf (stderr, "wsrep loader: [%s] %s\n", log_levels[lvl], msg);
+}
+
+static wsrep_log_cb_t logger = default_logger;
+
+/**************************************************************************
+ * Library loader
+ **************************************************************************/
+
+static int verify(const wsrep_t *wh, const char *iface_ver)
+{
+ const size_t msg_len = 128;
+ char msg[msg_len];
+
+#define VERIFY(_p) if (!(_p)) { \
+ snprintf(msg, msg_len, "wsrep_load(): verify(): %s\n", # _p); \
+ logger (WSREP_LOG_ERROR, msg); \
+ return EINVAL; \
+ }
+
+ VERIFY(wh);
+ VERIFY(wh->version);
+
+ if (strcmp(wh->version, iface_ver)) {
+ snprintf (msg, msg_len,
+ "provider interface version mismatch: need '%s', found '%s'",
+ iface_ver, wh->version);
+ logger (WSREP_LOG_ERROR, msg);
+ return EINVAL;
+ }
+
+ VERIFY(wh->init);
+ VERIFY(wh->options_set);
+ VERIFY(wh->options_get);
+ VERIFY(wh->connect);
+ VERIFY(wh->disconnect);
+ VERIFY(wh->recv);
+ VERIFY(wh->pre_commit);
+ VERIFY(wh->post_commit);
+ VERIFY(wh->post_rollback);
+ VERIFY(wh->replay_trx);
+ VERIFY(wh->abort_pre_commit);
+ VERIFY(wh->append_query);
+ VERIFY(wh->append_key);
+ VERIFY(wh->free_connection);
+ VERIFY(wh->to_execute_start);
+ VERIFY(wh->to_execute_end);
+ VERIFY(wh->sst_sent);
+ VERIFY(wh->sst_received);
+ VERIFY(wh->stats_get);
+ VERIFY(wh->stats_free);
+ VERIFY(wh->pause);
+ VERIFY(wh->resume);
+ VERIFY(wh->desync);
+ VERIFY(wh->resync);
+ VERIFY(wh->lock);
+ VERIFY(wh->unlock);
+ VERIFY(wh->is_locked);
+ VERIFY(wh->provider_name);
+ VERIFY(wh->provider_version);
+ VERIFY(wh->provider_vendor);
+ VERIFY(wh->free);
+ return 0;
+}
+
+
+static wsrep_loader_fun wsrep_dlf(void *dlh, const char *sym)
+{
+ union {
+ wsrep_loader_fun dlfun;
+ void *obj;
+ } alias;
+ alias.obj = dlsym(dlh, sym);
+ return alias.dlfun;
+}
+
+extern int wsrep_dummy_loader(wsrep_t *w);
+
+int wsrep_load(const char *spec, wsrep_t **hptr, wsrep_log_cb_t log_cb)
+{
+ int ret = 0;
+ void *dlh = NULL;
+ wsrep_loader_fun dlfun;
+ const size_t msg_len = 1024;
+ char msg[msg_len + 1];
+ msg[msg_len] = 0;
+
+ if (NULL != log_cb)
+ logger = log_cb;
+
+ if (!(spec && hptr))
+ return EINVAL;
+
+ snprintf (msg, msg_len,
+ "wsrep_load(): loading provider library '%s'", spec);
+ logger (WSREP_LOG_INFO, msg);
+
+ if (!(*hptr = malloc(sizeof(wsrep_t)))) {
+ logger (WSREP_LOG_FATAL, "wsrep_load(): out of memory");
+ return ENOMEM;
+ }
+
+ if (!spec || strcmp(spec, WSREP_NONE) == 0) {
+ if ((ret = wsrep_dummy_loader(*hptr)) != 0) {
+ free (*hptr);
+ *hptr = NULL;
+ }
+ return ret;
+ }
+
+ if (!(dlh = dlopen(spec, RTLD_NOW | RTLD_LOCAL))) {
+ snprintf(msg, msg_len, "wsrep_load(): dlopen(): %s", dlerror());
+ logger (WSREP_LOG_ERROR, msg);
+ ret = EINVAL;
+ goto out;
+ }
+
+ if (!(dlfun = wsrep_dlf(dlh, "wsrep_loader"))) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ if ((ret = (*dlfun)(*hptr)) != 0) {
+ snprintf(msg, msg_len, "wsrep_load(): loader failed: %s",
+ strerror(ret));
+ logger (WSREP_LOG_ERROR, msg);
+ goto out;
+ }
+
+ if ((ret = verify(*hptr, WSREP_INTERFACE_VERSION)) != 0) {
+ snprintf (msg, msg_len,
+ "wsrep_load(): interface version mismatch: my version %s, "
+ "provider version %s", WSREP_INTERFACE_VERSION,
+ (*hptr)->version);
+ logger (WSREP_LOG_ERROR, msg);
+ goto out;
+ }
+
+ (*hptr)->dlh = dlh;
+
+out:
+ if (ret != 0) {
+ if (dlh) dlclose(dlh);
+ free(*hptr);
+ *hptr = NULL;
+ } else {
+ snprintf (msg, msg_len,
+ "wsrep_load(): %s %s by %s loaded succesfully.",
+ (*hptr)->provider_name, (*hptr)->provider_version,
+ (*hptr)->provider_vendor);
+ logger (WSREP_LOG_INFO, msg);
+ }
+
+ return ret;
+}
+
+void wsrep_unload(wsrep_t *hptr)
+{
+ if (!hptr) {
+ logger (WSREP_LOG_WARN, "wsrep_unload(): null pointer.");
+ } else {
+ if (hptr->free)
+ hptr->free(hptr);
+ if (hptr->dlh)
+ dlclose(hptr->dlh);
+ free(hptr);
+ }
+}
+
diff --git a/wsrep/wsrep_uuid.c b/wsrep/wsrep_uuid.c
new file mode 100644
index 00000000000..c99240cc071
--- /dev/null
+++ b/wsrep/wsrep_uuid.c
@@ -0,0 +1,78 @@
+/* Copyright (C) 2009 Codership Oy <info@codersihp.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*! @file Helper functions to deal with history UUID string representations */
+
+#include <errno.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#include "wsrep_api.h"
+
+/*!
+ * Read UUID from string
+ * @return length of UUID string representation or -EINVAL in case of error
+ */
+ssize_t
+wsrep_uuid_scan (const char* str, size_t str_len, wsrep_uuid_t* uuid)
+{
+ size_t uuid_len = 0;
+ size_t uuid_offt = 0;
+
+ while (uuid_len + 1 < str_len) {
+ if ((4 == uuid_offt || 6 == uuid_offt || 8 == uuid_offt ||
+ 10 == uuid_offt) && str[uuid_len] == '-') {
+ // skip dashes after 4th, 6th, 8th and 10th positions
+ uuid_len += 1;
+ continue;
+ }
+ if (isxdigit(str[uuid_len]) && isxdigit(str[uuid_len + 1])) {
+ // got hex digit
+ sscanf (str + uuid_len, "%2hhx", uuid->uuid + uuid_offt);
+ uuid_len += 2;
+ uuid_offt += 1;
+ if (sizeof (uuid->uuid) == uuid_offt)
+ return uuid_len;
+ }
+ else {
+ break;
+ }
+ }
+
+ *uuid = WSREP_UUID_UNDEFINED;
+ return -EINVAL;
+}
+
+/*!
+ * Write UUID to string
+ * @return length of UUID string representation or -EMSGSIZE if string is too
+ * short
+ */
+ssize_t
+wsrep_uuid_print (const wsrep_uuid_t* uuid, char* str, size_t str_len)
+{
+ if (str_len > 36) {
+ const unsigned char* u = uuid->uuid;
+ return snprintf(str, str_len, "%02x%02x%02x%02x-%02x%02x-%02x%02x-"
+ "%02x%02x-%02x%02x%02x%02x%02x%02x",
+ u[ 0], u[ 1], u[ 2], u[ 3], u[ 4], u[ 5], u[ 6], u[ 7],
+ u[ 8], u[ 9], u[10], u[11], u[12], u[13], u[14], u[15]);
+ }
+ else {
+ return -EMSGSIZE;
+ }
+}
+