diff options
author | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2021-12-23 10:21:32 +0100 |
---|---|---|
committer | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2021-12-23 14:00:25 +0100 |
commit | 991b3d34ec71d466fada9c86fb3cf00dc57c3d0d (patch) | |
tree | 9d03b9e08bfc6ada367c982cd375ab07b871589d | |
parent | 98a2369eb925b2281d5286c073c89ddadea29b17 (diff) | |
download | mariadb-git-bb-10.3-julius.tar.gz |
MDEV-24097: galera[_3nodes] suite tests in MTR sporadically failsbb-10.3-julius
This commit contains a fix for instability when testing Galera
and when restarting nodes quickly:
1) Protection against a "stuck" old SST process during the execution
of the new SST (after restarting the node) is now implemented for
mariabackup / xtrabackup, which should help to avoid almost all
conflicts due to the use of the same ports - both during testing
with mtr, so and when restarting nodes quickly in a production
environment.
2) Added more protection to scripts against unexpected return of
the rc != 0 (in the commands for deleting temporary files, etc).
3) Added protection against unexpected crashes during binlog transfer
(in SST scripts for rsync).
4) Spaces and some special characters in binlog filenames shouldn't
be a problem now (at the script level).
5) Daemon process termination tracking has been made more robust
against crashes due to unexpected termination of the previous SST
process while new scripts are running.
6) Reading ssl encryption parameters has been moved from specific
SST scripts to a common wsrep_sst_common.sh script, which allows
unified error handling, unified diagnostics and simplifies script
revisions in the future.
7) Improved diagnostics of errors related to the use of openssl.
8) Corrections have been made for xtrabackup-v2 (both in tests and in
the script code) that restore the work of xtrabackup with updated
versions of innodb.
9) Fixed some tests for galera_3nodes, although the complete solution
for the problem of starting three nodes at the same time on fast
machines will be done in a separate commit.
No additional tests are required as this commit fixes problems with
existing tests.
MDEV-24097: Additional test result correction
-rw-r--r-- | mysql-test/suite/galera/disabled.def | 2 | ||||
-rw-r--r-- | mysql-test/suite/galera_3nodes/r/galera_ist_gcache_rollover.result | 4 | ||||
-rw-r--r-- | mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.test | 10 | ||||
-rw-r--r-- | scripts/wsrep_sst_common.sh | 142 | ||||
-rw-r--r-- | scripts/wsrep_sst_mariabackup.sh | 109 | ||||
-rw-r--r-- | scripts/wsrep_sst_rsync.sh | 161 |
6 files changed, 214 insertions, 214 deletions
diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index 955a2c82ebc..84babda2fa0 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -28,5 +28,3 @@ query_cache: MDEV-15805 Test failure on galera.query_cache versioning_trx_id: MDEV-18590: galera.versioning_trx_id: Test failure: mysqltest: Result content mismatch galera_wsrep_provider_unset_set: wsrep_provider is read-only for security reasons pxc-421: wsrep_provider is read-only for security reasons -galera_sst_xtrabackup-v2: Test fails due to innodb issues -galera_sst_xtrabackup-v2_data_dir: Test fails due to innodb issues diff --git a/mysql-test/suite/galera_3nodes/r/galera_ist_gcache_rollover.result b/mysql-test/suite/galera_3nodes/r/galera_ist_gcache_rollover.result index aa3e349eda7..996363a1ea8 100644 --- a/mysql-test/suite/galera_3nodes/r/galera_ist_gcache_rollover.result +++ b/mysql-test/suite/galera_3nodes/r/galera_ist_gcache_rollover.result @@ -6,10 +6,10 @@ CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); INSERT INTO t1 VALUES (01), (02), (03), (04), (05); connection node_2; Unloading wsrep provider ... -SET GLOBAL wsrep_provider = 'none'; +SET GLOBAL wsrep_cluster_address = ''; connection node_3; Unloading wsrep provider ... -SET GLOBAL wsrep_provider = 'none'; +SET GLOBAL wsrep_cluster_address = ''; connection node_1; INSERT INTO t1 VALUES (11), (12), (13), (14), (15); INSERT INTO t1 VALUES (21), (22), (23), (24), (25); diff --git a/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.test b/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.test index 0032f8d6ee1..2719316fb84 100644 --- a/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.test +++ b/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.test @@ -29,9 +29,11 @@ INSERT INTO t1 VALUES (01), (02), (03), (04), (05); # Disconnect nodes #2 and #3 --connection node_2 +--let $wsrep_cluster_address_orig2 = `select @@wsrep_cluster_address` --source suite/galera/include/galera_stop_replication.inc --connection node_3 +--let $wsrep_cluster_address_orig3 = `select @@wsrep_cluster_address` --source suite/galera/include/galera_stop_replication.inc --connection node_1 @@ -51,8 +53,8 @@ INSERT INTO t1 VALUES (21), (22), (23), (24), (25); # ... and restart providers to force IST --connection node_2 --disable_query_log ---eval SET GLOBAL wsrep_provider = '$wsrep_provider_orig'; ---eval SET GLOBAL wsrep_cluster_address = '$wsrep_cluster_address_orig'; +SET GLOBAL wsrep_cluster_address=''; +--eval SET GLOBAL wsrep_cluster_address = '$wsrep_cluster_address_orig2'; --enable_query_log --connection node_1 @@ -60,8 +62,8 @@ INSERT INTO t1 VALUES (31), (32), (33), (34), (35); --connection node_3 --disable_query_log ---eval SET GLOBAL wsrep_provider = '$wsrep_provider_orig'; ---eval SET GLOBAL wsrep_cluster_address = '$wsrep_cluster_address_orig'; +SET GLOBAL wsrep_cluster_address=''; +--eval SET GLOBAL wsrep_cluster_address = '$wsrep_cluster_address_orig3'; --enable_query_log --connection node_1 diff --git a/scripts/wsrep_sst_common.sh b/scripts/wsrep_sst_common.sh index 040cd31caa9..deebe7cf820 100644 --- a/scripts/wsrep_sst_common.sh +++ b/scripts/wsrep_sst_common.sh @@ -17,7 +17,7 @@ # This is a common command line parser to be sourced by other SST scripts -set -u +set -ue # Setting the path for some utilities on CentOS export PATH="$PATH:/usr/sbin:/usr/bin:/sbin:/bin" @@ -879,9 +879,9 @@ fi wsrep_cleanup_progress_file() { - [ -n "$SST_PROGRESS_FILE" -a \ - -f "$SST_PROGRESS_FILE" ] && \ - rm -f "$SST_PROGRESS_FILE" 2>/dev/null || : + if [ -n "$SST_PROGRESS_FILE" -a -f "$SST_PROGRESS_FILE" ]; then + rm -f "$SST_PROGRESS_FILE" 2>/dev/null || : + fi } wsrep_check_program() @@ -897,13 +897,10 @@ wsrep_check_program() wsrep_check_programs() { local ret=0 - - while [ $# -gt 0 ] - do + while [ $# -gt 0 ]; do wsrep_check_program "$1" || ret=$? shift done - return $ret } @@ -1028,11 +1025,11 @@ check_sockets_utils() # check_port() { - local pid="$1" + local pid="${1:-0}" local port="$2" local utils="$3" - [ -z "$pid" ] || [ $pid -eq 0 ] && pid='[0-9]+' + [ $pid -le 0 ] && pid='[0-9]+' local rc=1 @@ -1070,14 +1067,20 @@ check_for_dhparams() if [ ! -r "$ssl_dhparams" ]; then get_openssl if [ -n "$OPENSSL_BINARY" ]; then - wsrep_log_info "Could not find dhparams file, creating $ssl_dhparams" - if ! "$OPENSSL_BINARY" dhparam -out "$ssl_dhparams" 2048 >/dev/null 2>&1 - then + wsrep_log_info \ + "Could not find dhparams file, creating $ssl_dhparams" + local bug=0 + local errmsg + errmsg=$("$OPENSSL_BINARY" \ + dhparam -out "$ssl_dhparams" 2048 2>&1) || bug=1 + if [ $bug -ne 0 ]; then + wsrep_log_info "run: \"$OPENSSL_BINARY\" dhparam -out \"$ssl_dhparams\" 2048" + wsrep_log_info "output: $errmsg" wsrep_log_error "******** ERROR *****************************************" wsrep_log_error "* Could not create the dhparams.pem file with OpenSSL. *" wsrep_log_error "********************************************************" ssl_dhparams="" - fi + fi else # Rollback: if openssl is not installed, then use # the default parameters: @@ -1099,17 +1102,9 @@ verify_ca_matches_cert() local ca="$2" local cap="$3" - # If the openssl utility is not installed, then - # we will not do this certificate check: - get_openssl - if [ -z "$OPENSSL_BINARY" ]; then - wsrep_log_info "openssl utility not found" - return - fi - local readable=1; [ ! -r "$cert" ] && readable=0 - [ -n "$ca" ] && [ ! -r "$ca" ] && readable=0 - [ -n "$cap" ] && [ ! -r "$cap" ] && readable=0 + [ -n "$ca" -a ! -r "$ca" ] && readable=0 + [ -n "$cap" -a ! -r "$cap" ] && readable=0 if [ $readable -eq 0 ]; then wsrep_log_error \ @@ -1117,12 +1112,20 @@ verify_ca_matches_cert() exit 22 fi + # If the openssl utility is not installed, then + # we will not do this certificate check: + get_openssl + if [ -z "$OPENSSL_BINARY" ]; then + wsrep_log_info "openssl utility not found" + return + fi + local not_match=0 local errmsg errmsg=$("$OPENSSL_BINARY" verify -verbose \ - ${ca:+ -CAfile} ${ca:+ "$ca"} \ - ${cap:+ -CApath} ${cap:+ "$cap"} \ - "$cert" 2>&1) || not_match=1 + ${ca:+ -CAfile} ${ca:+ "$ca"} \ + ${cap:+ -CApath} ${cap:+ "$cap"} \ + "$cert" 2>&1) || not_match=1 if [ $not_match -eq 1 ]; then wsrep_log_info "run: \"$OPENSSL_BINARY\" verify -verbose${ca:+ -CAfile \"$ca\"}${cap:+ -CApath \"$cap\"} \"$cert\"" @@ -1158,6 +1161,7 @@ verify_cert_matches_key() # If the diff utility is not installed, then # we will not do this certificate check: if [ -z "$(commandex diff)" ]; then + wsrep_log_info "diff utility not found" return fi @@ -1165,6 +1169,7 @@ verify_cert_matches_key() # we will not do this certificate check: get_openssl if [ -z "$OPENSSL_BINARY" ]; then + wsrep_log_info "openssl utility not found" return fi @@ -1253,18 +1258,18 @@ check_pid() { local pid_file="$1" if [ -r "$pid_file" ]; then - local pid=$(cat "$pid_file" 2>/dev/null) + local pid=$(cat "$pid_file" 2>/dev/null || :) if [ -n "$pid" ]; then - if [ $pid -ne 0 ]; then - if ps -p "$pid" >/dev/null 2>&1; then + if [ $pid -gt 0 ]; then + if ps -p $pid >/dev/null 2>&1; then CHECK_PID=$pid return 0 fi fi fi local remove=${2:-0} - if [ $remove -eq 1 ]; then - rm -f "$pid_file" + if [ $remove -ne 0 ]; then + rm -f "$pid_file" || : fi fi CHECK_PID=0 @@ -1289,25 +1294,25 @@ cleanup_pid() local pid_file="${2:-}" local config="${3:-}" - if [ $pid -ne 0 ]; then + if [ $pid -gt 0 ]; then if ps -p $pid >/dev/null 2>&1; then if kill $pid >/dev/null 2>&1; then sleep 0.5 local round=0 local force=0 while ps -p $pid >/dev/null 2>&1; do - sleep 1 - round=$(( round+1 )) - if [ $round -eq 16 ]; then - if [ $force -eq 0 ]; then - round=8 - force=1 - kill -9 $pid >/dev/null 2>&1 - sleep 0.5 - else - return 1 - fi - fi + sleep 1 + round=$(( round+1 )) + if [ $round -eq 16 ]; then + if [ $force -eq 0 ]; then + round=8 + force=1 + kill -9 $pid >/dev/null 2>&1 || : + sleep 0.5 + else + return 1 + fi + fi done elif ps -p $pid >/dev/null 2>&1; then wsrep_log_warning "Unable to kill PID=$pid ($pid_file)" @@ -1316,8 +1321,8 @@ cleanup_pid() fi fi - [ -n "$pid_file" ] && [ -f "$pid_file" ] && rm -f "$pid_file" - [ -n "$config" ] && [ -f "$config" ] && rm -f "$config" + [ -n "$pid_file" -a -f "$pid_file" ] && rm -f "$pid_file" || : + [ -n "$config" -a -f "$config" ] && rm -f "$config" || : return 0 } @@ -1339,3 +1344,46 @@ get_proc() fi fi } + +check_server_ssl_config() +{ + # backward-compatible behavior: + tcert=$(parse_cnf 'sst' 'tca') + tcap=$(parse_cnf 'sst' 'tcapath') + tpem=$(parse_cnf 'sst' 'tcert') + tkey=$(parse_cnf 'sst' 'tkey') + # reading new ssl configuration options: + local tcert2=$(parse_cnf "$encgroups" 'ssl-ca') + local tcap2=$(parse_cnf "$encgroups" 'ssl-capath') + local tpem2=$(parse_cnf "$encgroups" 'ssl-cert') + local tkey2=$(parse_cnf "$encgroups" 'ssl-key') + # if there are no old options, then we take new ones: + if [ -z "$tcert" -a -z "$tcap" -a -z "$tpem" -a -z "$tkey" ]; then + tcert="$tcert2" + tcap="$tcap2" + tpem="$tpem2" + tkey="$tkey2" + # checking for presence of the new-style SSL configuration: + elif [ -n "$tcert2" -o -n "$tcap2" -o -n "$tpem2" -o -n "$tkey2" ]; then + if [ "$tcert" != "$tcert2" -o \ + "$tcap" != "$tcap2" -o \ + "$tpem" != "$tpem2" -o \ + "$tkey" != "$tkey2" ] + then + wsrep_log_info \ + "new ssl configuration options (ssl-ca[path], ssl-cert" \ + "and ssl-key) are ignored by SST due to presence" \ + "of the tca[path], tcert and/or tkey in the [sst] section" + fi + fi + if [ -n "$tcert" ]; then + tcert=$(trim_string "$tcert") + if [ "${tcert%/}" != "$tcert" -o -d "$tcert" ]; then + tcap="$tcert" + tcert="" + fi + fi + if [ -n "$tcap" ]; then + tcap=$(trim_string "$tcap") + fi +} diff --git a/scripts/wsrep_sst_mariabackup.sh b/scripts/wsrep_sst_mariabackup.sh index 4bca785fcad..aa9442b0601 100644 --- a/scripts/wsrep_sst_mariabackup.sh +++ b/scripts/wsrep_sst_mariabackup.sh @@ -30,7 +30,6 @@ eformat="" ekey="" ekeyfile="" encrypt=0 -ecode=0 ssyslog="" ssystag="" BACKUP_PID="" @@ -465,49 +464,6 @@ adjust_progress() encgroups='--mysqld|sst|xtrabackup' -check_server_ssl_config() -{ - # backward-compatible behavior: - tcert=$(parse_cnf 'sst' 'tca') - tcap=$(parse_cnf 'sst' 'tcapath') - tpem=$(parse_cnf 'sst' 'tcert') - tkey=$(parse_cnf 'sst' 'tkey') - # reading new ssl configuration options: - local tcert2=$(parse_cnf "$encgroups" 'ssl-ca') - local tcap2=$(parse_cnf "$encgroups" 'ssl-capath') - local tpem2=$(parse_cnf "$encgroups" 'ssl-cert') - local tkey2=$(parse_cnf "$encgroups" 'ssl-key') - # if there are no old options, then we take new ones: - if [ -z "$tcert" -a -z "$tcap" -a -z "$tpem" -a -z "$tkey" ]; then - tcert="$tcert2" - tcap="$tcap2" - tpem="$tpem2" - tkey="$tkey2" - # checking for presence of the new-style SSL configuration: - elif [ -n "$tcert2" -o -n "$tcap2" -o -n "$tpem2" -o -n "$tkey2" ]; then - if [ "$tcert" != "$tcert2" -o \ - "$tcap" != "$tcap2" -o \ - "$tpem" != "$tpem2" -o \ - "$tkey" != "$tkey2" ] - then - wsrep_log_info \ - "new ssl configuration options (ssl-ca[path], ssl-cert" \ - "and ssl-key) are ignored by SST due to presence" \ - "of the tca[path], tcert and/or tkey in the [sst] section" - fi - fi - if [ -n "$tcert" ]; then - tcert=$(trim_string "$tcert") - if [ "${tcert%/}" != "$tcert" ] || [ -d "$tcert" ]; then - tcap="$tcert" - tcert="" - fi - fi - if [ -n "$tcap" ]; then - tcap=$(trim_string "$tcap") - fi -} - read_cnf() { sfmt=$(parse_cnf sst streamfmt 'mbstream') @@ -647,7 +603,7 @@ cleanup_at_exit() cleanup_pid $CHECK_PID "$BACKUP_PID" fi fi - [ -f "$DATA/$IST_FILE" ] && rm -f "$DATA/$IST_FILE" + [ -f "$DATA/$IST_FILE" ] && rm -f "$DATA/$IST_FILE" || : fi if [ -n "$progress" -a -p "$progress" ]; then @@ -658,27 +614,31 @@ cleanup_at_exit() wsrep_log_info "Cleaning up temporary directories" if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then - if [ -n "$STATDIR" ]; then - [ -d "$STATDIR" ] && rm -rf "$STATDIR" - fi + [ -n "$STATDIR" -a -d "$STATDIR" ] && rm -rf "$STATDIR" || : else [ -n "$xtmpdir" -a -d "$xtmpdir" ] && rm -rf "$xtmpdir" || : [ -n "$itmpdir" -a -d "$itmpdir" ] && rm -rf "$itmpdir" || : fi # Final cleanup - pgid=$(ps -o pgid= $$ | grep -o '[0-9]*') + pgid=$(ps -o pgid= $$ 2>/dev/null | grep -o '[0-9]*' || :) # This means no setsid done in mysqld. # We don't want to kill mysqld here otherwise. - if [ $$ -eq $pgid ]; then - # This means a signal was delivered to the process. - # So, more cleanup. - if [ $estatus -ge 128 ]; then - kill -KILL -- -$$ || : + if [ -n "$pgid" ]; then + if [ $$ -eq $pgid ]; then + # This means a signal was delivered to the process. + # So, more cleanup. + if [ $estatus -ge 128 ]; then + kill -KILL -- -$$ || : + fi fi fi + if [ -n "${SST_PID:-}" ]; then + [ -f "$SST_PID" ] && rm -f "$SST_PID" || : + fi + exit $estatus } @@ -967,7 +927,7 @@ setup_commands() fi INNOAPPLY="$BACKUP_BIN --prepare$disver$recovery${iapts:+ }$iapts$INNOEXTRA --target-dir='$DATA' --datadir='$DATA'$mysqld_args $INNOAPPLY" INNOMOVE="$BACKUP_BIN$WSREP_SST_OPT_CONF --move-back$disver${impts:+ }$impts --force-non-empty-directories --target-dir='$DATA' --datadir='${TDATA:-$DATA}' $INNOMOVE" - INNOBACKUP="$BACKUP_BIN$WSREP_SST_OPT_CONF --backup$disver${iopts:+ }$iopts $tmpopts$INNOEXTRA --galera-info --stream=$sfmt --target-dir='$itmpdir' --datadir='$DATA'$mysqld_args $INNOBACKUP" + INNOBACKUP="$BACKUP_BIN$WSREP_SST_OPT_CONF --backup$disver${iopts:+ }$iopts$tmpopts$INNOEXTRA --galera-info --stream=$sfmt --target-dir='$itmpdir' --datadir='$DATA'$mysqld_args $INNOBACKUP" } get_stream @@ -995,7 +955,7 @@ then fi wsrep_log_info "Using '$xtmpdir' as mariabackup temporary directory" - tmpopts="--tmpdir='$xtmpdir'" + tmpopts=" --tmpdir='$xtmpdir'" itmpdir="$(mktemp -d)" wsrep_log_info "Using '$itmpdir' as mariabackup working directory" @@ -1161,10 +1121,23 @@ then impts="--parallel=$backup_threads${impts:+ }$impts" fi - stagemsg='Joiner-Recv' + SST_PID="$WSREP_SST_OPT_DATA/wsrep_sst.pid" + + # give some time for previous SST to complete: + check_round=0 + while check_pid "$SST_PID" 0; do + wsrep_log_info "previous SST is not completed, waiting for it to exit" + check_round=$(( check_round + 1 )) + if [ $check_round -eq 10 ]; then + wsrep_log_error "previous SST script still running." + exit 114 # EALREADY + fi + sleep 1 + done + + echo $$ > "$SST_PID" - sencrypted=1 - nthreads=1 + stagemsg='Joiner-Recv' MODULE="xtrabackup_sst" @@ -1208,7 +1181,7 @@ then fi get_keys - if [ $encrypt -eq 1 -a $sencrypted -eq 1 ]; then + if [ $encrypt -eq 1 ]; then strmcmd="$ecmd | $strmcmd" fi @@ -1263,12 +1236,14 @@ then if [ -n "$WSREP_SST_OPT_BINLOG" ]; then binlog_dir=$(dirname "$WSREP_SST_OPT_BINLOG") - cd "$binlog_dir" - wsrep_log_info "Cleaning the binlog directory $binlog_dir as well" - rm -fv "$WSREP_SST_OPT_BINLOG".[0-9]* 1>&2 \+ || : - [ -f "$WSREP_SST_OPT_BINLOG_INDEX" ] && \ - rm -fv "$WSREP_SST_OPT_BINLOG_INDEX" 1>&2 \+ || : - cd "$OLD_PWD" + if [ -d "$binlog_dir" ]; then + cd "$binlog_dir" + wsrep_log_info "Cleaning the binlog directory $binlog_dir as well" + rm -fv "$WSREP_SST_OPT_BINLOG".[0-9]* 1>&2 \+ || : + [ -f "$WSREP_SST_OPT_BINLOG_INDEX" ] && \ + rm -fv "$WSREP_SST_OPT_BINLOG_INDEX" 1>&2 \+ + cd "$OLD_PWD" + fi fi TDATA="$DATA" @@ -1285,7 +1260,7 @@ then fi # Compact backups are not supported by mariabackup - if grep -q -F 'compact = 1' "$DATA/xtrabackup_checkpoints"; then + if grep -qw -F 'compact = 1' "$DATA/xtrabackup_checkpoints"; then wsrep_log_info "Index compaction detected" wsrel_log_error "Compact backups are not supported by mariabackup" exit 2 diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index b0cc8cb3066..28dfed18218 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -17,7 +17,7 @@ # Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston # MA 02110-1335 USA. -# This is a reference script for rsync-based state snapshot tansfer +# This is a reference script for rsync-based state snapshot transfer RSYNC_REAL_PID=0 # rsync process id STUNNEL_REAL_PID=0 # stunnel process id @@ -41,7 +41,7 @@ cleanup_joiner() if cleanup_pid $STUNNEL_REAL_PID "$STUNNEL_PID" "$STUNNEL_CONF"; then if [ $RSYNC_REAL_PID -eq 0 ]; then if [ -r "$RSYNC_PID" ]; then - RSYNC_REAL_PID=$(cat "$RSYNC_PID" 2>/dev/null) + RSYNC_REAL_PID=$(cat "$RSYNC_PID" 2>/dev/null || :) if [ -z "$RSYNC_REAL_PID" ]; then RSYNC_REAL_PID=0 fi @@ -79,7 +79,7 @@ check_pid_and_port() local utils='rsync|stunnel' - if ! check_port "$pid" "$port" "$utils"; then + if ! check_port $pid "$port" "$utils"; then local port_info local busy=0 @@ -90,7 +90,7 @@ check_pid_and_port() grep -q -E "[[:space:]](\\*|\\[?::\\]?):$port[[:space:]]" && busy=1 else local filter='([^[:space:]]+[[:space:]]+){4}[^[:space:]]+' - if [ $sockstat_available -eq 1 ]; then + if [ $sockstat_available -ne 0 ]; then local opts='-p' if [ "$OS" = 'FreeBSD' ]; then # sockstat on FreeBSD requires the "-s" option @@ -110,18 +110,20 @@ check_pid_and_port() fi if [ $busy -eq 0 ]; then - if echo "$port_info" | grep -qw -F "[$addr]:$port" || \ - echo "$port_info" | grep -qw -F -- "$addr:$port" + if ! echo "$port_info" | grep -qw -F "[$addr]:$port" && \ + ! echo "$port_info" | grep -qw -F -- "$addr:$port" then - busy=1 + if ! ps -p $pid >/dev/null 2>&1; then + wsrep_log_error \ + "rsync or stunnel daemon (PID: $pid)" \ + "terminated unexpectedly." + exit 16 # EBUSY + fi + return 1 fi fi - if [ $busy -eq 0 ]; then - return 1 - fi - - if ! check_port "$pid" "$port" "$utils"; then + if ! check_port $pid "$port" "$utils"; then wsrep_log_error "rsync or stunnel daemon port '$port'" \ "has been taken by another program" exit 16 # EBUSY @@ -197,60 +199,16 @@ INNODB_UNDO_DIR=$(pwd -P) cd "$OLD_PWD" -# Old filter - include everything except selected -# FILTER=(--exclude '*.err' --exclude '*.pid' --exclude '*.sock' \ -# --exclude '*.conf' --exclude core --exclude 'galera.*' \ -# --exclude grastate.txt --exclude '*.pem' \ -# --exclude '*.[0-9][0-9][0-9][0-9][0-9][0-9]' --exclude '*.index') - -# New filter - exclude everything except dirs (schemas) and innodb files -FILTER="-f '- /lost+found' - -f '- /.zfs' - -f '- /.fseventsd' - -f '- /.Trashes' - -f '- /.pid' - -f '- /.conf' - -f '+ /wsrep_sst_binlog.tar' - -f '- $INNODB_DATA_HOME_DIR/ib_lru_dump' - -f '- $INNODB_DATA_HOME_DIR/ibdata*' - -f '+ $INNODB_UNDO_DIR/undo*' - -f '+ /*/' - -f '- /*'" - -# old-style SSL config -SSTKEY=$(parse_cnf 'sst' 'tkey') -SSTCERT=$(parse_cnf 'sst' 'tcert') -SSTCA=$(parse_cnf 'sst' 'tca') -SSTCAP=$(parse_cnf 'sst' 'tcapath') +encgroups='--mysqld|sst' -SST_SECTIONS="--mysqld|sst" +check_server_ssl_config -check_server_ssl_config() -{ - SSTKEY=$(parse_cnf "$SST_SECTIONS" 'ssl-key') - SSTCERT=$(parse_cnf "$SST_SECTIONS" 'ssl-cert') - SSTCA=$(parse_cnf "$SST_SECTIONS" 'ssl-ca') - SSTCAP=$(parse_cnf "$SST_SECTIONS" 'ssl-capath') -} +SSTKEY="$tkey" +SSTCERT="$tpem" +SSTCA="$tcert" +SSTCAP="$tcap" -SSLMODE=$(parse_cnf "$SST_SECTIONS" 'ssl-mode' | tr [:lower:] [:upper:]) - -# no old-style SSL config in [sst], check for new one: -if [ -z "$SSTKEY" -a -z "$SSTCERT" -a -z "$SSTCA" -a -z "$SSTCAP" ]; then - check_server_ssl_config -fi - -if [ -n "$SSTCA" ]; then - SSTCA=$(trim_string "$SSTCA") - if [ "${SSTCA%/}" != "$SSTCA" ] || [ -d "$SSTCA" ]; then - SSTCAP="$SSTCA" - SSTCA="" - fi -fi - -if [ -n "$SSTCAP" ]; then - SSTCAP=$(trim_string "$SSTCAP") -fi +SSLMODE=$(parse_cnf "$encgroups" 'ssl-mode' | tr [:lower:] [:upper:]) if [ -z "$SSLMODE" ]; then # Implicit verification if CA is set and the SSL mode @@ -266,7 +224,7 @@ if [ -z "$SSLMODE" ]; then fi fi -if [ -n "$SSTCERT" -a -n "$SSTKEY" ]; then +if [ -n "$SSTKEY" -a -n "$SSTCERT" ]; then verify_cert_matches_key "$SSTCERT" "$SSTKEY" fi @@ -287,8 +245,7 @@ fi VERIFY_OPT="" CHECK_OPT="" CHECK_OPT_LOCAL="" -if [ "${SSLMODE#VERIFY}" != "$SSLMODE" ] -then +if [ "${SSLMODE#VERIFY}" != "$SSLMODE" ]; then case "$SSLMODE" in 'VERIFY_IDENTITY') VERIFY_OPT='verifyPeer = yes' @@ -364,8 +321,9 @@ EOF [ -f "$STUNNEL_CONF" ] && rm -f "$STUNNEL_CONF" fi - if [ $WSREP_SST_OPT_BYPASS -eq 0 ] - then + RC=0 + + if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed" ERROR="$WSREP_SST_OPT_DATA/sst_error" @@ -380,11 +338,11 @@ EOF # (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR # (c) ERROR file, in case flush tables operation failed. - while [ ! -r "$FLUSHED" ] && ! grep -q -F ':' "$FLUSHED" >/dev/null 2>&1 + while [ ! -r "$FLUSHED" ] && \ + ! grep -q -F ':' '--' "$FLUSHED" >/dev/null 2>&1 do # Check whether ERROR file exists. - if [ -f "$ERROR" ] - then + if [ -f "$ERROR" ]; then # Flush tables operation failed. rm -f "$ERROR" exit 255 @@ -397,7 +355,7 @@ EOF sync - if [ -n "$WSREP_SST_OPT_BINLOG" ] + if [ -n "$WSREP_SST_OPT_BINLOG" -a -d "${BINLOG_DIRNAME:-}" ] then # Prepare binlog files cd "$BINLOG_DIRNAME" @@ -405,16 +363,14 @@ EOF binlog_files_full=$(tail -n $BINLOG_N_FILES \ "$WSREP_SST_OPT_BINLOG_INDEX") binlog_files="" - for ii in $binlog_files_full - do - binlog_file=$(basename "$ii") - binlog_files="$binlog_files $binlog_file" + for file in $binlog_files_full; do + binlog_file=$(basename "$file") + binlog_files="$binlog_files${binlog_files:+ }'$binlog_file'" done - if [ -n "$binlog_files" ] - then + if [ -n "$binlog_files" ]; then wsrep_log_info "Preparing binlog files for transfer:" - tar -cvf "$BINLOG_TAR_FILE" $binlog_files >&2 + eval tar -cvf "'$BINLOG_TAR_FILE'" $binlog_files >&2 fi cd "$OLD_PWD" @@ -427,9 +383,28 @@ EOF WHOLE_FILE_OPT="--whole-file" fi +# Old filter - include everything except selected +# FILTER=(--exclude '*.err' --exclude '*.pid' --exclude '*.sock' \ +# --exclude '*.conf' --exclude core --exclude 'galera.*' \ +# --exclude grastate.txt --exclude '*.pem' \ +# --exclude '*.[0-9][0-9][0-9][0-9][0-9][0-9]' --exclude '*.index') + +# New filter - exclude everything except dirs (schemas) and innodb files +FILTER="-f '- /lost+found' + -f '- /.zfs' + -f '- /.fseventsd' + -f '- /.Trashes' + -f '- /.pid' + -f '- /.conf' + -f '+ /wsrep_sst_binlog.tar' + -f '- $INNODB_DATA_HOME_DIR/ib_lru_dump' + -f '- $INNODB_DATA_HOME_DIR/ibdata*' + -f '+ $INNODB_UNDO_DIR/undo*' + -f '+ /*/' + -f '- /*'" + # first, the normal directories, so that we can detect # incompatible protocol: - RC=0 eval rsync ${STUNNEL:+"--rsh='$STUNNEL'"} \ --owner --group --perms --links --specials \ --ignore-times --inplace --dirs --delete --quiet \ @@ -484,7 +459,7 @@ EOF cd "$WSREP_SST_OPT_DATA" - backup_threads=$(parse_cnf "--mysqld|sst" 'backup-threads') + backup_threads=$(parse_cnf '--mysqld|sst' 'backup-threads') if [ -z "$backup_threads" ]; then get_proc backup_threads=$nproc @@ -527,7 +502,12 @@ EOF rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --archive --quiet --checksum "$MAGIC_FILE" \ - "rsync://$WSREP_SST_OPT_ADDR" + "rsync://$WSREP_SST_OPT_ADDR" >&2 || RC=$? + + if [ $RC -ne 0 ]; then + wsrep_log_error "rsync $MAGIC_FILE returned code $RC:" + exit 255 # unknown error + fi echo "done $STATE" @@ -540,12 +520,11 @@ elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ] then check_sockets_utils - SST_PID="$WSREP_SST_OPT_DATA/wsrep_rsync_sst.pid" + SST_PID="$WSREP_SST_OPT_DATA/wsrep_sst.pid" # give some time for previous SST to complete: check_round=0 - while check_pid "$SST_PID" 0 - do + while check_pid "$SST_PID" 0 'wsrep_sst_'; do wsrep_log_info "previous SST is not completed, waiting for it to exit" check_round=$(( check_round + 1 )) if [ $check_round -eq 10 ]; then @@ -555,10 +534,11 @@ then sleep 1 done + echo $$ > "$SST_PID" + # give some time for stunnel from the previous SST to complete: check_round=0 - while check_pid "$STUNNEL_PID" 1 - do + while check_pid "$STUNNEL_PID" 1; do wsrep_log_info "Lingering stunnel daemon found at startup," \ "waiting for it to exit" check_round=$(( check_round + 1 )) @@ -575,8 +555,7 @@ then # give some time for rsync from the previous SST to complete: check_round=0 - while check_pid "$RSYNC_PID" 1 - do + while check_pid "$RSYNC_PID" 1; do wsrep_log_info "Lingering rsync daemon found at startup," \ "waiting for it to exit" check_round=$(( check_round + 1 )) @@ -590,7 +569,7 @@ then [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" [ -f "$BINLOG_TAR_FILE" ] && rm -f "$BINLOG_TAR_FILE" - [ -z "$STUNNEL" ] && [ -f "$STUNNEL_CONF" ] && rm -f "$STUNNEL_CONF" + [ -z "$STUNNEL" -a -f "$STUNNEL_CONF" ] && rm -f "$STUNNEL_CONF" ADDR="$WSREP_SST_OPT_ADDR" RSYNC_PORT="$WSREP_SST_OPT_PORT" @@ -639,8 +618,6 @@ EOF RSYNC_ADDR="*" fi - echo $$ > "$SST_PID" - if [ -z "$STUNNEL" ]; then rsync --daemon --no-detach --port "$RSYNC_PORT" \ --config "$RSYNC_CONF" $RSYNC_EXTRA_ARGS & |