summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrei Elkin <andrei.elkin@oracle.com>2011-09-29 14:14:43 +0300
committerAndrei Elkin <andrei.elkin@oracle.com>2011-09-29 14:14:43 +0300
commit593c9457cd8533091ac1bad1e85234e79c0ce6a3 (patch)
tree9f54d8b7daf7b56d906514d99346b56d29b20c2b
parent586c0c0ef6e23c4ce3c63f796187178f15e7010a (diff)
downloadmariadb-git-593c9457cd8533091ac1bad1e85234e79c0ce6a3.tar.gz
Bug#11747416 : 32228 A disk full makes binary log corrupt
Binary log of master can get a partially logged event if the server runs out of disk space and, while waiting for some space to be freed, is shut down (or crashes). If the server is not stopped, it will just wait endlessly for space to be freed, thus no partial event anomaly occurs. The restarted master server has had a dubious policy to send the incomplete event to slave which it apparently can't handle. Although an error was printed out the fact of sending with unclear error message is a source of confusion. Actually the problem of presence an incomplete event in the binary log was already fixed by WL 5493 (which was merged to our current trunk branch, major version 5.6). The fix makes the server truncate the binary log on server restart and recovery. However 5.5 master can't do that. So the current issue is a problem of sending incomplete events to the slave by 5.5 master. It is fixed in this patch by changing the policy so that only complete events are pushed by the dump thread to the IO thread. In addition, the error text that master sends to the slave when an incomplete event is found, now states that incomplete event may have been caused by an out-of-disk space situation and provides coordinates of the first and the last event bytes read. mysql-test/std_data/bug11747416_32228_binlog.000001: a binlog is added with the last event written partly. mysql-test/suite/rpl/r/rpl_cant_read_event_incident.result: new result file is added. mysql-test/suite/rpl/r/rpl_log_pos.result: results updated. mysql-test/suite/rpl/r/rpl_manual_change_index_file.result: results updated. mysql-test/suite/rpl/r/rpl_packet.result: results updated. mysql-test/suite/rpl/t/rpl_cant_read_event_incident.test: regression test for bug#11747416 : 32228 A disk full makes binary log corrupt is added. sql/share/errmsg-utf8.txt: Increasing the explanatory part of ER_MASTER_FATAL_ERROR_READING_BINLOG error message twice in order to fit to the updated version which carries some more info. sql/sql_repl.cc: Error text indicating a failure of reading from binlog that master delivers to the slave is made more clear; A policy to regard a partial event to send it out to the slave anyway is removed.
-rw-r--r--mysql-test/std_data/bug11747416_32228_binlog.000001bin0 -> 8192 bytes
-rw-r--r--mysql-test/suite/rpl/r/rpl_cant_read_event_incident.result19
-rw-r--r--mysql-test/suite/rpl/r/rpl_log_pos.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_manual_change_index_file.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_packet.result2
-rw-r--r--mysql-test/suite/rpl/t/rpl_cant_read_event_incident.test64
-rw-r--r--sql/share/errmsg-utf8.txt16
-rw-r--r--sql/sql_repl.cc32
8 files changed, 110 insertions, 27 deletions
diff --git a/mysql-test/std_data/bug11747416_32228_binlog.000001 b/mysql-test/std_data/bug11747416_32228_binlog.000001
new file mode 100644
index 00000000000..2596b5b0237
--- /dev/null
+++ b/mysql-test/std_data/bug11747416_32228_binlog.000001
Binary files differ
diff --git a/mysql-test/suite/rpl/r/rpl_cant_read_event_incident.result b/mysql-test/suite/rpl/r/rpl_cant_read_event_incident.result
new file mode 100644
index 00000000000..f8ff9594dfa
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_cant_read_event_incident.result
@@ -0,0 +1,19 @@
+include/master-slave.inc
+[connection master]
+call mtr.add_suppression("Error in Log_event::read_log_event()");
+include/rpl_stop_server.inc [server_number=1]
+include/rpl_start_server.inc [server_number=1]
+show binlog events;
+ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O error
+call mtr.add_suppression("Slave I/O: Got fatal error 1236 from master when reading data from binary log");
+stop slave;
+reset slave;
+start slave;
+include/wait_for_slave_param.inc [Last_IO_Errno]
+Last_IO_Errno = '1236'
+Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'binlog truncated in the middle of event; consider out of disk space on master; the last event was read from ./master-bin.000001 at 316, the last byte read was read from ./master-bin.000001 at 335.''
+reset master;
+stop slave;
+reset slave;
+drop table t;
+End of the tests
diff --git a/mysql-test/suite/rpl/r/rpl_log_pos.result b/mysql-test/suite/rpl/r/rpl_log_pos.result
index 91d307008f0..753eb86be44 100644
--- a/mysql-test/suite/rpl/r/rpl_log_pos.result
+++ b/mysql-test/suite/rpl/r/rpl_log_pos.result
@@ -9,7 +9,7 @@ change master to master_log_pos=MASTER_LOG_POS;
Read_Master_Log_Pos = '75'
start slave;
include/wait_for_slave_io_error.inc [errno=1236]
-Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master''
+Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master; the last event was read from ./master-bin.000001 at 75, the last byte read was read from ./master-bin.000001 at 94.''
include/stop_slave_sql.inc
show master status;
File Position Binlog_Do_DB Binlog_Ignore_DB
diff --git a/mysql-test/suite/rpl/r/rpl_manual_change_index_file.result b/mysql-test/suite/rpl/r/rpl_manual_change_index_file.result
index 23238d9c97b..479f84ef648 100644
--- a/mysql-test/suite/rpl/r/rpl_manual_change_index_file.result
+++ b/mysql-test/suite/rpl/r/rpl_manual_change_index_file.result
@@ -5,7 +5,7 @@ CREATE TABLE t1(c1 INT);
FLUSH LOGS;
call mtr.add_suppression('Got fatal error 1236 from master when reading data from binary log: .*could not find next log');
include/wait_for_slave_io_error.inc [errno=1236]
-Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'could not find next log''
+Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'could not find next log; the last event was read from ./master-bin.000002 at 237, the last byte read was read from ./master-bin.000002 at 237.''
CREATE TABLE t2(c1 INT);
FLUSH LOGS;
CREATE TABLE t3(c1 INT);
diff --git a/mysql-test/suite/rpl/r/rpl_packet.result b/mysql-test/suite/rpl/r/rpl_packet.result
index 6190f458367..03b2e3131e1 100644
--- a/mysql-test/suite/rpl/r/rpl_packet.result
+++ b/mysql-test/suite/rpl/r/rpl_packet.result
@@ -37,7 +37,7 @@ DROP TABLE t1;
CREATE TABLE t1 (f1 int PRIMARY KEY, f2 LONGTEXT, f3 LONGTEXT) ENGINE=MyISAM;
INSERT INTO t1(f1, f2, f3) VALUES(1, REPEAT('a', @@global.max_allowed_packet), REPEAT('b', @@global.max_allowed_packet));
include/wait_for_slave_io_error.inc [errno=1236]
-Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master''
+Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'log event entry exceeded max_allowed_packet; Increase max_allowed_packet on master; the last event was read from ./master-bin.000001 at 463, the last byte read was read from ./master-bin.000001 at 482.''
STOP SLAVE;
RESET SLAVE;
RESET MASTER;
diff --git a/mysql-test/suite/rpl/t/rpl_cant_read_event_incident.test b/mysql-test/suite/rpl/t/rpl_cant_read_event_incident.test
new file mode 100644
index 00000000000..71445be55e6
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_cant_read_event_incident.test
@@ -0,0 +1,64 @@
+#
+# Bug#11747416 : 32228 A disk full makes binary log corrupt.
+#
+#
+# The test demonstrates reading from binlog error propagation to slave
+# and reporting there.
+# Conditions for the bug include a crash at time of the last event to
+# the binlog was written partly. With the fixes the event is not sent out
+# any longer, but rather the dump thread sends out a sound error message.
+#
+# Crash is not simulated. A binlog with partly written event in its end is installed
+# and replication is started from it.
+#
+
+--source include/master-slave.inc
+--source include/have_binlog_format_mixed.inc
+
+call mtr.add_suppression("Error in Log_event::read_log_event()");
+
+--connection master
+--let $datadir= `SELECT @@datadir`
+
+--let $rpl_server_number= 1
+--source include/rpl_stop_server.inc
+
+--remove_file $datadir/master-bin.000001
+--copy_file $MYSQL_TEST_DIR/std_data/bug11747416_32228_binlog.000001 $datadir/master-bin.000001
+
+--let $rpl_server_number= 1
+--source include/rpl_start_server.inc
+
+--source include/wait_until_connected_again.inc
+
+# evidence of the partial binlog
+--error ER_ERROR_WHEN_EXECUTING_COMMAND
+show binlog events;
+
+--connection slave
+call mtr.add_suppression("Slave I/O: Got fatal error 1236 from master when reading data from binary log");
+stop slave;
+reset slave;
+start slave;
+
+# ER_MASTER_FATAL_ERROR_READING_BINLOG 1236
+--let $slave_param=Last_IO_Errno
+--let $slave_param_value=1236
+--source include/wait_for_slave_param.inc
+
+--let $status_items= Last_IO_Errno, Last_IO_Error
+--source include/show_slave_status.inc
+
+#
+# Cleanup
+#
+
+--connection master
+reset master;
+
+--connection slave
+stop slave;
+reset slave;
+drop table t; # table was created from binlog. it does not exist on master.
+
+--echo End of the tests
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index da176bd5233..06d67db7aab 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -4701,14 +4701,14 @@ ER_NOT_SUPPORTED_YET 42000
spa "Esta versión de MySQL no soporta todavia '%s'"
swe "Denna version av MySQL kan ännu inte utföra '%s'"
ER_MASTER_FATAL_ERROR_READING_BINLOG
- nla "Kreeg fatale fout %d: '%-.128s' van master tijdens lezen van data uit binaire log"
- eng "Got fatal error %d from master when reading data from binary log: '%-.128s'"
- ger "Schwerer Fehler %d: '%-.128s vom Master beim Lesen des binären Logs"
- ita "Errore fatale %d: '%-.128s' dal master leggendo i dati dal log binario"
- por "Obteve fatal erro %d: '%-.128s' do master quando lendo dados do binary log"
- rus "Получена неисправимая ошибка %d: '%-.128s' от головного сервера в процессе выборки данных из двоичного журнала"
- spa "Recibió fatal error %d: '%-.128s' del master cuando leyendo datos del binary log"
- swe "Fick fatalt fel %d: '%-.128s' från master vid läsning av binärloggen"
+ nla "Kreeg fatale fout %d: '%-.256s' van master tijdens lezen van data uit binaire log"
+ eng "Got fatal error %d from master when reading data from binary log: '%-.256s'"
+ ger "Schwerer Fehler %d: '%-.256s vom Master beim Lesen des binären Logs"
+ ita "Errore fatale %d: '%-.256s' dal master leggendo i dati dal log binario"
+ por "Obteve fatal erro %d: '%-.256s' do master quando lendo dados do binary log"
+ rus "Получена неисправимая ошибка %d: '%-.256s' от головного сервера в процессе выборки данных из двоичного журнала"
+ spa "Recibió fatal error %d: '%-.256s' del master cuando leyendo datos del binary log"
+ swe "Fick fatalt fel %d: '%-.256s' från master vid läsning av binärloggen"
ER_SLAVE_IGNORED_TABLE
eng "Slave SQL thread ignored the query because of replicate-*-table rules"
ger "Slave-SQL-Thread hat die Abfrage aufgrund von replicate-*-table-Regeln ignoriert"
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 8653607263f..c0d9432c71a 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -352,7 +352,7 @@ Increase max_allowed_packet on master";
*errmsg = "memory allocation failed reading log event";
break;
case LOG_READ_TRUNC:
- *errmsg = "binlog truncated in the middle of event";
+ *errmsg = "binlog truncated in the middle of event; consider out of disk space on master";
break;
default:
*errmsg = "unknown error reading log event on the master";
@@ -447,6 +447,9 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
String* packet = &thd->packet;
int error;
const char *errmsg = "Unknown error";
+ const char *fmt= "%s; the last event was read from %s at %s, the last byte read was read from %s at %s.";
+ char llbuff1[22], llbuff2[22];
+ char error_text[MAX_SLAVE_ERRMSG]; // to be send to slave via my_message()
NET* net = &thd->net;
mysql_mutex_t *log_lock;
mysql_cond_t *log_cond;
@@ -677,10 +680,8 @@ impossible position";
if (reset_transmit_packet(thd, flags, &ev_offset, &errmsg))
goto err;
- my_off_t prev_pos= pos;
- while (!(error = Log_event::read_log_event(&log, packet, log_lock)))
+ while (!(error= Log_event::read_log_event(&log, packet, log_lock)))
{
- prev_pos= my_b_tell(&log);
#ifndef DBUG_OFF
if (max_binlog_dump_events && !left_events--)
{
@@ -767,17 +768,6 @@ impossible position";
}
/*
- here we were reading binlog that was not closed properly (as a result
- of a crash ?). treat any corruption as EOF
- */
- if (binlog_can_be_corrupted &&
- error != LOG_READ_MEM && error != LOG_READ_EOF)
- {
- my_b_seek(&log, prev_pos);
- error=LOG_READ_EOF;
- }
-
- /*
TODO: now that we are logging the offset, check to make sure
the recorded offset and the actual match.
Guilhem 2003-06: this is not true if this master is a slave
@@ -1021,6 +1011,16 @@ end:
err:
thd_proc_info(thd, "Waiting to finalize termination");
+ if (my_errno == ER_MASTER_FATAL_ERROR_READING_BINLOG && my_b_inited(&log))
+ /*
+ detailing the fatal error message with coordinates
+ of the last position read.
+ */
+ my_snprintf(error_text, sizeof(error_text), fmt, errmsg,
+ coord->file_name, (llstr(coord->pos, llbuff1), llbuff1),
+ log_file_name, (llstr(my_b_tell(&log), llbuff2), llbuff2));
+ else
+ strcpy(error_text, errmsg);
end_io_cache(&log);
RUN_HOOK(binlog_transmit, transmit_stop, (thd, flags));
/*
@@ -1037,7 +1037,7 @@ err:
mysql_file_close(file, MYF(MY_WME));
thd->variables.max_allowed_packet= old_max_allowed_packet;
- my_message(my_errno, errmsg, MYF(0));
+ my_message(my_errno, error_text, MYF(0));
DBUG_VOID_RETURN;
}