summaryrefslogtreecommitdiff
path: root/innobase
diff options
context:
space:
mode:
authorunknown <heikki@hundin.mysql.fi>2003-05-03 02:29:40 +0300
committerunknown <heikki@hundin.mysql.fi>2003-05-03 02:29:40 +0300
commit8d8f52e902699c260f455075650906025f59f010 (patch)
tree2c18eac77e10f9bfcac7dfb438fb46b3a81d5e67 /innobase
parent870397892be8a35afdb343c209be91cba117cee6 (diff)
downloadmariadb-git-8d8f52e902699c260f455075650906025f59f010.tar.gz
Many files:
Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/log.cc: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/handler.cc: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/handler.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/ha_innodb.cc: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/ha_innodb.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/include/log0log.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/include/trx0trx.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/os/os0file.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/buf/buf0flu.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/trx/trx0trx.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/log/log0log.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/srv/srv0srv.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/row/row0mysql.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released
Diffstat (limited to 'innobase')
-rw-r--r--innobase/buf/buf0flu.c2
-rw-r--r--innobase/include/log0log.h65
-rw-r--r--innobase/include/trx0trx.h14
-rw-r--r--innobase/log/log0log.c158
-rw-r--r--innobase/os/os0file.c7
-rw-r--r--innobase/row/row0mysql.c2
-rw-r--r--innobase/srv/srv0srv.c17
-rw-r--r--innobase/trx/trx0trx.c48
8 files changed, 179 insertions, 134 deletions
diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
index 516056b5174..4d998f8306f 100644
--- a/innobase/buf/buf0flu.c
+++ b/innobase/buf/buf0flu.c
@@ -398,7 +398,7 @@ buf_flush_write_block_low(
"Warning: cannot force log to disk in the log debug version!\n");
#else
/* Force the log to the disk before writing the modified block */
- log_flush_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS);
+ log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
#endif
buf_flush_init_for_writing(block->frame, block->newest_modification,
block->space, block->offset);
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index f200371de9d..4e1404b15fe 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -20,7 +20,7 @@ typedef struct log_group_struct log_group_t;
extern ibool log_do_write;
extern ibool log_debug_writes;
-/* Wait modes for log_flush_up_to */
+/* Wait modes for log_write_up_to */
#define LOG_NO_WAIT 91
#define LOG_WAIT_ONE_GROUP 92
#define LOG_WAIT_ALL_GROUPS 93
@@ -157,26 +157,21 @@ log_io_complete(
/*============*/
log_group_t* group); /* in: log group */
/**********************************************************
-Flushes the log files to the disk, using, for example, the Unix fsync.
-This function does the flush even if the user has set
-srv_flush_log_at_trx_commit = FALSE. */
-
-void
-log_flush_to_disk(void);
-/*===================*/
-/**********************************************************
This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been flushed to disk up to the last log entry written by the
-transaction. If there is a flush running, it waits and checks if the flush
-flushed enough. If not, starts a new flush. */
+that the log has been written to the log file up to the last log entry written
+by the transaction. If there is a flush running, it waits and checks if the
+flush flushed enough. If not, starts a new flush. */
void
-log_flush_up_to(
+log_write_up_to(
/*============*/
dulint lsn, /* in: log sequence number up to which the log should
- be flushed, ut_dulint_max if not specified */
- ulint wait); /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ be written, ut_dulint_max if not specified */
+ ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
+ ibool flush_to_disk);
+ /* in: TRUE if we want the written log also to be
+ flushed to disk */
/********************************************************************
Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool and also may make a new checkpoint. NOTE: this function may only
@@ -741,27 +736,37 @@ struct log_struct{
be advanced, it is enough that the
write i/o has been completed for all
log groups */
- dulint flush_lsn; /* end lsn for the current flush */
- ulint flush_end_offset;/* the data in buffer has been flushed
+ dulint write_lsn; /* end lsn for the current running
+ write */
+ ulint write_end_offset;/* the data in buffer has been written
up to this offset when the current
- flush ends: this field will then
+ write ends: this field will then
be copied to buf_next_to_write */
- ulint n_pending_writes;/* number of currently pending flush
- writes */
+ dulint current_flush_lsn;/* end lsn for the current running
+ write + flush operation */
+ dulint flushed_to_disk_lsn;
+ /* how far we have written the log
+ AND flushed to disk */
+ ulint n_pending_writes;/* number of currently pending flushes
+ or writes */
+ /* NOTE on the 'flush' in names of the fields below: starting from
+ 4.0.14, we separate the write of the log file and the actual fsync()
+ or other method to flush it to disk. The names below shhould really
+ be 'flush_or_write'! */
os_event_t no_flush_event; /* this event is in the reset state
- when a flush is running; a thread
- should wait for this without owning
- the log mutex, but NOTE that to set or
- reset this event, the thread MUST own
- the log mutex! */
+ when a flush or a write is running;
+ a thread should wait for this without
+ owning the log mutex, but NOTE that
+ to set or reset this event, the
+ thread MUST own the log mutex! */
ibool one_flushed; /* during a flush, this is first FALSE
and becomes TRUE when one log group
- has been flushed */
+ has been written or flushed */
os_event_t one_flushed_event;/* this event is reset when the
- flush has not yet completed for any
- log group; e.g., this means that a
- transaction has been committed when
- this is set; a thread should wait
+ flush or write has not yet completed
+ for any log group; e.g., this means
+ that a transaction has been committed
+ when this is set; a thread should wait
for this without owning the log mutex,
but NOTE that to set or reset this
event, the thread MUST own the log
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index be96519c4ea..39229923375 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -157,6 +157,15 @@ trx_commit_for_mysql(
/* out: 0 or error number */
trx_t* trx); /* in: trx handle */
/**************************************************************************
+If required, flushes the log to disk if we called trx_commit_for_mysql()
+with trx->flush_log_later == TRUE. */
+
+ulint
+trx_commit_complete_for_mysql(
+/*==========================*/
+ /* out: 0 or error number */
+ trx_t* trx); /* in: trx handle */
+/**************************************************************************
Marks the latest SQL statement ended. */
void
@@ -343,6 +352,11 @@ struct trx_struct{
dulint no; /* transaction serialization number ==
max trx id when the transaction is
moved to COMMITTED_IN_MEMORY state */
+ ibool flush_log_later;/* when we commit the transaction
+ in MySQL's binlog write, we will
+ flush the log to disk later in
+ a separate call */
+ dulint commit_lsn; /* lsn at the time of the commit */
ibool dict_operation; /* TRUE if the trx is used to create
a table, create an index, or drop a
table */
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
index 539cde337bd..25cc666e802 100644
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@@ -178,7 +178,7 @@ loop:
/* Not enough free space, do a syncronous flush of the log
buffer */
- log_flush_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS);
+ log_write_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS, TRUE);
count++;
@@ -675,7 +675,9 @@ log_init(void)
log_sys->buf_next_to_write = 0;
- log_sys->flush_lsn = ut_dulint_zero;
+ log_sys->write_lsn = ut_dulint_zero;
+ log_sys->current_flush_lsn = ut_dulint_zero;
+ log_sys->flushed_to_disk_lsn = ut_dulint_zero;
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
@@ -867,7 +869,7 @@ log_group_check_flush_completion(
printf("Log flushed first to group %lu\n", group->id);
}
- log_sys->written_to_some_lsn = log_sys->flush_lsn;
+ log_sys->written_to_some_lsn = log_sys->write_lsn;
log_sys->one_flushed = TRUE;
return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
@@ -896,15 +898,15 @@ log_sys_check_flush_completion(void)
if (log_sys->n_pending_writes == 0) {
- log_sys->written_to_all_lsn = log_sys->flush_lsn;
- log_sys->buf_next_to_write = log_sys->flush_end_offset;
+ log_sys->written_to_all_lsn = log_sys->write_lsn;
+ log_sys->buf_next_to_write = log_sys->write_end_offset;
- if (log_sys->flush_end_offset > log_sys->max_buf_free / 2) {
+ if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
/* Move the log buffer content to the start of the
buffer */
move_start = ut_calc_align_down(
- log_sys->flush_end_offset,
+ log_sys->write_end_offset,
OS_FILE_LOG_BLOCK_SIZE);
move_end = ut_calc_align(log_sys->buf_free,
OS_FILE_LOG_BLOCK_SIZE);
@@ -982,57 +984,6 @@ log_io_complete(
}
/**********************************************************
-Flushes the log files to the disk, using, for example, the Unix fsync.
-This function does the flush even if the user has set
-srv_flush_log_at_trx_commit = FALSE. */
-
-void
-log_flush_to_disk(void)
-/*===================*/
-{
- log_group_t* group;
-loop:
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->n_pending_writes > 0) {
- /* A log file write is running */
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the log file write to complete and try again */
-
- os_event_wait(log_sys->no_flush_event);
-
- goto loop;
- }
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- log_sys->n_pending_writes++;
- group->n_pending_writes++;
-
- os_event_reset(log_sys->no_flush_event);
- os_event_reset(log_sys->one_flushed_event);
-
- mutex_exit(&(log_sys->mutex));
-
- fil_flush(group->space_id);
-
- mutex_enter(&(log_sys->mutex));
-
- ut_a(group->n_pending_writes == 1);
- ut_a(log_sys->n_pending_writes == 1);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
-
- os_event_set(log_sys->no_flush_event);
- os_event_set(log_sys->one_flushed_event);
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/**********************************************************
Writes a log file header to a log file space. */
static
void
@@ -1205,12 +1156,15 @@ by the transaction. If there is a flush running, it waits and checks if the
flush flushed enough. If not, starts a new flush. */
void
-log_flush_up_to(
+log_write_up_to(
/*============*/
dulint lsn, /* in: log sequence number up to which the log should
be written, ut_dulint_max if not specified */
- ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
+ ibool flush_to_disk)
+ /* in: TRUE if we want the written log also to be
+ flushed to disk */
{
log_group_t* group;
ulint start_offset;
@@ -1239,9 +1193,18 @@ loop:
mutex_enter(&(log_sys->mutex));
- if ((ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0)
- || ((ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) >= 0)
- && (wait != LOG_WAIT_ALL_GROUPS))) {
+ if (flush_to_disk
+ && ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ if (!flush_to_disk
+ && (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0
+ || (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) >= 0
+ && wait != LOG_WAIT_ALL_GROUPS))) {
mutex_exit(&(log_sys->mutex));
@@ -1249,10 +1212,19 @@ loop:
}
if (log_sys->n_pending_writes > 0) {
- /* A flush is running */
+ /* A write (+ possibly flush to disk) is running */
+
+ if (flush_to_disk
+ && ut_dulint_cmp(log_sys->current_flush_lsn, lsn) >= 0) {
+ /* The write + flush will write enough: wait for it to
+ complete */
+
+ goto do_waits;
+ }
- if (ut_dulint_cmp(log_sys->flush_lsn, lsn) >= 0) {
- /* The flush will flush enough: wait for it to
+ if (!flush_to_disk
+ && ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) {
+ /* The write will write enough: wait for it to
complete */
goto do_waits;
@@ -1260,16 +1232,17 @@ loop:
mutex_exit(&(log_sys->mutex));
- /* Wait for the flush to complete and try to start a new
- flush */
+ /* Wait for the write to complete and try to start a new
+ write */
os_event_wait(log_sys->no_flush_event);
goto loop;
}
- if (log_sys->buf_free == log_sys->buf_next_to_write) {
- /* Nothing to flush */
+ if (!flush_to_disk
+ && log_sys->buf_free == log_sys->buf_next_to_write) {
+ /* Nothing to write and no flush to disk requested */
mutex_exit(&(log_sys->mutex));
@@ -1277,7 +1250,7 @@ loop:
}
if (log_debug_writes) {
- printf("Flushing log from %lu %lu up to lsn %lu %lu\n",
+ printf("Writing log from %lu %lu up to lsn %lu %lu\n",
ut_dulint_get_high(log_sys->written_to_all_lsn),
ut_dulint_get_low(log_sys->written_to_all_lsn),
ut_dulint_get_high(log_sys->lsn),
@@ -1301,7 +1274,12 @@ loop:
ut_ad(area_end - area_start > 0);
- log_sys->flush_lsn = log_sys->lsn;
+ log_sys->write_lsn = log_sys->lsn;
+
+ if (flush_to_disk) {
+ log_sys->current_flush_lsn = log_sys->lsn;
+ }
+
log_sys->one_flushed = FALSE;
log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
@@ -1318,10 +1296,12 @@ loop:
OS_FILE_LOG_BLOCK_SIZE);
log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
- log_sys->flush_end_offset = log_sys->buf_free;
+ log_sys->write_end_offset = log_sys->buf_free;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ /* Do the write to the log files */
+
while (group) {
log_group_write_buf(LOG_FLUSH, group,
log_sys->buf + area_start,
@@ -1330,20 +1310,25 @@ loop:
OS_FILE_LOG_BLOCK_SIZE),
start_offset - area_start);
- log_group_set_fields(group, log_sys->flush_lsn);
+ log_group_set_fields(group, log_sys->write_lsn);
group = UT_LIST_GET_NEXT(log_groups, group);
}
mutex_exit(&(log_sys->mutex));
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && srv_flush_log_at_trx_commit != 2) {
+ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
+ /* O_DSYNC means the OS did not buffer the log file at all:
+ so we have also flushed to disk what we have written */
+
+ log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
+
+ } else if (flush_to_disk) {
group = UT_LIST_GET_FIRST(log_sys->log_groups);
fil_flush(group->space_id);
+ log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
}
mutex_enter(&(log_sys->mutex));
@@ -1403,7 +1388,7 @@ log_flush_margin(void)
mutex_exit(&(log->mutex));
if (do_flush) {
- log_flush_up_to(ut_dulint_max, LOG_NO_WAIT);
+ log_write_up_to(ut_dulint_max, LOG_NO_WAIT, FALSE);
}
}
@@ -1555,7 +1540,8 @@ log_group_checkpoint(
buf = group->checkpoint_buf;
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
+ mach_write_to_8(buf + LOG_CHECKPOINT_LSN,
+ log_sys->next_checkpoint_lsn);
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
log_group_calc_lsn_offset(
@@ -1664,8 +1650,10 @@ log_reset_first_header_and_checkpoint(
lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
/* Write the label of ibbackup --restore */
- sprintf((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "ibbackup ");
- ut_sprintf_timestamp((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
+ sprintf((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
+ "ibbackup ");
+ ut_sprintf_timestamp(
+ (char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
+ strlen("ibbackup "));
buf = hdr_buf + LOG_CHECKPOINT_1;
@@ -1773,7 +1761,7 @@ log_checkpoint(
write-ahead-logging algorithm ensures that the log has been flushed
up to oldest_lsn. */
- log_flush_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS);
+ log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
mutex_enter(&(log_sys->mutex));
@@ -2466,7 +2454,7 @@ loop:
mutex_exit(&(log_sys->mutex));
- log_flush_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS);
+ log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
calc_new_limit = FALSE;
@@ -3104,8 +3092,8 @@ log_print(
"Last checkpoint at %lu %lu\n",
ut_dulint_get_high(log_sys->lsn),
ut_dulint_get_low(log_sys->lsn),
- ut_dulint_get_high(log_sys->written_to_some_lsn),
- ut_dulint_get_low(log_sys->written_to_some_lsn),
+ ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
+ ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
ut_dulint_get_high(log_sys->last_checkpoint_lsn),
ut_dulint_get_low(log_sys->last_checkpoint_lsn));
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index 1d1d84adda7..46129e3de79 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -521,10 +521,11 @@ try_again:
}
#endif
#ifdef UNIV_NON_BUFFERED_IO
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
+ if (type == OS_LOG_FILE) {
/* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
+ to allow group commit to work when MySQL binlogging
+ is used we must separate log file write and log
+ file flush to disk. */
} else {
if (srv_win_file_flush_method ==
SRV_WIN_IO_UNBUFFERED) {
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index db1119a2abc..428e4d568f3 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -1664,7 +1664,7 @@ row_drop_table_for_mysql_in_background(
the InnoDB data dictionary get out-of-sync if the user runs
with innodb_flush_log_at_trx_commit = 0 */
- log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
trx_commit_for_mysql(trx);
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index d90b818ad4b..07df708e5fb 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -2812,8 +2812,7 @@ loop:
at transaction commit */
srv_main_thread_op_info = (char*)"flushing log";
- log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
- log_flush_to_disk();
+ log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
/* If there were less than 10 i/os during the
one second sleep, we assume that there is free
@@ -2831,8 +2830,8 @@ loop:
srv_main_thread_op_info =
(char*)"flushing log";
- log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
- log_flush_to_disk();
+ log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
+ TRUE);
}
if (srv_activity_count == old_activity_count) {
@@ -2867,8 +2866,7 @@ loop:
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
srv_main_thread_op_info = (char*) "flushing log";
- log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
- log_flush_to_disk();
+ log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
}
/* We run a batch of insert buffer merge every 10 seconds,
@@ -2878,8 +2876,7 @@ loop:
ibuf_contract_for_n_pages(TRUE, 5);
srv_main_thread_op_info = (char*)"flushing log";
- log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
- log_flush_to_disk();
+ log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
/* We run a full purge every 10 seconds, even if the server
were active */
@@ -2903,8 +2900,8 @@ loop:
if (difftime(current_time, last_flush_time) > 1) {
srv_main_thread_op_info = (char*) "flushing log";
- log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
- log_flush_to_disk();
+ log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
+ TRUE);
last_flush_time = current_time;
}
}
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
index 4ce2236f78a..e6ef400bb40 100644
--- a/innobase/trx/trx0trx.c
+++ b/innobase/trx/trx0trx.c
@@ -89,6 +89,8 @@ trx_create(
trx->check_foreigns = TRUE;
trx->check_unique_secondary = TRUE;
+ trx->flush_log_later = FALSE;
+
trx->dict_operation = FALSE;
trx->mysql_thd = NULL;
@@ -780,13 +782,26 @@ trx_commit_off_kernel(
/*-------------------------------------*/
- /* Most MySQL users run with srv_flush_.. set to FALSE: */
+ /* Most MySQL users run with srv_flush_.. set to 0: */
- if (srv_flush_log_at_trx_commit) {
-
- log_flush_up_to(lsn, LOG_WAIT_ONE_GROUP);
+ if (srv_flush_log_at_trx_commit != 0) {
+ if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+ && srv_flush_log_at_trx_commit != 2
+ && !trx->flush_log_later) {
+
+ /* Write the log to the log files AND flush
+ them to disk */
+
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+ } else {
+ /* Write the log but do not flush it to disk */
+
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
+ }
}
+ trx->commit_lsn = lsn;
+
/*-------------------------------------*/
mutex_enter(&kernel_mutex);
@@ -1468,6 +1483,31 @@ trx_commit_for_mysql(
}
/**************************************************************************
+If required, flushes the log to disk if we called trx_commit_for_mysql()
+with trx->flush_log_later == TRUE. */
+
+ulint
+trx_commit_complete_for_mysql(
+/*==========================*/
+ /* out: 0 or error number */
+ trx_t* trx) /* in: trx handle */
+{
+ ut_a(trx);
+
+ if (srv_flush_log_at_trx_commit == 1
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+
+ trx->op_info = (char *) "flushing log";
+
+ /* Flush the log files to disk */
+
+ log_write_up_to(trx->commit_lsn, LOG_WAIT_ONE_GROUP, TRUE);
+
+ trx->op_info = (char *) "";
+ }
+}
+
+/**************************************************************************
Marks the latest SQL statement ended. */
void