summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2017-02-20 17:58:42 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2017-02-21 16:45:03 +0200
commit978179a9d4933d3d8d2ac99028798e8a07095dd4 (patch)
treea4f21dbbd812e8c347413f7ead578b1677d08691
parent2bfe83adec576a27aed2d87ff65cebddc3430d2e (diff)
downloadmariadb-git-978179a9d4933d3d8d2ac99028798e8a07095dd4.tar.gz
MDEV-11520 Extending an InnoDB data file unnecessarily allocates
a large memory buffer on Windows fil_extend_space_to_desired_size(), os_file_set_size(): Use calloc() for memory allocation, and handle failures. Properly check the return status of posix_fallocate(). On Windows, instead of extending the file by at most 1 megabyte at a time, write a zero-filled page at the end of the file. According to the Microsoft blog post https://blogs.msdn.microsoft.com/oldnewthing/20110922-00/?p=9573 this will physically extend the file by writing zero bytes. (InnoDB never uses DeviceIoControl() to set the file sparse.) For innodb_plugin, port the XtraDB fix for MySQL Bug#56433 (introducing fil_system->file_extend_mutex). The bug was fixed differently in MySQL 5.6 (and MariaDB Server 10.0).
-rw-r--r--storage/innobase/fil/fil0fil.c85
-rw-r--r--storage/innobase/include/sync0sync.h1
-rw-r--r--storage/innobase/os/os0file.c82
-rw-r--r--storage/xtradb/fil/fil0fil.c84
-rw-r--r--storage/xtradb/os/os0file.c76
5 files changed, 162 insertions, 166 deletions
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
index d7ac3dd14eb..195fa7adde9 100644
--- a/storage/innobase/fil/fil0fil.c
+++ b/storage/innobase/fil/fil0fil.c
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -248,6 +249,7 @@ the ib_logfiles form a 'space' and it is handled here */
struct fil_system_struct {
#ifndef UNIV_HOTBACKUP
mutex_t mutex; /*!< The mutex protecting the cache */
+ mutex_t file_extend_mutex;
#endif /* !UNIV_HOTBACKUP */
hash_table_t* spaces; /*!< The hash table of spaces in the
system; they are hashed on the space
@@ -1658,6 +1660,8 @@ fil_init(
mutex_create(fil_system_mutex_key,
&fil_system->mutex, SYNC_ANY_LATCH);
+ mutex_create(fil_system_mutex_key,
+ &fil_system->file_extend_mutex, SYNC_OUTER_ANY_LATCH);
fil_system->spaces = hash_create(hash_size);
fil_system->name_hash = hash_create(hash_size);
@@ -4096,6 +4100,10 @@ fil_extend_space_to_desired_size(
ulint page_size;
ibool success = TRUE;
+ /* fil_system->file_extend_mutex is for http://bugs.mysql.com/56433
+ to prevent concurrent fil_extend_space_to_desired_size()
+ while fil_system->mutex is temporarily released */
+ mutex_enter(&fil_system->file_extend_mutex);
fil_mutex_enter_and_prepare_for_io(space_id);
space = fil_space_get_by_id(space_id);
@@ -4107,6 +4115,7 @@ fil_extend_space_to_desired_size(
*actual_size = space->size;
mutex_exit(&fil_system->mutex);
+ mutex_exit(&fil_system->file_extend_mutex);
return(TRUE);
}
@@ -4123,22 +4132,24 @@ fil_extend_space_to_desired_size(
start_page_no = space->size;
file_start_page_no = space->size - node->size;
+ mutex_exit(&fil_system->mutex);
+
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
ib_int64_t start_offset = start_page_no * page_size;
ib_int64_t end_offset = (size_after_extend - start_page_no) * page_size;
ib_int64_t desired_size = size_after_extend*page_size;
+ int err = posix_fallocate(
+ node->handle, start_offset, end_offset);
- mutex_exit(&fil_system->mutex);
+ success = !err;
- if (posix_fallocate(node->handle, start_offset, end_offset) == -1) {
- fprintf(stderr, "InnoDB: Error: preallocating file "
- "space for file \'%s\' failed. Current size "
- " %lld, len %lld, desired size %lld\n",
- node->name, start_offset, end_offset, desired_size);
- success = FALSE;
- } else {
- success = TRUE;
+ if (!success) {
+ fprintf(stderr,
+ "InnoDB: Error: extending file %s"
+ " from %lld to %lld bytes"
+ " failed with error %d\n",
+ node->name, start_offset, end_offset, err);
}
mutex_enter(&fil_system->mutex);
@@ -4154,14 +4165,25 @@ fil_extend_space_to_desired_size(
}
#endif
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ start_page_no = size_after_extend - 1;
+ buf_size = page_size;
+#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
- buf2 = mem_alloc(buf_size + page_size);
+#endif
+ buf2 = calloc(1, buf_size + page_size);
+ if (!buf2) {
+ fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
+ " bytes to extend file\n",
+ buf_size + page_size);
+ mutex_exit(&fil_system->file_extend_mutex);
+ return(FALSE);
+ }
buf = ut_align(buf2, page_size);
- memset(buf, 0, buf_size);
-
- while (start_page_no < size_after_extend) {
+ for (;;) {
ulint n_pages = ut_min(buf_size / page_size,
size_after_extend - start_page_no);
@@ -4170,6 +4192,7 @@ fil_extend_space_to_desired_size(
offset_low = ((start_page_no - file_start_page_no)
% (4096 * ((1024 * 1024) / page_size)))
* page_size;
+
#ifdef UNIV_HOTBACKUP
success = os_file_write(node->name, node->handle, buf,
offset_low, offset_high,
@@ -4181,34 +4204,37 @@ fil_extend_space_to_desired_size(
page_size * n_pages,
NULL, NULL);
#endif
- if (success) {
- node->size += n_pages;
- space->size += n_pages;
- os_has_said_disk_full = FALSE;
- } else {
- /* Let us measure the size of the file to determine
- how much we were able to extend it */
+ /* Let us measure the size of the file to determine
+ how much we were able to extend it */
- n_pages = ((ulint)
- (os_file_get_size_as_iblonglong(
- node->handle)
- / page_size)) - node->size;
+ n_pages = (ulint) (os_file_get_size_as_iblonglong(node->handle)
+ / page_size);
- node->size += n_pages;
- space->size += n_pages;
+ mutex_enter(&fil_system->mutex);
+ ut_a(n_pages >= node->size);
+
+ start_page_no += n_pages - node->size;
+ space->size += n_pages - node->size;
+ node->size = n_pages;
+ if (success) {
+ os_has_said_disk_full = FALSE;
+ }
+
+ if (!success || start_page_no >= size_after_extend) {
break;
}
- start_page_no += n_pages;
+ mutex_exit(&fil_system->mutex);
}
- mem_free(buf2);
-
+ free(buf2);
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
+#ifdef HAVE_POSIX_FALLOCATE
complete_io:
+#endif /* HAVE_POSIX_FALLOCATE */
*actual_size = space->size;
@@ -4228,6 +4254,7 @@ complete_io:
printf("Extended %s to %lu, actual size %lu pages\n", space->name,
size_after_extend, *actual_size); */
mutex_exit(&fil_system->mutex);
+ mutex_exit(&fil_system->file_extend_mutex);
fil_flush(space_id);
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index bc8d0d27be3..f074ca2f189 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -675,6 +675,7 @@ or row lock! */
#define SYNC_BUF_BLOCK 146 /* Block mutex */
#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
#define SYNC_DOUBLEWRITE 140
+#define SYNC_OUTER_ANY_LATCH 136
#define SYNC_ANY_LATCH 135
#define SYNC_MEM_HASH 131
#define SYNC_MEM_POOL 130
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
index d792e7a61d8..72b9651f596 100644
--- a/storage/innobase/os/os0file.c
+++ b/storage/innobase/os/os0file.c
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2012, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -2027,48 +2028,44 @@ os_file_set_size(
ut_a(size == (size & 0xFFFFFFFF));
- current_size = 0;
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
#ifdef HAVE_POSIX_FALLOCATE
- if (srv_use_posix_fallocate) {
- if (posix_fallocate(file, current_size, desired_size) == -1) {
+ if (srv_use_posix_fallocate) {
+ int err = posix_fallocate(file, 0, desired_size);
+ if (err) {
fprintf(stderr,
- "InnoDB: Error: preallocating data for"
- " file %s failed at\n"
- "InnoDB: offset 0 size %lld %lld. Operating system"
- " error number %d.\n"
- "InnoDB: Check that the disk is not full"
- " or a disk quota exceeded.\n"
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n",
- name, (long long)size_high, (long long)size, errno);
-
- return (FALSE);
+ "InnoDB: Error: preallocating %lld bytes for"
+ " file %s failed with error %d.\n",
+ desired_size, name, err);
}
- return (TRUE);
+ return(!err);
}
#endif
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ buf_size = UNIV_PAGE_SIZE;
+ current_size = desired_size - buf_size;
+#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
- buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
+ current_size = 0;
+#endif
+ buf2 = calloc(1, buf_size + UNIV_PAGE_SIZE);
+
+ if (!buf2) {
+ fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
+ " bytes to extend file\n",
+ buf_size + UNIV_PAGE_SIZE);
+ return(FALSE);
+ }
/* Align the buffer for possible raw i/o */
buf = ut_align(buf2, UNIV_PAGE_SIZE);
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
-
- if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
- }
-
- while (current_size < desired_size) {
+ do {
ulint n_bytes;
if (desired_size - current_size < (ib_int64_t) buf_size) {
@@ -2082,37 +2079,14 @@ os_file_set_size(
(ulint)(current_size >> 32),
n_bytes);
if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
- != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (ib_int64_t)(100 * 1024 * 1024)));
+ break;
}
current_size += n_bytes;
- }
-
- if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "\n");
- }
+ } while (current_size < desired_size);
- ut_free(buf2);
-
- ret = os_file_flush(file);
-
- if (ret) {
- return(TRUE);
- }
-
-error_handling:
- return(FALSE);
+ free(buf2);
+ return(ret && os_file_flush(file));
}
/***********************************************************************//**
diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c
index 86e00dc22e4..3f9103c521f 100644
--- a/storage/xtradb/fil/fil0fil.c
+++ b/storage/xtradb/fil/fil0fil.c
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -4934,9 +4935,9 @@ fil_extend_space_to_desired_size(
ulint page_size;
ibool success = TRUE;
- /* file_extend_mutex is for http://bugs.mysql.com/56433 */
- /* to protect from the other fil_extend_space_to_desired_size() */
- /* during temprary releasing &fil_system->mutex */
+ /* fil_system->file_extend_mutex is for http://bugs.mysql.com/56433
+ to prevent concurrent fil_extend_space_to_desired_size()
+ while fil_system->mutex is temporarily released */
mutex_enter(&fil_system->file_extend_mutex);
fil_mutex_enter_and_prepare_for_io(space_id);
@@ -4966,6 +4967,8 @@ fil_extend_space_to_desired_size(
start_page_no = space->size;
file_start_page_no = space->size - node->size;
+ mutex_exit(&fil_system->mutex);
+
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
@@ -4973,19 +4976,19 @@ fil_extend_space_to_desired_size(
= file_start_page_no * page_size;
ib_int64_t end_offset
= (size_after_extend - file_start_page_no) * page_size;
+ int err = posix_fallocate(
+ node->handle, start_offset, end_offset);
- mutex_exit(&fil_system->mutex);
- success = (posix_fallocate(node->handle, start_offset,
- end_offset) == 0);
- if (!success)
- {
+ success = !err;
+
+ if (!success) {
fprintf(stderr,
- "InnoDB: Error: preallocating file space for "
- "file \'%s\' failed. Current size %lld, "
- "len %lld, desired size %lld\n", node->name,
- start_offset, end_offset,
- start_offset + end_offset);
+ "InnoDB: Error: extending file %s"
+ " from %lld to %lld bytes"
+ " failed with error %d\n",
+ node->name, start_offset, end_offset, err);
}
+
mutex_enter(&fil_system->mutex);
if (success) {
@@ -4999,14 +5002,25 @@ fil_extend_space_to_desired_size(
}
#endif
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ start_page_no = size_after_extend - 1;
+ buf_size = page_size;
+#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
- buf2 = mem_alloc(buf_size + page_size);
+#endif
+ buf2 = calloc(1, buf_size + page_size);
+ if (!buf2) {
+ fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
+ " bytes to extend file\n",
+ buf_size + page_size);
+ mutex_exit(&fil_system->file_extend_mutex);
+ return(FALSE);
+ }
buf = ut_align(buf2, page_size);
- memset(buf, 0, buf_size);
-
- while (start_page_no < size_after_extend) {
+ for (;;) {
ulint n_pages = ut_min(buf_size / page_size,
size_after_extend - start_page_no);
@@ -5016,7 +5030,6 @@ fil_extend_space_to_desired_size(
% (4096 * ((1024 * 1024) / page_size)))
* page_size;
- mutex_exit(&fil_system->mutex);
#ifdef UNIV_HOTBACKUP
success = os_file_write(node->name, node->handle, buf,
offset_low, offset_high,
@@ -5028,36 +5041,37 @@ fil_extend_space_to_desired_size(
page_size * n_pages,
NULL, NULL, space_id, NULL);
#endif
- mutex_enter(&fil_system->mutex);
- if (success) {
- node->size += n_pages;
- space->size += n_pages;
+ /* Let us measure the size of the file to determine
+ how much we were able to extend it */
- os_has_said_disk_full = FALSE;
- } else {
- /* Let us measure the size of the file to determine
- how much we were able to extend it */
+ n_pages = (ulint) (os_file_get_size_as_iblonglong(node->handle)
+ / page_size);
- n_pages = ((ulint)
- (os_file_get_size_as_iblonglong(
- node->handle)
- / page_size)) - node->size;
+ mutex_enter(&fil_system->mutex);
+ ut_a(n_pages >= node->size);
- node->size += n_pages;
- space->size += n_pages;
+ start_page_no += n_pages - node->size;
+ space->size += n_pages - node->size;
+ node->size = n_pages;
+ if (success) {
+ os_has_said_disk_full = FALSE;
+ }
+
+ if (!success || start_page_no >= size_after_extend) {
break;
}
- start_page_no += n_pages;
+ mutex_exit(&fil_system->mutex);
}
- mem_free(buf2);
-
+ free(buf2);
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
+#ifdef HAVE_POSIX_FALLOCATE
complete_io:
+#endif /* HAVE_POSIX_FALLOCATE */
*actual_size = space->size;
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index cca5ffa4772..201e4487ada 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2011, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -2184,42 +2185,44 @@ os_file_set_size(
ut_a(size == (size & 0xFFFFFFFF));
- current_size = 0;
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
-
- if (posix_fallocate(file, current_size, desired_size) == -1) {
-
- fprintf(stderr, "InnoDB: Error: preallocating file "
- "space for file \'%s\' failed. Current size "
- "%lld, desired size %lld\n",
- name, current_size, desired_size);
- os_file_handle_error_no_exit(name, "posix_fallocate");
- return(FALSE);
+ int err = posix_fallocate(file, 0, desired_size);
+ if (err) {
+ fprintf(stderr,
+ "InnoDB: Error: preallocating %lld bytes for"
+ " file %s failed with error %d.\n",
+ desired_size, name, err);
}
- return(TRUE);
+ return(!err);
}
#endif
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ buf_size = UNIV_PAGE_SIZE;
+ current_size = desired_size - buf_size;
+#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
- buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
+ current_size = 0;
+#endif
+ buf2 = calloc(1, buf_size + UNIV_PAGE_SIZE);
+
+ if (!buf2) {
+ fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
+ " bytes to extend file\n",
+ buf_size + UNIV_PAGE_SIZE);
+ return(FALSE);
+ }
/* Align the buffer for possible raw i/o */
buf = ut_align(buf2, UNIV_PAGE_SIZE);
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
-
- if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
- }
-
- while (current_size < desired_size) {
+ do {
ulint n_bytes;
if (desired_size - current_size < (ib_int64_t) buf_size) {
@@ -2233,37 +2236,14 @@ os_file_set_size(
(ulint)(current_size >> 32),
n_bytes);
if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
- != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (ib_int64_t)(100 * 1024 * 1024)));
+ break;
}
current_size += n_bytes;
- }
-
- if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "\n");
- }
-
- ut_free(buf2);
-
- ret = os_file_flush(file, TRUE);
-
- if (ret) {
- return(TRUE);
- }
+ } while (current_size < desired_size);
-error_handling:
- return(FALSE);
+ free(buf2);
+ return(ret && os_file_flush(file, TRUE));
}
/***********************************************************************//**