diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-03-18 14:43:08 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-03-18 14:43:08 +0200 |
commit | 126725421e56293d7c8b816e066271606b59dcd5 (patch) | |
tree | 4210f684a907d615da1c34b280218449d1118141 | |
parent | 39c015b77e0b5adced487ef634f87b39b9b4abc0 (diff) | |
download | mariadb-git-126725421e56293d7c8b816e066271606b59dcd5.tar.gz |
MDEV-25121: innodb_flush_method=O_DIRECT fails on compressed tables
Tests with 4096-byte sector size confirm that it is
safe to use O_DIRECT with page_compressed tables.
That had been disabled on Linux, in an attempt to fix MDEV-21584
which had been filed for the O_DIRECT problems earlier.
The fil_node_t::block_size was being set mostly correctly until
commit 10dd290b4b8b8b235c8cf42e100f0a4415629e79 (MDEV-17380)
introduced a regression in MariaDB Server 10.4.4.
fil_node_open_file(): Only avoid setting O_DIRECT on
ROW_FORMAT=COMPRESSED tables that use KEY_BLOCK_SIZE=1 or 2
(1024 or 2048 bytes).
fil_ibd_create(): Avoid setting O_DIRECT on ROW_FORMAT=COMPRESSED tables
that use KEY_BLOCK_SIZE=1 or 2 (1024 or 2048 bytes).
fil_node_t::find_metadata(): Require fstat() to be always invoked
outside Microsoft Windows, so that fil_node_t::block_size can be set.
fil_node_t::read_page0(): Rely on find_metadata() to assign block_size.
Thanks to Vladislav Vaintroub for testing this on Microsoft Windows
using an old-fashioned rotational hard disk with 4KiB sector size.
Reviewed by: Vladislav Vaintroub
This is a port of commit 00f620b27e960c4b96a8392b27742fd5e41a69e3
and commit 6505662c23ba81331d91f65c18e06a759d6f148f from 10.2.
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 41 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 2 | ||||
-rw-r--r-- | storage/innobase/os/os0file.cc | 29 |
3 files changed, 41 insertions, 31 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index a8b64f836cc..40a0744dd3d 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -488,12 +488,16 @@ static bool fil_node_open_file(fil_node_t* node) const bool first_time_open = node->size == 0; - bool o_direct_possible = !FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); - if (const ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) { - compile_time_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096); - if (ssize < 3) { - o_direct_possible = false; - } + ulint type; + static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096, + "compatibility"); + switch (FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) { + case 1: + case 2: + type = OS_DATA_FILE_NO_O_DIRECT; + break; + default: + type = OS_DATA_FILE; } if (first_time_open @@ -514,9 +518,7 @@ retry: ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_AIO, - o_direct_possible - ? OS_DATA_FILE - : OS_DATA_FILE_NO_O_DIRECT, + type, read_only_mode, &success); @@ -556,9 +558,7 @@ fail: ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_AIO, - o_direct_possible - ? OS_DATA_FILE - : OS_DATA_FILE_NO_O_DIRECT, + type, read_only_mode, &success); } @@ -2904,13 +2904,22 @@ fil_ibd_create( return NULL; } + ulint type; + static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096, + "compatibility"); + switch (FSP_FLAGS_GET_ZIP_SSIZE(flags)) { + case 1: + case 2: + type = OS_DATA_FILE_NO_O_DIRECT; + break; + default: + type = OS_DATA_FILE; + } + file = os_file_create( innodb_data_file_key, path, OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, - OS_FILE_NORMAL, - OS_DATA_FILE, - srv_read_only_mode, - &success); + OS_FILE_NORMAL, type, srv_read_only_mode, &success); if (!success) { /* The following call will print an error message */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 3001817a78c..873fcd67a3a 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -637,7 +637,7 @@ struct fil_node_t { /** Determine some file metadata when creating or reading the file. @param file the file that is being created, or OS_FILE_CLOSED */ void find_metadata(os_file_t file = OS_FILE_CLOSED -#ifdef UNIV_LINUX +#ifndef _WIN32 , struct stat* statbuf = NULL #endif ); diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index f96ff6b5171..bf38252578b 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2020, MariaDB Corporation. +Copyright (c) 2013, 2021, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -4137,7 +4137,9 @@ os_file_create_func( case SRV_ALL_O_DIRECT_FSYNC: /*Traditional Windows behavior, no buffering for any files.*/ - attributes |= FILE_FLAG_NO_BUFFERING; + if (type != OS_DATA_FILE_NO_O_DIRECT) { + attributes |= FILE_FLAG_NO_BUFFERING; + } break; case SRV_FSYNC: @@ -7707,7 +7709,7 @@ static bool is_file_on_ssd(char *file_path) /** Determine some file metadata when creating or reading the file. @param file the file that is being created, or OS_FILE_CLOSED */ void fil_node_t::find_metadata(os_file_t file -#ifdef UNIV_LINUX +#ifndef _WIN32 , struct stat* statbuf #endif ) @@ -7747,18 +7749,18 @@ void fil_node_t::find_metadata(os_file_t file block_size = 512; } #else - on_ssd = space->atomic_write_supported; -# ifdef UNIV_LINUX - if (!on_ssd) { - struct stat sbuf; - if (!statbuf && !fstat(file, &sbuf)) { - statbuf = &sbuf; - } - if (statbuf && fil_system.is_ssd(statbuf->st_dev)) { - on_ssd = true; - } + struct stat sbuf; + if (!statbuf && !fstat(file, &sbuf)) { + statbuf = &sbuf; } + if (statbuf) { + block_size = statbuf->st_blksize; + } + on_ssd = space->atomic_write_supported +# ifdef UNIV_LINUX + || (statbuf && fil_system.is_ssd(statbuf->st_dev)) # endif + ; #endif if (!space->atomic_write_supported) { space->atomic_write_supported = atomic_write @@ -7794,7 +7796,6 @@ bool fil_node_t::read_page0(bool first) if (fstat(handle, &statbuf)) { return false; } - block_size = statbuf.st_blksize; os_offset_t size_bytes = statbuf.st_size; #else os_offset_t size_bytes = os_file_get_size(handle); |