summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lindström <jan.lindstrom@mariadb.com>2017-01-24 14:40:58 +0200
committerJan Lindström <jan.lindstrom@mariadb.com>2017-01-24 14:40:58 +0200
commit6495806e59cc27313375fa8d431b7b8e777f73ff (patch)
treea3782ed0bd8ab6278e9b6d8d9f00b0587b69815b
parent0d107a85b3dd6969e66cc9cb4bd29e1cc92a7d18 (diff)
downloadmariadb-git-6495806e59cc27313375fa8d431b7b8e777f73ff.tar.gz
MDEV-11254: innodb-use-trim has no effect in 10.2
Problem was that implementation merged from 10.1 was incompatible with InnoDB 5.7. buf0buf.cc: Add functions to return should we punch hole and how big. buf0flu.cc: Add written page to IORequest fil0fil.cc: Remove unneeded status call and add test is sparse files and punch hole supported by file system when tablespace is created. Add call to get file system block size. Used file node is added to IORequest. Added functions to check is punch hole supported and setting punch hole. ha_innodb.cc: Remove unneeded status variables (trim512-32768) and trim_op_saved. Deprecate innodb_use_trim and set it ON by default. Add function to set innodb-use-trim dynamically. dberr.h: Add error code DB_IO_NO_PUNCH_HOLE if punch hole operation fails. fil0fil.h: Add punch_hole variable to fil_space_t and block size to fil_node_t. os0api.h: Header to helper functions on buf0buf.cc and fil0fil.cc for os0file.h os0file.h: Remove unneeded m_block_size from IORequest and add bpage to IORequest to know actual size of the block and m_fil_node to know tablespace file system block size and does it support punch hole. os0file.cc: Add function punch_hole() to IORequest to do punch_hole operation, get the file system block size and determine does file system support sparse files (for punch hole). page0size.h: remove implicit copy disable and use this implicit copy to implement copy_from() function. buf0dblwr.cc, buf0flu.cc, buf0rea.cc, fil0fil.cc, fil0fil.h, os0file.h, os0file.cc, log0log.cc, log0recv.cc: Remove unneeded write_size parameter from fil_io calls. srv0mon.h, srv0srv.h, srv0mon.cc: Remove unneeded trim512-trim32678 status variables. Removed these from monitor tests.
-rw-r--r--mysql-test/suite/encryption/t/innodb-discard-import-change.opt1
-rw-r--r--mysql-test/suite/innodb/include/have_innodb_punchhole.inc4
-rw-r--r--mysql-test/suite/innodb/r/innodb-trim.result20
-rw-r--r--mysql-test/suite/innodb/r/innodb_monitor.result8
-rw-r--r--mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result8
-rw-r--r--mysql-test/suite/innodb/t/innodb-trim.opt1
-rw-r--r--mysql-test/suite/innodb/t/innodb-trim.test44
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result8
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result8
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result8
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result8
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result8
-rw-r--r--mysql-test/suite/sys_vars/r/sysvars_innodb.result6
-rw-r--r--storage/innobase/buf/buf0buf.cc27
-rw-r--r--storage/innobase/buf/buf0dblwr.cc18
-rw-r--r--storage/innobase/buf/buf0flu.cc4
-rw-r--r--storage/innobase/buf/buf0rea.cc2
-rw-r--r--storage/innobase/fil/fil0fil.cc87
-rw-r--r--storage/innobase/handler/ha_innodb.cc46
-rw-r--r--storage/innobase/include/db0err.h3
-rw-r--r--storage/innobase/include/fil0fil.h18
-rw-r--r--storage/innobase/include/os0api.h75
-rw-r--r--storage/innobase/include/os0file.h203
-rw-r--r--storage/innobase/include/os0file.ic8
-rw-r--r--storage/innobase/include/page0size.h8
-rw-r--r--storage/innobase/include/srv0mon.h8
-rw-r--r--storage/innobase/include/srv0srv.h32
-rw-r--r--storage/innobase/include/univ.i6
-rw-r--r--storage/innobase/log/log0log.cc10
-rw-r--r--storage/innobase/log/log0recv.cc2
-rw-r--r--storage/innobase/os/os0file.cc461
-rw-r--r--storage/innobase/srv/srv0mon.cc64
-rw-r--r--storage/innobase/srv/srv0srv.cc5
-rw-r--r--storage/innobase/ut/ut0ut.cc2
34 files changed, 885 insertions, 336 deletions
diff --git a/mysql-test/suite/encryption/t/innodb-discard-import-change.opt b/mysql-test/suite/encryption/t/innodb-discard-import-change.opt
new file mode 100644
index 00000000000..ebf13f41150
--- /dev/null
+++ b/mysql-test/suite/encryption/t/innodb-discard-import-change.opt
@@ -0,0 +1 @@
+--loose-innodb-use-trim=0 \ No newline at end of file
diff --git a/mysql-test/suite/innodb/include/have_innodb_punchhole.inc b/mysql-test/suite/innodb/include/have_innodb_punchhole.inc
new file mode 100644
index 00000000000..74cd5c4e0f2
--- /dev/null
+++ b/mysql-test/suite/innodb/include/have_innodb_punchhole.inc
@@ -0,0 +1,4 @@
+if (!`SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_punch_hole' AND variable_value = 'ON'`)
+{
+ --skip Test requires InnoDB compiled with fallocate(FALLOC_PUNCH_HOLE| FALLOC_KEEP_SIZE)
+}
diff --git a/mysql-test/suite/innodb/r/innodb-trim.result b/mysql-test/suite/innodb/r/innodb-trim.result
new file mode 100644
index 00000000000..40eac2f8b40
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb-trim.result
@@ -0,0 +1,20 @@
+set global innodb_compression_algorithm = 1;
+create table innodb_page_compressed (c1 int not null primary key auto_increment, b char(200), c char(200), d char(200)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+Level Code Message
+create procedure innodb_insert_proc (repeat_count int)
+begin
+declare current_num int;
+set current_num = 0;
+while current_num < repeat_count do
+insert into innodb_page_compressed values (NULL,repeat('A',150),repeat('AB',75),repeat('B', 175));
+set current_num = current_num + 1;
+end while;
+end//
+commit;
+set autocommit=0;
+call innodb_insert_proc(16000);
+commit;
+set autocommit=1;
+DROP PROCEDURE innodb_insert_proc;
+DROP TABLE innodb_page_compressed;
diff --git a/mysql-test/suite/innodb/r/innodb_monitor.result b/mysql-test/suite/innodb/r/innodb_monitor.result
index 0a163193b58..263da6070b2 100644
--- a/mysql-test/suite/innodb/r/innodb_monitor.result
+++ b/mysql-test/suite/innodb/r/innodb_monitor.result
@@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
-compress_trim_sect512 disabled
-compress_trim_sect1024 disabled
-compress_trim_sect2048 disabled
-compress_trim_sect4096 disabled
-compress_trim_sect8192 disabled
-compress_trim_sect16384 disabled
-compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
-compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled
diff --git a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
index 4875dfaeb2a..f515cb047f1 100644
--- a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
+++ b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
@@ -216,16 +216,8 @@ compress_pages_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL N
compression_pad_increments compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is incremented to avoid compression failures
compression_pad_decrements compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is decremented due to good compressibility
compress_saved compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of bytes saved by page compression
-compress_trim_sect512 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-512 TRIMed by page compression
-compress_trim_sect1024 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-1024 TRIMed by page compression
-compress_trim_sect2048 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-2048 TRIMed by page compression
-compress_trim_sect4096 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-4K TRIMed by page compression
-compress_trim_sect8192 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-8K TRIMed by page compression
-compress_trim_sect16384 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-16K TRIMed by page compression
-compress_trim_sect32768 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-32K TRIMed by page compression
compress_pages_page_compressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages compressed by page compression
compress_page_compressed_trim_op compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of TRIM operation performed by page compression
-compress_page_compressed_trim_op_saved compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of TRIM operation saved by page compression
compress_pages_page_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages decompressed by page compression
compress_pages_page_compression_error compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of page compression errors
compress_pages_encrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages encrypted
diff --git a/mysql-test/suite/innodb/t/innodb-trim.opt b/mysql-test/suite/innodb/t/innodb-trim.opt
new file mode 100644
index 00000000000..c33d075b002
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-trim.opt
@@ -0,0 +1 @@
+--loose-innodb-use-trim=1
diff --git a/mysql-test/suite/innodb/t/innodb-trim.test b/mysql-test/suite/innodb/t/innodb-trim.test
new file mode 100644
index 00000000000..1b64321116c
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-trim.test
@@ -0,0 +1,44 @@
+--source include/have_innodb.inc
+--source include/have_innodb_punchhole.inc
+
+--disable_query_log
+--disable_warnings
+let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`;
+--enable_warnings
+--enable_query_log
+
+# zlib
+set global innodb_compression_algorithm = 1;
+
+create table innodb_page_compressed (c1 int not null primary key auto_increment, b char(200), c char(200), d char(200)) engine=innodb page_compressed=1 page_compression_level=9;
+show warnings;
+
+delimiter //;
+create procedure innodb_insert_proc (repeat_count int)
+begin
+ declare current_num int;
+ set current_num = 0;
+ while current_num < repeat_count do
+ insert into innodb_page_compressed values (NULL,repeat('A',150),repeat('AB',75),repeat('B', 175));
+ set current_num = current_num + 1;
+ end while;
+end//
+delimiter ;//
+commit;
+
+set autocommit=0;
+call innodb_insert_proc(16000);
+commit;
+set autocommit=1;
+
+let $wait_condition= SELECT variable_value > 5 FROM information_schema.global_status WHERE variable_name = 'innodb_num_page_compressed_trim_op';
+--source include/wait_condition.inc
+
+DROP PROCEDURE innodb_insert_proc;
+DROP TABLE innodb_page_compressed;
+
+--disable_query_log
+--disable_warnings
+EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
+--enable_warnings
+--enable_query_log
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
index 2a66a0d0931..ccd8e482756 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
@@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
-compress_trim_sect512 disabled
-compress_trim_sect1024 disabled
-compress_trim_sect2048 disabled
-compress_trim_sect4096 disabled
-compress_trim_sect8192 disabled
-compress_trim_sect16384 disabled
-compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
-compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
index 2a66a0d0931..ccd8e482756 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
@@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
-compress_trim_sect512 disabled
-compress_trim_sect1024 disabled
-compress_trim_sect2048 disabled
-compress_trim_sect4096 disabled
-compress_trim_sect8192 disabled
-compress_trim_sect16384 disabled
-compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
-compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
index 2a66a0d0931..ccd8e482756 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
@@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
-compress_trim_sect512 disabled
-compress_trim_sect1024 disabled
-compress_trim_sect2048 disabled
-compress_trim_sect4096 disabled
-compress_trim_sect8192 disabled
-compress_trim_sect16384 disabled
-compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
-compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
index 2a66a0d0931..ccd8e482756 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
@@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
-compress_trim_sect512 disabled
-compress_trim_sect1024 disabled
-compress_trim_sect2048 disabled
-compress_trim_sect4096 disabled
-compress_trim_sect8192 disabled
-compress_trim_sect16384 disabled
-compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
-compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
index 63292f5d3c8..6ab0a19fb57 100644
--- a/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
@@ -1,12 +1,14 @@
SET @start_use_trim = @@global.innodb_use_trim;
SELECT @start_use_trim;
@start_use_trim
-0
+1
SELECT COUNT(@@GLOBAL.innodb_use_trim);
COUNT(@@GLOBAL.innodb_use_trim)
1
1 Expected
SET @@GLOBAL.innodb_use_trim=1;
+Warnings:
+Warning 131 Using innodb_use_trim is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
SELECT COUNT(@@GLOBAL.innodb_use_trim);
COUNT(@@GLOBAL.innodb_use_trim)
1
@@ -28,6 +30,8 @@ COUNT(VARIABLE_VALUE)
1
1 Expected
SET @@global.innodb_use_trim = @start_use_trim;
+Warnings:
+Warning 131 Using innodb_use_trim is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
SELECT @@global.innodb_use_trim;
@@global.innodb_use_trim
-0
+1
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
index b4c7b2cc1fb..5954b057d2f 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
@@ -2612,12 +2612,12 @@ READ_ONLY YES
COMMAND_LINE_ARGUMENT NONE
VARIABLE_NAME INNODB_USE_TRIM
SESSION_VALUE NULL
-GLOBAL_VALUE OFF
+GLOBAL_VALUE ON
GLOBAL_VALUE_ORIGIN COMPILE-TIME
-DEFAULT_VALUE OFF
+DEFAULT_VALUE ON
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BOOLEAN
-VARIABLE_COMMENT Use trim. Default FALSE.
+VARIABLE_COMMENT Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default)
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 315c951a9fe..0f1d170b172 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -35,6 +35,7 @@ Created 11/5/1995 Heikki Tuuri
#include "page0size.h"
#include "buf0buf.h"
+#include "os0api.h"
#ifdef UNIV_NONINL
#include "buf0buf.ic"
@@ -7659,4 +7660,30 @@ buf_page_decrypt_after_read(
return (success);
}
+
+/**
+Should we punch hole to deallocate unused portion of the page.
+@param[in] bpage Page control block
+@return true if punch hole should be used, false if not */
+bool
+buf_page_should_punch_hole(
+ const buf_page_t* bpage)
+{
+ return (bpage->real_size != bpage->size.physical());
+}
+
+/**
+Calculate the length of trim (punch_hole) operation.
+@param[in] bpage Page control block
+@param[in] write_length Write length
+@return length of the trim or zero. */
+ulint
+buf_page_get_trim_length(
+ const buf_page_t* bpage,
+ ulint write_length)
+{
+ return (bpage->size.physical() - write_length);
+}
+
+
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 4deee54d97f..4f83921a553 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -591,7 +591,7 @@ buf_dblwr_process(void)
dberr_t err = fil_io(
request, true,
page_id, page_size,
- 0, page_size.physical(), read_buf, NULL, NULL);
+ 0, page_size.physical(), read_buf, NULL);
if (err != DB_SUCCESS) {
ib::warn()
@@ -679,7 +679,7 @@ buf_dblwr_process(void)
fil_io(write_request, true, page_id, page_size,
0, page_size.physical(),
- const_cast<byte*>(page), NULL, NULL);
+ const_cast<byte*>(page), NULL);
ib::info() << "Recovered page " << page_id
<< " from the doublewrite buffer.";
@@ -912,7 +912,7 @@ buf_dblwr_write_block_to_datafile(
type |= IORequest::DO_NOT_WAKE;
}
- IORequest request(type);
+ IORequest request(type, const_cast<buf_page_t*>(bpage));
/* We request frame here to get correct buffer in case of
encryption and/or page compression */
@@ -924,7 +924,7 @@ buf_dblwr_write_block_to_datafile(
fil_io(request, sync, bpage->id, bpage->size, 0,
bpage->size.physical(),
(void*) frame,
- (void*) bpage, NULL);
+ (void*) bpage);
} else {
ut_ad(!bpage->size.is_compressed());
@@ -938,8 +938,8 @@ buf_dblwr_write_block_to_datafile(
buf_dblwr_check_page_lsn(block->frame);
fil_io(request,
- sync, bpage->id, bpage->size, 0, bpage->size.physical(),
- frame, block, (ulint *)&bpage->write_size);
+ sync, bpage->id, bpage->size, 0, bpage->real_size,
+ frame, block);
}
}
@@ -1041,7 +1041,7 @@ try_again:
fil_io(IORequestWrite, true,
page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
- 0, len, (void*) write_buf, NULL, NULL);
+ 0, len, (void*) write_buf, NULL);
if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
/* No unwritten pages in the second block. */
@@ -1057,7 +1057,7 @@ try_again:
fil_io(IORequestWrite, true,
page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size,
- 0, len, (void*) write_buf, NULL, NULL);
+ 0, len, (void*) write_buf, NULL);
flush:
/* increment the doublewrite flushed pages counter */
@@ -1292,7 +1292,6 @@ retry:
0,
univ_page_size.physical(),
(void *)(buf_dblwr->write_buf + univ_page_size.physical() * i),
- NULL,
NULL);
} else {
/* It is a regular page. Write it directly to the
@@ -1304,7 +1303,6 @@ retry:
0,
univ_page_size.physical(),
(void*) frame,
- NULL,
NULL);
}
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 2738fbd0ec7..5fdb735e0d3 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1093,11 +1093,11 @@ buf_flush_write_block_low(
ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
- IORequest request(type);
+ IORequest request(type, bpage);
fil_io(request,
sync, bpage->id, bpage->size, 0, bpage->size.physical(),
- frame, bpage, NULL);
+ frame, bpage);
} else {
if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
buf_dblwr_write_single_page(bpage, sync);
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index ea3c1ceccf9..4d68ad5ac51 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -182,7 +182,7 @@ buf_read_page_low(
*err = fil_io(
request, sync, page_id, page_size, 0, page_size.physical(),
- dst, bpage, NULL);
+ dst, bpage);
if (sync) {
thd_wait_end(NULL);
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index a0442808eaa..75067bc075e 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -58,6 +58,7 @@ Created 10/25/1995 Heikki Tuuri
#include "srv0start.h"
#include "trx0purge.h"
#include "ut0new.h"
+#include "os0api.h"
/** Tries to close a file in the LRU list. The caller must hold the fil_sys
mutex.
@@ -280,7 +281,7 @@ fil_read(
void* buf)
{
return(fil_io(IORequestRead, true, page_id, page_size,
- byte_offset, len, buf, NULL, NULL));
+ byte_offset, len, buf, NULL));
}
/** Writes data to a space from a buffer. Remember that the possible incomplete
@@ -308,7 +309,7 @@ fil_write(
ut_ad(!srv_read_only_mode);
return(fil_io(IORequestWrite, true, page_id, page_size,
- byte_offset, len, buf, NULL, NULL));
+ byte_offset, len, buf, NULL));
}
/*******************************************************************//**
@@ -524,20 +525,6 @@ fil_node_create_low(
node->space = space;
- os_file_stat_t stat_info;
-
-#ifdef UNIV_DEBUG
- dberr_t err =
-#endif /* UNIV_DEBUG */
-
- os_file_get_status(
- node->name, &stat_info, false,
- fsp_is_system_temporary(space->id) ? true : srv_read_only_mode);
-
- ut_ad(err == DB_SUCCESS);
-
- node->block_size = stat_info.block_size;
-
node->atomic_write = atomic_write;
UT_LIST_ADD_LAST(space->chain, node);
@@ -1043,7 +1030,7 @@ fil_write_zeros(
err = os_aio(
request, OS_AIO_SYNC, node->name,
node->handle, buf, offset, n_bytes, read_only_mode,
- NULL, NULL, NULL);
+ NULL, NULL);
if (err != DB_SUCCESS) {
break;
@@ -3758,12 +3745,31 @@ fil_ibd_create(
success = true;
}
#endif /* HAVE_POSIX_FALLOCATE */
- if (!success)
- {
+
+ if (!success) {
success = os_file_set_size(
path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode);
}
+ /* Note: We are actually punching a hole, previous contents will
+ be lost after this call, if it succeeds. In this case the file
+ should be full of NULs. */
+
+ bool punch_hole = os_is_sparse_file_supported(path, file);
+
+ if (punch_hole) {
+
+ dberr_t punch_err;
+
+ punch_err = os_file_punch_hole(file, 0, size * UNIV_PAGE_SIZE);
+
+ if (punch_err != DB_SUCCESS) {
+ punch_hole = false;
+ }
+ }
+
+ ulint block_size = os_file_get_block_size(file, path);
+
if (!success) {
os_file_close(file);
os_file_delete(innodb_data_file_key, path);
@@ -3866,7 +3872,13 @@ fil_ibd_create(
space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE,
crypt_data, true);
- if (!fil_node_create_low(path, size, space, false, true)) {
+ fil_node_t* node = NULL;
+
+ if (space) {
+ node = fil_node_create_low(path, size, space, false, true);
+ }
+
+ if (!space || !node) {
if (crypt_data) {
free(crypt_data);
}
@@ -3883,6 +3895,9 @@ fil_ibd_create(
fil_name_write(space, 0, file, &mtr);
mtr.commit();
+ node->block_size = block_size;
+ space->punch_hole = punch_hole;
+
err = DB_SUCCESS;
}
@@ -5038,8 +5053,6 @@ fil_report_invalid_page_access(
aligned
@param[in] message message for aio handler if non-sync aio
used, else ignored
-@param[in] write_size actual payload size when written
- to avoid extra punch holes in compression
@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
if we are trying to do i/o on a tablespace which does not exist */
dberr_t
@@ -5051,8 +5064,7 @@ fil_io(
ulint byte_offset,
ulint len,
void* buf,
- void* message,
- ulint* write_size)
+ void* message)
{
os_offset_t offset;
IORequest req_type(type);
@@ -5285,7 +5297,7 @@ fil_io(
const char* name = node->name == NULL ? space->name : node->name;
- req_type.block_size(node->block_size);
+ req_type.set_fil_node(node);
/* Queue the aio request */
dberr_t err = os_aio(
@@ -5293,7 +5305,7 @@ fil_io(
mode, name, node->handle, buf, offset, len,
space->purpose != FIL_TYPE_TEMPORARY
&& srv_read_only_mode,
- node, message, write_size);
+ node, message);
/* We an try to recover the page from the double write buffer if
the decompression fails or the page is corrupt. */
@@ -6973,3 +6985,26 @@ fil_system_exit(void)
ut_ad(mutex_own(&fil_system->mutex));
mutex_exit(&fil_system->mutex);
}
+
+/**
+Get should we punch hole to tablespace.
+@param[in] node File node
+@return true, if punch hole should be tried, false if not. */
+bool
+fil_node_should_punch_hole(
+ const fil_node_t* node)
+{
+ return (node->space->punch_hole);
+}
+
+/**
+Set punch hole to tablespace to given value.
+@param[in] node File node
+@param[in] val value to be set. */
+void
+fil_space_set_punch_hole(
+ fil_node_t* node,
+ bool val)
+{
+ node->space->punch_hole = val;
+}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index a5fd7788af2..271abc3e86d 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -927,6 +927,7 @@ static ibool innodb_have_lz4=IF_LZ4(1, 0);
static ibool innodb_have_lzma=IF_LZMA(1, 0);
static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
+static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0);
static
int
@@ -1134,20 +1135,6 @@ static SHOW_VAR innodb_status_variables[]= {
/* Status variables for page compression */
{"page_compression_saved",
(char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
- {"page_compression_trim_sect512",
- (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG},
- {"page_compression_trim_sect1024",
- (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG},
- {"page_compression_trim_sect2048",
- (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG},
- {"page_compression_trim_sect4096",
- (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG},
- {"page_compression_trim_sect8192",
- (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG},
- {"page_compression_trim_sect16384",
- (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG},
- {"page_compression_trim_sect32768",
- (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG},
{"num_index_pages_written",
(char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
{"num_non_index_pages_written",
@@ -1156,8 +1143,6 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
{"num_page_compressed_trim_op",
(char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
- {"num_page_compressed_trim_op_saved",
- (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG},
{"num_pages_page_decompressed",
(char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
{"num_pages_page_compression_error",
@@ -1176,6 +1161,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &innodb_have_bzip2, SHOW_BOOL},
{"have_snappy",
(char*) &innodb_have_snappy, SHOW_BOOL},
+ {"have_punch_hole",
+ (char*) &innodb_have_punch_hole, SHOW_BOOL},
/* Defragmentation */
{"defragment_compression_failures",
@@ -3830,6 +3817,10 @@ static const char* deprecated_file_format_check
static const char* deprecated_file_format_max
= DEPRECATED_FORMAT_PARAMETER("innodb_file_format_max");
+/** Deprecation message about innodb_use_trim */
+static const char* deprecated_use_trim
+ = DEPRECATED_FORMAT_PARAMETER("innodb_use_trim");
+
/** Update log_checksum_algorithm_ptr with a pointer to the function
corresponding to whether checksums are enabled.
@param[in] check whether redo log block checksums are enabled */
@@ -20660,6 +20651,25 @@ wsrep_fake_trx_id(
#endif /* WITH_WSREP */
+/** Update the innodb_use_trim parameter.
+@param[in] thd thread handle
+@param[in] var system variable
+@param[out] var_ptr current value
+@param[in] save immediate result from check function */
+static
+void
+innodb_use_trim_update(
+ THD* thd,
+ struct st_mysql_sys_var* var,
+ void* var_ptr,
+ const void* save)
+{
+ srv_use_trim = *static_cast<const my_bool*>(save);
+
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_COMMAND, deprecated_use_trim);
+}
+
/* plugin options */
static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
@@ -21761,8 +21771,8 @@ static MYSQL_SYSVAR_BOOL(force_primary_key,
static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
PLUGIN_VAR_OPCMDARG,
- "Use trim. Default FALSE.",
- NULL, NULL, FALSE);
+ "Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default)",
+ NULL, innodb_use_trim_update, TRUE);
static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
static TYPELIB page_compression_algorithms_typelib=
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 316d90bec34..972d99553b9 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -160,6 +160,9 @@ enum dberr_t {
placed on the base column of
stored column */
+ DB_IO_NO_PUNCH_HOLE, /*!< Punch hole not supported by
+ file system. */
+
/* The following are partial failure codes */
DB_FAIL = 1000,
DB_OVERFLOW,
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 7428ff2c936..bd6067fbbee 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -185,6 +185,10 @@ struct fil_space_t {
@param[in] n_reserved number of reserved extents */
void release_free_extents(ulint n_reserved);
+ /** True if file system storing this tablespace supports
+ punch hole */
+ bool punch_hole;
+
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
};
@@ -229,12 +233,12 @@ struct fil_node_t {
/** link to the fil_system->LRU list (keeping track of open files) */
UT_LIST_NODE_T(fil_node_t) LRU;
- /** block size to use for punching holes */
- ulint block_size;
-
/** whether this file could use atomic write (data file) */
bool atomic_write;
+ /** Filesystem block size */
+ ulint block_size;
+
/** FIL_NODE_MAGIC_N */
ulint magic_n;
};
@@ -1129,11 +1133,6 @@ fil_space_get_n_reserved_extents(
aligned
@param[in] message message for aio handler if non-sync aio
used, else ignored
-@param[in,out] write_size Actual write size initialized
- after fist successfull trim
- operation for this page and if
- nitialized we do not trim again if
- Actual page
@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
if we are trying to do i/o on a tablespace which does not exist */
@@ -1146,8 +1145,7 @@ fil_io(
ulint byte_offset,
ulint len,
void* buf,
- void* message,
- ulint* write_size);
+ void* message);
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
diff --git a/storage/innobase/include/os0api.h b/storage/innobase/include/os0api.h
new file mode 100644
index 00000000000..ea2a113bdec
--- /dev/null
+++ b/storage/innobase/include/os0api.h
@@ -0,0 +1,75 @@
+/***********************************************************************
+
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file os0api.h
+The interface to the helper functions.
+These functions are used on os0file.h where
+including full full header is not feasible and
+implemented on buf0buf.cc and fil0fil.cc.
+*******************************************************/
+
+#ifndef OS_API_H
+#define OS_API_H 1
+
+/** Page control block */
+struct buf_page_t;
+
+/** File Node */
+struct fil_node_t;
+
+/**
+Should we punch hole to deallocate unused portion of the page.
+@param[in] bpage Page control block
+@return true if punch hole should be used, false if not */
+bool
+buf_page_should_punch_hole(
+ const buf_page_t* bpage)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/**
+Calculate the length of trim (punch_hole) operation.
+@param[in] bpage Page control block
+@param[in] write_length Write length
+@return length of the trim or zero. */
+ulint
+buf_page_get_trim_length(
+ const buf_page_t* bpage,
+ ulint write_length)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/**
+Get should we punch hole to tablespace.
+@param[in] space Tablespace
+@return true, if punch hole should be tried, false if not. */
+bool
+fil_node_should_punch_hole(
+ const fil_node_t* node)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/**
+Set punch hole to tablespace to given value.
+@param[in] space Tablespace
+@param[in] val value to be set. */
+void
+fil_space_set_punch_hole(
+ fil_node_t* node,
+ bool val);
+
+#endif /* OS_API_H */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 57ee015dfdd..6a97ff3aa53 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -36,7 +36,8 @@ Created 10/21/1995 Heikki Tuuri
#ifndef os0file_h
#define os0file_h
-#include "univ.i"
+#include "page0size.h"
+#include "os0api.h"
#ifndef _WIN32
#include <dirent.h>
@@ -46,8 +47,10 @@ Created 10/21/1995 Heikki Tuuri
/** File node of a tablespace or the log data space */
struct fil_node_t;
+struct fil_space_t;
extern bool os_has_said_disk_full;
+extern my_bool srv_use_trim;
/** Number of pending read operations */
extern ulint os_n_pending_reads;
@@ -177,6 +180,8 @@ static const ulint OS_FILE_ERROR_MAX = 200;
#define IORequestLogRead IORequest(IORequest::LOG | IORequest::READ)
#define IORequestLogWrite IORequest(IORequest::LOG | IORequest::WRITE)
+
+
/**
The IO Context that is passed down to the low level IO code */
class IORequest {
@@ -211,12 +216,16 @@ public:
/** Ignore failed reads of non-existent pages */
IGNORE_MISSING = 128,
+
+ /** Use punch hole if available*/
+ PUNCH_HOLE = 256,
};
/** Default constructor */
IORequest()
:
- m_block_size(UNIV_SECTOR_SIZE),
+ m_bpage(NULL),
+ m_fil_node(NULL),
m_type(READ)
{
/* No op */
@@ -227,9 +236,32 @@ public:
ORed from the above enum */
explicit IORequest(ulint type)
:
- m_block_size(UNIV_SECTOR_SIZE),
+ m_bpage(NULL),
+ m_fil_node(NULL),
+ m_type(static_cast<uint16_t>(type))
+ {
+ if (!is_punch_hole_supported() || !srv_use_trim) {
+ clear_punch_hole();
+ }
+ }
+
+ /**
+ @param[in] type Request type, can be a value that is
+ ORed from the above enum
+ @param[in] bpage Page to be written */
+ IORequest(ulint type, buf_page_t* bpage)
+ :
+ m_bpage(bpage),
+ m_fil_node(NULL),
m_type(static_cast<uint16_t>(type))
{
+ if (bpage && buf_page_should_punch_hole(bpage)) {
+ set_punch_hole();
+ }
+
+ if (!is_punch_hole_supported() || !srv_use_trim) {
+ clear_punch_hole();
+ }
}
/** Destructor */
@@ -270,6 +302,12 @@ public:
return((m_type & DO_NOT_WAKE) == 0);
}
+ /** Clear the punch hole flag */
+ void clear_punch_hole()
+ {
+ m_type &= ~PUNCH_HOLE;
+ }
+
/** @return true if partial read warning disabled */
bool is_partial_io_warning_disabled() const
MY_ATTRIBUTE((warn_unused_result))
@@ -291,6 +329,13 @@ public:
return(ignore_missing(m_type));
}
+ /** @return true if punch hole should be used */
+ bool punch_hole() const
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
+ }
+
/** @return true if the read should be validated */
bool validate() const
MY_ATTRIBUTE((warn_unused_result))
@@ -298,24 +343,30 @@ public:
return(is_read() ^ is_write());
}
+ /** Set the punch hole flag */
+ void set_punch_hole()
+ {
+ if (is_punch_hole_supported() && srv_use_trim) {
+ m_type |= PUNCH_HOLE;
+ }
+ }
+
/** Clear the do not wake flag */
void clear_do_not_wake()
{
m_type &= ~DO_NOT_WAKE;
}
- /** @return the block size to use for IO */
- ulint block_size() const
- MY_ATTRIBUTE((warn_unused_result))
+ /** Set the pointer to file node for IO
+ @param[in] node File node */
+ void set_fil_node(fil_node_t* node)
{
- return(m_block_size);
- }
+ if (!srv_use_trim ||
+ (node && !fil_node_should_punch_hole(node))) {
+ clear_punch_hole();
+ }
- /** Set the block size for IO
- @param[in] block_size Block size to set */
- void block_size(ulint block_size)
- {
- m_block_size = static_cast<uint32_t>(block_size);
+ m_fil_node = node;
}
/** Compare two requests
@@ -338,9 +389,59 @@ public:
return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
}
+ /** @return true if punch hole is supported */
+ static bool is_punch_hole_supported()
+ {
+
+ /* In this debugging mode, we act as if punch hole is supported,
+ and then skip any calls to actually punch a hole here.
+ In this way, Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(true);
+ );
+
+#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
+ return(true);
+#else
+ return(false);
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || _WIN32 */
+ }
+
+ ulint get_trim_length(ulint write_length) const
+ {
+ return (m_bpage ?
+ buf_page_get_trim_length(m_bpage, write_length)
+ : 0);
+ }
+
+ bool should_punch_hole() const {
+ return (m_fil_node ?
+ fil_node_should_punch_hole(m_fil_node)
+ : false);
+ }
+
+ void space_no_punch_hole() const {
+ if (m_fil_node) {
+ fil_space_set_punch_hole(m_fil_node, false);
+ }
+ }
+
+ /** Punch a hole in the file if it was a write
+ @param[in] fh Open file handle
+ @param[in] len Compressed buffer length for write
+ @return DB_SUCCESS or error code */
+
+ dberr_t punch_hole(
+ os_file_t fh,
+ ulint offset,
+ ulint len);
+
private:
- /* File system best block size */
- uint32_t m_block_size;
+ /** Page to be written on write operation. */
+ buf_page_t* m_bpage;
+
+ /** File node */
+ fil_node_t* m_fil_node;
/** Request type bit flags */
uint16_t m_type;
@@ -706,10 +807,10 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
-# define os_aio(type, mode, name, file, buf, offset, \
- n, read_only, message1, message2, wsize) \
- pfs_os_aio_func(type, mode, name, file, buf, offset, \
- n, read_only, message1, message2, wsize, \
+# define os_aio(type, mode, name, file, buf, offset, \
+ n, read_only, message1, message2) \
+ pfs_os_aio_func(type, mode, name, file, buf, offset, \
+ n, read_only, message1, message2, \
__FILE__, __LINE__)
# define os_file_read(type, file, buf, offset, n) \
@@ -721,7 +822,7 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_write(type, name, file, buf, offset, n) \
pfs_os_file_write_func(type, name, file, buf, offset, \
- n, __FILE__, __LINE__)
+ n,__FILE__, __LINE__)
# define os_file_flush(file) \
pfs_os_file_flush_func(file, __FILE__, __LINE__)
@@ -926,7 +1027,6 @@ pfs_os_aio_func(
bool read_only,
fil_node_t* m1,
void* m2,
- ulint* wsize,
const char* src_file,
ulint src_line);
@@ -1051,9 +1151,9 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file)
# define os_aio(type, mode, name, file, buf, offset, \
- n, read_only, message1, message2, wsize) \
+ n, read_only, message1, message2) \
os_aio_func(type, mode, name, file, buf, offset, \
- n, read_only, message1, message2, wsize)
+ n, read_only, message1, message2)
# define os_file_read(type, file, buf, offset, n) \
os_file_read_func(type, file, buf, offset, n)
@@ -1061,7 +1161,7 @@ to original un-instrumented file I/O APIs */
# define os_file_read_no_error_handling(type, file, buf, offset, n, o) \
os_file_read_no_error_handling_func(type, file, buf, offset, n, o)
-# define os_file_write(type, name, file, buf, offset, n) \
+# define os_file_write(type, name, file, buf, offset, n) \
os_file_write_func(type, name, file, buf, offset, n)
# define os_file_flush(file) os_file_flush_func(file)
@@ -1324,8 +1424,7 @@ os_aio_func(
ulint n,
bool read_only,
fil_node_t* m1,
- void* m2,
- ulint* wsize);
+ void* m2);
/** Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
@@ -1427,6 +1526,48 @@ innobase_mysql_tmpfile(
void
os_file_set_umask(ulint umask);
+/** Check if the file system supports sparse files.
+
+Warning: On POSIX systems we try and punch a hole from offset 0 to
+the system configured page size. This should only be called on an empty
+file.
+
+Note: On Windows we use the name and on Unices we use the file handle.
+
+@param[in] name File name
+@param[in] fh File handle for the file - if opened
+@return true if the file system supports sparse files */
+bool
+os_is_sparse_file_supported(
+ const char* path,
+ os_file_t fh)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+ IORequest& type,
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+ MY_ATTRIBUTE((warn_unused_result));
+
/** Normalizes a directory path for the current OS:
On Windows, we convert '/' to '\', else we convert '\' to '/'.
@param[in,out] str A null-terminated directory and file path */
@@ -1454,6 +1595,16 @@ is_absolute_path(
return(false);
}
+/***********************************************************************//**
+Try to get number of bytes per sector from file system.
+@return file block size */
+UNIV_INTERN
+ulint
+os_file_get_block_size(
+/*===================*/
+ os_file_t file, /*!< in: handle to a file */
+ const char* name); /*!< in: file name */
+
#ifndef UNIV_NONINL
#include "os0file.ic"
#endif /* UNIV_NONINL */
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index 3e78b87a177..5c7c4d45ca6 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -219,11 +219,6 @@ an asynchronous i/o operation.
@param[in,out] m2 message for the AIO handler (can be used to
identify a completed AIO operation); ignored
if mode is OS_AIO_SYNC
-@param[in,out] write_size Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size
@param[in] src_file file name where func invoked
@param[in] src_line line where the func invoked
@return DB_SUCCESS if request was queued successfully, FALSE if fail */
@@ -240,7 +235,6 @@ pfs_os_aio_func(
bool read_only,
fil_node_t* m1,
void* m2,
- ulint* write_size,
const char* src_file,
ulint src_line)
{
@@ -256,7 +250,7 @@ pfs_os_aio_func(
src_file, src_line);
dberr_t result = os_aio_func(
- type, mode, name, file, buf, offset, n, read_only, m1, m2, write_size);
+ type, mode, name, file, buf, offset, n, read_only, m1, m2);
register_pfs_file_io_end(locker, n);
diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h
index ab917e1ff05..ca173db9b6d 100644
--- a/storage/innobase/include/page0size.h
+++ b/storage/innobase/include/page0size.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -139,9 +140,7 @@ public:
@param[in] src page size object whose values to fetch */
inline void copy_from(const page_size_t& src)
{
- m_physical = src.physical();
- m_logical = src.logical();
- m_is_compressed = src.is_compressed();
+ *this = src;
}
/** Check if a given page_size_t object is equal to the current one.
@@ -156,9 +155,6 @@ public:
private:
- /* Disable implicit copying. */
- void operator=(const page_size_t&);
-
/* For non compressed tablespaces, physical page size is equal to
the logical page size and the data is stored in buf_page_t::frame
(and is also always equal to univ_page_size (--innodb-page-size=)).
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index e4e1394c2d3..5c19e735806 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -343,16 +343,8 @@ enum monitor_id_t {
MONITOR_PAD_DECREMENTS,
/* New monitor variables for page compression */
MONITOR_OVLD_PAGE_COMPRESS_SAVED,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768,
MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
- MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 227bcfb7781..ca81ad46b8b 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -114,20 +114,6 @@ struct srv_stats_t {
/** Number of bytes saved by page compression */
ulint_ctr_64_t page_compression_saved;
- /** Number of 512Byte TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect512;
- /** Number of 1K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect1024;
- /** Number of 2K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect2048;
- /** Number of 4K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect4096;
- /** Number of 8K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect8192;
- /** Number of 16K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect16384;
- /** Number of 32K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect32768;
/* Number of index pages written */
ulint_ctr_64_t index_pages_written;
/* Number of non index pages written */
@@ -136,8 +122,6 @@ struct srv_stats_t {
ulint_ctr_64_t pages_page_compressed;
/* Number of TRIM operations induced by page compression */
ulint_ctr_64_t page_compressed_trim_op;
- /* Number of TRIM operations saved by using actual write size knowledge */
- ulint_ctr_64_t page_compressed_trim_op_saved;
/* Number of pages decompressed with page compression */
ulint_ctr_64_t pages_page_decompressed;
/* Number of page compression errors */
@@ -1059,20 +1043,6 @@ struct export_var_t{
int64_t innodb_page_compression_saved;/*!< Number of bytes saved
by page compression */
- int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
- by page compression */
- int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
- by page compression */
- int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
- by page compression */
- int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
- by page compression */
- int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
- by page compression */
- int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
- by page compression */
- int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
- by page compression */
int64_t innodb_index_pages_written; /*!< Number of index pages
written */
int64_t innodb_non_index_pages_written; /*!< Number of non index pages
@@ -1081,8 +1051,6 @@ struct export_var_t{
compressed by page compression */
int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
induced by page compression */
- int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
- saved by page compression */
int64_t innodb_pages_page_decompressed;/*!< Number of pages
decompressed by page
compression */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 35ea9fd51be..908fb60e956 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -366,6 +366,12 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
#define IF_SNAPPY(A,B) B
#endif
+#if defined (HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
+#define IF_PUNCH_HOLE(A,B) A
+#else
+#define IF_PUNCH_HOLE(A,B) B
+#endif
+
/** The universal page size of the database */
#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index cf7825bd542..39afc4e9680 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1019,7 +1019,7 @@ log_group_file_header_flush(
page_id_t(group->space_id, page_no),
univ_page_size,
(ulint) (dest_offset % univ_page_size.physical()),
- OS_FILE_LOG_BLOCK_SIZE, buf, group, NULL);
+ OS_FILE_LOG_BLOCK_SIZE, buf, group);
srv_stats.os_log_pending_writes.dec();
}
@@ -1144,7 +1144,7 @@ loop:
page_id_t(group->space_id, page_no),
univ_page_size,
(ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
- group, NULL);
+ group);
srv_stats.os_log_pending_writes.dec();
@@ -1664,7 +1664,7 @@ log_group_checkpoint(
(log_sys->next_checkpoint_no & 1)
? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
OS_FILE_LOG_BLOCK_SIZE,
- buf, (byte*) group + 1, NULL);
+ buf, (byte*) group + 1);
ut_ad(((ulint) group & 0x1UL) == 0);
}
@@ -1686,7 +1686,7 @@ log_group_header_read(
fil_io(IORequestLogRead, true,
page_id_t(group->space_id, header / univ_page_size.physical()),
univ_page_size, header % univ_page_size.physical(),
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, NULL);
+ OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
}
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@@ -2038,7 +2038,7 @@ loop:
page_id_t(group->space_id, page_no),
univ_page_size,
(ulint) (source_offset % univ_page_size.physical()),
- len, buf, NULL, NULL);
+ len, buf, NULL);
#ifdef DEBUG_CRYPT
fprintf(stderr, "BEFORE DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx offset %lu\n",
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index e5aab543f5d..ce5b37565af 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -869,7 +869,7 @@ recv_log_format_0_recover(lsn_t lsn)
univ_page_size,
(ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
% univ_page_size.physical()),
- OS_FILE_LOG_BLOCK_SIZE, buf, NULL, NULL);
+ OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
if (log_block_calc_checksum_format_0(buf)
!= log_block_get_checksum(buf)) {
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 71a9a856571..f305de38e01 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -44,6 +44,11 @@ Created 10/21/1995 Heikki Tuuri
#include "os0file.ic"
#endif
+#ifdef UNIV_LINUX
+#include <sys/types.h>
+#include <sys/stat.h>
+#endif
+
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
@@ -63,17 +68,23 @@ Created 10/21/1995 Heikki Tuuri
#include <libaio.h>
#endif /* LINUX_NATIVE_AIO */
-#ifdef HAVE_LZ4
-#include <lz4.h>
-#endif
-
-#include <zlib.h>
+#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+# include <fcntl.h>
+# include <linux/falloc.h>
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
#ifdef UNIV_DEBUG
/** Set when InnoDB has invoked exit(). */
bool innodb_calling_exit;
#endif /* UNIV_DEBUG */
+#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H)
+# include <sys/ioctl.h>
+# ifndef DFS_IOCTL_ATOMIC_WRITE_SET
+# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
+# endif
+#endif
+
#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
#include <sys/statvfs.h>
#endif
@@ -82,12 +93,8 @@ bool innodb_calling_exit;
#include <linux/falloc.h>
#endif
-#ifdef HAVE_LZO
-#include "lzo/lzo1x.h"
-#endif
-
-#ifdef HAVE_SNAPPY
-#include "snappy-c.h"
+#ifdef _WIN32
+#include <winioctl.h>
#endif
/** Insert buffer segment id */
@@ -216,8 +223,6 @@ struct Slot {
/** buffer used in i/o */
byte* buf;
- ulint is_log; /*!< 1 if OS_FILE_LOG or 0 */
- ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
/** Buffer pointer used for actual IO. We advance this
when partial IO is required and not buf */
@@ -286,7 +291,6 @@ struct Slot {
/** Length of the block before it was compressed */
uint32 original_len;
- ulint* write_size;
};
/** The asynchronous i/o array structure */
@@ -328,8 +332,7 @@ public:
const char* name,
void* buf,
os_offset_t offset,
- ulint len,
- ulint* write_size)
+ ulint len)
MY_ATTRIBUTE((warn_unused_result));
/** @return number of reserved slots */
@@ -759,6 +762,107 @@ os_aio_simulated_handler(
void** m2,
IORequest* type);
+#ifdef _WIN32
+static HANDLE win_get_syncio_event();
+#endif
+
+#ifdef _WIN32
+/**
+ Wrapper around Windows DeviceIoControl() function.
+
+ Works synchronously, also in case for handle opened
+ for async access (i.e with FILE_FLAG_OVERLAPPED).
+
+ Accepts the same parameters as DeviceIoControl(),except
+ last parameter (OVERLAPPED).
+*/
+static
+BOOL
+os_win32_device_io_control(
+ HANDLE handle,
+ DWORD code,
+ LPVOID inbuf,
+ DWORD inbuf_size,
+ LPVOID outbuf,
+ DWORD outbuf_size,
+ LPDWORD bytes_returned
+)
+{
+ OVERLAPPED overlapped = { 0 };
+ overlapped.hEvent = win_get_syncio_event();
+ BOOL result = DeviceIoControl(handle, code, inbuf, inbuf_size, outbuf,
+ outbuf_size, bytes_returned, &overlapped);
+
+ if (!result && (GetLastError() == ERROR_IO_PENDING)) {
+ /* Wait for async io to complete */
+ result = GetOverlappedResult(handle, &overlapped, bytes_returned, TRUE);
+ }
+
+ return result;
+}
+
+#endif
+
+/***********************************************************************//**
+Try to get number of bytes per sector from file system.
+@return file block size */
+UNIV_INTERN
+ulint
+os_file_get_block_size(
+/*===================*/
+ os_file_t file, /*!< in: handle to a file */
+ const char* name) /*!< in: file name */
+{
+ ulint fblock_size = 512;
+
+#if defined(UNIV_LINUX)
+ struct stat local_stat;
+ int err;
+
+ err = fstat((int)file, &local_stat);
+
+ if (err != 0) {
+ os_file_handle_error_no_exit(name, "fstat()", FALSE);
+ } else {
+ fblock_size = local_stat.st_blksize;
+ }
+#endif /* UNIV_LINUX */
+#ifdef _WIN32
+ DWORD outsize;
+ STORAGE_PROPERTY_QUERY storageQuery;
+ memset(&storageQuery, 0, sizeof(storageQuery));
+ storageQuery.PropertyId = StorageAccessAlignmentProperty;
+ storageQuery.QueryType = PropertyStandardQuery;
+ STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR diskAlignment;
+
+ BOOL result = os_win32_device_io_control(file,
+ IOCTL_STORAGE_QUERY_PROPERTY,
+ &storageQuery,
+ sizeof(STORAGE_PROPERTY_QUERY),
+ &diskAlignment,
+ sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR),
+ &outsize);
+
+ if (!result) {
+ os_file_handle_error_no_exit(name, "DeviceIoControl()", FALSE);
+ fblock_size = 0;
+ }
+
+ fblock_size = diskAlignment.BytesPerPhysicalSector;
+#endif /* _WIN32 */
+
+ /* Currently we support file block size up to 4Kb */
+ if (fblock_size > 4096 || fblock_size < 512) {
+ if (fblock_size < 512) {
+ fblock_size = 512;
+ } else {
+ fblock_size = 4096;
+ }
+ }
+
+ return fblock_size;
+}
+
#ifdef WIN_ASYNC_IO
/** This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
@@ -1443,6 +1547,48 @@ SyncFileIO::execute(const IORequest& request)
return(n_bytes);
}
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_file_punch_hole_posix(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+{
+
+#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+ const int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+
+ int ret = fallocate(fh, mode, off, len);
+
+ if (ret == 0) {
+ return(DB_SUCCESS);
+ }
+
+ if (errno == ENOTSUP) {
+ return(DB_IO_NO_PUNCH_HOLE);
+ }
+
+ ib::warn()
+ << "fallocate("
+ <<", FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, "
+ << off << ", " << len << ") returned errno: "
+ << errno;
+
+ return(DB_IO_ERROR);
+
+#elif defined(UNIV_SOLARIS)
+
+ // Use F_FREESP
+
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
+
+ return(DB_IO_NO_PUNCH_HOLE);
+}
#if defined(LINUX_NATIVE_AIO)
@@ -1734,7 +1880,18 @@ LinuxAIOHandler::collect()
/* We have not overstepped to next segment. */
ut_a(slot->pos < end_pos);
- slot->err = DB_SUCCESS;
+ /* Deallocate unused blocks from file system.
+ This is newer done to page 0 or to log files.*/
+ if (slot->offset > 0
+ && !slot->skip_punch_hole
+ && !slot->type.is_log()
+ && slot->type.is_write()
+ && slot->type.punch_hole()) {
+
+ slot->err = AIOHandler::io_complete(slot);
+ } else {
+ slot->err = DB_SUCCESS;
+ }
/* Mark this request as completed. The error handling
will be done in the calling function. */
@@ -3353,6 +3510,76 @@ struct WinIoInit
/* Ensures proper initialization and shutdown */
static WinIoInit win_io_init;
+/** Check if the file system supports sparse files.
+@param[in] name File name
+@return true if the file system supports sparse files */
+static
+bool
+os_is_sparse_file_supported_win32(const char* filename)
+{
+ char volname[MAX_PATH];
+ BOOL result = GetVolumePathName(filename, volname, MAX_PATH);
+
+ if (!result) {
+
+ ib::error()
+ << "os_is_sparse_file_supported: "
+ << "Failed to get the volume path name for: "
+ << filename
+ << "- OS error number " << GetLastError();
+
+ return(false);
+ }
+
+ DWORD flags;
+
+ result = GetVolumeInformation(
+ volname, NULL, MAX_PATH, NULL, NULL,
+ &flags, NULL, MAX_PATH);
+
+
+ if (!result) {
+ ib::error()
+ << "os_is_sparse_file_supported: "
+ << "Failed to get the volume info for: "
+ << volname
+ << "- OS error number " << GetLastError();
+
+ return(false);
+ }
+
+ return(flags & FILE_SUPPORTS_SPARSE_FILES) ? true : false;
+}
+
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] page_size Tablespace page size
+@param[in] block_size File system block size
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return 0 on success or errno */
+static
+dberr_t
+os_file_punch_hole_win32(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+{
+ FILE_ZERO_DATA_INFORMATION punch;
+
+ punch.FileOffset.QuadPart = off;
+ punch.BeyondFinalZero.QuadPart = off + len;
+
+ /* If lpOverlapped is NULL, lpBytesReturned cannot be NULL,
+ therefore we pass a dummy parameter. */
+ DWORD temp;
+ BOOL success = os_win32_device_io_control(
+ fh, FSCTL_SET_ZERO_DATA, &punch, sizeof(punch),
+ NULL, 0, &temp);
+
+ return(success ? DB_SUCCESS: DB_IO_NO_PUNCH_HOLE);
+}
+
/** Check the existence and type of the given file.
@param[in] path path name of file
@param[out] exists true if the file exists
@@ -3661,9 +3888,9 @@ os_file_create_simple_func(
/* This is a best effort use case, if it fails then
we will find out when we try and punch the hole. */
- DeviceIoControl(
+ os_win32_device_io_control(
file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0,
- &temp, NULL);
+ &temp);
}
} while (retry);
@@ -4020,9 +4247,9 @@ os_file_create_func(
/* This is a best effort use case, if it fails then
we will find out when we try and punch the hole. */
- DeviceIoControl(
+ os_win32_device_io_control(
file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0,
- &temp, NULL);
+ &temp);
}
} while (retry);
@@ -4459,28 +4686,6 @@ os_file_get_status_win32(
}
stat_info->block_size = bytesPerSector * sectorsPerCluster;
-
- /* On Windows the block size is not used as the allocation
- unit for sparse files. The underlying infra-structure for
- sparse files is based on NTFS compression. The punch hole
- is done on a "compression unit". This compression unit
- is based on the cluster size. You cannot punch a hole if
- the cluster size >= 8K. For smaller sizes the table is
- as follows:
-
- Cluster Size Compression Unit
- 512 Bytes 8 KB
- 1 KB 16 KB
- 2 KB 32 KB
- 4 KB 64 KB
-
- Default NTFS cluster size is 4K, compression unit size of 64K.
- Therefore unless the user has created the file system with
- a smaller cluster size and used larger page sizes there is
- little benefit from compression out of the box. */
-
- stat_info->block_size = (stat_info->block_size <= 4096)
- ? stat_info->block_size * 16 : ULINT_UNDEFINED;
} else {
stat_info->type = OS_FILE_TYPE_UNKNOWN;
}
@@ -4615,7 +4820,18 @@ os_file_io(
} else if ((ulint) n_bytes + bytes_returned == n) {
bytes_returned += n_bytes;
- *err = DB_SUCCESS;
+
+ if (offset > 0
+ && !type.is_log()
+ && type.is_write()
+ && type.punch_hole()) {
+ *err = type.punch_hole(file,
+ static_cast<ulint>(offset),
+ n);
+
+ } else {
+ *err = DB_SUCCESS;
+ }
return(original_n);
}
@@ -4668,7 +4884,7 @@ ssize_t
os_file_pwrite(
IORequest& type,
os_file_t file,
- const void* buf,
+ const byte* buf,
ulint n,
os_offset_t offset,
dberr_t* err)
@@ -4680,7 +4896,7 @@ os_file_pwrite(
(void) my_atomic_addlint(&os_n_pending_writes, 1);
MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_WRITES);
- ssize_t n_bytes = os_file_io(type, file, const_cast<void*>(buf),
+ ssize_t n_bytes = os_file_io(type, file, const_cast<byte*>(buf),
n, offset, err);
(void) my_atomic_addlint(&os_n_pending_writes, -1);
@@ -4696,8 +4912,9 @@ os_file_pwrite(
@param[in] offset file offset from the start where to read
@param[in] n number of bytes to read, starting from offset
@return DB_SUCCESS if request was successful, false if fail */
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
-os_file_write_func(
+os_file_write_page(
IORequest& type,
const char* name,
os_file_t file,
@@ -4711,7 +4928,7 @@ os_file_write_func(
ut_ad(type.validate());
ut_ad(n > 0);
- ssize_t n_bytes = os_file_pwrite(type, file, buf, n, offset, &err);
+ ssize_t n_bytes = os_file_pwrite(type, file, (byte*)buf, n, offset, &err);
if ((ulint) n_bytes != n && !os_has_said_disk_full) {
@@ -5195,6 +5412,31 @@ os_file_read_no_error_handling_func(
return(os_file_read_page(type, file, buf, offset, n, o, false));
}
+/** NOTE! Use the corresponding macro os_file_write(), not directly
+Requests a synchronous write operation.
+@param[in] type IO flags
+@param[in] file handle to an open file
+@param[out] buf buffer from which to write
+@param[in] offset file offset from the start where to read
+@param[in] n number of bytes to read, starting from offset
+@return DB_SUCCESS if request was successful, false if fail */
+dberr_t
+os_file_write_func(
+ IORequest& type,
+ const char* name,
+ os_file_t file,
+ const void* buf,
+ os_offset_t offset,
+ ulint n)
+{
+ ut_ad(type.validate());
+ ut_ad(type.is_write());
+
+ const byte* ptr = reinterpret_cast<const byte*>(buf);
+
+ return(os_file_write_page(type, name, file, ptr, offset, n));
+}
+
/** Check the existence and type of the given file.
@param[in] path path name of file
@param[out] exists true if the file exists
@@ -5213,6 +5455,110 @@ os_file_status(
#endif /* _WIN32 */
}
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+{
+ dberr_t err;
+
+#ifdef _WIN32
+ err = os_file_punch_hole_win32(fh, off, len);
+#else
+ err = os_file_punch_hole_posix(fh, off, len);
+#endif /* _WIN32 */
+
+ return (err);
+}
+
+/** Free storage space associated with a section of the file.
+@param[in] fh Open file handle
+@param[in] off Starting offset (SEEK_SET)
+@param[in] len Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+IORequest::punch_hole(
+ os_file_t fh,
+ os_offset_t off,
+ os_offset_t len)
+{
+ /* In this debugging mode, we act as if punch hole is supported,
+ and then skip any calls to actually punch a hole here.
+ In this way, Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(DB_SUCCESS);
+ );
+
+ ulint trim_len = get_trim_length(len);
+
+ if (trim_len == 0) {
+ return(DB_SUCCESS);
+ }
+
+ off += len;
+
+ /* Check does file system support punching holes for this
+ tablespace. */
+ if (!should_punch_hole() || !srv_use_trim) {
+ return DB_IO_NO_PUNCH_HOLE;
+ }
+
+ dberr_t err = os_file_punch_hole(fh, off, len);
+
+ if (err == DB_SUCCESS) {
+ srv_stats.page_compressed_trim_op.inc();
+ } else {
+ /* If punch hole is not supported,
+ set space so that it is not used. */
+ if (err == DB_IO_NO_PUNCH_HOLE) {
+ space_no_punch_hole();
+ err = DB_SUCCESS;
+ }
+ }
+
+ return (err);
+}
+
+/** Check if the file system supports sparse files.
+
+Warning: On POSIX systems we try and punch a hole from offset 0 to
+the system configured page size. This should only be called on an empty
+file.
+
+Note: On Windows we use the name and on Unices we use the file handle.
+
+@param[in] name File name
+@param[in] fh File handle for the file - if opened
+@return true if the file system supports sparse files */
+bool
+os_is_sparse_file_supported(const char* path, os_file_t fh)
+{
+ /* In this debugging mode, we act as if punch hole is supported,
+ then we skip any calls to actually punch a hole. In this way,
+ Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(true);
+ );
+
+#ifdef _WIN32
+ return(os_is_sparse_file_supported_win32(path));
+#else
+ dberr_t err;
+
+ /* We don't know the FS block size, use the sector size. The FS
+ will do the magic. */
+ err = os_file_punch_hole_posix(fh, 0, UNIV_PAGE_SIZE);
+
+ return(err == DB_SUCCESS);
+#endif /* _WIN32 */
+}
+
/** This function returns information about the specified file
@param[in] path pathname of the file
@param[out] stat_info information of a file in a directory
@@ -5776,12 +6122,7 @@ AIO::reserve_slot(
const char* name,
void* buf,
os_offset_t offset,
- ulint len,
- ulint* write_size)/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
+ ulint len)
{
#ifdef WIN_ASYNC_IO
ut_a((len & 0xFFFFFFFFUL) == len);
@@ -5871,8 +6212,6 @@ AIO::reserve_slot(
slot->ptr = slot->buf;
slot->offset = offset;
slot->err = DB_SUCCESS;
- slot->write_size = write_size;
- slot->is_log = type.is_log();
slot->original_len = static_cast<uint32>(len);
slot->io_already_done = false;
slot->buf = static_cast<byte*>(buf);
@@ -6225,6 +6564,7 @@ Requests an asynchronous i/o operation.
@param[in,out] m2 message for the AIO handler (can be used to
identify a completed AIO operation); ignored
if mode is OS_AIO_SYNC
+
@return DB_SUCCESS or error code */
dberr_t
os_aio_func(
@@ -6237,12 +6577,7 @@ os_aio_func(
ulint n,
bool read_only,
fil_node_t* m1,
- void* m2,
- ulint* write_size)/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
+ void* m2)
{
#ifdef WIN_ASYNC_IO
BOOL ret = TRUE;
@@ -6278,7 +6613,7 @@ try_again:
Slot* slot;
- slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n, write_size);
+ slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n);
if (type.is_read()) {
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 00b7bd14c98..b2722c2a9bf 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -989,41 +989,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
- {"compress_trim_sect512", "compression",
- "Number of sect-512 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
-
- {"compress_trim_sect1024", "compression",
- "Number of sect-1024 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024},
-
- {"compress_trim_sect2048", "compression",
- "Number of sect-2048 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048},
-
- {"compress_trim_sect4096", "compression",
- "Number of sect-4K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
-
- {"compress_trim_sect8192", "compression",
- "Number of sect-8K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192},
-
- {"compress_trim_sect16384", "compression",
- "Number of sect-16K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384},
-
- {"compress_trim_sect32768", "compression",
- "Number of sect-32K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768},
-
{"compress_pages_page_compressed", "compression",
"Number of pages compressed by page compression",
MONITOR_NONE,
@@ -1034,11 +999,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
- {"compress_page_compressed_trim_op_saved", "compression",
- "Number of TRIM operation saved by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
-
{"compress_pages_page_decompressed", "compression",
"Number of pages decompressed by page compression",
MONITOR_NONE,
@@ -2073,36 +2033,12 @@ srv_mon_process_existing_counter(
case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
value = srv_stats.page_compression_saved;
break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
- value = srv_stats.page_compression_trim_sect512;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024:
- value = srv_stats.page_compression_trim_sect1024;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048:
- value = srv_stats.page_compression_trim_sect2048;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
- value = srv_stats.page_compression_trim_sect4096;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192:
- value = srv_stats.page_compression_trim_sect8192;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384:
- value = srv_stats.page_compression_trim_sect16384;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768:
- value = srv_stats.page_compression_trim_sect32768;
- break;
case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
value = srv_stats.pages_page_compressed;
break;
case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
value = srv_stats.page_compressed_trim_op;
break;
- case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
- value = srv_stats.page_compressed_trim_op_saved;
- break;
case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
value = srv_stats.pages_page_decompressed;
break;
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 5d478e4529f..bd4dd1c80af 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -184,7 +184,7 @@ my_bool srv_use_native_aio = TRUE;
my_bool srv_numa_interleave = FALSE;
/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
to the pages */
-UNIV_INTERN my_bool srv_use_trim = FALSE;
+UNIV_INTERN my_bool srv_use_trim;
/* If this flag is TRUE, then we disable doublewrite buffer */
UNIV_INTERN my_bool srv_use_atomic_writes = FALSE;
/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */
@@ -1617,13 +1617,10 @@ srv_export_innodb_status(void)
export_vars.innodb_available_undo_logs = srv_available_undo_logs;
export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
- export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
- export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
- export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error;
export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index d43735bba2e..f597147d50f 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -761,6 +761,8 @@ ut_strerr(
case DB_NO_FK_ON_S_BASE_COL:
return("Cannot add foreign key on the base column "
"of stored column");
+ case DB_IO_NO_PUNCH_HOLE:
+ return ("File system does not support punch hole (trim) operation.");
/* do not add default: in order to produce a warning if new code
is added to the enum but not added here */